1 // 2 // Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Special Registers 78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 79 80 // Float registers. We treat TOS/FPR0 special. It is invisible to the 81 // allocator, and only shows up in the encodings. 82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 84 // Ok so here's the trick FPR1 is really st(0) except in the midst 85 // of emission of assembly for a machnode. During the emission the fpu stack 86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 87 // the stack will not have this element so FPR1 == st(0) from the 88 // oopMap viewpoint. This same weirdness with numbering causes 89 // instruction encoding to have to play games with the register 90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 91 // where it does flt->flt moves to see an example 92 // 93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 107 108 // XMM registers. 128-bit registers or 4 words each, labeled a-d. 109 // Word a in each register holds a Float, words ab hold a Double. 110 // We currently do not use the SIMD capabilities, so registers cd 111 // are unused at the moment. 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); 128 129 // Specify priority of register selection within phases of register 130 // allocation. Highest priority is first. A useful heuristic is to 131 // give registers a low priority when they are required by machine 132 // instructions, like EAX and EDX. Registers which are used as 133 // pairs must fall on an even boundary (witness the FPR#L's in this list). 134 // For the Intel integer registers, the equivalent Long pairs are 135 // EDX:EAX, EBX:ECX, and EDI:EBP. 136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 139 FPR6L, FPR6H, FPR7L, FPR7H ); 140 141 alloc_class chunk1( XMM0a, XMM0b, 142 XMM1a, XMM1b, 143 XMM2a, XMM2b, 144 XMM3a, XMM3b, 145 XMM4a, XMM4b, 146 XMM5a, XMM5b, 147 XMM6a, XMM6b, 148 XMM7a, XMM7b, EFLAGS); 149 150 151 //----------Architecture Description Register Classes-------------------------- 152 // Several register classes are automatically defined based upon information in 153 // this architecture description. 154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 158 // 159 // Class for all registers 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 161 // Class for general registers 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 163 // Class for general registers which may be used for implicit null checks on win95 164 // Also safe for use by tailjump. We don't want to allocate in rbp, 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); 166 // Class of "X" registers 167 reg_class x_reg(EBX, ECX, EDX, EAX); 168 // Class of registers that can appear in an address with no offset. 169 // EBP and ESP require an extra instruction byte for zero offset. 170 // Used in fast-unlock 171 reg_class p_reg(EDX, EDI, ESI, EBX); 172 // Class for general registers not including ECX 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX); 174 // Class for general registers not including EAX 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 176 // Class for general registers not including EAX or EBX. 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP); 178 // Class of EAX (for multiply and divide operations) 179 reg_class eax_reg(EAX); 180 // Class of EBX (for atomic add) 181 reg_class ebx_reg(EBX); 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 183 reg_class ecx_reg(ECX); 184 // Class of EDX (for multiply and divide operations) 185 reg_class edx_reg(EDX); 186 // Class of EDI (for synchronization) 187 reg_class edi_reg(EDI); 188 // Class of ESI (for synchronization) 189 reg_class esi_reg(ESI); 190 // Singleton class for interpreter's stack pointer 191 reg_class ebp_reg(EBP); 192 // Singleton class for stack pointer 193 reg_class sp_reg(ESP); 194 // Singleton class for instruction pointer 195 // reg_class ip_reg(EIP); 196 // Singleton class for condition codes 197 reg_class int_flags(EFLAGS); 198 // Class of integer register pairs 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); 200 // Class of integer register pairs that aligns with calling convention 201 reg_class eadx_reg( EAX,EDX ); 202 reg_class ebcx_reg( ECX,EBX ); 203 // Not AX or DX, used in divides 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP ); 205 206 // Floating point registers. Notice FPR0 is not a choice. 207 // FPR0 is not ever allocated; we use clever encodings to fake 208 // a 2-address instructions out of Intels FP stack. 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 210 211 // make a register class for SSE registers 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a); 213 214 // make a double register class for SSE2 registers 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b, 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b ); 217 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 220 FPR7L,FPR7H ); 221 222 reg_class flt_reg0( FPR1L ); 223 reg_class dbl_reg0( FPR1L,FPR1H ); 224 reg_class dbl_reg1( FPR2L,FPR2H ); 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 227 228 // XMM6 and XMM7 could be used as temporary registers for long, float and 229 // double values for SSE2. 230 reg_class xdb_reg6( XMM6a,XMM6b ); 231 reg_class xdb_reg7( XMM7a,XMM7b ); 232 %} 233 234 235 //----------SOURCE BLOCK------------------------------------------------------- 236 // This is a block of C++ code which provides values, functions, and 237 // definitions necessary in the rest of the architecture description 238 source_hpp %{ 239 // Must be visible to the DFA in dfa_x86_32.cpp 240 extern bool is_operand_hi32_zero(Node* n); 241 %} 242 243 source %{ 244 #define RELOC_IMM32 Assembler::imm_operand 245 #define RELOC_DISP32 Assembler::disp32_operand 246 247 #define __ _masm. 248 249 // How to find the high register of a Long pair, given the low register 250 #define HIGH_FROM_LOW(x) ((x)+2) 251 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM 253 // instructions, to allow sign-masking or sign-bit flipping. They allow 254 // fast versions of NegF/NegD and AbsF/AbsD. 255 256 // Note: 'double' and 'long long' have 32-bits alignment on x86. 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 258 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 259 // of 128-bits operands for SSE instructions. 260 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 261 // Store the value to a 128-bits operand. 262 operand[0] = lo; 263 operand[1] = hi; 264 return operand; 265 } 266 267 // Buffer for 128-bits masks used by SSE instructions. 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 269 270 // Static initialization during VM startup. 271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 275 276 // Offset hacking within calls. 277 static int pre_call_FPU_size() { 278 if (Compile::current()->in_24_bit_fp_mode()) 279 return 6; // fldcw 280 return 0; 281 } 282 283 static int preserve_SP_size() { 284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) 285 } 286 287 // !!!!! Special hack to get all type of calls to specify the byte offset 288 // from the start of the call to the point where the return address 289 // will point. 290 int MachCallStaticJavaNode::ret_addr_offset() { 291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points 292 if (_method_handle_invoke) 293 offset += preserve_SP_size(); 294 return offset; 295 } 296 297 int MachCallDynamicJavaNode::ret_addr_offset() { 298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points 299 } 300 301 static int sizeof_FFree_Float_Stack_All = -1; 302 303 int MachCallRuntimeNode::ret_addr_offset() { 304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 305 return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size(); 306 } 307 308 // Indicate if the safepoint node needs the polling page as an input. 309 // Since x86 does have absolute addressing, it doesn't. 310 bool SafePointNode::needs_polling_address_input() { 311 return false; 312 } 313 314 // 315 // Compute padding required for nodes which need alignment 316 // 317 318 // The address of the call instruction needs to be 4-byte aligned to 319 // ensure that it does not span a cache line so that it can be patched. 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 321 current_offset += pre_call_FPU_size(); // skip fldcw, if any 322 current_offset += 1; // skip call opcode byte 323 return round_to(current_offset, alignment_required()) - current_offset; 324 } 325 326 // The address of the call instruction needs to be 4-byte aligned to 327 // ensure that it does not span a cache line so that it can be patched. 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const { 329 current_offset += pre_call_FPU_size(); // skip fldcw, if any 330 current_offset += preserve_SP_size(); // skip mov rbp, rsp 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_FPU_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 #ifndef PRODUCT 345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { 346 st->print("INT3"); 347 } 348 #endif 349 350 // EMIT_RM() 351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 352 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_CC() 357 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 358 unsigned char c = (unsigned char)( f1 | f2 ); 359 cbuf.insts()->emit_int8(c); 360 } 361 362 // EMIT_OPCODE() 363 void emit_opcode(CodeBuffer &cbuf, int code) { 364 cbuf.insts()->emit_int8((unsigned char) code); 365 } 366 367 // EMIT_OPCODE() w/ relocation information 368 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 369 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 370 emit_opcode(cbuf, code); 371 } 372 373 // EMIT_D8() 374 void emit_d8(CodeBuffer &cbuf, int d8) { 375 cbuf.insts()->emit_int8((unsigned char) d8); 376 } 377 378 // EMIT_D16() 379 void emit_d16(CodeBuffer &cbuf, int d16) { 380 cbuf.insts()->emit_int16(d16); 381 } 382 383 // EMIT_D32() 384 void emit_d32(CodeBuffer &cbuf, int d32) { 385 cbuf.insts()->emit_int32(d32); 386 } 387 388 // emit 32 bit value and construct relocation entry from relocInfo::relocType 389 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 390 int format) { 391 cbuf.relocate(cbuf.insts_mark(), reloc, format); 392 cbuf.insts()->emit_int32(d32); 393 } 394 395 // emit 32 bit value and construct relocation entry from RelocationHolder 396 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 397 int format) { 398 #ifdef ASSERT 399 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 400 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 401 } 402 #endif 403 cbuf.relocate(cbuf.insts_mark(), rspec, format); 404 cbuf.insts()->emit_int32(d32); 405 } 406 407 // Access stack slot for load or store 408 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 409 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 410 if( -128 <= disp && disp <= 127 ) { 411 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d8 (cbuf, disp); // Displacement // R/M byte 414 } else { 415 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 416 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 417 emit_d32(cbuf, disp); // Displacement // R/M byte 418 } 419 } 420 421 // eRegI ereg, memory mem) %{ // emit_reg_mem 422 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) { 423 // There is no index & no scale, use form without SIB byte 424 if ((index == 0x4) && 425 (scale == 0) && (base != ESP_enc)) { 426 // If no displacement, mode is 0x0; unless base is [EBP] 427 if ( (displace == 0) && (base != EBP_enc) ) { 428 emit_rm(cbuf, 0x0, reg_encoding, base); 429 } 430 else { // If 8-bit displacement, mode 0x1 431 if ((displace >= -128) && (displace <= 127) 432 && !(displace_is_oop) ) { 433 emit_rm(cbuf, 0x1, reg_encoding, base); 434 emit_d8(cbuf, displace); 435 } 436 else { // If 32-bit displacement 437 if (base == -1) { // Special flag for absolute address 438 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 439 // (manual lies; no SIB needed here) 440 if ( displace_is_oop ) { 441 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 442 } else { 443 emit_d32 (cbuf, displace); 444 } 445 } 446 else { // Normal base + offset 447 emit_rm(cbuf, 0x2, reg_encoding, base); 448 if ( displace_is_oop ) { 449 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 450 } else { 451 emit_d32 (cbuf, displace); 452 } 453 } 454 } 455 } 456 } 457 else { // Else, encode with the SIB byte 458 // If no displacement, mode is 0x0; unless base is [EBP] 459 if (displace == 0 && (base != EBP_enc)) { // If no displacement 460 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 } 463 else { // If 8-bit displacement, mode 0x1 464 if ((displace >= -128) && (displace <= 127) 465 && !(displace_is_oop) ) { 466 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, base); 468 emit_d8(cbuf, displace); 469 } 470 else { // If 32-bit displacement 471 if (base == 0x04 ) { 472 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 473 emit_rm(cbuf, scale, index, 0x04); 474 } else { 475 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 476 emit_rm(cbuf, scale, index, base); 477 } 478 if ( displace_is_oop ) { 479 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1); 480 } else { 481 emit_d32 (cbuf, displace); 482 } 483 } 484 } 485 } 486 } 487 488 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 490 if( dst_encoding == src_encoding ) { 491 // reg-reg copy, use an empty encoding 492 } else { 493 emit_opcode( cbuf, 0x8B ); 494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 495 } 496 } 497 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 499 if( dst_encoding == src_encoding ) { 500 // reg-reg copy, use an empty encoding 501 } else { 502 MacroAssembler _masm(&cbuf); 503 504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); 505 } 506 } 507 508 509 //============================================================================= 510 const bool Matcher::constant_table_absolute_addressing = true; 511 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 512 513 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 514 // Empty encoding 515 } 516 517 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 518 return 0; 519 } 520 521 #ifndef PRODUCT 522 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 523 st->print("# MachConstantBaseNode (empty encoding)"); 524 } 525 #endif 526 527 528 //============================================================================= 529 #ifndef PRODUCT 530 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 531 Compile* C = ra_->C; 532 if( C->in_24_bit_fp_mode() ) { 533 st->print("FLDCW 24 bit fpu control word"); 534 st->print_cr(""); st->print("\t"); 535 } 536 537 int framesize = C->frame_slots() << LogBytesPerInt; 538 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 539 // Remove two words for return addr and rbp, 540 framesize -= 2*wordSize; 541 542 // Calls to C2R adapters often do not accept exceptional returns. 543 // We require that their callers must bang for them. But be careful, because 544 // some VM calls (such as call site linkage) can use several kilobytes of 545 // stack. But the stack safety zone should account for that. 546 // See bugs 4446381, 4468289, 4497237. 547 if (C->need_stack_bang(framesize)) { 548 st->print_cr("# stack bang"); st->print("\t"); 549 } 550 st->print_cr("PUSHL EBP"); st->print("\t"); 551 552 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth 553 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check"); 554 st->print_cr(""); st->print("\t"); 555 framesize -= wordSize; 556 } 557 558 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) { 559 if (framesize) { 560 st->print("SUB ESP,%d\t# Create frame",framesize); 561 } 562 } else { 563 st->print("SUB ESP,%d\t# Create frame",framesize); 564 } 565 } 566 #endif 567 568 569 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 570 Compile* C = ra_->C; 571 572 if (UseSSE >= 2 && VerifyFPU) { 573 MacroAssembler masm(&cbuf); 574 masm.verify_FPU(0, "FPU stack must be clean on entry"); 575 } 576 577 // WARNING: Initial instruction MUST be 5 bytes or longer so that 578 // NativeJump::patch_verified_entry will be able to patch out the entry 579 // code safely. The fldcw is ok at 6 bytes, the push to verify stack 580 // depth is ok at 5 bytes, the frame allocation can be either 3 or 581 // 6 bytes. So if we don't do the fldcw or the push then we must 582 // use the 6 byte frame allocation even if we have no frame. :-( 583 // If method sets FPU control word do it now 584 if( C->in_24_bit_fp_mode() ) { 585 MacroAssembler masm(&cbuf); 586 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 587 } 588 589 int framesize = C->frame_slots() << LogBytesPerInt; 590 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 591 // Remove two words for return addr and rbp, 592 framesize -= 2*wordSize; 593 594 // Calls to C2R adapters often do not accept exceptional returns. 595 // We require that their callers must bang for them. But be careful, because 596 // some VM calls (such as call site linkage) can use several kilobytes of 597 // stack. But the stack safety zone should account for that. 598 // See bugs 4446381, 4468289, 4497237. 599 if (C->need_stack_bang(framesize)) { 600 MacroAssembler masm(&cbuf); 601 masm.generate_stack_overflow_check(framesize); 602 } 603 604 // We always push rbp, so that on return to interpreter rbp, will be 605 // restored correctly and we can correct the stack. 606 emit_opcode(cbuf, 0x50 | EBP_enc); 607 608 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth 609 emit_opcode(cbuf, 0x68); // push 0xbadb100d 610 emit_d32(cbuf, 0xbadb100d); 611 framesize -= wordSize; 612 } 613 614 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) { 615 if (framesize) { 616 emit_opcode(cbuf, 0x83); // sub SP,#framesize 617 emit_rm(cbuf, 0x3, 0x05, ESP_enc); 618 emit_d8(cbuf, framesize); 619 } 620 } else { 621 emit_opcode(cbuf, 0x81); // sub SP,#framesize 622 emit_rm(cbuf, 0x3, 0x05, ESP_enc); 623 emit_d32(cbuf, framesize); 624 } 625 C->set_frame_complete(cbuf.insts_size()); 626 627 #ifdef ASSERT 628 if (VerifyStackAtCalls) { 629 Label L; 630 MacroAssembler masm(&cbuf); 631 masm.push(rax); 632 masm.mov(rax, rsp); 633 masm.andptr(rax, StackAlignmentInBytes-1); 634 masm.cmpptr(rax, StackAlignmentInBytes-wordSize); 635 masm.pop(rax); 636 masm.jcc(Assembler::equal, L); 637 masm.stop("Stack is not properly aligned!"); 638 masm.bind(L); 639 } 640 #endif 641 642 } 643 644 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 645 return MachNode::size(ra_); // too many variables; just compute it the hard way 646 } 647 648 int MachPrologNode::reloc() const { 649 return 0; // a large enough number 650 } 651 652 //============================================================================= 653 #ifndef PRODUCT 654 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 655 Compile *C = ra_->C; 656 int framesize = C->frame_slots() << LogBytesPerInt; 657 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 658 // Remove two words for return addr and rbp, 659 framesize -= 2*wordSize; 660 661 if( C->in_24_bit_fp_mode() ) { 662 st->print("FLDCW standard control word"); 663 st->cr(); st->print("\t"); 664 } 665 if( framesize ) { 666 st->print("ADD ESP,%d\t# Destroy frame",framesize); 667 st->cr(); st->print("\t"); 668 } 669 st->print_cr("POPL EBP"); st->print("\t"); 670 if( do_polling() && C->is_method_compilation() ) { 671 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 672 st->cr(); st->print("\t"); 673 } 674 } 675 #endif 676 677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 678 Compile *C = ra_->C; 679 680 // If method set FPU control word, restore to standard control word 681 if( C->in_24_bit_fp_mode() ) { 682 MacroAssembler masm(&cbuf); 683 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 684 } 685 686 int framesize = C->frame_slots() << LogBytesPerInt; 687 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 688 // Remove two words for return addr and rbp, 689 framesize -= 2*wordSize; 690 691 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 692 693 if( framesize >= 128 ) { 694 emit_opcode(cbuf, 0x81); // add SP, #framesize 695 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 696 emit_d32(cbuf, framesize); 697 } 698 else if( framesize ) { 699 emit_opcode(cbuf, 0x83); // add SP, #framesize 700 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 701 emit_d8(cbuf, framesize); 702 } 703 704 emit_opcode(cbuf, 0x58 | EBP_enc); 705 706 if( do_polling() && C->is_method_compilation() ) { 707 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 708 emit_opcode(cbuf,0x85); 709 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 710 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 711 } 712 } 713 714 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 715 Compile *C = ra_->C; 716 // If method set FPU control word, restore to standard control word 717 int size = C->in_24_bit_fp_mode() ? 6 : 0; 718 if( do_polling() && C->is_method_compilation() ) size += 6; 719 720 int framesize = C->frame_slots() << LogBytesPerInt; 721 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 722 // Remove two words for return addr and rbp, 723 framesize -= 2*wordSize; 724 725 size++; // popl rbp, 726 727 if( framesize >= 128 ) { 728 size += 6; 729 } else { 730 size += framesize ? 3 : 0; 731 } 732 return size; 733 } 734 735 int MachEpilogNode::reloc() const { 736 return 0; // a large enough number 737 } 738 739 const Pipeline * MachEpilogNode::pipeline() const { 740 return MachNode::pipeline_class(); 741 } 742 743 int MachEpilogNode::safepoint_offset() const { return 0; } 744 745 //============================================================================= 746 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 748 static enum RC rc_class( OptoReg::Name reg ) { 749 750 if( !OptoReg::is_valid(reg) ) return rc_bad; 751 if (OptoReg::is_stack(reg)) return rc_stack; 752 753 VMReg r = OptoReg::as_VMReg(reg); 754 if (r->is_Register()) return rc_int; 755 if (r->is_FloatRegister()) { 756 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 757 return rc_float; 758 } 759 assert(r->is_XMMRegister(), "must be"); 760 return rc_xmm; 761 } 762 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 764 int opcode, const char *op_str, int size, outputStream* st ) { 765 if( cbuf ) { 766 emit_opcode (*cbuf, opcode ); 767 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false); 768 #ifndef PRODUCT 769 } else if( !do_size ) { 770 if( size != 0 ) st->print("\n\t"); 771 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 772 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 773 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 774 } else { // FLD, FST, PUSH, POP 775 st->print("%s [ESP + #%d]",op_str,offset); 776 } 777 #endif 778 } 779 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 780 return size+3+offset_size; 781 } 782 783 // Helper for XMM registers. Extra opcode bits, limited syntax. 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 785 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 786 if( cbuf ) { 787 if( reg_lo+1 == reg_hi ) { // double move? 788 if( is_load && !UseXmmLoadAndClearUpper ) 789 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load 790 else 791 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise 792 } else { 793 emit_opcode(*cbuf, 0xF3 ); 794 } 795 emit_opcode(*cbuf, 0x0F ); 796 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) 797 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load 798 else 799 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); 800 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); 801 #ifndef PRODUCT 802 } else if( !do_size ) { 803 if( size != 0 ) st->print("\n\t"); 804 if( reg_lo+1 == reg_hi ) { // double move? 805 if( is_load ) st->print("%s %s,[ESP + #%d]", 806 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 807 Matcher::regName[reg_lo], offset); 808 else st->print("MOVSD [ESP + #%d],%s", 809 offset, Matcher::regName[reg_lo]); 810 } else { 811 if( is_load ) st->print("MOVSS %s,[ESP + #%d]", 812 Matcher::regName[reg_lo], offset); 813 else st->print("MOVSS [ESP + #%d],%s", 814 offset, Matcher::regName[reg_lo]); 815 } 816 #endif 817 } 818 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 819 return size+5+offset_size; 820 } 821 822 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 824 int src_hi, int dst_hi, int size, outputStream* st ) { 825 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers 826 if( cbuf ) { 827 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { 828 emit_opcode(*cbuf, 0x66 ); 829 } 830 emit_opcode(*cbuf, 0x0F ); 831 emit_opcode(*cbuf, 0x28 ); 832 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 833 #ifndef PRODUCT 834 } else if( !do_size ) { 835 if( size != 0 ) st->print("\n\t"); 836 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 837 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 838 } else { 839 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 840 } 841 #endif 842 } 843 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); 844 } else { 845 if( cbuf ) { 846 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); 847 emit_opcode(*cbuf, 0x0F ); 848 emit_opcode(*cbuf, 0x10 ); 849 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 850 #ifndef PRODUCT 851 } else if( !do_size ) { 852 if( size != 0 ) st->print("\n\t"); 853 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 854 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 855 } else { 856 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } 858 #endif 859 } 860 return size+4; 861 } 862 } 863 864 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 865 int src_hi, int dst_hi, int size, outputStream* st ) { 866 // 32-bit 867 if (cbuf) { 868 emit_opcode(*cbuf, 0x66); 869 emit_opcode(*cbuf, 0x0F); 870 emit_opcode(*cbuf, 0x6E); 871 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); 872 #ifndef PRODUCT 873 } else if (!do_size) { 874 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 875 #endif 876 } 877 return 4; 878 } 879 880 881 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 emit_opcode(*cbuf, 0x66); 886 emit_opcode(*cbuf, 0x0F); 887 emit_opcode(*cbuf, 0x7E); 888 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); 889 #ifndef PRODUCT 890 } else if (!do_size) { 891 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 892 #endif 893 } 894 return 4; 895 } 896 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 898 if( cbuf ) { 899 emit_opcode(*cbuf, 0x8B ); 900 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 901 #ifndef PRODUCT 902 } else if( !do_size ) { 903 if( size != 0 ) st->print("\n\t"); 904 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 905 #endif 906 } 907 return size+2; 908 } 909 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 911 int offset, int size, outputStream* st ) { 912 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 913 if( cbuf ) { 914 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 915 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("FLD %s",Matcher::regName[src_lo]); 920 #endif 921 } 922 size += 2; 923 } 924 925 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 926 const char *op_str; 927 int op; 928 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 929 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 930 op = 0xDD; 931 } else { // 32-bit store 932 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 933 op = 0xD9; 934 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 935 } 936 937 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 938 } 939 940 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 941 // Get registers to move 942 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 943 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 944 OptoReg::Name dst_second = ra_->get_reg_second(this ); 945 OptoReg::Name dst_first = ra_->get_reg_first(this ); 946 947 enum RC src_second_rc = rc_class(src_second); 948 enum RC src_first_rc = rc_class(src_first); 949 enum RC dst_second_rc = rc_class(dst_second); 950 enum RC dst_first_rc = rc_class(dst_first); 951 952 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 953 954 // Generate spill code! 955 int size = 0; 956 957 if( src_first == dst_first && src_second == dst_second ) 958 return size; // Self copy, no move 959 960 // -------------------------------------- 961 // Check for mem-mem move. push/pop to move. 962 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 963 if( src_second == dst_first ) { // overlapping stack copy ranges 964 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 965 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 966 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 967 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 968 } 969 // move low bits 970 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 971 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 972 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 973 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 974 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 975 } 976 return size; 977 } 978 979 // -------------------------------------- 980 // Check for integer reg-reg copy 981 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 982 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 983 984 // Check for integer store 985 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 986 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 987 988 // Check for integer load 989 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 990 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 991 992 // Check for integer reg-xmm reg copy 993 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 994 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 995 "no 64 bit integer-float reg moves" ); 996 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 997 } 998 // -------------------------------------- 999 // Check for float reg-reg copy 1000 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1001 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1002 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1003 if( cbuf ) { 1004 1005 // Note the mucking with the register encode to compensate for the 0/1 1006 // indexing issue mentioned in a comment in the reg_def sections 1007 // for FPR registers many lines above here. 1008 1009 if( src_first != FPR1L_num ) { 1010 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1011 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1012 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1013 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1014 } else { 1015 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1016 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1017 } 1018 #ifndef PRODUCT 1019 } else if( !do_size ) { 1020 if( size != 0 ) st->print("\n\t"); 1021 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1022 else st->print( "FST %s", Matcher::regName[dst_first]); 1023 #endif 1024 } 1025 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1026 } 1027 1028 // Check for float store 1029 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1030 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1031 } 1032 1033 // Check for float load 1034 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1035 int offset = ra_->reg2offset(src_first); 1036 const char *op_str; 1037 int op; 1038 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1039 op_str = "FLD_D"; 1040 op = 0xDD; 1041 } else { // 32-bit load 1042 op_str = "FLD_S"; 1043 op = 0xD9; 1044 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1045 } 1046 if( cbuf ) { 1047 emit_opcode (*cbuf, op ); 1048 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false); 1049 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1050 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1051 #ifndef PRODUCT 1052 } else if( !do_size ) { 1053 if( size != 0 ) st->print("\n\t"); 1054 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1055 #endif 1056 } 1057 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1058 return size + 3+offset_size+2; 1059 } 1060 1061 // Check for xmm reg-reg copy 1062 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1063 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1064 (src_first+1 == src_second && dst_first+1 == dst_second), 1065 "no non-adjacent float-moves" ); 1066 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1067 } 1068 1069 // Check for xmm reg-integer reg copy 1070 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1071 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1072 "no 64 bit float-integer reg moves" ); 1073 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1074 } 1075 1076 // Check for xmm store 1077 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1078 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1079 } 1080 1081 // Check for float xmm load 1082 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1083 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1084 } 1085 1086 // Copy from float reg to xmm reg 1087 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1088 // copy to the top of stack from floating point reg 1089 // and use LEA to preserve flags 1090 if( cbuf ) { 1091 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1092 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1093 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1094 emit_d8(*cbuf,0xF8); 1095 #ifndef PRODUCT 1096 } else if( !do_size ) { 1097 if( size != 0 ) st->print("\n\t"); 1098 st->print("LEA ESP,[ESP-8]"); 1099 #endif 1100 } 1101 size += 4; 1102 1103 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1104 1105 // Copy from the temp memory to the xmm reg. 1106 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1107 1108 if( cbuf ) { 1109 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1110 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1111 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1112 emit_d8(*cbuf,0x08); 1113 #ifndef PRODUCT 1114 } else if( !do_size ) { 1115 if( size != 0 ) st->print("\n\t"); 1116 st->print("LEA ESP,[ESP+8]"); 1117 #endif 1118 } 1119 size += 4; 1120 return size; 1121 } 1122 1123 assert( size > 0, "missed a case" ); 1124 1125 // -------------------------------------------------------------------- 1126 // Check for second bits still needing moving. 1127 if( src_second == dst_second ) 1128 return size; // Self copy; no move 1129 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1130 1131 // Check for second word int-int move 1132 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1133 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1134 1135 // Check for second word integer store 1136 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1137 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1138 1139 // Check for second word integer load 1140 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1141 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1142 1143 1144 Unimplemented(); 1145 } 1146 1147 #ifndef PRODUCT 1148 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1149 implementation( NULL, ra_, false, st ); 1150 } 1151 #endif 1152 1153 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1154 implementation( &cbuf, ra_, false, NULL ); 1155 } 1156 1157 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1158 return implementation( NULL, ra_, true, NULL ); 1159 } 1160 1161 //============================================================================= 1162 #ifndef PRODUCT 1163 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { 1164 st->print("NOP \t# %d bytes pad for loops and calls", _count); 1165 } 1166 #endif 1167 1168 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { 1169 MacroAssembler _masm(&cbuf); 1170 __ nop(_count); 1171 } 1172 1173 uint MachNopNode::size(PhaseRegAlloc *) const { 1174 return _count; 1175 } 1176 1177 1178 //============================================================================= 1179 #ifndef PRODUCT 1180 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1181 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1182 int reg = ra_->get_reg_first(this); 1183 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1184 } 1185 #endif 1186 1187 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1188 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1189 int reg = ra_->get_encode(this); 1190 if( offset >= 128 ) { 1191 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1192 emit_rm(cbuf, 0x2, reg, 0x04); 1193 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1194 emit_d32(cbuf, offset); 1195 } 1196 else { 1197 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1198 emit_rm(cbuf, 0x1, reg, 0x04); 1199 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1200 emit_d8(cbuf, offset); 1201 } 1202 } 1203 1204 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1205 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1206 if( offset >= 128 ) { 1207 return 7; 1208 } 1209 else { 1210 return 4; 1211 } 1212 } 1213 1214 //============================================================================= 1215 1216 // emit call stub, compiled java to interpreter 1217 void emit_java_to_interp(CodeBuffer &cbuf ) { 1218 // Stub is fixed up when the corresponding call is converted from calling 1219 // compiled code to calling interpreted code. 1220 // mov rbx,0 1221 // jmp -1 1222 1223 address mark = cbuf.insts_mark(); // get mark within main instrs section 1224 1225 // Note that the code buffer's insts_mark is always relative to insts. 1226 // That's why we must use the macroassembler to generate a stub. 1227 MacroAssembler _masm(&cbuf); 1228 1229 address base = 1230 __ start_a_stub(Compile::MAX_stubs_size); 1231 if (base == NULL) return; // CodeBuffer::expand failed 1232 // static stub relocation stores the instruction address of the call 1233 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32); 1234 // static stub relocation also tags the methodOop in the code-stream. 1235 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time 1236 // This is recognized as unresolved by relocs/nativeInst/ic code 1237 __ jump(RuntimeAddress(__ pc())); 1238 1239 __ end_a_stub(); 1240 // Update current stubs pointer and restore insts_end. 1241 } 1242 // size of call stub, compiled java to interpretor 1243 uint size_java_to_interp() { 1244 return 10; // movl; jmp 1245 } 1246 // relocation entries for call stub, compiled java to interpretor 1247 uint reloc_java_to_interp() { 1248 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call 1249 } 1250 1251 //============================================================================= 1252 #ifndef PRODUCT 1253 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1254 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1255 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1256 st->print_cr("\tNOP"); 1257 st->print_cr("\tNOP"); 1258 if( !OptoBreakpoint ) 1259 st->print_cr("\tNOP"); 1260 } 1261 #endif 1262 1263 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1264 MacroAssembler masm(&cbuf); 1265 #ifdef ASSERT 1266 uint insts_size = cbuf.insts_size(); 1267 #endif 1268 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1269 masm.jump_cc(Assembler::notEqual, 1270 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1271 /* WARNING these NOPs are critical so that verified entry point is properly 1272 aligned for patching by NativeJump::patch_verified_entry() */ 1273 int nops_cnt = 2; 1274 if( !OptoBreakpoint ) // Leave space for int3 1275 nops_cnt += 1; 1276 masm.nop(nops_cnt); 1277 1278 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1279 } 1280 1281 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1282 return OptoBreakpoint ? 11 : 12; 1283 } 1284 1285 1286 //============================================================================= 1287 uint size_exception_handler() { 1288 // NativeCall instruction size is the same as NativeJump. 1289 // exception handler starts out as jump and can be patched to 1290 // a call be deoptimization. (4932387) 1291 // Note that this value is also credited (in output.cpp) to 1292 // the size of the code section. 1293 return NativeJump::instruction_size; 1294 } 1295 1296 // Emit exception handler code. Stuff framesize into a register 1297 // and call a VM stub routine. 1298 int emit_exception_handler(CodeBuffer& cbuf) { 1299 1300 // Note that the code buffer's insts_mark is always relative to insts. 1301 // That's why we must use the macroassembler to generate a handler. 1302 MacroAssembler _masm(&cbuf); 1303 address base = 1304 __ start_a_stub(size_exception_handler()); 1305 if (base == NULL) return 0; // CodeBuffer::expand failed 1306 int offset = __ offset(); 1307 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1308 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1309 __ end_a_stub(); 1310 return offset; 1311 } 1312 1313 uint size_deopt_handler() { 1314 // NativeCall instruction size is the same as NativeJump. 1315 // exception handler starts out as jump and can be patched to 1316 // a call be deoptimization. (4932387) 1317 // Note that this value is also credited (in output.cpp) to 1318 // the size of the code section. 1319 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1320 } 1321 1322 // Emit deopt handler code. 1323 int emit_deopt_handler(CodeBuffer& cbuf) { 1324 1325 // Note that the code buffer's insts_mark is always relative to insts. 1326 // That's why we must use the macroassembler to generate a handler. 1327 MacroAssembler _masm(&cbuf); 1328 address base = 1329 __ start_a_stub(size_exception_handler()); 1330 if (base == NULL) return 0; // CodeBuffer::expand failed 1331 int offset = __ offset(); 1332 InternalAddress here(__ pc()); 1333 __ pushptr(here.addr()); 1334 1335 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1336 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1337 __ end_a_stub(); 1338 return offset; 1339 } 1340 1341 1342 const bool Matcher::match_rule_supported(int opcode) { 1343 if (!has_match_rule(opcode)) 1344 return false; 1345 1346 return true; // Per default match rules are supported. 1347 } 1348 1349 int Matcher::regnum_to_fpu_offset(int regnum) { 1350 return regnum - 32; // The FP registers are in the second chunk 1351 } 1352 1353 // This is UltraSparc specific, true just means we have fast l2f conversion 1354 const bool Matcher::convL2FSupported(void) { 1355 return true; 1356 } 1357 1358 // Vector width in bytes 1359 const uint Matcher::vector_width_in_bytes(void) { 1360 return UseSSE >= 2 ? 8 : 0; 1361 } 1362 1363 // Vector ideal reg 1364 const uint Matcher::vector_ideal_reg(void) { 1365 return Op_RegD; 1366 } 1367 1368 // Is this branch offset short enough that a short branch can be used? 1369 // 1370 // NOTE: If the platform does not provide any short branch variants, then 1371 // this method should return false for offset 0. 1372 bool Matcher::is_short_branch_offset(int rule, int offset) { 1373 // the short version of jmpConUCF2 contains multiple branches, 1374 // making the reach slightly less 1375 if (rule == jmpConUCF2_rule) 1376 return (-126 <= offset && offset <= 125); 1377 return (-128 <= offset && offset <= 127); 1378 } 1379 1380 const bool Matcher::isSimpleConstant64(jlong value) { 1381 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1382 return false; 1383 } 1384 1385 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1386 const bool Matcher::init_array_count_is_in_bytes = false; 1387 1388 // Threshold size for cleararray. 1389 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1390 1391 // Should the Matcher clone shifts on addressing modes, expecting them to 1392 // be subsumed into complex addressing expressions or compute them into 1393 // registers? True for Intel but false for most RISCs 1394 const bool Matcher::clone_shift_expressions = true; 1395 1396 // Do we need to mask the count passed to shift instructions or does 1397 // the cpu only look at the lower 5/6 bits anyway? 1398 const bool Matcher::need_masked_shift_count = false; 1399 1400 bool Matcher::narrow_oop_use_complex_address() { 1401 ShouldNotCallThis(); 1402 return true; 1403 } 1404 1405 1406 // Is it better to copy float constants, or load them directly from memory? 1407 // Intel can load a float constant from a direct address, requiring no 1408 // extra registers. Most RISCs will have to materialize an address into a 1409 // register first, so they would do better to copy the constant from stack. 1410 const bool Matcher::rematerialize_float_constants = true; 1411 1412 // If CPU can load and store mis-aligned doubles directly then no fixup is 1413 // needed. Else we split the double into 2 integer pieces and move it 1414 // piece-by-piece. Only happens when passing doubles into C code as the 1415 // Java calling convention forces doubles to be aligned. 1416 const bool Matcher::misaligned_doubles_ok = true; 1417 1418 1419 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1420 // Get the memory operand from the node 1421 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1422 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1423 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1424 uint opcnt = 1; // First operand 1425 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1426 while( idx >= skipped+num_edges ) { 1427 skipped += num_edges; 1428 opcnt++; // Bump operand count 1429 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1430 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1431 } 1432 1433 MachOper *memory = node->_opnds[opcnt]; 1434 MachOper *new_memory = NULL; 1435 switch (memory->opcode()) { 1436 case DIRECT: 1437 case INDOFFSET32X: 1438 // No transformation necessary. 1439 return; 1440 case INDIRECT: 1441 new_memory = new (C) indirect_win95_safeOper( ); 1442 break; 1443 case INDOFFSET8: 1444 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1445 break; 1446 case INDOFFSET32: 1447 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1448 break; 1449 case INDINDEXOFFSET: 1450 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1451 break; 1452 case INDINDEXSCALE: 1453 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale()); 1454 break; 1455 case INDINDEXSCALEOFFSET: 1456 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1457 break; 1458 case LOAD_LONG_INDIRECT: 1459 case LOAD_LONG_INDOFFSET32: 1460 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1461 return; 1462 default: 1463 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1464 return; 1465 } 1466 node->_opnds[opcnt] = new_memory; 1467 } 1468 1469 // Advertise here if the CPU requires explicit rounding operations 1470 // to implement the UseStrictFP mode. 1471 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1472 1473 // Are floats conerted to double when stored to stack during deoptimization? 1474 // On x32 it is stored with convertion only when FPU is used for floats. 1475 bool Matcher::float_in_double() { return (UseSSE == 0); } 1476 1477 // Do ints take an entire long register or just half? 1478 const bool Matcher::int_in_long = false; 1479 1480 // Return whether or not this register is ever used as an argument. This 1481 // function is used on startup to build the trampoline stubs in generateOptoStub. 1482 // Registers not mentioned will be killed by the VM call in the trampoline, and 1483 // arguments in those registers not be available to the callee. 1484 bool Matcher::can_be_java_arg( int reg ) { 1485 if( reg == ECX_num || reg == EDX_num ) return true; 1486 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true; 1487 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1488 return false; 1489 } 1490 1491 bool Matcher::is_spillable_arg( int reg ) { 1492 return can_be_java_arg(reg); 1493 } 1494 1495 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1496 // Use hardware integer DIV instruction when 1497 // it is faster than a code which use multiply. 1498 // Only when constant divisor fits into 32 bit 1499 // (min_jint is excluded to get only correct 1500 // positive 32 bit values from negative). 1501 return VM_Version::has_fast_idiv() && 1502 (divisor == (int)divisor && divisor != min_jint); 1503 } 1504 1505 // Register for DIVI projection of divmodI 1506 RegMask Matcher::divI_proj_mask() { 1507 return EAX_REG_mask; 1508 } 1509 1510 // Register for MODI projection of divmodI 1511 RegMask Matcher::modI_proj_mask() { 1512 return EDX_REG_mask; 1513 } 1514 1515 // Register for DIVL projection of divmodL 1516 RegMask Matcher::divL_proj_mask() { 1517 ShouldNotReachHere(); 1518 return RegMask(); 1519 } 1520 1521 // Register for MODL projection of divmodL 1522 RegMask Matcher::modL_proj_mask() { 1523 ShouldNotReachHere(); 1524 return RegMask(); 1525 } 1526 1527 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1528 return EBP_REG_mask; 1529 } 1530 1531 // Returns true if the high 32 bits of the value is known to be zero. 1532 bool is_operand_hi32_zero(Node* n) { 1533 int opc = n->Opcode(); 1534 if (opc == Op_LoadUI2L) { 1535 return true; 1536 } 1537 if (opc == Op_AndL) { 1538 Node* o2 = n->in(2); 1539 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1540 return true; 1541 } 1542 } 1543 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1544 return true; 1545 } 1546 return false; 1547 } 1548 1549 %} 1550 1551 //----------ENCODING BLOCK----------------------------------------------------- 1552 // This block specifies the encoding classes used by the compiler to output 1553 // byte streams. Encoding classes generate functions which are called by 1554 // Machine Instruction Nodes in order to generate the bit encoding of the 1555 // instruction. Operands specify their base encoding interface with the 1556 // interface keyword. There are currently supported four interfaces, 1557 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1558 // operand to generate a function which returns its register number when 1559 // queried. CONST_INTER causes an operand to generate a function which 1560 // returns the value of the constant when queried. MEMORY_INTER causes an 1561 // operand to generate four functions which return the Base Register, the 1562 // Index Register, the Scale Value, and the Offset Value of the operand when 1563 // queried. COND_INTER causes an operand to generate six functions which 1564 // return the encoding code (ie - encoding bits for the instruction) 1565 // associated with each basic boolean condition for a conditional instruction. 1566 // Instructions specify two basic values for encoding. They use the 1567 // ins_encode keyword to specify their encoding class (which must be one of 1568 // the class names specified in the encoding block), and they use the 1569 // opcode keyword to specify, in order, their primary, secondary, and 1570 // tertiary opcode. Only the opcode sections which a particular instruction 1571 // needs for encoding need to be specified. 1572 encode %{ 1573 // Build emit functions for each basic byte or larger field in the intel 1574 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1575 // code in the enc_class source block. Emit functions will live in the 1576 // main source block for now. In future, we can generalize this by 1577 // adding a syntax that specifies the sizes of fields in an order, 1578 // so that the adlc can build the emit functions automagically 1579 1580 // Emit primary opcode 1581 enc_class OpcP %{ 1582 emit_opcode(cbuf, $primary); 1583 %} 1584 1585 // Emit secondary opcode 1586 enc_class OpcS %{ 1587 emit_opcode(cbuf, $secondary); 1588 %} 1589 1590 // Emit opcode directly 1591 enc_class Opcode(immI d8) %{ 1592 emit_opcode(cbuf, $d8$$constant); 1593 %} 1594 1595 enc_class SizePrefix %{ 1596 emit_opcode(cbuf,0x66); 1597 %} 1598 1599 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 1600 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1601 %} 1602 1603 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many) 1604 emit_opcode(cbuf,$opcode$$constant); 1605 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1606 %} 1607 1608 enc_class mov_r32_imm0( eRegI dst ) %{ 1609 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1610 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1611 %} 1612 1613 enc_class cdq_enc %{ 1614 // Full implementation of Java idiv and irem; checks for 1615 // special case as described in JVM spec., p.243 & p.271. 1616 // 1617 // normal case special case 1618 // 1619 // input : rax,: dividend min_int 1620 // reg: divisor -1 1621 // 1622 // output: rax,: quotient (= rax, idiv reg) min_int 1623 // rdx: remainder (= rax, irem reg) 0 1624 // 1625 // Code sequnce: 1626 // 1627 // 81 F8 00 00 00 80 cmp rax,80000000h 1628 // 0F 85 0B 00 00 00 jne normal_case 1629 // 33 D2 xor rdx,edx 1630 // 83 F9 FF cmp rcx,0FFh 1631 // 0F 84 03 00 00 00 je done 1632 // normal_case: 1633 // 99 cdq 1634 // F7 F9 idiv rax,ecx 1635 // done: 1636 // 1637 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1638 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1639 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1640 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1641 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1642 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1643 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1644 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1645 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1646 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1647 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1648 // normal_case: 1649 emit_opcode(cbuf,0x99); // cdq 1650 // idiv (note: must be emitted by the user of this rule) 1651 // normal: 1652 %} 1653 1654 // Dense encoding for older common ops 1655 enc_class Opc_plus(immI opcode, eRegI reg) %{ 1656 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1657 %} 1658 1659 1660 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1661 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1662 // Check for 8-bit immediate, and set sign extend bit in opcode 1663 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1664 emit_opcode(cbuf, $primary | 0x02); 1665 } 1666 else { // If 32-bit immediate 1667 emit_opcode(cbuf, $primary); 1668 } 1669 %} 1670 1671 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m 1672 // Emit primary opcode and set sign-extend bit 1673 // Check for 8-bit immediate, and set sign extend bit in opcode 1674 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1675 emit_opcode(cbuf, $primary | 0x02); } 1676 else { // If 32-bit immediate 1677 emit_opcode(cbuf, $primary); 1678 } 1679 // Emit r/m byte with secondary opcode, after primary opcode. 1680 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1681 %} 1682 1683 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1684 // Check for 8-bit immediate, and set sign extend bit in opcode 1685 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1686 $$$emit8$imm$$constant; 1687 } 1688 else { // If 32-bit immediate 1689 // Output immediate 1690 $$$emit32$imm$$constant; 1691 } 1692 %} 1693 1694 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1695 // Emit primary opcode and set sign-extend bit 1696 // Check for 8-bit immediate, and set sign extend bit in opcode 1697 int con = (int)$imm$$constant; // Throw away top bits 1698 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1699 // Emit r/m byte with secondary opcode, after primary opcode. 1700 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1701 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1702 else emit_d32(cbuf,con); 1703 %} 1704 1705 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1706 // Emit primary opcode and set sign-extend bit 1707 // Check for 8-bit immediate, and set sign extend bit in opcode 1708 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1709 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1710 // Emit r/m byte with tertiary opcode, after primary opcode. 1711 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1712 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1713 else emit_d32(cbuf,con); 1714 %} 1715 1716 enc_class Lbl (label labl) %{ // GOTO 1717 Label *l = $labl$$label; 1718 assert(l != NULL, "need Label"); 1719 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0); 1720 %} 1721 1722 enc_class LblShort (label labl) %{ // GOTO 1723 Label *l = $labl$$label; 1724 assert(l != NULL, "need Label"); 1725 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0; 1726 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 1727 emit_d8(cbuf, disp); 1728 %} 1729 1730 enc_class OpcSReg (eRegI dst) %{ // BSWAP 1731 emit_cc(cbuf, $secondary, $dst$$reg ); 1732 %} 1733 1734 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1735 int destlo = $dst$$reg; 1736 int desthi = HIGH_FROM_LOW(destlo); 1737 // bswap lo 1738 emit_opcode(cbuf, 0x0F); 1739 emit_cc(cbuf, 0xC8, destlo); 1740 // bswap hi 1741 emit_opcode(cbuf, 0x0F); 1742 emit_cc(cbuf, 0xC8, desthi); 1743 // xchg lo and hi 1744 emit_opcode(cbuf, 0x87); 1745 emit_rm(cbuf, 0x3, destlo, desthi); 1746 %} 1747 1748 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1749 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1750 %} 1751 1752 enc_class Jcc (cmpOp cop, label labl) %{ // JCC 1753 Label *l = $labl$$label; 1754 assert(l != NULL, "need Label"); 1755 $$$emit8$primary; 1756 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1757 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0); 1758 %} 1759 1760 enc_class JccShort (cmpOp cop, label labl) %{ // JCC 1761 Label *l = $labl$$label; 1762 assert(l != NULL, "need Label"); 1763 emit_cc(cbuf, $primary, $cop$$cmpcode); 1764 int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0; 1765 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 1766 emit_d8(cbuf, disp); 1767 %} 1768 1769 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1770 $$$emit8$primary; 1771 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1772 %} 1773 1774 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 1775 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1776 emit_d8(cbuf, op >> 8 ); 1777 emit_d8(cbuf, op & 255); 1778 %} 1779 1780 // emulate a CMOV with a conditional branch around a MOV 1781 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1782 // Invert sense of branch from sense of CMOV 1783 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1784 emit_d8( cbuf, $brOffs$$constant ); 1785 %} 1786 1787 enc_class enc_PartialSubtypeCheck( ) %{ 1788 Register Redi = as_Register(EDI_enc); // result register 1789 Register Reax = as_Register(EAX_enc); // super class 1790 Register Recx = as_Register(ECX_enc); // killed 1791 Register Resi = as_Register(ESI_enc); // sub class 1792 Label miss; 1793 1794 MacroAssembler _masm(&cbuf); 1795 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1796 NULL, &miss, 1797 /*set_cond_codes:*/ true); 1798 if ($primary) { 1799 __ xorptr(Redi, Redi); 1800 } 1801 __ bind(miss); 1802 %} 1803 1804 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1805 MacroAssembler masm(&cbuf); 1806 int start = masm.offset(); 1807 if (UseSSE >= 2) { 1808 if (VerifyFPU) { 1809 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1810 } 1811 } else { 1812 // External c_calling_convention expects the FPU stack to be 'clean'. 1813 // Compiled code leaves it dirty. Do cleanup now. 1814 masm.empty_FPU_stack(); 1815 } 1816 if (sizeof_FFree_Float_Stack_All == -1) { 1817 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1818 } else { 1819 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1820 } 1821 %} 1822 1823 enc_class Verify_FPU_For_Leaf %{ 1824 if( VerifyFPU ) { 1825 MacroAssembler masm(&cbuf); 1826 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1827 } 1828 %} 1829 1830 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1831 // This is the instruction starting address for relocation info. 1832 cbuf.set_insts_mark(); 1833 $$$emit8$primary; 1834 // CALL directly to the runtime 1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1836 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1837 1838 if (UseSSE >= 2) { 1839 MacroAssembler _masm(&cbuf); 1840 BasicType rt = tf()->return_type(); 1841 1842 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1843 // A C runtime call where the return value is unused. In SSE2+ 1844 // mode the result needs to be removed from the FPU stack. It's 1845 // likely that this function call could be removed by the 1846 // optimizer if the C function is a pure function. 1847 __ ffree(0); 1848 } else if (rt == T_FLOAT) { 1849 __ lea(rsp, Address(rsp, -4)); 1850 __ fstp_s(Address(rsp, 0)); 1851 __ movflt(xmm0, Address(rsp, 0)); 1852 __ lea(rsp, Address(rsp, 4)); 1853 } else if (rt == T_DOUBLE) { 1854 __ lea(rsp, Address(rsp, -8)); 1855 __ fstp_d(Address(rsp, 0)); 1856 __ movdbl(xmm0, Address(rsp, 0)); 1857 __ lea(rsp, Address(rsp, 8)); 1858 } 1859 } 1860 %} 1861 1862 1863 enc_class pre_call_FPU %{ 1864 // If method sets FPU control word restore it here 1865 debug_only(int off0 = cbuf.insts_size()); 1866 if( Compile::current()->in_24_bit_fp_mode() ) { 1867 MacroAssembler masm(&cbuf); 1868 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1869 } 1870 debug_only(int off1 = cbuf.insts_size()); 1871 assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction"); 1872 %} 1873 1874 enc_class post_call_FPU %{ 1875 // If method sets FPU control word do it here also 1876 if( Compile::current()->in_24_bit_fp_mode() ) { 1877 MacroAssembler masm(&cbuf); 1878 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1879 } 1880 %} 1881 1882 enc_class preserve_SP %{ 1883 debug_only(int off0 = cbuf.insts_size()); 1884 MacroAssembler _masm(&cbuf); 1885 // RBP is preserved across all calls, even compiled calls. 1886 // Use it to preserve RSP in places where the callee might change the SP. 1887 __ movptr(rbp_mh_SP_save, rsp); 1888 debug_only(int off1 = cbuf.insts_size()); 1889 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 1890 %} 1891 1892 enc_class restore_SP %{ 1893 MacroAssembler _masm(&cbuf); 1894 __ movptr(rsp, rbp_mh_SP_save); 1895 %} 1896 1897 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1898 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1899 // who we intended to call. 1900 cbuf.set_insts_mark(); 1901 $$$emit8$primary; 1902 if ( !_method ) { 1903 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1904 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1905 } else if(_optimized_virtual) { 1906 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1907 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1908 } else { 1909 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1910 static_call_Relocation::spec(), RELOC_IMM32 ); 1911 } 1912 if( _method ) { // Emit stub for static call 1913 emit_java_to_interp(cbuf); 1914 } 1915 %} 1916 1917 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1918 // !!!!! 1919 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info 1920 // emit_call_dynamic_prologue( cbuf ); 1921 cbuf.set_insts_mark(); 1922 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1 1923 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32); 1924 address virtual_call_oop_addr = cbuf.insts_mark(); 1925 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1926 // who we intended to call. 1927 cbuf.set_insts_mark(); 1928 $$$emit8$primary; 1929 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1930 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 ); 1931 %} 1932 1933 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1934 int disp = in_bytes(methodOopDesc::from_compiled_offset()); 1935 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1936 1937 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())] 1938 cbuf.set_insts_mark(); 1939 $$$emit8$primary; 1940 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1941 emit_d8(cbuf, disp); // Displacement 1942 1943 %} 1944 1945 enc_class Xor_Reg (eRegI dst) %{ 1946 emit_opcode(cbuf, 0x33); 1947 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 1948 %} 1949 1950 // Following encoding is no longer used, but may be restored if calling 1951 // convention changes significantly. 1952 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1953 // 1954 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1955 // // int ic_reg = Matcher::inline_cache_reg(); 1956 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1957 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1958 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1959 // 1960 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1961 // // // so we load it immediately before the call 1962 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1963 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1964 // 1965 // // xor rbp,ebp 1966 // emit_opcode(cbuf, 0x33); 1967 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1968 // 1969 // // CALL to interpreter. 1970 // cbuf.set_insts_mark(); 1971 // $$$emit8$primary; 1972 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1973 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1974 // %} 1975 1976 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1977 $$$emit8$primary; 1978 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1979 $$$emit8$shift$$constant; 1980 %} 1981 1982 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 emit_opcode(cbuf, $primary + $dst$$reg); 1993 $$$emit32$src$$constant; 1994 %} 1995 1996 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1997 // Load immediate does not have a zero or sign extended version 1998 // for 8-bit immediates 1999 int dst_enc = $dst$$reg; 2000 int src_con = $src$$constant & 0x0FFFFFFFFL; 2001 if (src_con == 0) { 2002 // xor dst, dst 2003 emit_opcode(cbuf, 0x33); 2004 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2005 } else { 2006 emit_opcode(cbuf, $primary + dst_enc); 2007 emit_d32(cbuf, src_con); 2008 } 2009 %} 2010 2011 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2012 // Load immediate does not have a zero or sign extended version 2013 // for 8-bit immediates 2014 int dst_enc = $dst$$reg + 2; 2015 int src_con = ((julong)($src$$constant)) >> 32; 2016 if (src_con == 0) { 2017 // xor dst, dst 2018 emit_opcode(cbuf, 0x33); 2019 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2020 } else { 2021 emit_opcode(cbuf, $primary + dst_enc); 2022 emit_d32(cbuf, src_con); 2023 } 2024 %} 2025 2026 2027 enc_class MovI2X_reg(regX dst, eRegI src) %{ 2028 emit_opcode(cbuf, 0x66 ); // MOVD dst,src 2029 emit_opcode(cbuf, 0x0F ); 2030 emit_opcode(cbuf, 0x6E ); 2031 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2032 %} 2033 2034 enc_class MovX2I_reg(eRegI dst, regX src) %{ 2035 emit_opcode(cbuf, 0x66 ); // MOVD dst,src 2036 emit_opcode(cbuf, 0x0F ); 2037 emit_opcode(cbuf, 0x7E ); 2038 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 2039 %} 2040 2041 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ 2042 { // MOVD $dst,$src.lo 2043 emit_opcode(cbuf,0x66); 2044 emit_opcode(cbuf,0x0F); 2045 emit_opcode(cbuf,0x6E); 2046 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2047 } 2048 { // MOVD $tmp,$src.hi 2049 emit_opcode(cbuf,0x66); 2050 emit_opcode(cbuf,0x0F); 2051 emit_opcode(cbuf,0x6E); 2052 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2053 } 2054 { // PUNPCKLDQ $dst,$tmp 2055 emit_opcode(cbuf,0x66); 2056 emit_opcode(cbuf,0x0F); 2057 emit_opcode(cbuf,0x62); 2058 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); 2059 } 2060 %} 2061 2062 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ 2063 { // MOVD $dst.lo,$src 2064 emit_opcode(cbuf,0x66); 2065 emit_opcode(cbuf,0x0F); 2066 emit_opcode(cbuf,0x7E); 2067 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 2068 } 2069 { // PSHUFLW $tmp,$src,0x4E (01001110b) 2070 emit_opcode(cbuf,0xF2); 2071 emit_opcode(cbuf,0x0F); 2072 emit_opcode(cbuf,0x70); 2073 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2074 emit_d8(cbuf, 0x4E); 2075 } 2076 { // MOVD $dst.hi,$tmp 2077 emit_opcode(cbuf,0x66); 2078 emit_opcode(cbuf,0x0F); 2079 emit_opcode(cbuf,0x7E); 2080 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 2081 } 2082 %} 2083 2084 2085 // Encode a reg-reg copy. If it is useless, then empty encoding. 2086 enc_class enc_Copy( eRegI dst, eRegI src ) %{ 2087 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2088 %} 2089 2090 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ 2091 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2092 %} 2093 2094 // Encode xmm reg-reg copy. If it is useless, then empty encoding. 2095 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ 2096 encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); 2097 %} 2098 2099 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 2100 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2101 %} 2102 2103 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2104 $$$emit8$primary; 2105 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2106 %} 2107 2108 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2109 $$$emit8$secondary; 2110 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2111 %} 2112 2113 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2114 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2115 %} 2116 2117 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2118 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2119 %} 2120 2121 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{ 2122 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2123 %} 2124 2125 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2126 // Output immediate 2127 $$$emit32$src$$constant; 2128 %} 2129 2130 enc_class Con32F_as_bits(immF src) %{ // storeF_imm 2131 // Output Float immediate bits 2132 jfloat jf = $src$$constant; 2133 int jf_as_bits = jint_cast( jf ); 2134 emit_d32(cbuf, jf_as_bits); 2135 %} 2136 2137 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 2138 // Output Float immediate bits 2139 jfloat jf = $src$$constant; 2140 int jf_as_bits = jint_cast( jf ); 2141 emit_d32(cbuf, jf_as_bits); 2142 %} 2143 2144 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2145 // Output immediate 2146 $$$emit16$src$$constant; 2147 %} 2148 2149 enc_class Con_d32(immI src) %{ 2150 emit_d32(cbuf,$src$$constant); 2151 %} 2152 2153 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2154 // Output immediate memory reference 2155 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2156 emit_d32(cbuf, 0x00); 2157 %} 2158 2159 enc_class lock_prefix( ) %{ 2160 if( os::is_MP() ) 2161 emit_opcode(cbuf,0xF0); // [Lock] 2162 %} 2163 2164 // Cmp-xchg long value. 2165 // Note: we need to swap rbx, and rcx before and after the 2166 // cmpxchg8 instruction because the instruction uses 2167 // rcx as the high order word of the new value to store but 2168 // our register encoding uses rbx,. 2169 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2170 2171 // XCHG rbx,ecx 2172 emit_opcode(cbuf,0x87); 2173 emit_opcode(cbuf,0xD9); 2174 // [Lock] 2175 if( os::is_MP() ) 2176 emit_opcode(cbuf,0xF0); 2177 // CMPXCHG8 [Eptr] 2178 emit_opcode(cbuf,0x0F); 2179 emit_opcode(cbuf,0xC7); 2180 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2181 // XCHG rbx,ecx 2182 emit_opcode(cbuf,0x87); 2183 emit_opcode(cbuf,0xD9); 2184 %} 2185 2186 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2187 // [Lock] 2188 if( os::is_MP() ) 2189 emit_opcode(cbuf,0xF0); 2190 2191 // CMPXCHG [Eptr] 2192 emit_opcode(cbuf,0x0F); 2193 emit_opcode(cbuf,0xB1); 2194 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2195 %} 2196 2197 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2198 int res_encoding = $res$$reg; 2199 2200 // MOV res,0 2201 emit_opcode( cbuf, 0xB8 + res_encoding); 2202 emit_d32( cbuf, 0 ); 2203 // JNE,s fail 2204 emit_opcode(cbuf,0x75); 2205 emit_d8(cbuf, 5 ); 2206 // MOV res,1 2207 emit_opcode( cbuf, 0xB8 + res_encoding); 2208 emit_d32( cbuf, 1 ); 2209 // fail: 2210 %} 2211 2212 enc_class set_instruction_start( ) %{ 2213 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2214 %} 2215 2216 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem 2217 int reg_encoding = $ereg$$reg; 2218 int base = $mem$$base; 2219 int index = $mem$$index; 2220 int scale = $mem$$scale; 2221 int displace = $mem$$disp; 2222 bool disp_is_oop = $mem->disp_is_oop(); 2223 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2224 %} 2225 2226 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2227 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2228 int base = $mem$$base; 2229 int index = $mem$$index; 2230 int scale = $mem$$scale; 2231 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2232 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" ); 2233 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/); 2234 %} 2235 2236 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2237 int r1, r2; 2238 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2239 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2240 emit_opcode(cbuf,0x0F); 2241 emit_opcode(cbuf,$tertiary); 2242 emit_rm(cbuf, 0x3, r1, r2); 2243 emit_d8(cbuf,$cnt$$constant); 2244 emit_d8(cbuf,$primary); 2245 emit_rm(cbuf, 0x3, $secondary, r1); 2246 emit_d8(cbuf,$cnt$$constant); 2247 %} 2248 2249 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2250 emit_opcode( cbuf, 0x8B ); // Move 2251 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2252 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2253 emit_d8(cbuf,$primary); 2254 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2255 emit_d8(cbuf,$cnt$$constant-32); 2256 } 2257 emit_d8(cbuf,$primary); 2258 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2259 emit_d8(cbuf,31); 2260 %} 2261 2262 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2263 int r1, r2; 2264 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2265 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2266 2267 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2268 emit_rm(cbuf, 0x3, r1, r2); 2269 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2270 emit_opcode(cbuf,$primary); 2271 emit_rm(cbuf, 0x3, $secondary, r1); 2272 emit_d8(cbuf,$cnt$$constant-32); 2273 } 2274 emit_opcode(cbuf,0x33); // XOR r2,r2 2275 emit_rm(cbuf, 0x3, r2, r2); 2276 %} 2277 2278 // Clone of RegMem but accepts an extra parameter to access each 2279 // half of a double in memory; it never needs relocation info. 2280 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{ 2281 emit_opcode(cbuf,$opcode$$constant); 2282 int reg_encoding = $rm_reg$$reg; 2283 int base = $mem$$base; 2284 int index = $mem$$index; 2285 int scale = $mem$$scale; 2286 int displace = $mem$$disp + $disp_for_half$$constant; 2287 bool disp_is_oop = false; 2288 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2289 %} 2290 2291 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2292 // 2293 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2294 // and it never needs relocation information. 2295 // Frequently used to move data between FPU's Stack Top and memory. 2296 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2297 int rm_byte_opcode = $rm_opcode$$constant; 2298 int base = $mem$$base; 2299 int index = $mem$$index; 2300 int scale = $mem$$scale; 2301 int displace = $mem$$disp; 2302 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" ); 2303 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false); 2304 %} 2305 2306 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2307 int rm_byte_opcode = $rm_opcode$$constant; 2308 int base = $mem$$base; 2309 int index = $mem$$index; 2310 int scale = $mem$$scale; 2311 int displace = $mem$$disp; 2312 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 2313 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 2314 %} 2315 2316 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea 2317 int reg_encoding = $dst$$reg; 2318 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2319 int index = 0x04; // 0x04 indicates no index 2320 int scale = 0x00; // 0x00 indicates no scale 2321 int displace = $src1$$constant; // 0x00 indicates no displacement 2322 bool disp_is_oop = false; 2323 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2324 %} 2325 2326 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN 2327 // Compare dst,src 2328 emit_opcode(cbuf,0x3B); 2329 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2330 // jmp dst < src around move 2331 emit_opcode(cbuf,0x7C); 2332 emit_d8(cbuf,2); 2333 // move dst,src 2334 emit_opcode(cbuf,0x8B); 2335 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2336 %} 2337 2338 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX 2339 // Compare dst,src 2340 emit_opcode(cbuf,0x3B); 2341 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2342 // jmp dst > src around move 2343 emit_opcode(cbuf,0x7F); 2344 emit_d8(cbuf,2); 2345 // move dst,src 2346 emit_opcode(cbuf,0x8B); 2347 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2348 %} 2349 2350 enc_class enc_FP_store(memory mem, regD src) %{ 2351 // If src is FPR1, we can just FST to store it. 2352 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2353 int reg_encoding = 0x2; // Just store 2354 int base = $mem$$base; 2355 int index = $mem$$index; 2356 int scale = $mem$$scale; 2357 int displace = $mem$$disp; 2358 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 2359 if( $src$$reg != FPR1L_enc ) { 2360 reg_encoding = 0x3; // Store & pop 2361 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2362 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2363 } 2364 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2365 emit_opcode(cbuf,$primary); 2366 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2367 %} 2368 2369 enc_class neg_reg(eRegI dst) %{ 2370 // NEG $dst 2371 emit_opcode(cbuf,0xF7); 2372 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2373 %} 2374 2375 enc_class setLT_reg(eCXRegI dst) %{ 2376 // SETLT $dst 2377 emit_opcode(cbuf,0x0F); 2378 emit_opcode(cbuf,0x9C); 2379 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2380 %} 2381 2382 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2383 int tmpReg = $tmp$$reg; 2384 2385 // SUB $p,$q 2386 emit_opcode(cbuf,0x2B); 2387 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2388 // SBB $tmp,$tmp 2389 emit_opcode(cbuf,0x1B); 2390 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2391 // AND $tmp,$y 2392 emit_opcode(cbuf,0x23); 2393 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2394 // ADD $p,$tmp 2395 emit_opcode(cbuf,0x03); 2396 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2397 %} 2398 2399 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT 2400 int tmpReg = $tmp$$reg; 2401 2402 // SUB $p,$q 2403 emit_opcode(cbuf,0x2B); 2404 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2405 // SBB $tmp,$tmp 2406 emit_opcode(cbuf,0x1B); 2407 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2408 // AND $tmp,$y 2409 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2410 emit_opcode(cbuf,0x23); 2411 int reg_encoding = tmpReg; 2412 int base = $mem$$base; 2413 int index = $mem$$index; 2414 int scale = $mem$$scale; 2415 int displace = $mem$$disp; 2416 bool disp_is_oop = $mem->disp_is_oop(); 2417 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); 2418 // ADD $p,$tmp 2419 emit_opcode(cbuf,0x03); 2420 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2421 %} 2422 2423 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2424 // TEST shift,32 2425 emit_opcode(cbuf,0xF7); 2426 emit_rm(cbuf, 0x3, 0, ECX_enc); 2427 emit_d32(cbuf,0x20); 2428 // JEQ,s small 2429 emit_opcode(cbuf, 0x74); 2430 emit_d8(cbuf, 0x04); 2431 // MOV $dst.hi,$dst.lo 2432 emit_opcode( cbuf, 0x8B ); 2433 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2434 // CLR $dst.lo 2435 emit_opcode(cbuf, 0x33); 2436 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2437 // small: 2438 // SHLD $dst.hi,$dst.lo,$shift 2439 emit_opcode(cbuf,0x0F); 2440 emit_opcode(cbuf,0xA5); 2441 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2442 // SHL $dst.lo,$shift" 2443 emit_opcode(cbuf,0xD3); 2444 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2445 %} 2446 2447 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2448 // TEST shift,32 2449 emit_opcode(cbuf,0xF7); 2450 emit_rm(cbuf, 0x3, 0, ECX_enc); 2451 emit_d32(cbuf,0x20); 2452 // JEQ,s small 2453 emit_opcode(cbuf, 0x74); 2454 emit_d8(cbuf, 0x04); 2455 // MOV $dst.lo,$dst.hi 2456 emit_opcode( cbuf, 0x8B ); 2457 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2458 // CLR $dst.hi 2459 emit_opcode(cbuf, 0x33); 2460 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2461 // small: 2462 // SHRD $dst.lo,$dst.hi,$shift 2463 emit_opcode(cbuf,0x0F); 2464 emit_opcode(cbuf,0xAD); 2465 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2466 // SHR $dst.hi,$shift" 2467 emit_opcode(cbuf,0xD3); 2468 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2469 %} 2470 2471 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2472 // TEST shift,32 2473 emit_opcode(cbuf,0xF7); 2474 emit_rm(cbuf, 0x3, 0, ECX_enc); 2475 emit_d32(cbuf,0x20); 2476 // JEQ,s small 2477 emit_opcode(cbuf, 0x74); 2478 emit_d8(cbuf, 0x05); 2479 // MOV $dst.lo,$dst.hi 2480 emit_opcode( cbuf, 0x8B ); 2481 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2482 // SAR $dst.hi,31 2483 emit_opcode(cbuf, 0xC1); 2484 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2485 emit_d8(cbuf, 0x1F ); 2486 // small: 2487 // SHRD $dst.lo,$dst.hi,$shift 2488 emit_opcode(cbuf,0x0F); 2489 emit_opcode(cbuf,0xAD); 2490 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2491 // SAR $dst.hi,$shift" 2492 emit_opcode(cbuf,0xD3); 2493 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2494 %} 2495 2496 2497 // ----------------- Encodings for floating point unit ----------------- 2498 // May leave result in FPU-TOS or FPU reg depending on opcodes 2499 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 2500 $$$emit8$primary; 2501 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2502 %} 2503 2504 // Pop argument in FPR0 with FSTP ST(0) 2505 enc_class PopFPU() %{ 2506 emit_opcode( cbuf, 0xDD ); 2507 emit_d8( cbuf, 0xD8 ); 2508 %} 2509 2510 // !!!!! equivalent to Pop_Reg_F 2511 enc_class Pop_Reg_D( regD dst ) %{ 2512 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2513 emit_d8( cbuf, 0xD8+$dst$$reg ); 2514 %} 2515 2516 enc_class Push_Reg_D( regD dst ) %{ 2517 emit_opcode( cbuf, 0xD9 ); 2518 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2519 %} 2520 2521 enc_class strictfp_bias1( regD dst ) %{ 2522 emit_opcode( cbuf, 0xDB ); // FLD m80real 2523 emit_opcode( cbuf, 0x2D ); 2524 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2525 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2526 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2527 %} 2528 2529 enc_class strictfp_bias2( regD dst ) %{ 2530 emit_opcode( cbuf, 0xDB ); // FLD m80real 2531 emit_opcode( cbuf, 0x2D ); 2532 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2533 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2534 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2535 %} 2536 2537 // Special case for moving an integer register to a stack slot. 2538 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS 2539 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2540 %} 2541 2542 // Special case for moving a register to a stack slot. 2543 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS 2544 // Opcode already emitted 2545 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2546 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2547 emit_d32(cbuf, $dst$$disp); // Displacement 2548 %} 2549 2550 // Push the integer in stackSlot 'src' onto FP-stack 2551 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2552 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2553 %} 2554 2555 // Push the float in stackSlot 'src' onto FP-stack 2556 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] 2557 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); 2558 %} 2559 2560 // Push the double in stackSlot 'src' onto FP-stack 2561 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] 2562 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); 2563 %} 2564 2565 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2566 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2567 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2568 %} 2569 2570 // Same as Pop_Mem_F except for opcode 2571 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2572 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2573 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2574 %} 2575 2576 enc_class Pop_Reg_F( regF dst ) %{ 2577 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2578 emit_d8( cbuf, 0xD8+$dst$$reg ); 2579 %} 2580 2581 enc_class Push_Reg_F( regF dst ) %{ 2582 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2583 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2584 %} 2585 2586 // Push FPU's float to a stack-slot, and pop FPU-stack 2587 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 2588 int pop = 0x02; 2589 if ($src$$reg != FPR1L_enc) { 2590 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2591 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2592 pop = 0x03; 2593 } 2594 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2595 %} 2596 2597 // Push FPU's double to a stack-slot, and pop FPU-stack 2598 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 2599 int pop = 0x02; 2600 if ($src$$reg != FPR1L_enc) { 2601 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2602 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2603 pop = 0x03; 2604 } 2605 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2606 %} 2607 2608 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2609 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 2610 int pop = 0xD0 - 1; // -1 since we skip FLD 2611 if ($src$$reg != FPR1L_enc) { 2612 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2613 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2614 pop = 0xD8; 2615 } 2616 emit_opcode( cbuf, 0xDD ); 2617 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2618 %} 2619 2620 2621 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 2622 MacroAssembler masm(&cbuf); 2623 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg 2624 masm.fmul( $src2$$reg+0); // value at TOS 2625 masm.fadd( $src$$reg+0); // value at TOS 2626 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store 2627 %} 2628 2629 2630 enc_class Push_Reg_Mod_D( regD dst, regD src) %{ 2631 // load dst in FPR0 2632 emit_opcode( cbuf, 0xD9 ); 2633 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2634 if ($src$$reg != FPR1L_enc) { 2635 // fincstp 2636 emit_opcode (cbuf, 0xD9); 2637 emit_opcode (cbuf, 0xF7); 2638 // swap src with FPR1: 2639 // FXCH FPR1 with src 2640 emit_opcode(cbuf, 0xD9); 2641 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2642 // fdecstp 2643 emit_opcode (cbuf, 0xD9); 2644 emit_opcode (cbuf, 0xF6); 2645 } 2646 %} 2647 2648 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ 2649 // Allocate a word 2650 emit_opcode(cbuf,0x83); // SUB ESP,8 2651 emit_opcode(cbuf,0xEC); 2652 emit_d8(cbuf,0x08); 2653 2654 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 2655 emit_opcode (cbuf, 0x0F ); 2656 emit_opcode (cbuf, 0x11 ); 2657 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 2658 2659 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2660 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2661 2662 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 2663 emit_opcode (cbuf, 0x0F ); 2664 emit_opcode (cbuf, 0x11 ); 2665 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 2666 2667 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2668 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2669 2670 %} 2671 2672 enc_class Push_ModX_encoding( regX src0, regX src1) %{ 2673 // Allocate a word 2674 emit_opcode(cbuf,0x83); // SUB ESP,4 2675 emit_opcode(cbuf,0xEC); 2676 emit_d8(cbuf,0x04); 2677 2678 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 2679 emit_opcode (cbuf, 0x0F ); 2680 emit_opcode (cbuf, 0x11 ); 2681 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 2682 2683 emit_opcode(cbuf,0xD9 ); // FLD [ESP] 2684 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2685 2686 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 2687 emit_opcode (cbuf, 0x0F ); 2688 emit_opcode (cbuf, 0x11 ); 2689 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 2690 2691 emit_opcode(cbuf,0xD9 ); // FLD [ESP] 2692 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2693 2694 %} 2695 2696 enc_class Push_ResultXD(regXD dst) %{ 2697 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] 2698 2699 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] 2700 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 2701 emit_opcode (cbuf, 0x0F ); 2702 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 2703 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 2704 2705 emit_opcode(cbuf,0x83); // ADD ESP,8 2706 emit_opcode(cbuf,0xC4); 2707 emit_d8(cbuf,0x08); 2708 %} 2709 2710 enc_class Push_ResultX(regX dst, immI d8) %{ 2711 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] 2712 2713 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 2714 emit_opcode (cbuf, 0x0F ); 2715 emit_opcode (cbuf, 0x10 ); 2716 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 2717 2718 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) 2719 emit_opcode(cbuf,0xC4); 2720 emit_d8(cbuf,$d8$$constant); 2721 %} 2722 2723 enc_class Push_SrcXD(regXD src) %{ 2724 // Allocate a word 2725 emit_opcode(cbuf,0x83); // SUB ESP,8 2726 emit_opcode(cbuf,0xEC); 2727 emit_d8(cbuf,0x08); 2728 2729 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 2730 emit_opcode (cbuf, 0x0F ); 2731 emit_opcode (cbuf, 0x11 ); 2732 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 2733 2734 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2735 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2736 %} 2737 2738 enc_class push_stack_temp_qword() %{ 2739 emit_opcode(cbuf,0x83); // SUB ESP,8 2740 emit_opcode(cbuf,0xEC); 2741 emit_d8 (cbuf,0x08); 2742 %} 2743 2744 enc_class pop_stack_temp_qword() %{ 2745 emit_opcode(cbuf,0x83); // ADD ESP,8 2746 emit_opcode(cbuf,0xC4); 2747 emit_d8 (cbuf,0x08); 2748 %} 2749 2750 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ 2751 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src 2752 emit_opcode (cbuf, 0x0F ); 2753 emit_opcode (cbuf, 0x11 ); 2754 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); 2755 2756 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2757 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2758 %} 2759 2760 // Compute X^Y using Intel's fast hardware instructions, if possible. 2761 // Otherwise return a NaN. 2762 enc_class pow_exp_core_encoding %{ 2763 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X)) 2764 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q 2765 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q 2766 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q) 2767 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q) 2768 emit_opcode(cbuf,0x1C); 2769 emit_d8(cbuf,0x24); 2770 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1 2771 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1 2772 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q) 2773 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q) 2774 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false); 2775 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask 2776 emit_rm(cbuf, 0x3, 0x0, ECX_enc); 2777 emit_d32(cbuf,0xFFFFF800); 2778 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias 2779 emit_rm(cbuf, 0x3, 0x0, EAX_enc); 2780 emit_d32(cbuf,1023); 2781 emit_opcode(cbuf,0x8B); // mov rbx,eax 2782 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc); 2783 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position 2784 emit_rm(cbuf,0x3,0x4,EAX_enc); 2785 emit_d8(cbuf,20); 2786 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow 2787 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc); 2788 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX 2789 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc); 2790 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word 2791 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false); 2792 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q) 2793 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2794 emit_d32(cbuf,0); 2795 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q 2796 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 2797 %} 2798 2799 // enc_class Pop_Reg_Mod_D( regD dst, regD src) 2800 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() 2801 2802 enc_class Push_Result_Mod_D( regD src) %{ 2803 if ($src$$reg != FPR1L_enc) { 2804 // fincstp 2805 emit_opcode (cbuf, 0xD9); 2806 emit_opcode (cbuf, 0xF7); 2807 // FXCH FPR1 with src 2808 emit_opcode(cbuf, 0xD9); 2809 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2810 // fdecstp 2811 emit_opcode (cbuf, 0xD9); 2812 emit_opcode (cbuf, 0xF6); 2813 } 2814 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2815 // // FSTP FPR$dst$$reg 2816 // emit_opcode( cbuf, 0xDD ); 2817 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2818 %} 2819 2820 enc_class fnstsw_sahf_skip_parity() %{ 2821 // fnstsw ax 2822 emit_opcode( cbuf, 0xDF ); 2823 emit_opcode( cbuf, 0xE0 ); 2824 // sahf 2825 emit_opcode( cbuf, 0x9E ); 2826 // jnp ::skip 2827 emit_opcode( cbuf, 0x7B ); 2828 emit_opcode( cbuf, 0x05 ); 2829 %} 2830 2831 enc_class emitModD() %{ 2832 // fprem must be iterative 2833 // :: loop 2834 // fprem 2835 emit_opcode( cbuf, 0xD9 ); 2836 emit_opcode( cbuf, 0xF8 ); 2837 // wait 2838 emit_opcode( cbuf, 0x9b ); 2839 // fnstsw ax 2840 emit_opcode( cbuf, 0xDF ); 2841 emit_opcode( cbuf, 0xE0 ); 2842 // sahf 2843 emit_opcode( cbuf, 0x9E ); 2844 // jp ::loop 2845 emit_opcode( cbuf, 0x0F ); 2846 emit_opcode( cbuf, 0x8A ); 2847 emit_opcode( cbuf, 0xF4 ); 2848 emit_opcode( cbuf, 0xFF ); 2849 emit_opcode( cbuf, 0xFF ); 2850 emit_opcode( cbuf, 0xFF ); 2851 %} 2852 2853 enc_class fpu_flags() %{ 2854 // fnstsw_ax 2855 emit_opcode( cbuf, 0xDF); 2856 emit_opcode( cbuf, 0xE0); 2857 // test ax,0x0400 2858 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2859 emit_opcode( cbuf, 0xA9 ); 2860 emit_d16 ( cbuf, 0x0400 ); 2861 // // // This sequence works, but stalls for 12-16 cycles on PPro 2862 // // test rax,0x0400 2863 // emit_opcode( cbuf, 0xA9 ); 2864 // emit_d32 ( cbuf, 0x00000400 ); 2865 // 2866 // jz exit (no unordered comparison) 2867 emit_opcode( cbuf, 0x74 ); 2868 emit_d8 ( cbuf, 0x02 ); 2869 // mov ah,1 - treat as LT case (set carry flag) 2870 emit_opcode( cbuf, 0xB4 ); 2871 emit_d8 ( cbuf, 0x01 ); 2872 // sahf 2873 emit_opcode( cbuf, 0x9E); 2874 %} 2875 2876 enc_class cmpF_P6_fixup() %{ 2877 // Fixup the integer flags in case comparison involved a NaN 2878 // 2879 // JNP exit (no unordered comparison, P-flag is set by NaN) 2880 emit_opcode( cbuf, 0x7B ); 2881 emit_d8 ( cbuf, 0x03 ); 2882 // MOV AH,1 - treat as LT case (set carry flag) 2883 emit_opcode( cbuf, 0xB4 ); 2884 emit_d8 ( cbuf, 0x01 ); 2885 // SAHF 2886 emit_opcode( cbuf, 0x9E); 2887 // NOP // target for branch to avoid branch to branch 2888 emit_opcode( cbuf, 0x90); 2889 %} 2890 2891 // fnstsw_ax(); 2892 // sahf(); 2893 // movl(dst, nan_result); 2894 // jcc(Assembler::parity, exit); 2895 // movl(dst, less_result); 2896 // jcc(Assembler::below, exit); 2897 // movl(dst, equal_result); 2898 // jcc(Assembler::equal, exit); 2899 // movl(dst, greater_result); 2900 2901 // less_result = 1; 2902 // greater_result = -1; 2903 // equal_result = 0; 2904 // nan_result = -1; 2905 2906 enc_class CmpF_Result(eRegI dst) %{ 2907 // fnstsw_ax(); 2908 emit_opcode( cbuf, 0xDF); 2909 emit_opcode( cbuf, 0xE0); 2910 // sahf 2911 emit_opcode( cbuf, 0x9E); 2912 // movl(dst, nan_result); 2913 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2914 emit_d32( cbuf, -1 ); 2915 // jcc(Assembler::parity, exit); 2916 emit_opcode( cbuf, 0x7A ); 2917 emit_d8 ( cbuf, 0x13 ); 2918 // movl(dst, less_result); 2919 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2920 emit_d32( cbuf, -1 ); 2921 // jcc(Assembler::below, exit); 2922 emit_opcode( cbuf, 0x72 ); 2923 emit_d8 ( cbuf, 0x0C ); 2924 // movl(dst, equal_result); 2925 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2926 emit_d32( cbuf, 0 ); 2927 // jcc(Assembler::equal, exit); 2928 emit_opcode( cbuf, 0x74 ); 2929 emit_d8 ( cbuf, 0x05 ); 2930 // movl(dst, greater_result); 2931 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2932 emit_d32( cbuf, 1 ); 2933 %} 2934 2935 2936 // XMM version of CmpF_Result. Because the XMM compare 2937 // instructions set the EFLAGS directly. It becomes simpler than 2938 // the float version above. 2939 enc_class CmpX_Result(eRegI dst) %{ 2940 MacroAssembler _masm(&cbuf); 2941 Label nan, inc, done; 2942 2943 __ jccb(Assembler::parity, nan); 2944 __ jccb(Assembler::equal, done); 2945 __ jccb(Assembler::above, inc); 2946 __ bind(nan); 2947 __ decrement(as_Register($dst$$reg)); // NO L qqq 2948 __ jmpb(done); 2949 __ bind(inc); 2950 __ increment(as_Register($dst$$reg)); // NO L qqq 2951 __ bind(done); 2952 %} 2953 2954 // Compare the longs and set flags 2955 // BROKEN! Do Not use as-is 2956 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2957 // CMP $src1.hi,$src2.hi 2958 emit_opcode( cbuf, 0x3B ); 2959 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2960 // JNE,s done 2961 emit_opcode(cbuf,0x75); 2962 emit_d8(cbuf, 2 ); 2963 // CMP $src1.lo,$src2.lo 2964 emit_opcode( cbuf, 0x3B ); 2965 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2966 // done: 2967 %} 2968 2969 enc_class convert_int_long( regL dst, eRegI src ) %{ 2970 // mov $dst.lo,$src 2971 int dst_encoding = $dst$$reg; 2972 int src_encoding = $src$$reg; 2973 encode_Copy( cbuf, dst_encoding , src_encoding ); 2974 // mov $dst.hi,$src 2975 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2976 // sar $dst.hi,31 2977 emit_opcode( cbuf, 0xC1 ); 2978 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2979 emit_d8(cbuf, 0x1F ); 2980 %} 2981 2982 enc_class convert_long_double( eRegL src ) %{ 2983 // push $src.hi 2984 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2985 // push $src.lo 2986 emit_opcode(cbuf, 0x50+$src$$reg ); 2987 // fild 64-bits at [SP] 2988 emit_opcode(cbuf,0xdf); 2989 emit_d8(cbuf, 0x6C); 2990 emit_d8(cbuf, 0x24); 2991 emit_d8(cbuf, 0x00); 2992 // pop stack 2993 emit_opcode(cbuf, 0x83); // add SP, #8 2994 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2995 emit_d8(cbuf, 0x8); 2996 %} 2997 2998 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2999 // IMUL EDX:EAX,$src1 3000 emit_opcode( cbuf, 0xF7 ); 3001 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 3002 // SAR EDX,$cnt-32 3003 int shift_count = ((int)$cnt$$constant) - 32; 3004 if (shift_count > 0) { 3005 emit_opcode(cbuf, 0xC1); 3006 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 3007 emit_d8(cbuf, shift_count); 3008 } 3009 %} 3010 3011 // this version doesn't have add sp, 8 3012 enc_class convert_long_double2( eRegL src ) %{ 3013 // push $src.hi 3014 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 3015 // push $src.lo 3016 emit_opcode(cbuf, 0x50+$src$$reg ); 3017 // fild 64-bits at [SP] 3018 emit_opcode(cbuf,0xdf); 3019 emit_d8(cbuf, 0x6C); 3020 emit_d8(cbuf, 0x24); 3021 emit_d8(cbuf, 0x00); 3022 %} 3023 3024 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 3025 // Basic idea: long = (long)int * (long)int 3026 // IMUL EDX:EAX, src 3027 emit_opcode( cbuf, 0xF7 ); 3028 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 3029 %} 3030 3031 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 3032 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 3033 // MUL EDX:EAX, src 3034 emit_opcode( cbuf, 0xF7 ); 3035 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 3036 %} 3037 3038 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{ 3039 // Basic idea: lo(result) = lo(x_lo * y_lo) 3040 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 3041 // MOV $tmp,$src.lo 3042 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 3043 // IMUL $tmp,EDX 3044 emit_opcode( cbuf, 0x0F ); 3045 emit_opcode( cbuf, 0xAF ); 3046 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 3047 // MOV EDX,$src.hi 3048 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 3049 // IMUL EDX,EAX 3050 emit_opcode( cbuf, 0x0F ); 3051 emit_opcode( cbuf, 0xAF ); 3052 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 3053 // ADD $tmp,EDX 3054 emit_opcode( cbuf, 0x03 ); 3055 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 3056 // MUL EDX:EAX,$src.lo 3057 emit_opcode( cbuf, 0xF7 ); 3058 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 3059 // ADD EDX,ESI 3060 emit_opcode( cbuf, 0x03 ); 3061 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 3062 %} 3063 3064 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{ 3065 // Basic idea: lo(result) = lo(src * y_lo) 3066 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 3067 // IMUL $tmp,EDX,$src 3068 emit_opcode( cbuf, 0x6B ); 3069 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 3070 emit_d8( cbuf, (int)$src$$constant ); 3071 // MOV EDX,$src 3072 emit_opcode(cbuf, 0xB8 + EDX_enc); 3073 emit_d32( cbuf, (int)$src$$constant ); 3074 // MUL EDX:EAX,EDX 3075 emit_opcode( cbuf, 0xF7 ); 3076 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 3077 // ADD EDX,ESI 3078 emit_opcode( cbuf, 0x03 ); 3079 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 3080 %} 3081 3082 enc_class long_div( eRegL src1, eRegL src2 ) %{ 3083 // PUSH src1.hi 3084 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 3085 // PUSH src1.lo 3086 emit_opcode(cbuf, 0x50+$src1$$reg ); 3087 // PUSH src2.hi 3088 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 3089 // PUSH src2.lo 3090 emit_opcode(cbuf, 0x50+$src2$$reg ); 3091 // CALL directly to the runtime 3092 cbuf.set_insts_mark(); 3093 emit_opcode(cbuf,0xE8); // Call into runtime 3094 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3095 // Restore stack 3096 emit_opcode(cbuf, 0x83); // add SP, #framesize 3097 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 3098 emit_d8(cbuf, 4*4); 3099 %} 3100 3101 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 3102 // PUSH src1.hi 3103 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 3104 // PUSH src1.lo 3105 emit_opcode(cbuf, 0x50+$src1$$reg ); 3106 // PUSH src2.hi 3107 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 3108 // PUSH src2.lo 3109 emit_opcode(cbuf, 0x50+$src2$$reg ); 3110 // CALL directly to the runtime 3111 cbuf.set_insts_mark(); 3112 emit_opcode(cbuf,0xE8); // Call into runtime 3113 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3114 // Restore stack 3115 emit_opcode(cbuf, 0x83); // add SP, #framesize 3116 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 3117 emit_d8(cbuf, 4*4); 3118 %} 3119 3120 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{ 3121 // MOV $tmp,$src.lo 3122 emit_opcode(cbuf, 0x8B); 3123 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 3124 // OR $tmp,$src.hi 3125 emit_opcode(cbuf, 0x0B); 3126 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 3127 %} 3128 3129 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 3130 // CMP $src1.lo,$src2.lo 3131 emit_opcode( cbuf, 0x3B ); 3132 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 3133 // JNE,s skip 3134 emit_cc(cbuf, 0x70, 0x5); 3135 emit_d8(cbuf,2); 3136 // CMP $src1.hi,$src2.hi 3137 emit_opcode( cbuf, 0x3B ); 3138 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 3139 %} 3140 3141 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{ 3142 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 3143 emit_opcode( cbuf, 0x3B ); 3144 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 3145 // MOV $tmp,$src1.hi 3146 emit_opcode( cbuf, 0x8B ); 3147 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 3148 // SBB $tmp,$src2.hi\t! Compute flags for long compare 3149 emit_opcode( cbuf, 0x1B ); 3150 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 3151 %} 3152 3153 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{ 3154 // XOR $tmp,$tmp 3155 emit_opcode(cbuf,0x33); // XOR 3156 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 3157 // CMP $tmp,$src.lo 3158 emit_opcode( cbuf, 0x3B ); 3159 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 3160 // SBB $tmp,$src.hi 3161 emit_opcode( cbuf, 0x1B ); 3162 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 3163 %} 3164 3165 // Sniff, sniff... smells like Gnu Superoptimizer 3166 enc_class neg_long( eRegL dst ) %{ 3167 emit_opcode(cbuf,0xF7); // NEG hi 3168 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 3169 emit_opcode(cbuf,0xF7); // NEG lo 3170 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 3171 emit_opcode(cbuf,0x83); // SBB hi,0 3172 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 3173 emit_d8 (cbuf,0 ); 3174 %} 3175 3176 enc_class movq_ld(regXD dst, memory mem) %{ 3177 MacroAssembler _masm(&cbuf); 3178 __ movq($dst$$XMMRegister, $mem$$Address); 3179 %} 3180 3181 enc_class movq_st(memory mem, regXD src) %{ 3182 MacroAssembler _masm(&cbuf); 3183 __ movq($mem$$Address, $src$$XMMRegister); 3184 %} 3185 3186 enc_class pshufd_8x8(regX dst, regX src) %{ 3187 MacroAssembler _masm(&cbuf); 3188 3189 encode_CopyXD(cbuf, $dst$$reg, $src$$reg); 3190 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); 3191 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); 3192 %} 3193 3194 enc_class pshufd_4x16(regX dst, regX src) %{ 3195 MacroAssembler _masm(&cbuf); 3196 3197 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); 3198 %} 3199 3200 enc_class pshufd(regXD dst, regXD src, int mode) %{ 3201 MacroAssembler _masm(&cbuf); 3202 3203 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); 3204 %} 3205 3206 enc_class pxor(regXD dst, regXD src) %{ 3207 MacroAssembler _masm(&cbuf); 3208 3209 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); 3210 %} 3211 3212 enc_class mov_i2x(regXD dst, eRegI src) %{ 3213 MacroAssembler _masm(&cbuf); 3214 3215 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); 3216 %} 3217 3218 3219 // Because the transitions from emitted code to the runtime 3220 // monitorenter/exit helper stubs are so slow it's critical that 3221 // we inline both the stack-locking fast-path and the inflated fast path. 3222 // 3223 // See also: cmpFastLock and cmpFastUnlock. 3224 // 3225 // What follows is a specialized inline transliteration of the code 3226 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat 3227 // another option would be to emit TrySlowEnter and TrySlowExit methods 3228 // at startup-time. These methods would accept arguments as 3229 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure 3230 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply 3231 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. 3232 // In practice, however, the # of lock sites is bounded and is usually small. 3233 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer 3234 // if the processor uses simple bimodal branch predictors keyed by EIP 3235 // Since the helper routines would be called from multiple synchronization 3236 // sites. 3237 // 3238 // An even better approach would be write "MonitorEnter()" and "MonitorExit()" 3239 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites 3240 // to those specialized methods. That'd give us a mostly platform-independent 3241 // implementation that the JITs could optimize and inline at their pleasure. 3242 // Done correctly, the only time we'd need to cross to native could would be 3243 // to park() or unpark() threads. We'd also need a few more unsafe operators 3244 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and 3245 // (b) explicit barriers or fence operations. 3246 // 3247 // TODO: 3248 // 3249 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). 3250 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. 3251 // Given TLAB allocation, Self is usually manifested in a register, so passing it into 3252 // the lock operators would typically be faster than reifying Self. 3253 // 3254 // * Ideally I'd define the primitives as: 3255 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. 3256 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED 3257 // Unfortunately ADLC bugs prevent us from expressing the ideal form. 3258 // Instead, we're stuck with a rather awkward and brittle register assignments below. 3259 // Furthermore the register assignments are overconstrained, possibly resulting in 3260 // sub-optimal code near the synchronization site. 3261 // 3262 // * Eliminate the sp-proximity tests and just use "== Self" tests instead. 3263 // Alternately, use a better sp-proximity test. 3264 // 3265 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. 3266 // Either one is sufficient to uniquely identify a thread. 3267 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. 3268 // 3269 // * Intrinsify notify() and notifyAll() for the common cases where the 3270 // object is locked by the calling thread but the waitlist is empty. 3271 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). 3272 // 3273 // * use jccb and jmpb instead of jcc and jmp to improve code density. 3274 // But beware of excessive branch density on AMD Opterons. 3275 // 3276 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success 3277 // or failure of the fast-path. If the fast-path fails then we pass 3278 // control to the slow-path, typically in C. In Fast_Lock and 3279 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 3280 // will emit a conditional branch immediately after the node. 3281 // So we have branches to branches and lots of ICC.ZF games. 3282 // Instead, it might be better to have C2 pass a "FailureLabel" 3283 // into Fast_Lock and Fast_Unlock. In the case of success, control 3284 // will drop through the node. ICC.ZF is undefined at exit. 3285 // In the case of failure, the node will branch directly to the 3286 // FailureLabel 3287 3288 3289 // obj: object to lock 3290 // box: on-stack box address (displaced header location) - KILLED 3291 // rax,: tmp -- KILLED 3292 // scr: tmp -- KILLED 3293 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ 3294 3295 Register objReg = as_Register($obj$$reg); 3296 Register boxReg = as_Register($box$$reg); 3297 Register tmpReg = as_Register($tmp$$reg); 3298 Register scrReg = as_Register($scr$$reg); 3299 3300 // Ensure the register assignents are disjoint 3301 guarantee (objReg != boxReg, "") ; 3302 guarantee (objReg != tmpReg, "") ; 3303 guarantee (objReg != scrReg, "") ; 3304 guarantee (boxReg != tmpReg, "") ; 3305 guarantee (boxReg != scrReg, "") ; 3306 guarantee (tmpReg == as_Register(EAX_enc), "") ; 3307 3308 MacroAssembler masm(&cbuf); 3309 3310 if (_counters != NULL) { 3311 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); 3312 } 3313 if (EmitSync & 1) { 3314 // set box->dhw = unused_mark (3) 3315 // Force all sync thru slow-path: slow_enter() and slow_exit() 3316 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ; 3317 masm.cmpptr (rsp, (int32_t)0) ; 3318 } else 3319 if (EmitSync & 2) { 3320 Label DONE_LABEL ; 3321 if (UseBiasedLocking) { 3322 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. 3323 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); 3324 } 3325 3326 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword 3327 masm.orptr (tmpReg, 0x1); 3328 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS 3329 if (os::is_MP()) { masm.lock(); } 3330 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg 3331 masm.jcc(Assembler::equal, DONE_LABEL); 3332 // Recursive locking 3333 masm.subptr(tmpReg, rsp); 3334 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 ); 3335 masm.movptr(Address(boxReg, 0), tmpReg); 3336 masm.bind(DONE_LABEL) ; 3337 } else { 3338 // Possible cases that we'll encounter in fast_lock 3339 // ------------------------------------------------ 3340 // * Inflated 3341 // -- unlocked 3342 // -- Locked 3343 // = by self 3344 // = by other 3345 // * biased 3346 // -- by Self 3347 // -- by other 3348 // * neutral 3349 // * stack-locked 3350 // -- by self 3351 // = sp-proximity test hits 3352 // = sp-proximity test generates false-negative 3353 // -- by other 3354 // 3355 3356 Label IsInflated, DONE_LABEL, PopDone ; 3357 3358 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage 3359 // order to reduce the number of conditional branches in the most common cases. 3360 // Beware -- there's a subtle invariant that fetch of the markword 3361 // at [FETCH], below, will never observe a biased encoding (*101b). 3362 // If this invariant is not held we risk exclusion (safety) failure. 3363 if (UseBiasedLocking && !UseOptoBiasInlining) { 3364 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); 3365 } 3366 3367 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] 3368 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) 3369 masm.jccb (Assembler::notZero, IsInflated) ; 3370 3371 // Attempt stack-locking ... 3372 masm.orptr (tmpReg, 0x1); 3373 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS 3374 if (os::is_MP()) { masm.lock(); } 3375 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg 3376 if (_counters != NULL) { 3377 masm.cond_inc32(Assembler::equal, 3378 ExternalAddress((address)_counters->fast_path_entry_count_addr())); 3379 } 3380 masm.jccb (Assembler::equal, DONE_LABEL); 3381 3382 // Recursive locking 3383 masm.subptr(tmpReg, rsp); 3384 masm.andptr(tmpReg, 0xFFFFF003 ); 3385 masm.movptr(Address(boxReg, 0), tmpReg); 3386 if (_counters != NULL) { 3387 masm.cond_inc32(Assembler::equal, 3388 ExternalAddress((address)_counters->fast_path_entry_count_addr())); 3389 } 3390 masm.jmp (DONE_LABEL) ; 3391 3392 masm.bind (IsInflated) ; 3393 3394 // The object is inflated. 3395 // 3396 // TODO-FIXME: eliminate the ugly use of manifest constants: 3397 // Use markOopDesc::monitor_value instead of "2". 3398 // use markOop::unused_mark() instead of "3". 3399 // The tmpReg value is an objectMonitor reference ORed with 3400 // markOopDesc::monitor_value (2). We can either convert tmpReg to an 3401 // objectmonitor pointer by masking off the "2" bit or we can just 3402 // use tmpReg as an objectmonitor pointer but bias the objectmonitor 3403 // field offsets with "-2" to compensate for and annul the low-order tag bit. 3404 // 3405 // I use the latter as it avoids AGI stalls. 3406 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" 3407 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". 3408 // 3409 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) 3410 3411 // boxReg refers to the on-stack BasicLock in the current frame. 3412 // We'd like to write: 3413 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. 3414 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers 3415 // additional latency as we have another ST in the store buffer that must drain. 3416 3417 if (EmitSync & 8192) { 3418 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty 3419 masm.get_thread (scrReg) ; 3420 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 3421 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov 3422 if (os::is_MP()) { masm.lock(); } 3423 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3424 } else 3425 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS 3426 masm.movptr(scrReg, boxReg) ; 3427 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 3428 3429 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 3430 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 3431 // prefetchw [eax + Offset(_owner)-2] 3432 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); 3433 } 3434 3435 if ((EmitSync & 64) == 0) { 3436 // Optimistic form: consider XORL tmpReg,tmpReg 3437 masm.movptr(tmpReg, NULL_WORD) ; 3438 } else { 3439 // Can suffer RTS->RTO upgrades on shared or cold $ lines 3440 // Test-And-CAS instead of CAS 3441 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner 3442 masm.testptr(tmpReg, tmpReg) ; // Locked ? 3443 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3444 } 3445 3446 // Appears unlocked - try to swing _owner from null to non-null. 3447 // Ideally, I'd manifest "Self" with get_thread and then attempt 3448 // to CAS the register containing Self into m->Owner. 3449 // But we don't have enough registers, so instead we can either try to CAS 3450 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds 3451 // we later store "Self" into m->Owner. Transiently storing a stack address 3452 // (rsp or the address of the box) into m->owner is harmless. 3453 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 3454 if (os::is_MP()) { masm.lock(); } 3455 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3456 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3 3457 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3458 masm.get_thread (scrReg) ; // beware: clobbers ICCs 3459 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 3460 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success 3461 3462 // If the CAS fails we can either retry or pass control to the slow-path. 3463 // We use the latter tactic. 3464 // Pass the CAS result in the icc.ZFlag into DONE_LABEL 3465 // If the CAS was successful ... 3466 // Self has acquired the lock 3467 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 3468 // Intentional fall-through into DONE_LABEL ... 3469 } else { 3470 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty 3471 masm.movptr(boxReg, tmpReg) ; 3472 3473 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 3474 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 3475 // prefetchw [eax + Offset(_owner)-2] 3476 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); 3477 } 3478 3479 if ((EmitSync & 64) == 0) { 3480 // Optimistic form 3481 masm.xorptr (tmpReg, tmpReg) ; 3482 } else { 3483 // Can suffer RTS->RTO upgrades on shared or cold $ lines 3484 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner 3485 masm.testptr(tmpReg, tmpReg) ; // Locked ? 3486 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3487 } 3488 3489 // Appears unlocked - try to swing _owner from null to non-null. 3490 // Use either "Self" (in scr) or rsp as thread identity in _owner. 3491 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 3492 masm.get_thread (scrReg) ; 3493 if (os::is_MP()) { masm.lock(); } 3494 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3495 3496 // If the CAS fails we can either retry or pass control to the slow-path. 3497 // We use the latter tactic. 3498 // Pass the CAS result in the icc.ZFlag into DONE_LABEL 3499 // If the CAS was successful ... 3500 // Self has acquired the lock 3501 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 3502 // Intentional fall-through into DONE_LABEL ... 3503 } 3504 3505 // DONE_LABEL is a hot target - we'd really like to place it at the 3506 // start of cache line by padding with NOPs. 3507 // See the AMD and Intel software optimization manuals for the 3508 // most efficient "long" NOP encodings. 3509 // Unfortunately none of our alignment mechanisms suffice. 3510 masm.bind(DONE_LABEL); 3511 3512 // Avoid branch-to-branch on AMD processors 3513 // This appears to be superstition. 3514 if (EmitSync & 32) masm.nop() ; 3515 3516 3517 // At DONE_LABEL the icc ZFlag is set as follows ... 3518 // Fast_Unlock uses the same protocol. 3519 // ZFlag == 1 -> Success 3520 // ZFlag == 0 -> Failure - force control through the slow-path 3521 } 3522 %} 3523 3524 // obj: object to unlock 3525 // box: box address (displaced header location), killed. Must be EAX. 3526 // rbx,: killed tmp; cannot be obj nor box. 3527 // 3528 // Some commentary on balanced locking: 3529 // 3530 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. 3531 // Methods that don't have provably balanced locking are forced to run in the 3532 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. 3533 // The interpreter provides two properties: 3534 // I1: At return-time the interpreter automatically and quietly unlocks any 3535 // objects acquired the current activation (frame). Recall that the 3536 // interpreter maintains an on-stack list of locks currently held by 3537 // a frame. 3538 // I2: If a method attempts to unlock an object that is not held by the 3539 // the frame the interpreter throws IMSX. 3540 // 3541 // Lets say A(), which has provably balanced locking, acquires O and then calls B(). 3542 // B() doesn't have provably balanced locking so it runs in the interpreter. 3543 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O 3544 // is still locked by A(). 3545 // 3546 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: 3547 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter 3548 // should not be unlocked by "normal" java-level locking and vice-versa. The specification 3549 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. 3550 3551 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ 3552 3553 Register objReg = as_Register($obj$$reg); 3554 Register boxReg = as_Register($box$$reg); 3555 Register tmpReg = as_Register($tmp$$reg); 3556 3557 guarantee (objReg != boxReg, "") ; 3558 guarantee (objReg != tmpReg, "") ; 3559 guarantee (boxReg != tmpReg, "") ; 3560 guarantee (boxReg == as_Register(EAX_enc), "") ; 3561 MacroAssembler masm(&cbuf); 3562 3563 if (EmitSync & 4) { 3564 // Disable - inhibit all inlining. Force control through the slow-path 3565 masm.cmpptr (rsp, 0) ; 3566 } else 3567 if (EmitSync & 8) { 3568 Label DONE_LABEL ; 3569 if (UseBiasedLocking) { 3570 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); 3571 } 3572 // classic stack-locking code ... 3573 masm.movptr(tmpReg, Address(boxReg, 0)) ; 3574 masm.testptr(tmpReg, tmpReg) ; 3575 masm.jcc (Assembler::zero, DONE_LABEL) ; 3576 if (os::is_MP()) { masm.lock(); } 3577 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box 3578 masm.bind(DONE_LABEL); 3579 } else { 3580 Label DONE_LABEL, Stacked, CheckSucc, Inflated ; 3581 3582 // Critically, the biased locking test must have precedence over 3583 // and appear before the (box->dhw == 0) recursive stack-lock test. 3584 if (UseBiasedLocking && !UseOptoBiasInlining) { 3585 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); 3586 } 3587 3588 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header 3589 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword 3590 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock 3591 3592 masm.testptr(tmpReg, 0x02) ; // Inflated? 3593 masm.jccb (Assembler::zero, Stacked) ; 3594 3595 masm.bind (Inflated) ; 3596 // It's inflated. 3597 // Despite our balanced locking property we still check that m->_owner == Self 3598 // as java routines or native JNI code called by this thread might 3599 // have released the lock. 3600 // Refer to the comments in synchronizer.cpp for how we might encode extra 3601 // state in _succ so we can avoid fetching EntryList|cxq. 3602 // 3603 // I'd like to add more cases in fast_lock() and fast_unlock() -- 3604 // such as recursive enter and exit -- but we have to be wary of 3605 // I$ bloat, T$ effects and BP$ effects. 3606 // 3607 // If there's no contention try a 1-0 exit. That is, exit without 3608 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how 3609 // we detect and recover from the race that the 1-0 exit admits. 3610 // 3611 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier 3612 // before it STs null into _owner, releasing the lock. Updates 3613 // to data protected by the critical section must be visible before 3614 // we drop the lock (and thus before any other thread could acquire 3615 // the lock and observe the fields protected by the lock). 3616 // IA32's memory-model is SPO, so STs are ordered with respect to 3617 // each other and there's no need for an explicit barrier (fence). 3618 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. 3619 3620 masm.get_thread (boxReg) ; 3621 if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 3622 // prefetchw [ebx + Offset(_owner)-2] 3623 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); 3624 } 3625 3626 // Note that we could employ various encoding schemes to reduce 3627 // the number of loads below (currently 4) to just 2 or 3. 3628 // Refer to the comments in synchronizer.cpp. 3629 // In practice the chain of fetches doesn't seem to impact performance, however. 3630 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { 3631 // Attempt to reduce branch density - AMD's branch predictor. 3632 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3633 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 3634 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 3635 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 3636 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3637 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 3638 masm.jmpb (DONE_LABEL) ; 3639 } else { 3640 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 3641 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 3642 masm.jccb (Assembler::notZero, DONE_LABEL) ; 3643 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 3644 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 3645 masm.jccb (Assembler::notZero, CheckSucc) ; 3646 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 3647 masm.jmpb (DONE_LABEL) ; 3648 } 3649 3650 // The Following code fragment (EmitSync & 65536) improves the performance of 3651 // contended applications and contended synchronization microbenchmarks. 3652 // Unfortunately the emission of the code - even though not executed - causes regressions 3653 // in scimark and jetstream, evidently because of $ effects. Replacing the code 3654 // with an equal number of never-executed NOPs results in the same regression. 3655 // We leave it off by default. 3656 3657 if ((EmitSync & 65536) != 0) { 3658 Label LSuccess, LGoSlowPath ; 3659 3660 masm.bind (CheckSucc) ; 3661 3662 // Optional pre-test ... it's safe to elide this 3663 if ((EmitSync & 16) == 0) { 3664 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 3665 masm.jccb (Assembler::zero, LGoSlowPath) ; 3666 } 3667 3668 // We have a classic Dekker-style idiom: 3669 // ST m->_owner = 0 ; MEMBAR; LD m->_succ 3670 // There are a number of ways to implement the barrier: 3671 // (1) lock:andl &m->_owner, 0 3672 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. 3673 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 3674 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 3675 // (2) If supported, an explicit MFENCE is appealing. 3676 // In older IA32 processors MFENCE is slower than lock:add or xchg 3677 // particularly if the write-buffer is full as might be the case if 3678 // if stores closely precede the fence or fence-equivalent instruction. 3679 // In more modern implementations MFENCE appears faster, however. 3680 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack 3681 // The $lines underlying the top-of-stack should be in M-state. 3682 // The locked add instruction is serializing, of course. 3683 // (4) Use xchg, which is serializing 3684 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works 3685 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. 3686 // The integer condition codes will tell us if succ was 0. 3687 // Since _succ and _owner should reside in the same $line and 3688 // we just stored into _owner, it's likely that the $line 3689 // remains in M-state for the lock:orl. 3690 // 3691 // We currently use (3), although it's likely that switching to (2) 3692 // is correct for the future. 3693 3694 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 3695 if (os::is_MP()) { 3696 if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 3697 masm.mfence(); 3698 } else { 3699 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 3700 } 3701 } 3702 // Ratify _succ remains non-null 3703 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 3704 masm.jccb (Assembler::notZero, LSuccess) ; 3705 3706 masm.xorptr(boxReg, boxReg) ; // box is really EAX 3707 if (os::is_MP()) { masm.lock(); } 3708 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); 3709 masm.jccb (Assembler::notEqual, LSuccess) ; 3710 // Since we're low on registers we installed rsp as a placeholding in _owner. 3711 // Now install Self over rsp. This is safe as we're transitioning from 3712 // non-null to non=null 3713 masm.get_thread (boxReg) ; 3714 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; 3715 // Intentional fall-through into LGoSlowPath ... 3716 3717 masm.bind (LGoSlowPath) ; 3718 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure 3719 masm.jmpb (DONE_LABEL) ; 3720 3721 masm.bind (LSuccess) ; 3722 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success 3723 masm.jmpb (DONE_LABEL) ; 3724 } 3725 3726 masm.bind (Stacked) ; 3727 // It's not inflated and it's not recursively stack-locked and it's not biased. 3728 // It must be stack-locked. 3729 // Try to reset the header to displaced header. 3730 // The "box" value on the stack is stable, so we can reload 3731 // and be assured we observe the same value as above. 3732 masm.movptr(tmpReg, Address(boxReg, 0)) ; 3733 if (os::is_MP()) { masm.lock(); } 3734 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box 3735 // Intention fall-thru into DONE_LABEL 3736 3737 3738 // DONE_LABEL is a hot target - we'd really like to place it at the 3739 // start of cache line by padding with NOPs. 3740 // See the AMD and Intel software optimization manuals for the 3741 // most efficient "long" NOP encodings. 3742 // Unfortunately none of our alignment mechanisms suffice. 3743 if ((EmitSync & 65536) == 0) { 3744 masm.bind (CheckSucc) ; 3745 } 3746 masm.bind(DONE_LABEL); 3747 3748 // Avoid branch to branch on AMD processors 3749 if (EmitSync & 32768) { masm.nop() ; } 3750 } 3751 %} 3752 3753 3754 enc_class enc_pop_rdx() %{ 3755 emit_opcode(cbuf,0x5A); 3756 %} 3757 3758 enc_class enc_rethrow() %{ 3759 cbuf.set_insts_mark(); 3760 emit_opcode(cbuf, 0xE9); // jmp entry 3761 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 3762 runtime_call_Relocation::spec(), RELOC_IMM32 ); 3763 %} 3764 3765 3766 // Convert a double to an int. Java semantics require we do complex 3767 // manglelations in the corner cases. So we set the rounding mode to 3768 // 'zero', store the darned double down as an int, and reset the 3769 // rounding mode to 'nearest'. The hardware throws an exception which 3770 // patches up the correct value directly to the stack. 3771 enc_class D2I_encoding( regD src ) %{ 3772 // Flip to round-to-zero mode. We attempted to allow invalid-op 3773 // exceptions here, so that a NAN or other corner-case value will 3774 // thrown an exception (but normal values get converted at full speed). 3775 // However, I2C adapters and other float-stack manglers leave pending 3776 // invalid-op exceptions hanging. We would have to clear them before 3777 // enabling them and that is more expensive than just testing for the 3778 // invalid value Intel stores down in the corner cases. 3779 emit_opcode(cbuf,0xD9); // FLDCW trunc 3780 emit_opcode(cbuf,0x2D); 3781 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3782 // Allocate a word 3783 emit_opcode(cbuf,0x83); // SUB ESP,4 3784 emit_opcode(cbuf,0xEC); 3785 emit_d8(cbuf,0x04); 3786 // Encoding assumes a double has been pushed into FPR0. 3787 // Store down the double as an int, popping the FPU stack 3788 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3789 emit_opcode(cbuf,0x1C); 3790 emit_d8(cbuf,0x24); 3791 // Restore the rounding mode; mask the exception 3792 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3793 emit_opcode(cbuf,0x2D); 3794 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3795 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3796 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3797 3798 // Load the converted int; adjust CPU stack 3799 emit_opcode(cbuf,0x58); // POP EAX 3800 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3801 emit_d32 (cbuf,0x80000000); // 0x80000000 3802 emit_opcode(cbuf,0x75); // JNE around_slow_call 3803 emit_d8 (cbuf,0x07); // Size of slow_call 3804 // Push src onto stack slow-path 3805 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3806 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3807 // CALL directly to the runtime 3808 cbuf.set_insts_mark(); 3809 emit_opcode(cbuf,0xE8); // Call into runtime 3810 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3811 // Carry on here... 3812 %} 3813 3814 enc_class D2L_encoding( regD src ) %{ 3815 emit_opcode(cbuf,0xD9); // FLDCW trunc 3816 emit_opcode(cbuf,0x2D); 3817 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3818 // Allocate a word 3819 emit_opcode(cbuf,0x83); // SUB ESP,8 3820 emit_opcode(cbuf,0xEC); 3821 emit_d8(cbuf,0x08); 3822 // Encoding assumes a double has been pushed into FPR0. 3823 // Store down the double as a long, popping the FPU stack 3824 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3825 emit_opcode(cbuf,0x3C); 3826 emit_d8(cbuf,0x24); 3827 // Restore the rounding mode; mask the exception 3828 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3829 emit_opcode(cbuf,0x2D); 3830 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3831 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3832 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3833 3834 // Load the converted int; adjust CPU stack 3835 emit_opcode(cbuf,0x58); // POP EAX 3836 emit_opcode(cbuf,0x5A); // POP EDX 3837 emit_opcode(cbuf,0x81); // CMP EDX,imm 3838 emit_d8 (cbuf,0xFA); // rdx 3839 emit_d32 (cbuf,0x80000000); // 0x80000000 3840 emit_opcode(cbuf,0x75); // JNE around_slow_call 3841 emit_d8 (cbuf,0x07+4); // Size of slow_call 3842 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3843 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3844 emit_opcode(cbuf,0x75); // JNE around_slow_call 3845 emit_d8 (cbuf,0x07); // Size of slow_call 3846 // Push src onto stack slow-path 3847 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3848 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3849 // CALL directly to the runtime 3850 cbuf.set_insts_mark(); 3851 emit_opcode(cbuf,0xE8); // Call into runtime 3852 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3853 // Carry on here... 3854 %} 3855 3856 enc_class X2L_encoding( regX src ) %{ 3857 // Allocate a word 3858 emit_opcode(cbuf,0x83); // SUB ESP,8 3859 emit_opcode(cbuf,0xEC); 3860 emit_d8(cbuf,0x08); 3861 3862 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 3863 emit_opcode (cbuf, 0x0F ); 3864 emit_opcode (cbuf, 0x11 ); 3865 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3866 3867 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 3868 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 3869 3870 emit_opcode(cbuf,0xD9); // FLDCW trunc 3871 emit_opcode(cbuf,0x2D); 3872 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3873 3874 // Encoding assumes a double has been pushed into FPR0. 3875 // Store down the double as a long, popping the FPU stack 3876 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3877 emit_opcode(cbuf,0x3C); 3878 emit_d8(cbuf,0x24); 3879 3880 // Restore the rounding mode; mask the exception 3881 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3882 emit_opcode(cbuf,0x2D); 3883 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3884 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3885 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3886 3887 // Load the converted int; adjust CPU stack 3888 emit_opcode(cbuf,0x58); // POP EAX 3889 3890 emit_opcode(cbuf,0x5A); // POP EDX 3891 3892 emit_opcode(cbuf,0x81); // CMP EDX,imm 3893 emit_d8 (cbuf,0xFA); // rdx 3894 emit_d32 (cbuf,0x80000000);// 0x80000000 3895 3896 emit_opcode(cbuf,0x75); // JNE around_slow_call 3897 emit_d8 (cbuf,0x13+4); // Size of slow_call 3898 3899 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3900 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3901 3902 emit_opcode(cbuf,0x75); // JNE around_slow_call 3903 emit_d8 (cbuf,0x13); // Size of slow_call 3904 3905 // Allocate a word 3906 emit_opcode(cbuf,0x83); // SUB ESP,4 3907 emit_opcode(cbuf,0xEC); 3908 emit_d8(cbuf,0x04); 3909 3910 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 3911 emit_opcode (cbuf, 0x0F ); 3912 emit_opcode (cbuf, 0x11 ); 3913 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3914 3915 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 3916 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 3917 3918 emit_opcode(cbuf,0x83); // ADD ESP,4 3919 emit_opcode(cbuf,0xC4); 3920 emit_d8(cbuf,0x04); 3921 3922 // CALL directly to the runtime 3923 cbuf.set_insts_mark(); 3924 emit_opcode(cbuf,0xE8); // Call into runtime 3925 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3926 // Carry on here... 3927 %} 3928 3929 enc_class XD2L_encoding( regXD src ) %{ 3930 // Allocate a word 3931 emit_opcode(cbuf,0x83); // SUB ESP,8 3932 emit_opcode(cbuf,0xEC); 3933 emit_d8(cbuf,0x08); 3934 3935 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 3936 emit_opcode (cbuf, 0x0F ); 3937 emit_opcode (cbuf, 0x11 ); 3938 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3939 3940 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 3941 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 3942 3943 emit_opcode(cbuf,0xD9); // FLDCW trunc 3944 emit_opcode(cbuf,0x2D); 3945 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3946 3947 // Encoding assumes a double has been pushed into FPR0. 3948 // Store down the double as a long, popping the FPU stack 3949 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3950 emit_opcode(cbuf,0x3C); 3951 emit_d8(cbuf,0x24); 3952 3953 // Restore the rounding mode; mask the exception 3954 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3955 emit_opcode(cbuf,0x2D); 3956 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3957 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3958 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3959 3960 // Load the converted int; adjust CPU stack 3961 emit_opcode(cbuf,0x58); // POP EAX 3962 3963 emit_opcode(cbuf,0x5A); // POP EDX 3964 3965 emit_opcode(cbuf,0x81); // CMP EDX,imm 3966 emit_d8 (cbuf,0xFA); // rdx 3967 emit_d32 (cbuf,0x80000000); // 0x80000000 3968 3969 emit_opcode(cbuf,0x75); // JNE around_slow_call 3970 emit_d8 (cbuf,0x13+4); // Size of slow_call 3971 3972 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3973 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3974 3975 emit_opcode(cbuf,0x75); // JNE around_slow_call 3976 emit_d8 (cbuf,0x13); // Size of slow_call 3977 3978 // Push src onto stack slow-path 3979 // Allocate a word 3980 emit_opcode(cbuf,0x83); // SUB ESP,8 3981 emit_opcode(cbuf,0xEC); 3982 emit_d8(cbuf,0x08); 3983 3984 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 3985 emit_opcode (cbuf, 0x0F ); 3986 emit_opcode (cbuf, 0x11 ); 3987 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 3988 3989 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 3990 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 3991 3992 emit_opcode(cbuf,0x83); // ADD ESP,8 3993 emit_opcode(cbuf,0xC4); 3994 emit_d8(cbuf,0x08); 3995 3996 // CALL directly to the runtime 3997 cbuf.set_insts_mark(); 3998 emit_opcode(cbuf,0xE8); // Call into runtime 3999 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 4000 // Carry on here... 4001 %} 4002 4003 enc_class D2X_encoding( regX dst, regD src ) %{ 4004 // Allocate a word 4005 emit_opcode(cbuf,0x83); // SUB ESP,4 4006 emit_opcode(cbuf,0xEC); 4007 emit_d8(cbuf,0x04); 4008 int pop = 0x02; 4009 if ($src$$reg != FPR1L_enc) { 4010 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 4011 emit_d8( cbuf, 0xC0-1+$src$$reg ); 4012 pop = 0x03; 4013 } 4014 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP] 4015 4016 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 4017 emit_opcode (cbuf, 0x0F ); 4018 emit_opcode (cbuf, 0x10 ); 4019 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 4020 4021 emit_opcode(cbuf,0x83); // ADD ESP,4 4022 emit_opcode(cbuf,0xC4); 4023 emit_d8(cbuf,0x04); 4024 // Carry on here... 4025 %} 4026 4027 enc_class FX2I_encoding( regX src, eRegI dst ) %{ 4028 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 4029 4030 // Compare the result to see if we need to go to the slow path 4031 emit_opcode(cbuf,0x81); // CMP dst,imm 4032 emit_rm (cbuf,0x3,0x7,$dst$$reg); 4033 emit_d32 (cbuf,0x80000000); // 0x80000000 4034 4035 emit_opcode(cbuf,0x75); // JNE around_slow_call 4036 emit_d8 (cbuf,0x13); // Size of slow_call 4037 // Store xmm to a temp memory 4038 // location and push it onto stack. 4039 4040 emit_opcode(cbuf,0x83); // SUB ESP,4 4041 emit_opcode(cbuf,0xEC); 4042 emit_d8(cbuf, $primary ? 0x8 : 0x4); 4043 4044 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm 4045 emit_opcode (cbuf, 0x0F ); 4046 emit_opcode (cbuf, 0x11 ); 4047 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 4048 4049 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP] 4050 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 4051 4052 emit_opcode(cbuf,0x83); // ADD ESP,4 4053 emit_opcode(cbuf,0xC4); 4054 emit_d8(cbuf, $primary ? 0x8 : 0x4); 4055 4056 // CALL directly to the runtime 4057 cbuf.set_insts_mark(); 4058 emit_opcode(cbuf,0xE8); // Call into runtime 4059 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 4060 4061 // Carry on here... 4062 %} 4063 4064 enc_class X2D_encoding( regD dst, regX src ) %{ 4065 // Allocate a word 4066 emit_opcode(cbuf,0x83); // SUB ESP,4 4067 emit_opcode(cbuf,0xEC); 4068 emit_d8(cbuf,0x04); 4069 4070 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm 4071 emit_opcode (cbuf, 0x0F ); 4072 emit_opcode (cbuf, 0x11 ); 4073 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 4074 4075 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 4076 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 4077 4078 emit_opcode(cbuf,0x83); // ADD ESP,4 4079 emit_opcode(cbuf,0xC4); 4080 emit_d8(cbuf,0x04); 4081 4082 // Carry on here... 4083 %} 4084 4085 enc_class AbsXF_encoding(regX dst) %{ 4086 address signmask_address=(address)float_signmask_pool; 4087 // andpd:\tANDPS $dst,[signconst] 4088 emit_opcode(cbuf, 0x0F); 4089 emit_opcode(cbuf, 0x54); 4090 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4091 emit_d32(cbuf, (int)signmask_address); 4092 %} 4093 4094 enc_class AbsXD_encoding(regXD dst) %{ 4095 address signmask_address=(address)double_signmask_pool; 4096 // andpd:\tANDPD $dst,[signconst] 4097 emit_opcode(cbuf, 0x66); 4098 emit_opcode(cbuf, 0x0F); 4099 emit_opcode(cbuf, 0x54); 4100 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4101 emit_d32(cbuf, (int)signmask_address); 4102 %} 4103 4104 enc_class NegXF_encoding(regX dst) %{ 4105 address signmask_address=(address)float_signflip_pool; 4106 // andpd:\tXORPS $dst,[signconst] 4107 emit_opcode(cbuf, 0x0F); 4108 emit_opcode(cbuf, 0x57); 4109 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4110 emit_d32(cbuf, (int)signmask_address); 4111 %} 4112 4113 enc_class NegXD_encoding(regXD dst) %{ 4114 address signmask_address=(address)double_signflip_pool; 4115 // andpd:\tXORPD $dst,[signconst] 4116 emit_opcode(cbuf, 0x66); 4117 emit_opcode(cbuf, 0x0F); 4118 emit_opcode(cbuf, 0x57); 4119 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 4120 emit_d32(cbuf, (int)signmask_address); 4121 %} 4122 4123 enc_class FMul_ST_reg( eRegF src1 ) %{ 4124 // Operand was loaded from memory into fp ST (stack top) 4125 // FMUL ST,$src /* D8 C8+i */ 4126 emit_opcode(cbuf, 0xD8); 4127 emit_opcode(cbuf, 0xC8 + $src1$$reg); 4128 %} 4129 4130 enc_class FAdd_ST_reg( eRegF src2 ) %{ 4131 // FADDP ST,src2 /* D8 C0+i */ 4132 emit_opcode(cbuf, 0xD8); 4133 emit_opcode(cbuf, 0xC0 + $src2$$reg); 4134 //could use FADDP src2,fpST /* DE C0+i */ 4135 %} 4136 4137 enc_class FAddP_reg_ST( eRegF src2 ) %{ 4138 // FADDP src2,ST /* DE C0+i */ 4139 emit_opcode(cbuf, 0xDE); 4140 emit_opcode(cbuf, 0xC0 + $src2$$reg); 4141 %} 4142 4143 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 4144 // Operand has been loaded into fp ST (stack top) 4145 // FSUB ST,$src1 4146 emit_opcode(cbuf, 0xD8); 4147 emit_opcode(cbuf, 0xE0 + $src1$$reg); 4148 4149 // FDIV 4150 emit_opcode(cbuf, 0xD8); 4151 emit_opcode(cbuf, 0xF0 + $src2$$reg); 4152 %} 4153 4154 enc_class MulFAddF (eRegF src1, eRegF src2) %{ 4155 // Operand was loaded from memory into fp ST (stack top) 4156 // FADD ST,$src /* D8 C0+i */ 4157 emit_opcode(cbuf, 0xD8); 4158 emit_opcode(cbuf, 0xC0 + $src1$$reg); 4159 4160 // FMUL ST,src2 /* D8 C*+i */ 4161 emit_opcode(cbuf, 0xD8); 4162 emit_opcode(cbuf, 0xC8 + $src2$$reg); 4163 %} 4164 4165 4166 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 4167 // Operand was loaded from memory into fp ST (stack top) 4168 // FADD ST,$src /* D8 C0+i */ 4169 emit_opcode(cbuf, 0xD8); 4170 emit_opcode(cbuf, 0xC0 + $src1$$reg); 4171 4172 // FMULP src2,ST /* DE C8+i */ 4173 emit_opcode(cbuf, 0xDE); 4174 emit_opcode(cbuf, 0xC8 + $src2$$reg); 4175 %} 4176 4177 // Atomically load the volatile long 4178 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 4179 emit_opcode(cbuf,0xDF); 4180 int rm_byte_opcode = 0x05; 4181 int base = $mem$$base; 4182 int index = $mem$$index; 4183 int scale = $mem$$scale; 4184 int displace = $mem$$disp; 4185 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4186 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 4187 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 4188 %} 4189 4190 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{ 4191 { // Atomic long load 4192 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 4193 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 4194 emit_opcode(cbuf,0x0F); 4195 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 4196 int base = $mem$$base; 4197 int index = $mem$$index; 4198 int scale = $mem$$scale; 4199 int displace = $mem$$disp; 4200 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4201 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4202 } 4203 { // MOVSD $dst,$tmp ! atomic long store 4204 emit_opcode(cbuf,0xF2); 4205 emit_opcode(cbuf,0x0F); 4206 emit_opcode(cbuf,0x11); 4207 int base = $dst$$base; 4208 int index = $dst$$index; 4209 int scale = $dst$$scale; 4210 int displace = $dst$$disp; 4211 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals 4212 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4213 } 4214 %} 4215 4216 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{ 4217 { // Atomic long load 4218 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 4219 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 4220 emit_opcode(cbuf,0x0F); 4221 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 4222 int base = $mem$$base; 4223 int index = $mem$$index; 4224 int scale = $mem$$scale; 4225 int displace = $mem$$disp; 4226 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4227 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4228 } 4229 { // MOVD $dst.lo,$tmp 4230 emit_opcode(cbuf,0x66); 4231 emit_opcode(cbuf,0x0F); 4232 emit_opcode(cbuf,0x7E); 4233 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg); 4234 } 4235 { // PSRLQ $tmp,32 4236 emit_opcode(cbuf,0x66); 4237 emit_opcode(cbuf,0x0F); 4238 emit_opcode(cbuf,0x73); 4239 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg); 4240 emit_d8(cbuf, 0x20); 4241 } 4242 { // MOVD $dst.hi,$tmp 4243 emit_opcode(cbuf,0x66); 4244 emit_opcode(cbuf,0x0F); 4245 emit_opcode(cbuf,0x7E); 4246 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 4247 } 4248 %} 4249 4250 // Volatile Store Long. Must be atomic, so move it into 4251 // the FP TOS and then do a 64-bit FIST. Has to probe the 4252 // target address before the store (for null-ptr checks) 4253 // so the memory operand is used twice in the encoding. 4254 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 4255 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 4256 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 4257 emit_opcode(cbuf,0xDF); 4258 int rm_byte_opcode = 0x07; 4259 int base = $mem$$base; 4260 int index = $mem$$index; 4261 int scale = $mem$$scale; 4262 int displace = $mem$$disp; 4263 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4264 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 4265 %} 4266 4267 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{ 4268 { // Atomic long load 4269 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src] 4270 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 4271 emit_opcode(cbuf,0x0F); 4272 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 4273 int base = $src$$base; 4274 int index = $src$$index; 4275 int scale = $src$$scale; 4276 int displace = $src$$disp; 4277 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals 4278 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4279 } 4280 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop 4281 { // MOVSD $mem,$tmp ! atomic long store 4282 emit_opcode(cbuf,0xF2); 4283 emit_opcode(cbuf,0x0F); 4284 emit_opcode(cbuf,0x11); 4285 int base = $mem$$base; 4286 int index = $mem$$index; 4287 int scale = $mem$$scale; 4288 int displace = $mem$$disp; 4289 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4290 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4291 } 4292 %} 4293 4294 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{ 4295 { // MOVD $tmp,$src.lo 4296 emit_opcode(cbuf,0x66); 4297 emit_opcode(cbuf,0x0F); 4298 emit_opcode(cbuf,0x6E); 4299 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 4300 } 4301 { // MOVD $tmp2,$src.hi 4302 emit_opcode(cbuf,0x66); 4303 emit_opcode(cbuf,0x0F); 4304 emit_opcode(cbuf,0x6E); 4305 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg)); 4306 } 4307 { // PUNPCKLDQ $tmp,$tmp2 4308 emit_opcode(cbuf,0x66); 4309 emit_opcode(cbuf,0x0F); 4310 emit_opcode(cbuf,0x62); 4311 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg); 4312 } 4313 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop 4314 { // MOVSD $mem,$tmp ! atomic long store 4315 emit_opcode(cbuf,0xF2); 4316 emit_opcode(cbuf,0x0F); 4317 emit_opcode(cbuf,0x11); 4318 int base = $mem$$base; 4319 int index = $mem$$index; 4320 int scale = $mem$$scale; 4321 int displace = $mem$$disp; 4322 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 4323 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 4324 } 4325 %} 4326 4327 // Safepoint Poll. This polls the safepoint page, and causes an 4328 // exception if it is not readable. Unfortunately, it kills the condition code 4329 // in the process 4330 // We current use TESTL [spp],EDI 4331 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 4332 4333 enc_class Safepoint_Poll() %{ 4334 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 4335 emit_opcode(cbuf,0x85); 4336 emit_rm (cbuf, 0x0, 0x7, 0x5); 4337 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 4338 %} 4339 %} 4340 4341 4342 //----------FRAME-------------------------------------------------------------- 4343 // Definition of frame structure and management information. 4344 // 4345 // S T A C K L A Y O U T Allocators stack-slot number 4346 // | (to get allocators register number 4347 // G Owned by | | v add OptoReg::stack0()) 4348 // r CALLER | | 4349 // o | +--------+ pad to even-align allocators stack-slot 4350 // w V | pad0 | numbers; owned by CALLER 4351 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 4352 // h ^ | in | 5 4353 // | | args | 4 Holes in incoming args owned by SELF 4354 // | | | | 3 4355 // | | +--------+ 4356 // V | | old out| Empty on Intel, window on Sparc 4357 // | old |preserve| Must be even aligned. 4358 // | SP-+--------+----> Matcher::_old_SP, even aligned 4359 // | | in | 3 area for Intel ret address 4360 // Owned by |preserve| Empty on Sparc. 4361 // SELF +--------+ 4362 // | | pad2 | 2 pad to align old SP 4363 // | +--------+ 1 4364 // | | locks | 0 4365 // | +--------+----> OptoReg::stack0(), even aligned 4366 // | | pad1 | 11 pad to align new SP 4367 // | +--------+ 4368 // | | | 10 4369 // | | spills | 9 spills 4370 // V | | 8 (pad0 slot for callee) 4371 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 4372 // ^ | out | 7 4373 // | | args | 6 Holes in outgoing args owned by CALLEE 4374 // Owned by +--------+ 4375 // CALLEE | new out| 6 Empty on Intel, window on Sparc 4376 // | new |preserve| Must be even-aligned. 4377 // | SP-+--------+----> Matcher::_new_SP, even aligned 4378 // | | | 4379 // 4380 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 4381 // known from SELF's arguments and the Java calling convention. 4382 // Region 6-7 is determined per call site. 4383 // Note 2: If the calling convention leaves holes in the incoming argument 4384 // area, those holes are owned by SELF. Holes in the outgoing area 4385 // are owned by the CALLEE. Holes should not be nessecary in the 4386 // incoming area, as the Java calling convention is completely under 4387 // the control of the AD file. Doubles can be sorted and packed to 4388 // avoid holes. Holes in the outgoing arguments may be nessecary for 4389 // varargs C calling conventions. 4390 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 4391 // even aligned with pad0 as needed. 4392 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 4393 // region 6-11 is even aligned; it may be padded out more so that 4394 // the region from SP to FP meets the minimum stack alignment. 4395 4396 frame %{ 4397 // What direction does stack grow in (assumed to be same for C & Java) 4398 stack_direction(TOWARDS_LOW); 4399 4400 // These three registers define part of the calling convention 4401 // between compiled code and the interpreter. 4402 inline_cache_reg(EAX); // Inline Cache Register 4403 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 4404 4405 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 4406 cisc_spilling_operand_name(indOffset32); 4407 4408 // Number of stack slots consumed by locking an object 4409 sync_stack_slots(1); 4410 4411 // Compiled code's Frame Pointer 4412 frame_pointer(ESP); 4413 // Interpreter stores its frame pointer in a register which is 4414 // stored to the stack by I2CAdaptors. 4415 // I2CAdaptors convert from interpreted java to compiled java. 4416 interpreter_frame_pointer(EBP); 4417 4418 // Stack alignment requirement 4419 // Alignment size in bytes (128-bit -> 16 bytes) 4420 stack_alignment(StackAlignmentInBytes); 4421 4422 // Number of stack slots between incoming argument block and the start of 4423 // a new frame. The PROLOG must add this many slots to the stack. The 4424 // EPILOG must remove this many slots. Intel needs one slot for 4425 // return address and one for rbp, (must save rbp) 4426 in_preserve_stack_slots(2+VerifyStackAtCalls); 4427 4428 // Number of outgoing stack slots killed above the out_preserve_stack_slots 4429 // for calls to C. Supports the var-args backing area for register parms. 4430 varargs_C_out_slots_killed(0); 4431 4432 // The after-PROLOG location of the return address. Location of 4433 // return address specifies a type (REG or STACK) and a number 4434 // representing the register number (i.e. - use a register name) or 4435 // stack slot. 4436 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 4437 // Otherwise, it is above the locks and verification slot and alignment word 4438 return_addr(STACK - 1 + 4439 round_to(1+VerifyStackAtCalls+ 4440 Compile::current()->fixed_slots(), 4441 (StackAlignmentInBytes/wordSize))); 4442 4443 // Body of function which returns an integer array locating 4444 // arguments either in registers or in stack slots. Passed an array 4445 // of ideal registers called "sig" and a "length" count. Stack-slot 4446 // offsets are based on outgoing arguments, i.e. a CALLER setting up 4447 // arguments for a CALLEE. Incoming stack arguments are 4448 // automatically biased by the preserve_stack_slots field above. 4449 calling_convention %{ 4450 // No difference between ingoing/outgoing just pass false 4451 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 4452 %} 4453 4454 4455 // Body of function which returns an integer array locating 4456 // arguments either in registers or in stack slots. Passed an array 4457 // of ideal registers called "sig" and a "length" count. Stack-slot 4458 // offsets are based on outgoing arguments, i.e. a CALLER setting up 4459 // arguments for a CALLEE. Incoming stack arguments are 4460 // automatically biased by the preserve_stack_slots field above. 4461 c_calling_convention %{ 4462 // This is obviously always outgoing 4463 (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); 4464 %} 4465 4466 // Location of C & interpreter return values 4467 c_return_value %{ 4468 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 4469 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 4470 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 4471 4472 // in SSE2+ mode we want to keep the FPU stack clean so pretend 4473 // that C functions return float and double results in XMM0. 4474 if( ideal_reg == Op_RegD && UseSSE>=2 ) 4475 return OptoRegPair(XMM0b_num,XMM0a_num); 4476 if( ideal_reg == Op_RegF && UseSSE>=2 ) 4477 return OptoRegPair(OptoReg::Bad,XMM0a_num); 4478 4479 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 4480 %} 4481 4482 // Location of return values 4483 return_value %{ 4484 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 4485 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 4486 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 4487 if( ideal_reg == Op_RegD && UseSSE>=2 ) 4488 return OptoRegPair(XMM0b_num,XMM0a_num); 4489 if( ideal_reg == Op_RegF && UseSSE>=1 ) 4490 return OptoRegPair(OptoReg::Bad,XMM0a_num); 4491 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 4492 %} 4493 4494 %} 4495 4496 //----------ATTRIBUTES--------------------------------------------------------- 4497 //----------Operand Attributes------------------------------------------------- 4498 op_attrib op_cost(0); // Required cost attribute 4499 4500 //----------Instruction Attributes--------------------------------------------- 4501 ins_attrib ins_cost(100); // Required cost attribute 4502 ins_attrib ins_size(8); // Required size attribute (in bits) 4503 ins_attrib ins_pc_relative(0); // Required PC Relative flag 4504 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 4505 // non-matching short branch variant of some 4506 // long branch? 4507 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 4508 // specifies the alignment that some part of the instruction (not 4509 // necessarily the start) requires. If > 1, a compute_padding() 4510 // function must be provided for the instruction 4511 4512 //----------OPERANDS----------------------------------------------------------- 4513 // Operand definitions must precede instruction definitions for correct parsing 4514 // in the ADLC because operands constitute user defined types which are used in 4515 // instruction definitions. 4516 4517 //----------Simple Operands---------------------------------------------------- 4518 // Immediate Operands 4519 // Integer Immediate 4520 operand immI() %{ 4521 match(ConI); 4522 4523 op_cost(10); 4524 format %{ %} 4525 interface(CONST_INTER); 4526 %} 4527 4528 // Constant for test vs zero 4529 operand immI0() %{ 4530 predicate(n->get_int() == 0); 4531 match(ConI); 4532 4533 op_cost(0); 4534 format %{ %} 4535 interface(CONST_INTER); 4536 %} 4537 4538 // Constant for increment 4539 operand immI1() %{ 4540 predicate(n->get_int() == 1); 4541 match(ConI); 4542 4543 op_cost(0); 4544 format %{ %} 4545 interface(CONST_INTER); 4546 %} 4547 4548 // Constant for decrement 4549 operand immI_M1() %{ 4550 predicate(n->get_int() == -1); 4551 match(ConI); 4552 4553 op_cost(0); 4554 format %{ %} 4555 interface(CONST_INTER); 4556 %} 4557 4558 // Valid scale values for addressing modes 4559 operand immI2() %{ 4560 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 4561 match(ConI); 4562 4563 format %{ %} 4564 interface(CONST_INTER); 4565 %} 4566 4567 operand immI8() %{ 4568 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 4569 match(ConI); 4570 4571 op_cost(5); 4572 format %{ %} 4573 interface(CONST_INTER); 4574 %} 4575 4576 operand immI16() %{ 4577 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 4578 match(ConI); 4579 4580 op_cost(10); 4581 format %{ %} 4582 interface(CONST_INTER); 4583 %} 4584 4585 // Constant for long shifts 4586 operand immI_32() %{ 4587 predicate( n->get_int() == 32 ); 4588 match(ConI); 4589 4590 op_cost(0); 4591 format %{ %} 4592 interface(CONST_INTER); 4593 %} 4594 4595 operand immI_1_31() %{ 4596 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 4597 match(ConI); 4598 4599 op_cost(0); 4600 format %{ %} 4601 interface(CONST_INTER); 4602 %} 4603 4604 operand immI_32_63() %{ 4605 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 4606 match(ConI); 4607 op_cost(0); 4608 4609 format %{ %} 4610 interface(CONST_INTER); 4611 %} 4612 4613 operand immI_1() %{ 4614 predicate( n->get_int() == 1 ); 4615 match(ConI); 4616 4617 op_cost(0); 4618 format %{ %} 4619 interface(CONST_INTER); 4620 %} 4621 4622 operand immI_2() %{ 4623 predicate( n->get_int() == 2 ); 4624 match(ConI); 4625 4626 op_cost(0); 4627 format %{ %} 4628 interface(CONST_INTER); 4629 %} 4630 4631 operand immI_3() %{ 4632 predicate( n->get_int() == 3 ); 4633 match(ConI); 4634 4635 op_cost(0); 4636 format %{ %} 4637 interface(CONST_INTER); 4638 %} 4639 4640 // Pointer Immediate 4641 operand immP() %{ 4642 match(ConP); 4643 4644 op_cost(10); 4645 format %{ %} 4646 interface(CONST_INTER); 4647 %} 4648 4649 // NULL Pointer Immediate 4650 operand immP0() %{ 4651 predicate( n->get_ptr() == 0 ); 4652 match(ConP); 4653 op_cost(0); 4654 4655 format %{ %} 4656 interface(CONST_INTER); 4657 %} 4658 4659 // Long Immediate 4660 operand immL() %{ 4661 match(ConL); 4662 4663 op_cost(20); 4664 format %{ %} 4665 interface(CONST_INTER); 4666 %} 4667 4668 // Long Immediate zero 4669 operand immL0() %{ 4670 predicate( n->get_long() == 0L ); 4671 match(ConL); 4672 op_cost(0); 4673 4674 format %{ %} 4675 interface(CONST_INTER); 4676 %} 4677 4678 // Long Immediate zero 4679 operand immL_M1() %{ 4680 predicate( n->get_long() == -1L ); 4681 match(ConL); 4682 op_cost(0); 4683 4684 format %{ %} 4685 interface(CONST_INTER); 4686 %} 4687 4688 // Long immediate from 0 to 127. 4689 // Used for a shorter form of long mul by 10. 4690 operand immL_127() %{ 4691 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 4692 match(ConL); 4693 op_cost(0); 4694 4695 format %{ %} 4696 interface(CONST_INTER); 4697 %} 4698 4699 // Long Immediate: low 32-bit mask 4700 operand immL_32bits() %{ 4701 predicate(n->get_long() == 0xFFFFFFFFL); 4702 match(ConL); 4703 op_cost(0); 4704 4705 format %{ %} 4706 interface(CONST_INTER); 4707 %} 4708 4709 // Long Immediate: low 32-bit mask 4710 operand immL32() %{ 4711 predicate(n->get_long() == (int)(n->get_long())); 4712 match(ConL); 4713 op_cost(20); 4714 4715 format %{ %} 4716 interface(CONST_INTER); 4717 %} 4718 4719 //Double Immediate zero 4720 operand immD0() %{ 4721 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4722 // bug that generates code such that NaNs compare equal to 0.0 4723 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 4724 match(ConD); 4725 4726 op_cost(5); 4727 format %{ %} 4728 interface(CONST_INTER); 4729 %} 4730 4731 // Double Immediate one 4732 operand immD1() %{ 4733 predicate( UseSSE<=1 && n->getd() == 1.0 ); 4734 match(ConD); 4735 4736 op_cost(5); 4737 format %{ %} 4738 interface(CONST_INTER); 4739 %} 4740 4741 // Double Immediate 4742 operand immD() %{ 4743 predicate(UseSSE<=1); 4744 match(ConD); 4745 4746 op_cost(5); 4747 format %{ %} 4748 interface(CONST_INTER); 4749 %} 4750 4751 operand immXD() %{ 4752 predicate(UseSSE>=2); 4753 match(ConD); 4754 4755 op_cost(5); 4756 format %{ %} 4757 interface(CONST_INTER); 4758 %} 4759 4760 // Double Immediate zero 4761 operand immXD0() %{ 4762 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4763 // bug that generates code such that NaNs compare equal to 0.0 AND do not 4764 // compare equal to -0.0. 4765 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 4766 match(ConD); 4767 4768 format %{ %} 4769 interface(CONST_INTER); 4770 %} 4771 4772 // Float Immediate zero 4773 operand immF0() %{ 4774 predicate(UseSSE == 0 && n->getf() == 0.0F); 4775 match(ConF); 4776 4777 op_cost(5); 4778 format %{ %} 4779 interface(CONST_INTER); 4780 %} 4781 4782 // Float Immediate one 4783 operand immF1() %{ 4784 predicate(UseSSE == 0 && n->getf() == 1.0F); 4785 match(ConF); 4786 4787 op_cost(5); 4788 format %{ %} 4789 interface(CONST_INTER); 4790 %} 4791 4792 // Float Immediate 4793 operand immF() %{ 4794 predicate( UseSSE == 0 ); 4795 match(ConF); 4796 4797 op_cost(5); 4798 format %{ %} 4799 interface(CONST_INTER); 4800 %} 4801 4802 // Float Immediate 4803 operand immXF() %{ 4804 predicate(UseSSE >= 1); 4805 match(ConF); 4806 4807 op_cost(5); 4808 format %{ %} 4809 interface(CONST_INTER); 4810 %} 4811 4812 // Float Immediate zero. Zero and not -0.0 4813 operand immXF0() %{ 4814 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 4815 match(ConF); 4816 4817 op_cost(5); 4818 format %{ %} 4819 interface(CONST_INTER); 4820 %} 4821 4822 // Immediates for special shifts (sign extend) 4823 4824 // Constants for increment 4825 operand immI_16() %{ 4826 predicate( n->get_int() == 16 ); 4827 match(ConI); 4828 4829 format %{ %} 4830 interface(CONST_INTER); 4831 %} 4832 4833 operand immI_24() %{ 4834 predicate( n->get_int() == 24 ); 4835 match(ConI); 4836 4837 format %{ %} 4838 interface(CONST_INTER); 4839 %} 4840 4841 // Constant for byte-wide masking 4842 operand immI_255() %{ 4843 predicate( n->get_int() == 255 ); 4844 match(ConI); 4845 4846 format %{ %} 4847 interface(CONST_INTER); 4848 %} 4849 4850 // Constant for short-wide masking 4851 operand immI_65535() %{ 4852 predicate(n->get_int() == 65535); 4853 match(ConI); 4854 4855 format %{ %} 4856 interface(CONST_INTER); 4857 %} 4858 4859 // Register Operands 4860 // Integer Register 4861 operand eRegI() %{ 4862 constraint(ALLOC_IN_RC(e_reg)); 4863 match(RegI); 4864 match(xRegI); 4865 match(eAXRegI); 4866 match(eBXRegI); 4867 match(eCXRegI); 4868 match(eDXRegI); 4869 match(eDIRegI); 4870 match(eSIRegI); 4871 4872 format %{ %} 4873 interface(REG_INTER); 4874 %} 4875 4876 // Subset of Integer Register 4877 operand xRegI(eRegI reg) %{ 4878 constraint(ALLOC_IN_RC(x_reg)); 4879 match(reg); 4880 match(eAXRegI); 4881 match(eBXRegI); 4882 match(eCXRegI); 4883 match(eDXRegI); 4884 4885 format %{ %} 4886 interface(REG_INTER); 4887 %} 4888 4889 // Special Registers 4890 operand eAXRegI(xRegI reg) %{ 4891 constraint(ALLOC_IN_RC(eax_reg)); 4892 match(reg); 4893 match(eRegI); 4894 4895 format %{ "EAX" %} 4896 interface(REG_INTER); 4897 %} 4898 4899 // Special Registers 4900 operand eBXRegI(xRegI reg) %{ 4901 constraint(ALLOC_IN_RC(ebx_reg)); 4902 match(reg); 4903 match(eRegI); 4904 4905 format %{ "EBX" %} 4906 interface(REG_INTER); 4907 %} 4908 4909 operand eCXRegI(xRegI reg) %{ 4910 constraint(ALLOC_IN_RC(ecx_reg)); 4911 match(reg); 4912 match(eRegI); 4913 4914 format %{ "ECX" %} 4915 interface(REG_INTER); 4916 %} 4917 4918 operand eDXRegI(xRegI reg) %{ 4919 constraint(ALLOC_IN_RC(edx_reg)); 4920 match(reg); 4921 match(eRegI); 4922 4923 format %{ "EDX" %} 4924 interface(REG_INTER); 4925 %} 4926 4927 operand eDIRegI(xRegI reg) %{ 4928 constraint(ALLOC_IN_RC(edi_reg)); 4929 match(reg); 4930 match(eRegI); 4931 4932 format %{ "EDI" %} 4933 interface(REG_INTER); 4934 %} 4935 4936 operand naxRegI() %{ 4937 constraint(ALLOC_IN_RC(nax_reg)); 4938 match(RegI); 4939 match(eCXRegI); 4940 match(eDXRegI); 4941 match(eSIRegI); 4942 match(eDIRegI); 4943 4944 format %{ %} 4945 interface(REG_INTER); 4946 %} 4947 4948 operand nadxRegI() %{ 4949 constraint(ALLOC_IN_RC(nadx_reg)); 4950 match(RegI); 4951 match(eBXRegI); 4952 match(eCXRegI); 4953 match(eSIRegI); 4954 match(eDIRegI); 4955 4956 format %{ %} 4957 interface(REG_INTER); 4958 %} 4959 4960 operand ncxRegI() %{ 4961 constraint(ALLOC_IN_RC(ncx_reg)); 4962 match(RegI); 4963 match(eAXRegI); 4964 match(eDXRegI); 4965 match(eSIRegI); 4966 match(eDIRegI); 4967 4968 format %{ %} 4969 interface(REG_INTER); 4970 %} 4971 4972 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 4973 // // 4974 operand eSIRegI(xRegI reg) %{ 4975 constraint(ALLOC_IN_RC(esi_reg)); 4976 match(reg); 4977 match(eRegI); 4978 4979 format %{ "ESI" %} 4980 interface(REG_INTER); 4981 %} 4982 4983 // Pointer Register 4984 operand anyRegP() %{ 4985 constraint(ALLOC_IN_RC(any_reg)); 4986 match(RegP); 4987 match(eAXRegP); 4988 match(eBXRegP); 4989 match(eCXRegP); 4990 match(eDIRegP); 4991 match(eRegP); 4992 4993 format %{ %} 4994 interface(REG_INTER); 4995 %} 4996 4997 operand eRegP() %{ 4998 constraint(ALLOC_IN_RC(e_reg)); 4999 match(RegP); 5000 match(eAXRegP); 5001 match(eBXRegP); 5002 match(eCXRegP); 5003 match(eDIRegP); 5004 5005 format %{ %} 5006 interface(REG_INTER); 5007 %} 5008 5009 // On windows95, EBP is not safe to use for implicit null tests. 5010 operand eRegP_no_EBP() %{ 5011 constraint(ALLOC_IN_RC(e_reg_no_rbp)); 5012 match(RegP); 5013 match(eAXRegP); 5014 match(eBXRegP); 5015 match(eCXRegP); 5016 match(eDIRegP); 5017 5018 op_cost(100); 5019 format %{ %} 5020 interface(REG_INTER); 5021 %} 5022 5023 operand naxRegP() %{ 5024 constraint(ALLOC_IN_RC(nax_reg)); 5025 match(RegP); 5026 match(eBXRegP); 5027 match(eDXRegP); 5028 match(eCXRegP); 5029 match(eSIRegP); 5030 match(eDIRegP); 5031 5032 format %{ %} 5033 interface(REG_INTER); 5034 %} 5035 5036 operand nabxRegP() %{ 5037 constraint(ALLOC_IN_RC(nabx_reg)); 5038 match(RegP); 5039 match(eCXRegP); 5040 match(eDXRegP); 5041 match(eSIRegP); 5042 match(eDIRegP); 5043 5044 format %{ %} 5045 interface(REG_INTER); 5046 %} 5047 5048 operand pRegP() %{ 5049 constraint(ALLOC_IN_RC(p_reg)); 5050 match(RegP); 5051 match(eBXRegP); 5052 match(eDXRegP); 5053 match(eSIRegP); 5054 match(eDIRegP); 5055 5056 format %{ %} 5057 interface(REG_INTER); 5058 %} 5059 5060 // Special Registers 5061 // Return a pointer value 5062 operand eAXRegP(eRegP reg) %{ 5063 constraint(ALLOC_IN_RC(eax_reg)); 5064 match(reg); 5065 format %{ "EAX" %} 5066 interface(REG_INTER); 5067 %} 5068 5069 // Used in AtomicAdd 5070 operand eBXRegP(eRegP reg) %{ 5071 constraint(ALLOC_IN_RC(ebx_reg)); 5072 match(reg); 5073 format %{ "EBX" %} 5074 interface(REG_INTER); 5075 %} 5076 5077 // Tail-call (interprocedural jump) to interpreter 5078 operand eCXRegP(eRegP reg) %{ 5079 constraint(ALLOC_IN_RC(ecx_reg)); 5080 match(reg); 5081 format %{ "ECX" %} 5082 interface(REG_INTER); 5083 %} 5084 5085 operand eSIRegP(eRegP reg) %{ 5086 constraint(ALLOC_IN_RC(esi_reg)); 5087 match(reg); 5088 format %{ "ESI" %} 5089 interface(REG_INTER); 5090 %} 5091 5092 // Used in rep stosw 5093 operand eDIRegP(eRegP reg) %{ 5094 constraint(ALLOC_IN_RC(edi_reg)); 5095 match(reg); 5096 format %{ "EDI" %} 5097 interface(REG_INTER); 5098 %} 5099 5100 operand eBPRegP() %{ 5101 constraint(ALLOC_IN_RC(ebp_reg)); 5102 match(RegP); 5103 format %{ "EBP" %} 5104 interface(REG_INTER); 5105 %} 5106 5107 operand eRegL() %{ 5108 constraint(ALLOC_IN_RC(long_reg)); 5109 match(RegL); 5110 match(eADXRegL); 5111 5112 format %{ %} 5113 interface(REG_INTER); 5114 %} 5115 5116 operand eADXRegL( eRegL reg ) %{ 5117 constraint(ALLOC_IN_RC(eadx_reg)); 5118 match(reg); 5119 5120 format %{ "EDX:EAX" %} 5121 interface(REG_INTER); 5122 %} 5123 5124 operand eBCXRegL( eRegL reg ) %{ 5125 constraint(ALLOC_IN_RC(ebcx_reg)); 5126 match(reg); 5127 5128 format %{ "EBX:ECX" %} 5129 interface(REG_INTER); 5130 %} 5131 5132 // Special case for integer high multiply 5133 operand eADXRegL_low_only() %{ 5134 constraint(ALLOC_IN_RC(eadx_reg)); 5135 match(RegL); 5136 5137 format %{ "EAX" %} 5138 interface(REG_INTER); 5139 %} 5140 5141 // Flags register, used as output of compare instructions 5142 operand eFlagsReg() %{ 5143 constraint(ALLOC_IN_RC(int_flags)); 5144 match(RegFlags); 5145 5146 format %{ "EFLAGS" %} 5147 interface(REG_INTER); 5148 %} 5149 5150 // Flags register, used as output of FLOATING POINT compare instructions 5151 operand eFlagsRegU() %{ 5152 constraint(ALLOC_IN_RC(int_flags)); 5153 match(RegFlags); 5154 5155 format %{ "EFLAGS_U" %} 5156 interface(REG_INTER); 5157 %} 5158 5159 operand eFlagsRegUCF() %{ 5160 constraint(ALLOC_IN_RC(int_flags)); 5161 match(RegFlags); 5162 predicate(false); 5163 5164 format %{ "EFLAGS_U_CF" %} 5165 interface(REG_INTER); 5166 %} 5167 5168 // Condition Code Register used by long compare 5169 operand flagsReg_long_LTGE() %{ 5170 constraint(ALLOC_IN_RC(int_flags)); 5171 match(RegFlags); 5172 format %{ "FLAGS_LTGE" %} 5173 interface(REG_INTER); 5174 %} 5175 operand flagsReg_long_EQNE() %{ 5176 constraint(ALLOC_IN_RC(int_flags)); 5177 match(RegFlags); 5178 format %{ "FLAGS_EQNE" %} 5179 interface(REG_INTER); 5180 %} 5181 operand flagsReg_long_LEGT() %{ 5182 constraint(ALLOC_IN_RC(int_flags)); 5183 match(RegFlags); 5184 format %{ "FLAGS_LEGT" %} 5185 interface(REG_INTER); 5186 %} 5187 5188 // Float register operands 5189 operand regD() %{ 5190 predicate( UseSSE < 2 ); 5191 constraint(ALLOC_IN_RC(dbl_reg)); 5192 match(RegD); 5193 match(regDPR1); 5194 match(regDPR2); 5195 format %{ %} 5196 interface(REG_INTER); 5197 %} 5198 5199 operand regDPR1(regD reg) %{ 5200 predicate( UseSSE < 2 ); 5201 constraint(ALLOC_IN_RC(dbl_reg0)); 5202 match(reg); 5203 format %{ "FPR1" %} 5204 interface(REG_INTER); 5205 %} 5206 5207 operand regDPR2(regD reg) %{ 5208 predicate( UseSSE < 2 ); 5209 constraint(ALLOC_IN_RC(dbl_reg1)); 5210 match(reg); 5211 format %{ "FPR2" %} 5212 interface(REG_INTER); 5213 %} 5214 5215 operand regnotDPR1(regD reg) %{ 5216 predicate( UseSSE < 2 ); 5217 constraint(ALLOC_IN_RC(dbl_notreg0)); 5218 match(reg); 5219 format %{ %} 5220 interface(REG_INTER); 5221 %} 5222 5223 // XMM Double register operands 5224 operand regXD() %{ 5225 predicate( UseSSE>=2 ); 5226 constraint(ALLOC_IN_RC(xdb_reg)); 5227 match(RegD); 5228 match(regXD6); 5229 match(regXD7); 5230 format %{ %} 5231 interface(REG_INTER); 5232 %} 5233 5234 // XMM6 double register operands 5235 operand regXD6(regXD reg) %{ 5236 predicate( UseSSE>=2 ); 5237 constraint(ALLOC_IN_RC(xdb_reg6)); 5238 match(reg); 5239 format %{ "XMM6" %} 5240 interface(REG_INTER); 5241 %} 5242 5243 // XMM7 double register operands 5244 operand regXD7(regXD reg) %{ 5245 predicate( UseSSE>=2 ); 5246 constraint(ALLOC_IN_RC(xdb_reg7)); 5247 match(reg); 5248 format %{ "XMM7" %} 5249 interface(REG_INTER); 5250 %} 5251 5252 // Float register operands 5253 operand regF() %{ 5254 predicate( UseSSE < 2 ); 5255 constraint(ALLOC_IN_RC(flt_reg)); 5256 match(RegF); 5257 match(regFPR1); 5258 format %{ %} 5259 interface(REG_INTER); 5260 %} 5261 5262 // Float register operands 5263 operand regFPR1(regF reg) %{ 5264 predicate( UseSSE < 2 ); 5265 constraint(ALLOC_IN_RC(flt_reg0)); 5266 match(reg); 5267 format %{ "FPR1" %} 5268 interface(REG_INTER); 5269 %} 5270 5271 // XMM register operands 5272 operand regX() %{ 5273 predicate( UseSSE>=1 ); 5274 constraint(ALLOC_IN_RC(xmm_reg)); 5275 match(RegF); 5276 format %{ %} 5277 interface(REG_INTER); 5278 %} 5279 5280 5281 //----------Memory Operands---------------------------------------------------- 5282 // Direct Memory Operand 5283 operand direct(immP addr) %{ 5284 match(addr); 5285 5286 format %{ "[$addr]" %} 5287 interface(MEMORY_INTER) %{ 5288 base(0xFFFFFFFF); 5289 index(0x4); 5290 scale(0x0); 5291 disp($addr); 5292 %} 5293 %} 5294 5295 // Indirect Memory Operand 5296 operand indirect(eRegP reg) %{ 5297 constraint(ALLOC_IN_RC(e_reg)); 5298 match(reg); 5299 5300 format %{ "[$reg]" %} 5301 interface(MEMORY_INTER) %{ 5302 base($reg); 5303 index(0x4); 5304 scale(0x0); 5305 disp(0x0); 5306 %} 5307 %} 5308 5309 // Indirect Memory Plus Short Offset Operand 5310 operand indOffset8(eRegP reg, immI8 off) %{ 5311 match(AddP reg off); 5312 5313 format %{ "[$reg + $off]" %} 5314 interface(MEMORY_INTER) %{ 5315 base($reg); 5316 index(0x4); 5317 scale(0x0); 5318 disp($off); 5319 %} 5320 %} 5321 5322 // Indirect Memory Plus Long Offset Operand 5323 operand indOffset32(eRegP reg, immI off) %{ 5324 match(AddP reg off); 5325 5326 format %{ "[$reg + $off]" %} 5327 interface(MEMORY_INTER) %{ 5328 base($reg); 5329 index(0x4); 5330 scale(0x0); 5331 disp($off); 5332 %} 5333 %} 5334 5335 // Indirect Memory Plus Long Offset Operand 5336 operand indOffset32X(eRegI reg, immP off) %{ 5337 match(AddP off reg); 5338 5339 format %{ "[$reg + $off]" %} 5340 interface(MEMORY_INTER) %{ 5341 base($reg); 5342 index(0x4); 5343 scale(0x0); 5344 disp($off); 5345 %} 5346 %} 5347 5348 // Indirect Memory Plus Index Register Plus Offset Operand 5349 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{ 5350 match(AddP (AddP reg ireg) off); 5351 5352 op_cost(10); 5353 format %{"[$reg + $off + $ireg]" %} 5354 interface(MEMORY_INTER) %{ 5355 base($reg); 5356 index($ireg); 5357 scale(0x0); 5358 disp($off); 5359 %} 5360 %} 5361 5362 // Indirect Memory Plus Index Register Plus Offset Operand 5363 operand indIndex(eRegP reg, eRegI ireg) %{ 5364 match(AddP reg ireg); 5365 5366 op_cost(10); 5367 format %{"[$reg + $ireg]" %} 5368 interface(MEMORY_INTER) %{ 5369 base($reg); 5370 index($ireg); 5371 scale(0x0); 5372 disp(0x0); 5373 %} 5374 %} 5375 5376 // // ------------------------------------------------------------------------- 5377 // // 486 architecture doesn't support "scale * index + offset" with out a base 5378 // // ------------------------------------------------------------------------- 5379 // // Scaled Memory Operands 5380 // // Indirect Memory Times Scale Plus Offset Operand 5381 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{ 5382 // match(AddP off (LShiftI ireg scale)); 5383 // 5384 // op_cost(10); 5385 // format %{"[$off + $ireg << $scale]" %} 5386 // interface(MEMORY_INTER) %{ 5387 // base(0x4); 5388 // index($ireg); 5389 // scale($scale); 5390 // disp($off); 5391 // %} 5392 // %} 5393 5394 // Indirect Memory Times Scale Plus Index Register 5395 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{ 5396 match(AddP reg (LShiftI ireg scale)); 5397 5398 op_cost(10); 5399 format %{"[$reg + $ireg << $scale]" %} 5400 interface(MEMORY_INTER) %{ 5401 base($reg); 5402 index($ireg); 5403 scale($scale); 5404 disp(0x0); 5405 %} 5406 %} 5407 5408 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 5409 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{ 5410 match(AddP (AddP reg (LShiftI ireg scale)) off); 5411 5412 op_cost(10); 5413 format %{"[$reg + $off + $ireg << $scale]" %} 5414 interface(MEMORY_INTER) %{ 5415 base($reg); 5416 index($ireg); 5417 scale($scale); 5418 disp($off); 5419 %} 5420 %} 5421 5422 //----------Load Long Memory Operands------------------------------------------ 5423 // The load-long idiom will use it's address expression again after loading 5424 // the first word of the long. If the load-long destination overlaps with 5425 // registers used in the addressing expression, the 2nd half will be loaded 5426 // from a clobbered address. Fix this by requiring that load-long use 5427 // address registers that do not overlap with the load-long target. 5428 5429 // load-long support 5430 operand load_long_RegP() %{ 5431 constraint(ALLOC_IN_RC(esi_reg)); 5432 match(RegP); 5433 match(eSIRegP); 5434 op_cost(100); 5435 format %{ %} 5436 interface(REG_INTER); 5437 %} 5438 5439 // Indirect Memory Operand Long 5440 operand load_long_indirect(load_long_RegP reg) %{ 5441 constraint(ALLOC_IN_RC(esi_reg)); 5442 match(reg); 5443 5444 format %{ "[$reg]" %} 5445 interface(MEMORY_INTER) %{ 5446 base($reg); 5447 index(0x4); 5448 scale(0x0); 5449 disp(0x0); 5450 %} 5451 %} 5452 5453 // Indirect Memory Plus Long Offset Operand 5454 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 5455 match(AddP reg off); 5456 5457 format %{ "[$reg + $off]" %} 5458 interface(MEMORY_INTER) %{ 5459 base($reg); 5460 index(0x4); 5461 scale(0x0); 5462 disp($off); 5463 %} 5464 %} 5465 5466 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 5467 5468 5469 //----------Special Memory Operands-------------------------------------------- 5470 // Stack Slot Operand - This operand is used for loading and storing temporary 5471 // values on the stack where a match requires a value to 5472 // flow through memory. 5473 operand stackSlotP(sRegP reg) %{ 5474 constraint(ALLOC_IN_RC(stack_slots)); 5475 // No match rule because this operand is only generated in matching 5476 format %{ "[$reg]" %} 5477 interface(MEMORY_INTER) %{ 5478 base(0x4); // ESP 5479 index(0x4); // No Index 5480 scale(0x0); // No Scale 5481 disp($reg); // Stack Offset 5482 %} 5483 %} 5484 5485 operand stackSlotI(sRegI reg) %{ 5486 constraint(ALLOC_IN_RC(stack_slots)); 5487 // No match rule because this operand is only generated in matching 5488 format %{ "[$reg]" %} 5489 interface(MEMORY_INTER) %{ 5490 base(0x4); // ESP 5491 index(0x4); // No Index 5492 scale(0x0); // No Scale 5493 disp($reg); // Stack Offset 5494 %} 5495 %} 5496 5497 operand stackSlotF(sRegF reg) %{ 5498 constraint(ALLOC_IN_RC(stack_slots)); 5499 // No match rule because this operand is only generated in matching 5500 format %{ "[$reg]" %} 5501 interface(MEMORY_INTER) %{ 5502 base(0x4); // ESP 5503 index(0x4); // No Index 5504 scale(0x0); // No Scale 5505 disp($reg); // Stack Offset 5506 %} 5507 %} 5508 5509 operand stackSlotD(sRegD reg) %{ 5510 constraint(ALLOC_IN_RC(stack_slots)); 5511 // No match rule because this operand is only generated in matching 5512 format %{ "[$reg]" %} 5513 interface(MEMORY_INTER) %{ 5514 base(0x4); // ESP 5515 index(0x4); // No Index 5516 scale(0x0); // No Scale 5517 disp($reg); // Stack Offset 5518 %} 5519 %} 5520 5521 operand stackSlotL(sRegL reg) %{ 5522 constraint(ALLOC_IN_RC(stack_slots)); 5523 // No match rule because this operand is only generated in matching 5524 format %{ "[$reg]" %} 5525 interface(MEMORY_INTER) %{ 5526 base(0x4); // ESP 5527 index(0x4); // No Index 5528 scale(0x0); // No Scale 5529 disp($reg); // Stack Offset 5530 %} 5531 %} 5532 5533 //----------Memory Operands - Win95 Implicit Null Variants---------------- 5534 // Indirect Memory Operand 5535 operand indirect_win95_safe(eRegP_no_EBP reg) 5536 %{ 5537 constraint(ALLOC_IN_RC(e_reg)); 5538 match(reg); 5539 5540 op_cost(100); 5541 format %{ "[$reg]" %} 5542 interface(MEMORY_INTER) %{ 5543 base($reg); 5544 index(0x4); 5545 scale(0x0); 5546 disp(0x0); 5547 %} 5548 %} 5549 5550 // Indirect Memory Plus Short Offset Operand 5551 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 5552 %{ 5553 match(AddP reg off); 5554 5555 op_cost(100); 5556 format %{ "[$reg + $off]" %} 5557 interface(MEMORY_INTER) %{ 5558 base($reg); 5559 index(0x4); 5560 scale(0x0); 5561 disp($off); 5562 %} 5563 %} 5564 5565 // Indirect Memory Plus Long Offset Operand 5566 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 5567 %{ 5568 match(AddP reg off); 5569 5570 op_cost(100); 5571 format %{ "[$reg + $off]" %} 5572 interface(MEMORY_INTER) %{ 5573 base($reg); 5574 index(0x4); 5575 scale(0x0); 5576 disp($off); 5577 %} 5578 %} 5579 5580 // Indirect Memory Plus Index Register Plus Offset Operand 5581 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off) 5582 %{ 5583 match(AddP (AddP reg ireg) off); 5584 5585 op_cost(100); 5586 format %{"[$reg + $off + $ireg]" %} 5587 interface(MEMORY_INTER) %{ 5588 base($reg); 5589 index($ireg); 5590 scale(0x0); 5591 disp($off); 5592 %} 5593 %} 5594 5595 // Indirect Memory Times Scale Plus Index Register 5596 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale) 5597 %{ 5598 match(AddP reg (LShiftI ireg scale)); 5599 5600 op_cost(100); 5601 format %{"[$reg + $ireg << $scale]" %} 5602 interface(MEMORY_INTER) %{ 5603 base($reg); 5604 index($ireg); 5605 scale($scale); 5606 disp(0x0); 5607 %} 5608 %} 5609 5610 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 5611 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale) 5612 %{ 5613 match(AddP (AddP reg (LShiftI ireg scale)) off); 5614 5615 op_cost(100); 5616 format %{"[$reg + $off + $ireg << $scale]" %} 5617 interface(MEMORY_INTER) %{ 5618 base($reg); 5619 index($ireg); 5620 scale($scale); 5621 disp($off); 5622 %} 5623 %} 5624 5625 //----------Conditional Branch Operands---------------------------------------- 5626 // Comparison Op - This is the operation of the comparison, and is limited to 5627 // the following set of codes: 5628 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 5629 // 5630 // Other attributes of the comparison, such as unsignedness, are specified 5631 // by the comparison instruction that sets a condition code flags register. 5632 // That result is represented by a flags operand whose subtype is appropriate 5633 // to the unsignedness (etc.) of the comparison. 5634 // 5635 // Later, the instruction which matches both the Comparison Op (a Bool) and 5636 // the flags (produced by the Cmp) specifies the coding of the comparison op 5637 // by matching a specific subtype of Bool operand below, such as cmpOpU. 5638 5639 // Comparision Code 5640 operand cmpOp() %{ 5641 match(Bool); 5642 5643 format %{ "" %} 5644 interface(COND_INTER) %{ 5645 equal(0x4, "e"); 5646 not_equal(0x5, "ne"); 5647 less(0xC, "l"); 5648 greater_equal(0xD, "ge"); 5649 less_equal(0xE, "le"); 5650 greater(0xF, "g"); 5651 %} 5652 %} 5653 5654 // Comparison Code, unsigned compare. Used by FP also, with 5655 // C2 (unordered) turned into GT or LT already. The other bits 5656 // C0 and C3 are turned into Carry & Zero flags. 5657 operand cmpOpU() %{ 5658 match(Bool); 5659 5660 format %{ "" %} 5661 interface(COND_INTER) %{ 5662 equal(0x4, "e"); 5663 not_equal(0x5, "ne"); 5664 less(0x2, "b"); 5665 greater_equal(0x3, "nb"); 5666 less_equal(0x6, "be"); 5667 greater(0x7, "nbe"); 5668 %} 5669 %} 5670 5671 // Floating comparisons that don't require any fixup for the unordered case 5672 operand cmpOpUCF() %{ 5673 match(Bool); 5674 predicate(n->as_Bool()->_test._test == BoolTest::lt || 5675 n->as_Bool()->_test._test == BoolTest::ge || 5676 n->as_Bool()->_test._test == BoolTest::le || 5677 n->as_Bool()->_test._test == BoolTest::gt); 5678 format %{ "" %} 5679 interface(COND_INTER) %{ 5680 equal(0x4, "e"); 5681 not_equal(0x5, "ne"); 5682 less(0x2, "b"); 5683 greater_equal(0x3, "nb"); 5684 less_equal(0x6, "be"); 5685 greater(0x7, "nbe"); 5686 %} 5687 %} 5688 5689 5690 // Floating comparisons that can be fixed up with extra conditional jumps 5691 operand cmpOpUCF2() %{ 5692 match(Bool); 5693 predicate(n->as_Bool()->_test._test == BoolTest::ne || 5694 n->as_Bool()->_test._test == BoolTest::eq); 5695 format %{ "" %} 5696 interface(COND_INTER) %{ 5697 equal(0x4, "e"); 5698 not_equal(0x5, "ne"); 5699 less(0x2, "b"); 5700 greater_equal(0x3, "nb"); 5701 less_equal(0x6, "be"); 5702 greater(0x7, "nbe"); 5703 %} 5704 %} 5705 5706 // Comparison Code for FP conditional move 5707 operand cmpOp_fcmov() %{ 5708 match(Bool); 5709 5710 format %{ "" %} 5711 interface(COND_INTER) %{ 5712 equal (0x0C8); 5713 not_equal (0x1C8); 5714 less (0x0C0); 5715 greater_equal(0x1C0); 5716 less_equal (0x0D0); 5717 greater (0x1D0); 5718 %} 5719 %} 5720 5721 // Comparision Code used in long compares 5722 operand cmpOp_commute() %{ 5723 match(Bool); 5724 5725 format %{ "" %} 5726 interface(COND_INTER) %{ 5727 equal(0x4, "e"); 5728 not_equal(0x5, "ne"); 5729 less(0xF, "g"); 5730 greater_equal(0xE, "le"); 5731 less_equal(0xD, "ge"); 5732 greater(0xC, "l"); 5733 %} 5734 %} 5735 5736 //----------OPERAND CLASSES---------------------------------------------------- 5737 // Operand Classes are groups of operands that are used as to simplify 5738 // instruction definitions by not requiring the AD writer to specify separate 5739 // instructions for every form of operand when the instruction accepts 5740 // multiple operand types with the same basic encoding and format. The classic 5741 // case of this is memory operands. 5742 5743 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 5744 indIndex, indIndexScale, indIndexScaleOffset); 5745 5746 // Long memory operations are encoded in 2 instructions and a +4 offset. 5747 // This means some kind of offset is always required and you cannot use 5748 // an oop as the offset (done when working on static globals). 5749 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 5750 indIndex, indIndexScale, indIndexScaleOffset); 5751 5752 5753 //----------PIPELINE----------------------------------------------------------- 5754 // Rules which define the behavior of the target architectures pipeline. 5755 pipeline %{ 5756 5757 //----------ATTRIBUTES--------------------------------------------------------- 5758 attributes %{ 5759 variable_size_instructions; // Fixed size instructions 5760 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 5761 instruction_unit_size = 1; // An instruction is 1 bytes long 5762 instruction_fetch_unit_size = 16; // The processor fetches one line 5763 instruction_fetch_units = 1; // of 16 bytes 5764 5765 // List of nop instructions 5766 nops( MachNop ); 5767 %} 5768 5769 //----------RESOURCES---------------------------------------------------------- 5770 // Resources are the functional units available to the machine 5771 5772 // Generic P2/P3 pipeline 5773 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 5774 // 3 instructions decoded per cycle. 5775 // 2 load/store ops per cycle, 1 branch, 1 FPU, 5776 // 2 ALU op, only ALU0 handles mul/div instructions. 5777 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 5778 MS0, MS1, MEM = MS0 | MS1, 5779 BR, FPU, 5780 ALU0, ALU1, ALU = ALU0 | ALU1 ); 5781 5782 //----------PIPELINE DESCRIPTION----------------------------------------------- 5783 // Pipeline Description specifies the stages in the machine's pipeline 5784 5785 // Generic P2/P3 pipeline 5786 pipe_desc(S0, S1, S2, S3, S4, S5); 5787 5788 //----------PIPELINE CLASSES--------------------------------------------------- 5789 // Pipeline Classes describe the stages in which input and output are 5790 // referenced by the hardware pipeline. 5791 5792 // Naming convention: ialu or fpu 5793 // Then: _reg 5794 // Then: _reg if there is a 2nd register 5795 // Then: _long if it's a pair of instructions implementing a long 5796 // Then: _fat if it requires the big decoder 5797 // Or: _mem if it requires the big decoder and a memory unit. 5798 5799 // Integer ALU reg operation 5800 pipe_class ialu_reg(eRegI dst) %{ 5801 single_instruction; 5802 dst : S4(write); 5803 dst : S3(read); 5804 DECODE : S0; // any decoder 5805 ALU : S3; // any alu 5806 %} 5807 5808 // Long ALU reg operation 5809 pipe_class ialu_reg_long(eRegL dst) %{ 5810 instruction_count(2); 5811 dst : S4(write); 5812 dst : S3(read); 5813 DECODE : S0(2); // any 2 decoders 5814 ALU : S3(2); // both alus 5815 %} 5816 5817 // Integer ALU reg operation using big decoder 5818 pipe_class ialu_reg_fat(eRegI dst) %{ 5819 single_instruction; 5820 dst : S4(write); 5821 dst : S3(read); 5822 D0 : S0; // big decoder only 5823 ALU : S3; // any alu 5824 %} 5825 5826 // Long ALU reg operation using big decoder 5827 pipe_class ialu_reg_long_fat(eRegL dst) %{ 5828 instruction_count(2); 5829 dst : S4(write); 5830 dst : S3(read); 5831 D0 : S0(2); // big decoder only; twice 5832 ALU : S3(2); // any 2 alus 5833 %} 5834 5835 // Integer ALU reg-reg operation 5836 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{ 5837 single_instruction; 5838 dst : S4(write); 5839 src : S3(read); 5840 DECODE : S0; // any decoder 5841 ALU : S3; // any alu 5842 %} 5843 5844 // Long ALU reg-reg operation 5845 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 5846 instruction_count(2); 5847 dst : S4(write); 5848 src : S3(read); 5849 DECODE : S0(2); // any 2 decoders 5850 ALU : S3(2); // both alus 5851 %} 5852 5853 // Integer ALU reg-reg operation 5854 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{ 5855 single_instruction; 5856 dst : S4(write); 5857 src : S3(read); 5858 D0 : S0; // big decoder only 5859 ALU : S3; // any alu 5860 %} 5861 5862 // Long ALU reg-reg operation 5863 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 5864 instruction_count(2); 5865 dst : S4(write); 5866 src : S3(read); 5867 D0 : S0(2); // big decoder only; twice 5868 ALU : S3(2); // both alus 5869 %} 5870 5871 // Integer ALU reg-mem operation 5872 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{ 5873 single_instruction; 5874 dst : S5(write); 5875 mem : S3(read); 5876 D0 : S0; // big decoder only 5877 ALU : S4; // any alu 5878 MEM : S3; // any mem 5879 %} 5880 5881 // Long ALU reg-mem operation 5882 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 5883 instruction_count(2); 5884 dst : S5(write); 5885 mem : S3(read); 5886 D0 : S0(2); // big decoder only; twice 5887 ALU : S4(2); // any 2 alus 5888 MEM : S3(2); // both mems 5889 %} 5890 5891 // Integer mem operation (prefetch) 5892 pipe_class ialu_mem(memory mem) 5893 %{ 5894 single_instruction; 5895 mem : S3(read); 5896 D0 : S0; // big decoder only 5897 MEM : S3; // any mem 5898 %} 5899 5900 // Integer Store to Memory 5901 pipe_class ialu_mem_reg(memory mem, eRegI src) %{ 5902 single_instruction; 5903 mem : S3(read); 5904 src : S5(read); 5905 D0 : S0; // big decoder only 5906 ALU : S4; // any alu 5907 MEM : S3; 5908 %} 5909 5910 // Long Store to Memory 5911 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 5912 instruction_count(2); 5913 mem : S3(read); 5914 src : S5(read); 5915 D0 : S0(2); // big decoder only; twice 5916 ALU : S4(2); // any 2 alus 5917 MEM : S3(2); // Both mems 5918 %} 5919 5920 // Integer Store to Memory 5921 pipe_class ialu_mem_imm(memory mem) %{ 5922 single_instruction; 5923 mem : S3(read); 5924 D0 : S0; // big decoder only 5925 ALU : S4; // any alu 5926 MEM : S3; 5927 %} 5928 5929 // Integer ALU0 reg-reg operation 5930 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{ 5931 single_instruction; 5932 dst : S4(write); 5933 src : S3(read); 5934 D0 : S0; // Big decoder only 5935 ALU0 : S3; // only alu0 5936 %} 5937 5938 // Integer ALU0 reg-mem operation 5939 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{ 5940 single_instruction; 5941 dst : S5(write); 5942 mem : S3(read); 5943 D0 : S0; // big decoder only 5944 ALU0 : S4; // ALU0 only 5945 MEM : S3; // any mem 5946 %} 5947 5948 // Integer ALU reg-reg operation 5949 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{ 5950 single_instruction; 5951 cr : S4(write); 5952 src1 : S3(read); 5953 src2 : S3(read); 5954 DECODE : S0; // any decoder 5955 ALU : S3; // any alu 5956 %} 5957 5958 // Integer ALU reg-imm operation 5959 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{ 5960 single_instruction; 5961 cr : S4(write); 5962 src1 : S3(read); 5963 DECODE : S0; // any decoder 5964 ALU : S3; // any alu 5965 %} 5966 5967 // Integer ALU reg-mem operation 5968 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{ 5969 single_instruction; 5970 cr : S4(write); 5971 src1 : S3(read); 5972 src2 : S3(read); 5973 D0 : S0; // big decoder only 5974 ALU : S4; // any alu 5975 MEM : S3; 5976 %} 5977 5978 // Conditional move reg-reg 5979 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{ 5980 instruction_count(4); 5981 y : S4(read); 5982 q : S3(read); 5983 p : S3(read); 5984 DECODE : S0(4); // any decoder 5985 %} 5986 5987 // Conditional move reg-reg 5988 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{ 5989 single_instruction; 5990 dst : S4(write); 5991 src : S3(read); 5992 cr : S3(read); 5993 DECODE : S0; // any decoder 5994 %} 5995 5996 // Conditional move reg-mem 5997 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{ 5998 single_instruction; 5999 dst : S4(write); 6000 src : S3(read); 6001 cr : S3(read); 6002 DECODE : S0; // any decoder 6003 MEM : S3; 6004 %} 6005 6006 // Conditional move reg-reg long 6007 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 6008 single_instruction; 6009 dst : S4(write); 6010 src : S3(read); 6011 cr : S3(read); 6012 DECODE : S0(2); // any 2 decoders 6013 %} 6014 6015 // Conditional move double reg-reg 6016 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 6017 single_instruction; 6018 dst : S4(write); 6019 src : S3(read); 6020 cr : S3(read); 6021 DECODE : S0; // any decoder 6022 %} 6023 6024 // Float reg-reg operation 6025 pipe_class fpu_reg(regD dst) %{ 6026 instruction_count(2); 6027 dst : S3(read); 6028 DECODE : S0(2); // any 2 decoders 6029 FPU : S3; 6030 %} 6031 6032 // Float reg-reg operation 6033 pipe_class fpu_reg_reg(regD dst, regD src) %{ 6034 instruction_count(2); 6035 dst : S4(write); 6036 src : S3(read); 6037 DECODE : S0(2); // any 2 decoders 6038 FPU : S3; 6039 %} 6040 6041 // Float reg-reg operation 6042 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 6043 instruction_count(3); 6044 dst : S4(write); 6045 src1 : S3(read); 6046 src2 : S3(read); 6047 DECODE : S0(3); // any 3 decoders 6048 FPU : S3(2); 6049 %} 6050 6051 // Float reg-reg operation 6052 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 6053 instruction_count(4); 6054 dst : S4(write); 6055 src1 : S3(read); 6056 src2 : S3(read); 6057 src3 : S3(read); 6058 DECODE : S0(4); // any 3 decoders 6059 FPU : S3(2); 6060 %} 6061 6062 // Float reg-reg operation 6063 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 6064 instruction_count(4); 6065 dst : S4(write); 6066 src1 : S3(read); 6067 src2 : S3(read); 6068 src3 : S3(read); 6069 DECODE : S1(3); // any 3 decoders 6070 D0 : S0; // Big decoder only 6071 FPU : S3(2); 6072 MEM : S3; 6073 %} 6074 6075 // Float reg-mem operation 6076 pipe_class fpu_reg_mem(regD dst, memory mem) %{ 6077 instruction_count(2); 6078 dst : S5(write); 6079 mem : S3(read); 6080 D0 : S0; // big decoder only 6081 DECODE : S1; // any decoder for FPU POP 6082 FPU : S4; 6083 MEM : S3; // any mem 6084 %} 6085 6086 // Float reg-mem operation 6087 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 6088 instruction_count(3); 6089 dst : S5(write); 6090 src1 : S3(read); 6091 mem : S3(read); 6092 D0 : S0; // big decoder only 6093 DECODE : S1(2); // any decoder for FPU POP 6094 FPU : S4; 6095 MEM : S3; // any mem 6096 %} 6097 6098 // Float mem-reg operation 6099 pipe_class fpu_mem_reg(memory mem, regD src) %{ 6100 instruction_count(2); 6101 src : S5(read); 6102 mem : S3(read); 6103 DECODE : S0; // any decoder for FPU PUSH 6104 D0 : S1; // big decoder only 6105 FPU : S4; 6106 MEM : S3; // any mem 6107 %} 6108 6109 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 6110 instruction_count(3); 6111 src1 : S3(read); 6112 src2 : S3(read); 6113 mem : S3(read); 6114 DECODE : S0(2); // any decoder for FPU PUSH 6115 D0 : S1; // big decoder only 6116 FPU : S4; 6117 MEM : S3; // any mem 6118 %} 6119 6120 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 6121 instruction_count(3); 6122 src1 : S3(read); 6123 src2 : S3(read); 6124 mem : S4(read); 6125 DECODE : S0; // any decoder for FPU PUSH 6126 D0 : S0(2); // big decoder only 6127 FPU : S4; 6128 MEM : S3(2); // any mem 6129 %} 6130 6131 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 6132 instruction_count(2); 6133 src1 : S3(read); 6134 dst : S4(read); 6135 D0 : S0(2); // big decoder only 6136 MEM : S3(2); // any mem 6137 %} 6138 6139 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 6140 instruction_count(3); 6141 src1 : S3(read); 6142 src2 : S3(read); 6143 dst : S4(read); 6144 D0 : S0(3); // big decoder only 6145 FPU : S4; 6146 MEM : S3(3); // any mem 6147 %} 6148 6149 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 6150 instruction_count(3); 6151 src1 : S4(read); 6152 mem : S4(read); 6153 DECODE : S0; // any decoder for FPU PUSH 6154 D0 : S0(2); // big decoder only 6155 FPU : S4; 6156 MEM : S3(2); // any mem 6157 %} 6158 6159 // Float load constant 6160 pipe_class fpu_reg_con(regD dst) %{ 6161 instruction_count(2); 6162 dst : S5(write); 6163 D0 : S0; // big decoder only for the load 6164 DECODE : S1; // any decoder for FPU POP 6165 FPU : S4; 6166 MEM : S3; // any mem 6167 %} 6168 6169 // Float load constant 6170 pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 6171 instruction_count(3); 6172 dst : S5(write); 6173 src : S3(read); 6174 D0 : S0; // big decoder only for the load 6175 DECODE : S1(2); // any decoder for FPU POP 6176 FPU : S4; 6177 MEM : S3; // any mem 6178 %} 6179 6180 // UnConditional branch 6181 pipe_class pipe_jmp( label labl ) %{ 6182 single_instruction; 6183 BR : S3; 6184 %} 6185 6186 // Conditional branch 6187 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 6188 single_instruction; 6189 cr : S1(read); 6190 BR : S3; 6191 %} 6192 6193 // Allocation idiom 6194 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 6195 instruction_count(1); force_serialization; 6196 fixed_latency(6); 6197 heap_ptr : S3(read); 6198 DECODE : S0(3); 6199 D0 : S2; 6200 MEM : S3; 6201 ALU : S3(2); 6202 dst : S5(write); 6203 BR : S5; 6204 %} 6205 6206 // Generic big/slow expanded idiom 6207 pipe_class pipe_slow( ) %{ 6208 instruction_count(10); multiple_bundles; force_serialization; 6209 fixed_latency(100); 6210 D0 : S0(2); 6211 MEM : S3(2); 6212 %} 6213 6214 // The real do-nothing guy 6215 pipe_class empty( ) %{ 6216 instruction_count(0); 6217 %} 6218 6219 // Define the class for the Nop node 6220 define %{ 6221 MachNop = empty; 6222 %} 6223 6224 %} 6225 6226 //----------INSTRUCTIONS------------------------------------------------------- 6227 // 6228 // match -- States which machine-independent subtree may be replaced 6229 // by this instruction. 6230 // ins_cost -- The estimated cost of this instruction is used by instruction 6231 // selection to identify a minimum cost tree of machine 6232 // instructions that matches a tree of machine-independent 6233 // instructions. 6234 // format -- A string providing the disassembly for this instruction. 6235 // The value of an instruction's operand may be inserted 6236 // by referring to it with a '$' prefix. 6237 // opcode -- Three instruction opcodes may be provided. These are referred 6238 // to within an encode class as $primary, $secondary, and $tertiary 6239 // respectively. The primary opcode is commonly used to 6240 // indicate the type of machine instruction, while secondary 6241 // and tertiary are often used for prefix options or addressing 6242 // modes. 6243 // ins_encode -- A list of encode classes with parameters. The encode class 6244 // name must have been defined in an 'enc_class' specification 6245 // in the encode section of the architecture description. 6246 6247 //----------BSWAP-Instruction-------------------------------------------------- 6248 instruct bytes_reverse_int(eRegI dst) %{ 6249 match(Set dst (ReverseBytesI dst)); 6250 6251 format %{ "BSWAP $dst" %} 6252 opcode(0x0F, 0xC8); 6253 ins_encode( OpcP, OpcSReg(dst) ); 6254 ins_pipe( ialu_reg ); 6255 %} 6256 6257 instruct bytes_reverse_long(eRegL dst) %{ 6258 match(Set dst (ReverseBytesL dst)); 6259 6260 format %{ "BSWAP $dst.lo\n\t" 6261 "BSWAP $dst.hi\n\t" 6262 "XCHG $dst.lo $dst.hi" %} 6263 6264 ins_cost(125); 6265 ins_encode( bswap_long_bytes(dst) ); 6266 ins_pipe( ialu_reg_reg); 6267 %} 6268 6269 instruct bytes_reverse_unsigned_short(eRegI dst) %{ 6270 match(Set dst (ReverseBytesUS dst)); 6271 6272 format %{ "BSWAP $dst\n\t" 6273 "SHR $dst,16\n\t" %} 6274 ins_encode %{ 6275 __ bswapl($dst$$Register); 6276 __ shrl($dst$$Register, 16); 6277 %} 6278 ins_pipe( ialu_reg ); 6279 %} 6280 6281 instruct bytes_reverse_short(eRegI dst) %{ 6282 match(Set dst (ReverseBytesS dst)); 6283 6284 format %{ "BSWAP $dst\n\t" 6285 "SAR $dst,16\n\t" %} 6286 ins_encode %{ 6287 __ bswapl($dst$$Register); 6288 __ sarl($dst$$Register, 16); 6289 %} 6290 ins_pipe( ialu_reg ); 6291 %} 6292 6293 6294 //---------- Zeros Count Instructions ------------------------------------------ 6295 6296 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ 6297 predicate(UseCountLeadingZerosInstruction); 6298 match(Set dst (CountLeadingZerosI src)); 6299 effect(KILL cr); 6300 6301 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 6302 ins_encode %{ 6303 __ lzcntl($dst$$Register, $src$$Register); 6304 %} 6305 ins_pipe(ialu_reg); 6306 %} 6307 6308 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{ 6309 predicate(!UseCountLeadingZerosInstruction); 6310 match(Set dst (CountLeadingZerosI src)); 6311 effect(KILL cr); 6312 6313 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 6314 "JNZ skip\n\t" 6315 "MOV $dst, -1\n" 6316 "skip:\n\t" 6317 "NEG $dst\n\t" 6318 "ADD $dst, 31" %} 6319 ins_encode %{ 6320 Register Rdst = $dst$$Register; 6321 Register Rsrc = $src$$Register; 6322 Label skip; 6323 __ bsrl(Rdst, Rsrc); 6324 __ jccb(Assembler::notZero, skip); 6325 __ movl(Rdst, -1); 6326 __ bind(skip); 6327 __ negl(Rdst); 6328 __ addl(Rdst, BitsPerInt - 1); 6329 %} 6330 ins_pipe(ialu_reg); 6331 %} 6332 6333 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ 6334 predicate(UseCountLeadingZerosInstruction); 6335 match(Set dst (CountLeadingZerosL src)); 6336 effect(TEMP dst, KILL cr); 6337 6338 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 6339 "JNC done\n\t" 6340 "LZCNT $dst, $src.lo\n\t" 6341 "ADD $dst, 32\n" 6342 "done:" %} 6343 ins_encode %{ 6344 Register Rdst = $dst$$Register; 6345 Register Rsrc = $src$$Register; 6346 Label done; 6347 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 6348 __ jccb(Assembler::carryClear, done); 6349 __ lzcntl(Rdst, Rsrc); 6350 __ addl(Rdst, BitsPerInt); 6351 __ bind(done); 6352 %} 6353 ins_pipe(ialu_reg); 6354 %} 6355 6356 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{ 6357 predicate(!UseCountLeadingZerosInstruction); 6358 match(Set dst (CountLeadingZerosL src)); 6359 effect(TEMP dst, KILL cr); 6360 6361 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 6362 "JZ msw_is_zero\n\t" 6363 "ADD $dst, 32\n\t" 6364 "JMP not_zero\n" 6365 "msw_is_zero:\n\t" 6366 "BSR $dst, $src.lo\n\t" 6367 "JNZ not_zero\n\t" 6368 "MOV $dst, -1\n" 6369 "not_zero:\n\t" 6370 "NEG $dst\n\t" 6371 "ADD $dst, 63\n" %} 6372 ins_encode %{ 6373 Register Rdst = $dst$$Register; 6374 Register Rsrc = $src$$Register; 6375 Label msw_is_zero; 6376 Label not_zero; 6377 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 6378 __ jccb(Assembler::zero, msw_is_zero); 6379 __ addl(Rdst, BitsPerInt); 6380 __ jmpb(not_zero); 6381 __ bind(msw_is_zero); 6382 __ bsrl(Rdst, Rsrc); 6383 __ jccb(Assembler::notZero, not_zero); 6384 __ movl(Rdst, -1); 6385 __ bind(not_zero); 6386 __ negl(Rdst); 6387 __ addl(Rdst, BitsPerLong - 1); 6388 %} 6389 ins_pipe(ialu_reg); 6390 %} 6391 6392 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ 6393 match(Set dst (CountTrailingZerosI src)); 6394 effect(KILL cr); 6395 6396 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 6397 "JNZ done\n\t" 6398 "MOV $dst, 32\n" 6399 "done:" %} 6400 ins_encode %{ 6401 Register Rdst = $dst$$Register; 6402 Label done; 6403 __ bsfl(Rdst, $src$$Register); 6404 __ jccb(Assembler::notZero, done); 6405 __ movl(Rdst, BitsPerInt); 6406 __ bind(done); 6407 %} 6408 ins_pipe(ialu_reg); 6409 %} 6410 6411 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ 6412 match(Set dst (CountTrailingZerosL src)); 6413 effect(TEMP dst, KILL cr); 6414 6415 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 6416 "JNZ done\n\t" 6417 "BSF $dst, $src.hi\n\t" 6418 "JNZ msw_not_zero\n\t" 6419 "MOV $dst, 32\n" 6420 "msw_not_zero:\n\t" 6421 "ADD $dst, 32\n" 6422 "done:" %} 6423 ins_encode %{ 6424 Register Rdst = $dst$$Register; 6425 Register Rsrc = $src$$Register; 6426 Label msw_not_zero; 6427 Label done; 6428 __ bsfl(Rdst, Rsrc); 6429 __ jccb(Assembler::notZero, done); 6430 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 6431 __ jccb(Assembler::notZero, msw_not_zero); 6432 __ movl(Rdst, BitsPerInt); 6433 __ bind(msw_not_zero); 6434 __ addl(Rdst, BitsPerInt); 6435 __ bind(done); 6436 %} 6437 ins_pipe(ialu_reg); 6438 %} 6439 6440 6441 //---------- Population Count Instructions ------------------------------------- 6442 6443 instruct popCountI(eRegI dst, eRegI src) %{ 6444 predicate(UsePopCountInstruction); 6445 match(Set dst (PopCountI src)); 6446 6447 format %{ "POPCNT $dst, $src" %} 6448 ins_encode %{ 6449 __ popcntl($dst$$Register, $src$$Register); 6450 %} 6451 ins_pipe(ialu_reg); 6452 %} 6453 6454 instruct popCountI_mem(eRegI dst, memory mem) %{ 6455 predicate(UsePopCountInstruction); 6456 match(Set dst (PopCountI (LoadI mem))); 6457 6458 format %{ "POPCNT $dst, $mem" %} 6459 ins_encode %{ 6460 __ popcntl($dst$$Register, $mem$$Address); 6461 %} 6462 ins_pipe(ialu_reg); 6463 %} 6464 6465 // Note: Long.bitCount(long) returns an int. 6466 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 6467 predicate(UsePopCountInstruction); 6468 match(Set dst (PopCountL src)); 6469 effect(KILL cr, TEMP tmp, TEMP dst); 6470 6471 format %{ "POPCNT $dst, $src.lo\n\t" 6472 "POPCNT $tmp, $src.hi\n\t" 6473 "ADD $dst, $tmp" %} 6474 ins_encode %{ 6475 __ popcntl($dst$$Register, $src$$Register); 6476 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 6477 __ addl($dst$$Register, $tmp$$Register); 6478 %} 6479 ins_pipe(ialu_reg); 6480 %} 6481 6482 // Note: Long.bitCount(long) returns an int. 6483 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{ 6484 predicate(UsePopCountInstruction); 6485 match(Set dst (PopCountL (LoadL mem))); 6486 effect(KILL cr, TEMP tmp, TEMP dst); 6487 6488 format %{ "POPCNT $dst, $mem\n\t" 6489 "POPCNT $tmp, $mem+4\n\t" 6490 "ADD $dst, $tmp" %} 6491 ins_encode %{ 6492 //__ popcntl($dst$$Register, $mem$$Address$$first); 6493 //__ popcntl($tmp$$Register, $mem$$Address$$second); 6494 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false)); 6495 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false)); 6496 __ addl($dst$$Register, $tmp$$Register); 6497 %} 6498 ins_pipe(ialu_reg); 6499 %} 6500 6501 6502 //----------Load/Store/Move Instructions--------------------------------------- 6503 //----------Load Instructions-------------------------------------------------- 6504 // Load Byte (8bit signed) 6505 instruct loadB(xRegI dst, memory mem) %{ 6506 match(Set dst (LoadB mem)); 6507 6508 ins_cost(125); 6509 format %{ "MOVSX8 $dst,$mem\t# byte" %} 6510 6511 ins_encode %{ 6512 __ movsbl($dst$$Register, $mem$$Address); 6513 %} 6514 6515 ins_pipe(ialu_reg_mem); 6516 %} 6517 6518 // Load Byte (8bit signed) into Long Register 6519 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6520 match(Set dst (ConvI2L (LoadB mem))); 6521 effect(KILL cr); 6522 6523 ins_cost(375); 6524 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 6525 "MOV $dst.hi,$dst.lo\n\t" 6526 "SAR $dst.hi,7" %} 6527 6528 ins_encode %{ 6529 __ movsbl($dst$$Register, $mem$$Address); 6530 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 6531 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 6532 %} 6533 6534 ins_pipe(ialu_reg_mem); 6535 %} 6536 6537 // Load Unsigned Byte (8bit UNsigned) 6538 instruct loadUB(xRegI dst, memory mem) %{ 6539 match(Set dst (LoadUB mem)); 6540 6541 ins_cost(125); 6542 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 6543 6544 ins_encode %{ 6545 __ movzbl($dst$$Register, $mem$$Address); 6546 %} 6547 6548 ins_pipe(ialu_reg_mem); 6549 %} 6550 6551 // Load Unsigned Byte (8 bit UNsigned) into Long Register 6552 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6553 match(Set dst (ConvI2L (LoadUB mem))); 6554 effect(KILL cr); 6555 6556 ins_cost(250); 6557 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 6558 "XOR $dst.hi,$dst.hi" %} 6559 6560 ins_encode %{ 6561 Register Rdst = $dst$$Register; 6562 __ movzbl(Rdst, $mem$$Address); 6563 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6564 %} 6565 6566 ins_pipe(ialu_reg_mem); 6567 %} 6568 6569 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 6570 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{ 6571 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 6572 effect(KILL cr); 6573 6574 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t" 6575 "XOR $dst.hi,$dst.hi\n\t" 6576 "AND $dst.lo,$mask" %} 6577 ins_encode %{ 6578 Register Rdst = $dst$$Register; 6579 __ movzbl(Rdst, $mem$$Address); 6580 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6581 __ andl(Rdst, $mask$$constant); 6582 %} 6583 ins_pipe(ialu_reg_mem); 6584 %} 6585 6586 // Load Short (16bit signed) 6587 instruct loadS(eRegI dst, memory mem) %{ 6588 match(Set dst (LoadS mem)); 6589 6590 ins_cost(125); 6591 format %{ "MOVSX $dst,$mem\t# short" %} 6592 6593 ins_encode %{ 6594 __ movswl($dst$$Register, $mem$$Address); 6595 %} 6596 6597 ins_pipe(ialu_reg_mem); 6598 %} 6599 6600 // Load Short (16 bit signed) to Byte (8 bit signed) 6601 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ 6602 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 6603 6604 ins_cost(125); 6605 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 6606 ins_encode %{ 6607 __ movsbl($dst$$Register, $mem$$Address); 6608 %} 6609 ins_pipe(ialu_reg_mem); 6610 %} 6611 6612 // Load Short (16bit signed) into Long Register 6613 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6614 match(Set dst (ConvI2L (LoadS mem))); 6615 effect(KILL cr); 6616 6617 ins_cost(375); 6618 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 6619 "MOV $dst.hi,$dst.lo\n\t" 6620 "SAR $dst.hi,15" %} 6621 6622 ins_encode %{ 6623 __ movswl($dst$$Register, $mem$$Address); 6624 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 6625 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 6626 %} 6627 6628 ins_pipe(ialu_reg_mem); 6629 %} 6630 6631 // Load Unsigned Short/Char (16bit unsigned) 6632 instruct loadUS(eRegI dst, memory mem) %{ 6633 match(Set dst (LoadUS mem)); 6634 6635 ins_cost(125); 6636 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 6637 6638 ins_encode %{ 6639 __ movzwl($dst$$Register, $mem$$Address); 6640 %} 6641 6642 ins_pipe(ialu_reg_mem); 6643 %} 6644 6645 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 6646 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ 6647 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 6648 6649 ins_cost(125); 6650 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 6651 ins_encode %{ 6652 __ movsbl($dst$$Register, $mem$$Address); 6653 %} 6654 ins_pipe(ialu_reg_mem); 6655 %} 6656 6657 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 6658 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6659 match(Set dst (ConvI2L (LoadUS mem))); 6660 effect(KILL cr); 6661 6662 ins_cost(250); 6663 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 6664 "XOR $dst.hi,$dst.hi" %} 6665 6666 ins_encode %{ 6667 __ movzwl($dst$$Register, $mem$$Address); 6668 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 6669 %} 6670 6671 ins_pipe(ialu_reg_mem); 6672 %} 6673 6674 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 6675 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 6676 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 6677 effect(KILL cr); 6678 6679 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 6680 "XOR $dst.hi,$dst.hi" %} 6681 ins_encode %{ 6682 Register Rdst = $dst$$Register; 6683 __ movzbl(Rdst, $mem$$Address); 6684 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6685 %} 6686 ins_pipe(ialu_reg_mem); 6687 %} 6688 6689 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register 6690 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{ 6691 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 6692 effect(KILL cr); 6693 6694 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t" 6695 "XOR $dst.hi,$dst.hi\n\t" 6696 "AND $dst.lo,$mask" %} 6697 ins_encode %{ 6698 Register Rdst = $dst$$Register; 6699 __ movzwl(Rdst, $mem$$Address); 6700 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6701 __ andl(Rdst, $mask$$constant); 6702 %} 6703 ins_pipe(ialu_reg_mem); 6704 %} 6705 6706 // Load Integer 6707 instruct loadI(eRegI dst, memory mem) %{ 6708 match(Set dst (LoadI mem)); 6709 6710 ins_cost(125); 6711 format %{ "MOV $dst,$mem\t# int" %} 6712 6713 ins_encode %{ 6714 __ movl($dst$$Register, $mem$$Address); 6715 %} 6716 6717 ins_pipe(ialu_reg_mem); 6718 %} 6719 6720 // Load Integer (32 bit signed) to Byte (8 bit signed) 6721 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{ 6722 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 6723 6724 ins_cost(125); 6725 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 6726 ins_encode %{ 6727 __ movsbl($dst$$Register, $mem$$Address); 6728 %} 6729 ins_pipe(ialu_reg_mem); 6730 %} 6731 6732 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 6733 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{ 6734 match(Set dst (AndI (LoadI mem) mask)); 6735 6736 ins_cost(125); 6737 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 6738 ins_encode %{ 6739 __ movzbl($dst$$Register, $mem$$Address); 6740 %} 6741 ins_pipe(ialu_reg_mem); 6742 %} 6743 6744 // Load Integer (32 bit signed) to Short (16 bit signed) 6745 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{ 6746 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 6747 6748 ins_cost(125); 6749 format %{ "MOVSX $dst, $mem\t# int -> short" %} 6750 ins_encode %{ 6751 __ movswl($dst$$Register, $mem$$Address); 6752 %} 6753 ins_pipe(ialu_reg_mem); 6754 %} 6755 6756 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 6757 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{ 6758 match(Set dst (AndI (LoadI mem) mask)); 6759 6760 ins_cost(125); 6761 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 6762 ins_encode %{ 6763 __ movzwl($dst$$Register, $mem$$Address); 6764 %} 6765 ins_pipe(ialu_reg_mem); 6766 %} 6767 6768 // Load Integer into Long Register 6769 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6770 match(Set dst (ConvI2L (LoadI mem))); 6771 effect(KILL cr); 6772 6773 ins_cost(375); 6774 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 6775 "MOV $dst.hi,$dst.lo\n\t" 6776 "SAR $dst.hi,31" %} 6777 6778 ins_encode %{ 6779 __ movl($dst$$Register, $mem$$Address); 6780 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 6781 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 6782 %} 6783 6784 ins_pipe(ialu_reg_mem); 6785 %} 6786 6787 // Load Integer with mask 0xFF into Long Register 6788 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 6789 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 6790 effect(KILL cr); 6791 6792 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 6793 "XOR $dst.hi,$dst.hi" %} 6794 ins_encode %{ 6795 Register Rdst = $dst$$Register; 6796 __ movzbl(Rdst, $mem$$Address); 6797 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6798 %} 6799 ins_pipe(ialu_reg_mem); 6800 %} 6801 6802 // Load Integer with mask 0xFFFF into Long Register 6803 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 6804 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 6805 effect(KILL cr); 6806 6807 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 6808 "XOR $dst.hi,$dst.hi" %} 6809 ins_encode %{ 6810 Register Rdst = $dst$$Register; 6811 __ movzwl(Rdst, $mem$$Address); 6812 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6813 %} 6814 ins_pipe(ialu_reg_mem); 6815 %} 6816 6817 // Load Integer with 32-bit mask into Long Register 6818 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 6819 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 6820 effect(KILL cr); 6821 6822 format %{ "MOV $dst.lo,$mem\t# int & 32-bit mask -> long\n\t" 6823 "XOR $dst.hi,$dst.hi\n\t" 6824 "AND $dst.lo,$mask" %} 6825 ins_encode %{ 6826 Register Rdst = $dst$$Register; 6827 __ movl(Rdst, $mem$$Address); 6828 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 6829 __ andl(Rdst, $mask$$constant); 6830 %} 6831 ins_pipe(ialu_reg_mem); 6832 %} 6833 6834 // Load Unsigned Integer into Long Register 6835 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 6836 match(Set dst (LoadUI2L mem)); 6837 effect(KILL cr); 6838 6839 ins_cost(250); 6840 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 6841 "XOR $dst.hi,$dst.hi" %} 6842 6843 ins_encode %{ 6844 __ movl($dst$$Register, $mem$$Address); 6845 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 6846 %} 6847 6848 ins_pipe(ialu_reg_mem); 6849 %} 6850 6851 // Load Long. Cannot clobber address while loading, so restrict address 6852 // register to ESI 6853 instruct loadL(eRegL dst, load_long_memory mem) %{ 6854 predicate(!((LoadLNode*)n)->require_atomic_access()); 6855 match(Set dst (LoadL mem)); 6856 6857 ins_cost(250); 6858 format %{ "MOV $dst.lo,$mem\t# long\n\t" 6859 "MOV $dst.hi,$mem+4" %} 6860 6861 ins_encode %{ 6862 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false); 6863 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false); 6864 __ movl($dst$$Register, Amemlo); 6865 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 6866 %} 6867 6868 ins_pipe(ialu_reg_long_mem); 6869 %} 6870 6871 // Volatile Load Long. Must be atomic, so do 64-bit FILD 6872 // then store it down to the stack and reload on the int 6873 // side. 6874 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 6875 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 6876 match(Set dst (LoadL mem)); 6877 6878 ins_cost(200); 6879 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 6880 "FISTp $dst" %} 6881 ins_encode(enc_loadL_volatile(mem,dst)); 6882 ins_pipe( fpu_reg_mem ); 6883 %} 6884 6885 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 6886 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6887 match(Set dst (LoadL mem)); 6888 effect(TEMP tmp); 6889 ins_cost(180); 6890 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6891 "MOVSD $dst,$tmp" %} 6892 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 6897 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6898 match(Set dst (LoadL mem)); 6899 effect(TEMP tmp); 6900 ins_cost(160); 6901 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6902 "MOVD $dst.lo,$tmp\n\t" 6903 "PSRLQ $tmp,32\n\t" 6904 "MOVD $dst.hi,$tmp" %} 6905 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 // Load Range 6910 instruct loadRange(eRegI dst, memory mem) %{ 6911 match(Set dst (LoadRange mem)); 6912 6913 ins_cost(125); 6914 format %{ "MOV $dst,$mem" %} 6915 opcode(0x8B); 6916 ins_encode( OpcP, RegMem(dst,mem)); 6917 ins_pipe( ialu_reg_mem ); 6918 %} 6919 6920 6921 // Load Pointer 6922 instruct loadP(eRegP dst, memory mem) %{ 6923 match(Set dst (LoadP mem)); 6924 6925 ins_cost(125); 6926 format %{ "MOV $dst,$mem" %} 6927 opcode(0x8B); 6928 ins_encode( OpcP, RegMem(dst,mem)); 6929 ins_pipe( ialu_reg_mem ); 6930 %} 6931 6932 // Load Klass Pointer 6933 instruct loadKlass(eRegP dst, memory mem) %{ 6934 match(Set dst (LoadKlass mem)); 6935 6936 ins_cost(125); 6937 format %{ "MOV $dst,$mem" %} 6938 opcode(0x8B); 6939 ins_encode( OpcP, RegMem(dst,mem)); 6940 ins_pipe( ialu_reg_mem ); 6941 %} 6942 6943 // Load Double 6944 instruct loadD(regD dst, memory mem) %{ 6945 predicate(UseSSE<=1); 6946 match(Set dst (LoadD mem)); 6947 6948 ins_cost(150); 6949 format %{ "FLD_D ST,$mem\n\t" 6950 "FSTP $dst" %} 6951 opcode(0xDD); /* DD /0 */ 6952 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6953 Pop_Reg_D(dst) ); 6954 ins_pipe( fpu_reg_mem ); 6955 %} 6956 6957 // Load Double to XMM 6958 instruct loadXD(regXD dst, memory mem) %{ 6959 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 6960 match(Set dst (LoadD mem)); 6961 ins_cost(145); 6962 format %{ "MOVSD $dst,$mem" %} 6963 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 6964 ins_pipe( pipe_slow ); 6965 %} 6966 6967 instruct loadXD_partial(regXD dst, memory mem) %{ 6968 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 6969 match(Set dst (LoadD mem)); 6970 ins_cost(145); 6971 format %{ "MOVLPD $dst,$mem" %} 6972 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); 6973 ins_pipe( pipe_slow ); 6974 %} 6975 6976 // Load to XMM register (single-precision floating point) 6977 // MOVSS instruction 6978 instruct loadX(regX dst, memory mem) %{ 6979 predicate(UseSSE>=1); 6980 match(Set dst (LoadF mem)); 6981 ins_cost(145); 6982 format %{ "MOVSS $dst,$mem" %} 6983 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 6984 ins_pipe( pipe_slow ); 6985 %} 6986 6987 // Load Float 6988 instruct loadF(regF dst, memory mem) %{ 6989 predicate(UseSSE==0); 6990 match(Set dst (LoadF mem)); 6991 6992 ins_cost(150); 6993 format %{ "FLD_S ST,$mem\n\t" 6994 "FSTP $dst" %} 6995 opcode(0xD9); /* D9 /0 */ 6996 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6997 Pop_Reg_F(dst) ); 6998 ins_pipe( fpu_reg_mem ); 6999 %} 7000 7001 // Load Aligned Packed Byte to XMM register 7002 instruct loadA8B(regXD dst, memory mem) %{ 7003 predicate(UseSSE>=1); 7004 match(Set dst (Load8B mem)); 7005 ins_cost(125); 7006 format %{ "MOVQ $dst,$mem\t! packed8B" %} 7007 ins_encode( movq_ld(dst, mem)); 7008 ins_pipe( pipe_slow ); 7009 %} 7010 7011 // Load Aligned Packed Short to XMM register 7012 instruct loadA4S(regXD dst, memory mem) %{ 7013 predicate(UseSSE>=1); 7014 match(Set dst (Load4S mem)); 7015 ins_cost(125); 7016 format %{ "MOVQ $dst,$mem\t! packed4S" %} 7017 ins_encode( movq_ld(dst, mem)); 7018 ins_pipe( pipe_slow ); 7019 %} 7020 7021 // Load Aligned Packed Char to XMM register 7022 instruct loadA4C(regXD dst, memory mem) %{ 7023 predicate(UseSSE>=1); 7024 match(Set dst (Load4C mem)); 7025 ins_cost(125); 7026 format %{ "MOVQ $dst,$mem\t! packed4C" %} 7027 ins_encode( movq_ld(dst, mem)); 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 // Load Aligned Packed Integer to XMM register 7032 instruct load2IU(regXD dst, memory mem) %{ 7033 predicate(UseSSE>=1); 7034 match(Set dst (Load2I mem)); 7035 ins_cost(125); 7036 format %{ "MOVQ $dst,$mem\t! packed2I" %} 7037 ins_encode( movq_ld(dst, mem)); 7038 ins_pipe( pipe_slow ); 7039 %} 7040 7041 // Load Aligned Packed Single to XMM 7042 instruct loadA2F(regXD dst, memory mem) %{ 7043 predicate(UseSSE>=1); 7044 match(Set dst (Load2F mem)); 7045 ins_cost(145); 7046 format %{ "MOVQ $dst,$mem\t! packed2F" %} 7047 ins_encode( movq_ld(dst, mem)); 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 // Load Effective Address 7052 instruct leaP8(eRegP dst, indOffset8 mem) %{ 7053 match(Set dst mem); 7054 7055 ins_cost(110); 7056 format %{ "LEA $dst,$mem" %} 7057 opcode(0x8D); 7058 ins_encode( OpcP, RegMem(dst,mem)); 7059 ins_pipe( ialu_reg_reg_fat ); 7060 %} 7061 7062 instruct leaP32(eRegP dst, indOffset32 mem) %{ 7063 match(Set dst mem); 7064 7065 ins_cost(110); 7066 format %{ "LEA $dst,$mem" %} 7067 opcode(0x8D); 7068 ins_encode( OpcP, RegMem(dst,mem)); 7069 ins_pipe( ialu_reg_reg_fat ); 7070 %} 7071 7072 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 7073 match(Set dst mem); 7074 7075 ins_cost(110); 7076 format %{ "LEA $dst,$mem" %} 7077 opcode(0x8D); 7078 ins_encode( OpcP, RegMem(dst,mem)); 7079 ins_pipe( ialu_reg_reg_fat ); 7080 %} 7081 7082 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 7083 match(Set dst mem); 7084 7085 ins_cost(110); 7086 format %{ "LEA $dst,$mem" %} 7087 opcode(0x8D); 7088 ins_encode( OpcP, RegMem(dst,mem)); 7089 ins_pipe( ialu_reg_reg_fat ); 7090 %} 7091 7092 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 7093 match(Set dst mem); 7094 7095 ins_cost(110); 7096 format %{ "LEA $dst,$mem" %} 7097 opcode(0x8D); 7098 ins_encode( OpcP, RegMem(dst,mem)); 7099 ins_pipe( ialu_reg_reg_fat ); 7100 %} 7101 7102 // Load Constant 7103 instruct loadConI(eRegI dst, immI src) %{ 7104 match(Set dst src); 7105 7106 format %{ "MOV $dst,$src" %} 7107 ins_encode( LdImmI(dst, src) ); 7108 ins_pipe( ialu_reg_fat ); 7109 %} 7110 7111 // Load Constant zero 7112 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{ 7113 match(Set dst src); 7114 effect(KILL cr); 7115 7116 ins_cost(50); 7117 format %{ "XOR $dst,$dst" %} 7118 opcode(0x33); /* + rd */ 7119 ins_encode( OpcP, RegReg( dst, dst ) ); 7120 ins_pipe( ialu_reg ); 7121 %} 7122 7123 instruct loadConP(eRegP dst, immP src) %{ 7124 match(Set dst src); 7125 7126 format %{ "MOV $dst,$src" %} 7127 opcode(0xB8); /* + rd */ 7128 ins_encode( LdImmP(dst, src) ); 7129 ins_pipe( ialu_reg_fat ); 7130 %} 7131 7132 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 7133 match(Set dst src); 7134 effect(KILL cr); 7135 ins_cost(200); 7136 format %{ "MOV $dst.lo,$src.lo\n\t" 7137 "MOV $dst.hi,$src.hi" %} 7138 opcode(0xB8); 7139 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 7140 ins_pipe( ialu_reg_long_fat ); 7141 %} 7142 7143 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 7144 match(Set dst src); 7145 effect(KILL cr); 7146 ins_cost(150); 7147 format %{ "XOR $dst.lo,$dst.lo\n\t" 7148 "XOR $dst.hi,$dst.hi" %} 7149 opcode(0x33,0x33); 7150 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 7151 ins_pipe( ialu_reg_long ); 7152 %} 7153 7154 // The instruction usage is guarded by predicate in operand immF(). 7155 instruct loadConF(regF dst, immF con) %{ 7156 match(Set dst con); 7157 ins_cost(125); 7158 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 7159 "FSTP $dst" %} 7160 ins_encode %{ 7161 __ fld_s($constantaddress($con)); 7162 __ fstp_d($dst$$reg); 7163 %} 7164 ins_pipe(fpu_reg_con); 7165 %} 7166 7167 // The instruction usage is guarded by predicate in operand immF0(). 7168 instruct loadConF0(regF dst, immF0 con) %{ 7169 match(Set dst con); 7170 ins_cost(125); 7171 format %{ "FLDZ ST\n\t" 7172 "FSTP $dst" %} 7173 ins_encode %{ 7174 __ fldz(); 7175 __ fstp_d($dst$$reg); 7176 %} 7177 ins_pipe(fpu_reg_con); 7178 %} 7179 7180 // The instruction usage is guarded by predicate in operand immF1(). 7181 instruct loadConF1(regF dst, immF1 con) %{ 7182 match(Set dst con); 7183 ins_cost(125); 7184 format %{ "FLD1 ST\n\t" 7185 "FSTP $dst" %} 7186 ins_encode %{ 7187 __ fld1(); 7188 __ fstp_d($dst$$reg); 7189 %} 7190 ins_pipe(fpu_reg_con); 7191 %} 7192 7193 // The instruction usage is guarded by predicate in operand immXF(). 7194 instruct loadConX(regX dst, immXF con) %{ 7195 match(Set dst con); 7196 ins_cost(125); 7197 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 7198 ins_encode %{ 7199 __ movflt($dst$$XMMRegister, $constantaddress($con)); 7200 %} 7201 ins_pipe(pipe_slow); 7202 %} 7203 7204 // The instruction usage is guarded by predicate in operand immXF0(). 7205 instruct loadConX0(regX dst, immXF0 src) %{ 7206 match(Set dst src); 7207 ins_cost(100); 7208 format %{ "XORPS $dst,$dst\t# float 0.0" %} 7209 ins_encode %{ 7210 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 7211 %} 7212 ins_pipe(pipe_slow); 7213 %} 7214 7215 // The instruction usage is guarded by predicate in operand immD(). 7216 instruct loadConD(regD dst, immD con) %{ 7217 match(Set dst con); 7218 ins_cost(125); 7219 7220 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 7221 "FSTP $dst" %} 7222 ins_encode %{ 7223 __ fld_d($constantaddress($con)); 7224 __ fstp_d($dst$$reg); 7225 %} 7226 ins_pipe(fpu_reg_con); 7227 %} 7228 7229 // The instruction usage is guarded by predicate in operand immD0(). 7230 instruct loadConD0(regD dst, immD0 con) %{ 7231 match(Set dst con); 7232 ins_cost(125); 7233 7234 format %{ "FLDZ ST\n\t" 7235 "FSTP $dst" %} 7236 ins_encode %{ 7237 __ fldz(); 7238 __ fstp_d($dst$$reg); 7239 %} 7240 ins_pipe(fpu_reg_con); 7241 %} 7242 7243 // The instruction usage is guarded by predicate in operand immD1(). 7244 instruct loadConD1(regD dst, immD1 con) %{ 7245 match(Set dst con); 7246 ins_cost(125); 7247 7248 format %{ "FLD1 ST\n\t" 7249 "FSTP $dst" %} 7250 ins_encode %{ 7251 __ fld1(); 7252 __ fstp_d($dst$$reg); 7253 %} 7254 ins_pipe(fpu_reg_con); 7255 %} 7256 7257 // The instruction usage is guarded by predicate in operand immXD(). 7258 instruct loadConXD(regXD dst, immXD con) %{ 7259 match(Set dst con); 7260 ins_cost(125); 7261 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 7262 ins_encode %{ 7263 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 7264 %} 7265 ins_pipe(pipe_slow); 7266 %} 7267 7268 // The instruction usage is guarded by predicate in operand immXD0(). 7269 instruct loadConXD0(regXD dst, immXD0 src) %{ 7270 match(Set dst src); 7271 ins_cost(100); 7272 format %{ "XORPD $dst,$dst\t# double 0.0" %} 7273 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 7274 ins_pipe( pipe_slow ); 7275 %} 7276 7277 // Load Stack Slot 7278 instruct loadSSI(eRegI dst, stackSlotI src) %{ 7279 match(Set dst src); 7280 ins_cost(125); 7281 7282 format %{ "MOV $dst,$src" %} 7283 opcode(0x8B); 7284 ins_encode( OpcP, RegMem(dst,src)); 7285 ins_pipe( ialu_reg_mem ); 7286 %} 7287 7288 instruct loadSSL(eRegL dst, stackSlotL src) %{ 7289 match(Set dst src); 7290 7291 ins_cost(200); 7292 format %{ "MOV $dst,$src.lo\n\t" 7293 "MOV $dst+4,$src.hi" %} 7294 opcode(0x8B, 0x8B); 7295 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 7296 ins_pipe( ialu_mem_long_reg ); 7297 %} 7298 7299 // Load Stack Slot 7300 instruct loadSSP(eRegP dst, stackSlotP src) %{ 7301 match(Set dst src); 7302 ins_cost(125); 7303 7304 format %{ "MOV $dst,$src" %} 7305 opcode(0x8B); 7306 ins_encode( OpcP, RegMem(dst,src)); 7307 ins_pipe( ialu_reg_mem ); 7308 %} 7309 7310 // Load Stack Slot 7311 instruct loadSSF(regF dst, stackSlotF src) %{ 7312 match(Set dst src); 7313 ins_cost(125); 7314 7315 format %{ "FLD_S $src\n\t" 7316 "FSTP $dst" %} 7317 opcode(0xD9); /* D9 /0, FLD m32real */ 7318 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 7319 Pop_Reg_F(dst) ); 7320 ins_pipe( fpu_reg_mem ); 7321 %} 7322 7323 // Load Stack Slot 7324 instruct loadSSD(regD dst, stackSlotD src) %{ 7325 match(Set dst src); 7326 ins_cost(125); 7327 7328 format %{ "FLD_D $src\n\t" 7329 "FSTP $dst" %} 7330 opcode(0xDD); /* DD /0, FLD m64real */ 7331 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 7332 Pop_Reg_D(dst) ); 7333 ins_pipe( fpu_reg_mem ); 7334 %} 7335 7336 // Prefetch instructions. 7337 // Must be safe to execute with invalid address (cannot fault). 7338 7339 instruct prefetchr0( memory mem ) %{ 7340 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); 7341 match(PrefetchRead mem); 7342 ins_cost(0); 7343 size(0); 7344 format %{ "PREFETCHR (non-SSE is empty encoding)" %} 7345 ins_encode(); 7346 ins_pipe(empty); 7347 %} 7348 7349 instruct prefetchr( memory mem ) %{ 7350 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3); 7351 match(PrefetchRead mem); 7352 ins_cost(100); 7353 7354 format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %} 7355 opcode(0x0F, 0x0d); /* Opcode 0F 0d /0 */ 7356 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 7357 ins_pipe(ialu_mem); 7358 %} 7359 7360 instruct prefetchrNTA( memory mem ) %{ 7361 predicate(UseSSE>=1 && ReadPrefetchInstr==0); 7362 match(PrefetchRead mem); 7363 ins_cost(100); 7364 7365 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %} 7366 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */ 7367 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 7368 ins_pipe(ialu_mem); 7369 %} 7370 7371 instruct prefetchrT0( memory mem ) %{ 7372 predicate(UseSSE>=1 && ReadPrefetchInstr==1); 7373 match(PrefetchRead mem); 7374 ins_cost(100); 7375 7376 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %} 7377 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */ 7378 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 7379 ins_pipe(ialu_mem); 7380 %} 7381 7382 instruct prefetchrT2( memory mem ) %{ 7383 predicate(UseSSE>=1 && ReadPrefetchInstr==2); 7384 match(PrefetchRead mem); 7385 ins_cost(100); 7386 7387 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %} 7388 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */ 7389 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem)); 7390 ins_pipe(ialu_mem); 7391 %} 7392 7393 instruct prefetchw0( memory mem ) %{ 7394 predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); 7395 match(PrefetchWrite mem); 7396 ins_cost(0); 7397 size(0); 7398 format %{ "Prefetch (non-SSE is empty encoding)" %} 7399 ins_encode(); 7400 ins_pipe(empty); 7401 %} 7402 7403 instruct prefetchw( memory mem ) %{ 7404 predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3); 7405 match( PrefetchWrite mem ); 7406 ins_cost(100); 7407 7408 format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %} 7409 opcode(0x0F, 0x0D); /* Opcode 0F 0D /1 */ 7410 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 7411 ins_pipe(ialu_mem); 7412 %} 7413 7414 instruct prefetchwNTA( memory mem ) %{ 7415 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 7416 match(PrefetchWrite mem); 7417 ins_cost(100); 7418 7419 format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %} 7420 opcode(0x0F, 0x18); /* Opcode 0F 18 /0 */ 7421 ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem)); 7422 ins_pipe(ialu_mem); 7423 %} 7424 7425 instruct prefetchwT0( memory mem ) %{ 7426 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 7427 match(PrefetchWrite mem); 7428 ins_cost(100); 7429 7430 format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %} 7431 opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */ 7432 ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem)); 7433 ins_pipe(ialu_mem); 7434 %} 7435 7436 instruct prefetchwT2( memory mem ) %{ 7437 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 7438 match(PrefetchWrite mem); 7439 ins_cost(100); 7440 7441 format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %} 7442 opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */ 7443 ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem)); 7444 ins_pipe(ialu_mem); 7445 %} 7446 7447 //----------Store Instructions------------------------------------------------- 7448 7449 // Store Byte 7450 instruct storeB(memory mem, xRegI src) %{ 7451 match(Set mem (StoreB mem src)); 7452 7453 ins_cost(125); 7454 format %{ "MOV8 $mem,$src" %} 7455 opcode(0x88); 7456 ins_encode( OpcP, RegMem( src, mem ) ); 7457 ins_pipe( ialu_mem_reg ); 7458 %} 7459 7460 // Store Char/Short 7461 instruct storeC(memory mem, eRegI src) %{ 7462 match(Set mem (StoreC mem src)); 7463 7464 ins_cost(125); 7465 format %{ "MOV16 $mem,$src" %} 7466 opcode(0x89, 0x66); 7467 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 7468 ins_pipe( ialu_mem_reg ); 7469 %} 7470 7471 // Store Integer 7472 instruct storeI(memory mem, eRegI src) %{ 7473 match(Set mem (StoreI mem src)); 7474 7475 ins_cost(125); 7476 format %{ "MOV $mem,$src" %} 7477 opcode(0x89); 7478 ins_encode( OpcP, RegMem( src, mem ) ); 7479 ins_pipe( ialu_mem_reg ); 7480 %} 7481 7482 // Store Long 7483 instruct storeL(long_memory mem, eRegL src) %{ 7484 predicate(!((StoreLNode*)n)->require_atomic_access()); 7485 match(Set mem (StoreL mem src)); 7486 7487 ins_cost(200); 7488 format %{ "MOV $mem,$src.lo\n\t" 7489 "MOV $mem+4,$src.hi" %} 7490 opcode(0x89, 0x89); 7491 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 7492 ins_pipe( ialu_mem_long_reg ); 7493 %} 7494 7495 // Store Long to Integer 7496 instruct storeL2I(memory mem, eRegL src) %{ 7497 match(Set mem (StoreI mem (ConvL2I src))); 7498 7499 format %{ "MOV $mem,$src.lo\t# long -> int" %} 7500 ins_encode %{ 7501 __ movl($mem$$Address, $src$$Register); 7502 %} 7503 ins_pipe(ialu_mem_reg); 7504 %} 7505 7506 // Volatile Store Long. Must be atomic, so move it into 7507 // the FP TOS and then do a 64-bit FIST. Has to probe the 7508 // target address before the store (for null-ptr checks) 7509 // so the memory operand is used twice in the encoding. 7510 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 7511 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 7512 match(Set mem (StoreL mem src)); 7513 effect( KILL cr ); 7514 ins_cost(400); 7515 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7516 "FILD $src\n\t" 7517 "FISTp $mem\t # 64-bit atomic volatile long store" %} 7518 opcode(0x3B); 7519 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 7520 ins_pipe( fpu_reg_mem ); 7521 %} 7522 7523 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 7524 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7525 match(Set mem (StoreL mem src)); 7526 effect( TEMP tmp, KILL cr ); 7527 ins_cost(380); 7528 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7529 "MOVSD $tmp,$src\n\t" 7530 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 7531 opcode(0x3B); 7532 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 7537 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7538 match(Set mem (StoreL mem src)); 7539 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 7540 ins_cost(360); 7541 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7542 "MOVD $tmp,$src.lo\n\t" 7543 "MOVD $tmp2,$src.hi\n\t" 7544 "PUNPCKLDQ $tmp,$tmp2\n\t" 7545 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 7546 opcode(0x3B); 7547 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 // Store Pointer; for storing unknown oops and raw pointers 7552 instruct storeP(memory mem, anyRegP src) %{ 7553 match(Set mem (StoreP mem src)); 7554 7555 ins_cost(125); 7556 format %{ "MOV $mem,$src" %} 7557 opcode(0x89); 7558 ins_encode( OpcP, RegMem( src, mem ) ); 7559 ins_pipe( ialu_mem_reg ); 7560 %} 7561 7562 // Store Integer Immediate 7563 instruct storeImmI(memory mem, immI src) %{ 7564 match(Set mem (StoreI mem src)); 7565 7566 ins_cost(150); 7567 format %{ "MOV $mem,$src" %} 7568 opcode(0xC7); /* C7 /0 */ 7569 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 7570 ins_pipe( ialu_mem_imm ); 7571 %} 7572 7573 // Store Short/Char Immediate 7574 instruct storeImmI16(memory mem, immI16 src) %{ 7575 predicate(UseStoreImmI16); 7576 match(Set mem (StoreC mem src)); 7577 7578 ins_cost(150); 7579 format %{ "MOV16 $mem,$src" %} 7580 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 7581 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 7582 ins_pipe( ialu_mem_imm ); 7583 %} 7584 7585 // Store Pointer Immediate; null pointers or constant oops that do not 7586 // need card-mark barriers. 7587 instruct storeImmP(memory mem, immP src) %{ 7588 match(Set mem (StoreP mem src)); 7589 7590 ins_cost(150); 7591 format %{ "MOV $mem,$src" %} 7592 opcode(0xC7); /* C7 /0 */ 7593 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 7594 ins_pipe( ialu_mem_imm ); 7595 %} 7596 7597 // Store Byte Immediate 7598 instruct storeImmB(memory mem, immI8 src) %{ 7599 match(Set mem (StoreB mem src)); 7600 7601 ins_cost(150); 7602 format %{ "MOV8 $mem,$src" %} 7603 opcode(0xC6); /* C6 /0 */ 7604 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7605 ins_pipe( ialu_mem_imm ); 7606 %} 7607 7608 // Store Aligned Packed Byte XMM register to memory 7609 instruct storeA8B(memory mem, regXD src) %{ 7610 predicate(UseSSE>=1); 7611 match(Set mem (Store8B mem src)); 7612 ins_cost(145); 7613 format %{ "MOVQ $mem,$src\t! packed8B" %} 7614 ins_encode( movq_st(mem, src)); 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 // Store Aligned Packed Char/Short XMM register to memory 7619 instruct storeA4C(memory mem, regXD src) %{ 7620 predicate(UseSSE>=1); 7621 match(Set mem (Store4C mem src)); 7622 ins_cost(145); 7623 format %{ "MOVQ $mem,$src\t! packed4C" %} 7624 ins_encode( movq_st(mem, src)); 7625 ins_pipe( pipe_slow ); 7626 %} 7627 7628 // Store Aligned Packed Integer XMM register to memory 7629 instruct storeA2I(memory mem, regXD src) %{ 7630 predicate(UseSSE>=1); 7631 match(Set mem (Store2I mem src)); 7632 ins_cost(145); 7633 format %{ "MOVQ $mem,$src\t! packed2I" %} 7634 ins_encode( movq_st(mem, src)); 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 // Store CMS card-mark Immediate 7639 instruct storeImmCM(memory mem, immI8 src) %{ 7640 match(Set mem (StoreCM mem src)); 7641 7642 ins_cost(150); 7643 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 7644 opcode(0xC6); /* C6 /0 */ 7645 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7646 ins_pipe( ialu_mem_imm ); 7647 %} 7648 7649 // Store Double 7650 instruct storeD( memory mem, regDPR1 src) %{ 7651 predicate(UseSSE<=1); 7652 match(Set mem (StoreD mem src)); 7653 7654 ins_cost(100); 7655 format %{ "FST_D $mem,$src" %} 7656 opcode(0xDD); /* DD /2 */ 7657 ins_encode( enc_FP_store(mem,src) ); 7658 ins_pipe( fpu_mem_reg ); 7659 %} 7660 7661 // Store double does rounding on x86 7662 instruct storeD_rounded( memory mem, regDPR1 src) %{ 7663 predicate(UseSSE<=1); 7664 match(Set mem (StoreD mem (RoundDouble src))); 7665 7666 ins_cost(100); 7667 format %{ "FST_D $mem,$src\t# round" %} 7668 opcode(0xDD); /* DD /2 */ 7669 ins_encode( enc_FP_store(mem,src) ); 7670 ins_pipe( fpu_mem_reg ); 7671 %} 7672 7673 // Store XMM register to memory (double-precision floating points) 7674 // MOVSD instruction 7675 instruct storeXD(memory mem, regXD src) %{ 7676 predicate(UseSSE>=2); 7677 match(Set mem (StoreD mem src)); 7678 ins_cost(95); 7679 format %{ "MOVSD $mem,$src" %} 7680 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 7681 ins_pipe( pipe_slow ); 7682 %} 7683 7684 // Store XMM register to memory (single-precision floating point) 7685 // MOVSS instruction 7686 instruct storeX(memory mem, regX src) %{ 7687 predicate(UseSSE>=1); 7688 match(Set mem (StoreF mem src)); 7689 ins_cost(95); 7690 format %{ "MOVSS $mem,$src" %} 7691 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 7692 ins_pipe( pipe_slow ); 7693 %} 7694 7695 // Store Aligned Packed Single Float XMM register to memory 7696 instruct storeA2F(memory mem, regXD src) %{ 7697 predicate(UseSSE>=1); 7698 match(Set mem (Store2F mem src)); 7699 ins_cost(145); 7700 format %{ "MOVQ $mem,$src\t! packed2F" %} 7701 ins_encode( movq_st(mem, src)); 7702 ins_pipe( pipe_slow ); 7703 %} 7704 7705 // Store Float 7706 instruct storeF( memory mem, regFPR1 src) %{ 7707 predicate(UseSSE==0); 7708 match(Set mem (StoreF mem src)); 7709 7710 ins_cost(100); 7711 format %{ "FST_S $mem,$src" %} 7712 opcode(0xD9); /* D9 /2 */ 7713 ins_encode( enc_FP_store(mem,src) ); 7714 ins_pipe( fpu_mem_reg ); 7715 %} 7716 7717 // Store Float does rounding on x86 7718 instruct storeF_rounded( memory mem, regFPR1 src) %{ 7719 predicate(UseSSE==0); 7720 match(Set mem (StoreF mem (RoundFloat src))); 7721 7722 ins_cost(100); 7723 format %{ "FST_S $mem,$src\t# round" %} 7724 opcode(0xD9); /* D9 /2 */ 7725 ins_encode( enc_FP_store(mem,src) ); 7726 ins_pipe( fpu_mem_reg ); 7727 %} 7728 7729 // Store Float does rounding on x86 7730 instruct storeF_Drounded( memory mem, regDPR1 src) %{ 7731 predicate(UseSSE<=1); 7732 match(Set mem (StoreF mem (ConvD2F src))); 7733 7734 ins_cost(100); 7735 format %{ "FST_S $mem,$src\t# D-round" %} 7736 opcode(0xD9); /* D9 /2 */ 7737 ins_encode( enc_FP_store(mem,src) ); 7738 ins_pipe( fpu_mem_reg ); 7739 %} 7740 7741 // Store immediate Float value (it is faster than store from FPU register) 7742 // The instruction usage is guarded by predicate in operand immF(). 7743 instruct storeF_imm( memory mem, immF src) %{ 7744 match(Set mem (StoreF mem src)); 7745 7746 ins_cost(50); 7747 format %{ "MOV $mem,$src\t# store float" %} 7748 opcode(0xC7); /* C7 /0 */ 7749 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 7750 ins_pipe( ialu_mem_imm ); 7751 %} 7752 7753 // Store immediate Float value (it is faster than store from XMM register) 7754 // The instruction usage is guarded by predicate in operand immXF(). 7755 instruct storeX_imm( memory mem, immXF src) %{ 7756 match(Set mem (StoreF mem src)); 7757 7758 ins_cost(50); 7759 format %{ "MOV $mem,$src\t# store float" %} 7760 opcode(0xC7); /* C7 /0 */ 7761 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); 7762 ins_pipe( ialu_mem_imm ); 7763 %} 7764 7765 // Store Integer to stack slot 7766 instruct storeSSI(stackSlotI dst, eRegI src) %{ 7767 match(Set dst src); 7768 7769 ins_cost(100); 7770 format %{ "MOV $dst,$src" %} 7771 opcode(0x89); 7772 ins_encode( OpcPRegSS( dst, src ) ); 7773 ins_pipe( ialu_mem_reg ); 7774 %} 7775 7776 // Store Integer to stack slot 7777 instruct storeSSP(stackSlotP dst, eRegP src) %{ 7778 match(Set dst src); 7779 7780 ins_cost(100); 7781 format %{ "MOV $dst,$src" %} 7782 opcode(0x89); 7783 ins_encode( OpcPRegSS( dst, src ) ); 7784 ins_pipe( ialu_mem_reg ); 7785 %} 7786 7787 // Store Long to stack slot 7788 instruct storeSSL(stackSlotL dst, eRegL src) %{ 7789 match(Set dst src); 7790 7791 ins_cost(200); 7792 format %{ "MOV $dst,$src.lo\n\t" 7793 "MOV $dst+4,$src.hi" %} 7794 opcode(0x89, 0x89); 7795 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 7796 ins_pipe( ialu_mem_long_reg ); 7797 %} 7798 7799 //----------MemBar Instructions----------------------------------------------- 7800 // Memory barrier flavors 7801 7802 instruct membar_acquire() %{ 7803 match(MemBarAcquire); 7804 ins_cost(400); 7805 7806 size(0); 7807 format %{ "MEMBAR-acquire ! (empty encoding)" %} 7808 ins_encode(); 7809 ins_pipe(empty); 7810 %} 7811 7812 instruct membar_acquire_lock() %{ 7813 match(MemBarAcquire); 7814 predicate(Matcher::prior_fast_lock(n)); 7815 ins_cost(0); 7816 7817 size(0); 7818 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 7819 ins_encode( ); 7820 ins_pipe(empty); 7821 %} 7822 7823 instruct membar_release() %{ 7824 match(MemBarRelease); 7825 ins_cost(400); 7826 7827 size(0); 7828 format %{ "MEMBAR-release ! (empty encoding)" %} 7829 ins_encode( ); 7830 ins_pipe(empty); 7831 %} 7832 7833 instruct membar_release_lock() %{ 7834 match(MemBarRelease); 7835 predicate(Matcher::post_fast_unlock(n)); 7836 ins_cost(0); 7837 7838 size(0); 7839 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 7840 ins_encode( ); 7841 ins_pipe(empty); 7842 %} 7843 7844 instruct membar_volatile(eFlagsReg cr) %{ 7845 match(MemBarVolatile); 7846 effect(KILL cr); 7847 ins_cost(400); 7848 7849 format %{ 7850 $$template 7851 if (os::is_MP()) { 7852 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 7853 } else { 7854 $$emit$$"MEMBAR-volatile ! (empty encoding)" 7855 } 7856 %} 7857 ins_encode %{ 7858 __ membar(Assembler::StoreLoad); 7859 %} 7860 ins_pipe(pipe_slow); 7861 %} 7862 7863 instruct unnecessary_membar_volatile() %{ 7864 match(MemBarVolatile); 7865 predicate(Matcher::post_store_load_barrier(n)); 7866 ins_cost(0); 7867 7868 size(0); 7869 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 7870 ins_encode( ); 7871 ins_pipe(empty); 7872 %} 7873 7874 //----------Move Instructions-------------------------------------------------- 7875 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 7876 match(Set dst (CastX2P src)); 7877 format %{ "# X2P $dst, $src" %} 7878 ins_encode( /*empty encoding*/ ); 7879 ins_cost(0); 7880 ins_pipe(empty); 7881 %} 7882 7883 instruct castP2X(eRegI dst, eRegP src ) %{ 7884 match(Set dst (CastP2X src)); 7885 ins_cost(50); 7886 format %{ "MOV $dst, $src\t# CastP2X" %} 7887 ins_encode( enc_Copy( dst, src) ); 7888 ins_pipe( ialu_reg_reg ); 7889 %} 7890 7891 //----------Conditional Move--------------------------------------------------- 7892 // Conditional move 7893 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{ 7894 predicate(VM_Version::supports_cmov() ); 7895 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 7896 ins_cost(200); 7897 format %{ "CMOV$cop $dst,$src" %} 7898 opcode(0x0F,0x40); 7899 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7900 ins_pipe( pipe_cmov_reg ); 7901 %} 7902 7903 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{ 7904 predicate(VM_Version::supports_cmov() ); 7905 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 7906 ins_cost(200); 7907 format %{ "CMOV$cop $dst,$src" %} 7908 opcode(0x0F,0x40); 7909 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7910 ins_pipe( pipe_cmov_reg ); 7911 %} 7912 7913 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{ 7914 predicate(VM_Version::supports_cmov() ); 7915 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 7916 ins_cost(200); 7917 expand %{ 7918 cmovI_regU(cop, cr, dst, src); 7919 %} 7920 %} 7921 7922 // Conditional move 7923 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{ 7924 predicate(VM_Version::supports_cmov() ); 7925 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 7926 ins_cost(250); 7927 format %{ "CMOV$cop $dst,$src" %} 7928 opcode(0x0F,0x40); 7929 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7930 ins_pipe( pipe_cmov_mem ); 7931 %} 7932 7933 // Conditional move 7934 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{ 7935 predicate(VM_Version::supports_cmov() ); 7936 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 7937 ins_cost(250); 7938 format %{ "CMOV$cop $dst,$src" %} 7939 opcode(0x0F,0x40); 7940 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7941 ins_pipe( pipe_cmov_mem ); 7942 %} 7943 7944 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{ 7945 predicate(VM_Version::supports_cmov() ); 7946 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 7947 ins_cost(250); 7948 expand %{ 7949 cmovI_memU(cop, cr, dst, src); 7950 %} 7951 %} 7952 7953 // Conditional move 7954 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 7955 predicate(VM_Version::supports_cmov() ); 7956 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7957 ins_cost(200); 7958 format %{ "CMOV$cop $dst,$src\t# ptr" %} 7959 opcode(0x0F,0x40); 7960 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7961 ins_pipe( pipe_cmov_reg ); 7962 %} 7963 7964 // Conditional move (non-P6 version) 7965 // Note: a CMoveP is generated for stubs and native wrappers 7966 // regardless of whether we are on a P6, so we 7967 // emulate a cmov here 7968 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 7969 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7970 ins_cost(300); 7971 format %{ "Jn$cop skip\n\t" 7972 "MOV $dst,$src\t# pointer\n" 7973 "skip:" %} 7974 opcode(0x8b); 7975 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 7976 ins_pipe( pipe_cmov_reg ); 7977 %} 7978 7979 // Conditional move 7980 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 7981 predicate(VM_Version::supports_cmov() ); 7982 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7983 ins_cost(200); 7984 format %{ "CMOV$cop $dst,$src\t# ptr" %} 7985 opcode(0x0F,0x40); 7986 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7987 ins_pipe( pipe_cmov_reg ); 7988 %} 7989 7990 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 7991 predicate(VM_Version::supports_cmov() ); 7992 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7993 ins_cost(200); 7994 expand %{ 7995 cmovP_regU(cop, cr, dst, src); 7996 %} 7997 %} 7998 7999 // DISABLED: Requires the ADLC to emit a bottom_type call that 8000 // correctly meets the two pointer arguments; one is an incoming 8001 // register but the other is a memory operand. ALSO appears to 8002 // be buggy with implicit null checks. 8003 // 8004 //// Conditional move 8005 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 8006 // predicate(VM_Version::supports_cmov() ); 8007 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 8008 // ins_cost(250); 8009 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 8010 // opcode(0x0F,0x40); 8011 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 8012 // ins_pipe( pipe_cmov_mem ); 8013 //%} 8014 // 8015 //// Conditional move 8016 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 8017 // predicate(VM_Version::supports_cmov() ); 8018 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 8019 // ins_cost(250); 8020 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 8021 // opcode(0x0F,0x40); 8022 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 8023 // ins_pipe( pipe_cmov_mem ); 8024 //%} 8025 8026 // Conditional move 8027 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 8028 predicate(UseSSE<=1); 8029 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8030 ins_cost(200); 8031 format %{ "FCMOV$cop $dst,$src\t# double" %} 8032 opcode(0xDA); 8033 ins_encode( enc_cmov_d(cop,src) ); 8034 ins_pipe( pipe_cmovD_reg ); 8035 %} 8036 8037 // Conditional move 8038 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 8039 predicate(UseSSE==0); 8040 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8041 ins_cost(200); 8042 format %{ "FCMOV$cop $dst,$src\t# float" %} 8043 opcode(0xDA); 8044 ins_encode( enc_cmov_d(cop,src) ); 8045 ins_pipe( pipe_cmovD_reg ); 8046 %} 8047 8048 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 8049 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 8050 predicate(UseSSE<=1); 8051 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8052 ins_cost(200); 8053 format %{ "Jn$cop skip\n\t" 8054 "MOV $dst,$src\t# double\n" 8055 "skip:" %} 8056 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 8057 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 8058 ins_pipe( pipe_cmovD_reg ); 8059 %} 8060 8061 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 8062 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 8063 predicate(UseSSE==0); 8064 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8065 ins_cost(200); 8066 format %{ "Jn$cop skip\n\t" 8067 "MOV $dst,$src\t# float\n" 8068 "skip:" %} 8069 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 8070 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 8071 ins_pipe( pipe_cmovD_reg ); 8072 %} 8073 8074 // No CMOVE with SSE/SSE2 8075 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 8076 predicate (UseSSE>=1); 8077 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8078 ins_cost(200); 8079 format %{ "Jn$cop skip\n\t" 8080 "MOVSS $dst,$src\t# float\n" 8081 "skip:" %} 8082 ins_encode %{ 8083 Label skip; 8084 // Invert sense of branch from sense of CMOV 8085 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8086 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8087 __ bind(skip); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 8092 // No CMOVE with SSE/SSE2 8093 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 8094 predicate (UseSSE>=2); 8095 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8096 ins_cost(200); 8097 format %{ "Jn$cop skip\n\t" 8098 "MOVSD $dst,$src\t# float\n" 8099 "skip:" %} 8100 ins_encode %{ 8101 Label skip; 8102 // Invert sense of branch from sense of CMOV 8103 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8104 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8105 __ bind(skip); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 // unsigned version 8111 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 8112 predicate (UseSSE>=1); 8113 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8114 ins_cost(200); 8115 format %{ "Jn$cop skip\n\t" 8116 "MOVSS $dst,$src\t# float\n" 8117 "skip:" %} 8118 ins_encode %{ 8119 Label skip; 8120 // Invert sense of branch from sense of CMOV 8121 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8122 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8123 __ bind(skip); 8124 %} 8125 ins_pipe( pipe_slow ); 8126 %} 8127 8128 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ 8129 predicate (UseSSE>=1); 8130 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8131 ins_cost(200); 8132 expand %{ 8133 fcmovX_regU(cop, cr, dst, src); 8134 %} 8135 %} 8136 8137 // unsigned version 8138 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 8139 predicate (UseSSE>=2); 8140 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8141 ins_cost(200); 8142 format %{ "Jn$cop skip\n\t" 8143 "MOVSD $dst,$src\t# float\n" 8144 "skip:" %} 8145 ins_encode %{ 8146 Label skip; 8147 // Invert sense of branch from sense of CMOV 8148 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 8149 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8150 __ bind(skip); 8151 %} 8152 ins_pipe( pipe_slow ); 8153 %} 8154 8155 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ 8156 predicate (UseSSE>=2); 8157 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8158 ins_cost(200); 8159 expand %{ 8160 fcmovXD_regU(cop, cr, dst, src); 8161 %} 8162 %} 8163 8164 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 8165 predicate(VM_Version::supports_cmov() ); 8166 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 8167 ins_cost(200); 8168 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 8169 "CMOV$cop $dst.hi,$src.hi" %} 8170 opcode(0x0F,0x40); 8171 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 8172 ins_pipe( pipe_cmov_reg_long ); 8173 %} 8174 8175 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 8176 predicate(VM_Version::supports_cmov() ); 8177 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 8178 ins_cost(200); 8179 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 8180 "CMOV$cop $dst.hi,$src.hi" %} 8181 opcode(0x0F,0x40); 8182 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 8183 ins_pipe( pipe_cmov_reg_long ); 8184 %} 8185 8186 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 8187 predicate(VM_Version::supports_cmov() ); 8188 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 8189 ins_cost(200); 8190 expand %{ 8191 cmovL_regU(cop, cr, dst, src); 8192 %} 8193 %} 8194 8195 //----------Arithmetic Instructions-------------------------------------------- 8196 //----------Addition Instructions---------------------------------------------- 8197 // Integer Addition Instructions 8198 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 8199 match(Set dst (AddI dst src)); 8200 effect(KILL cr); 8201 8202 size(2); 8203 format %{ "ADD $dst,$src" %} 8204 opcode(0x03); 8205 ins_encode( OpcP, RegReg( dst, src) ); 8206 ins_pipe( ialu_reg_reg ); 8207 %} 8208 8209 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 8210 match(Set dst (AddI dst src)); 8211 effect(KILL cr); 8212 8213 format %{ "ADD $dst,$src" %} 8214 opcode(0x81, 0x00); /* /0 id */ 8215 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8216 ins_pipe( ialu_reg ); 8217 %} 8218 8219 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 8220 predicate(UseIncDec); 8221 match(Set dst (AddI dst src)); 8222 effect(KILL cr); 8223 8224 size(1); 8225 format %{ "INC $dst" %} 8226 opcode(0x40); /* */ 8227 ins_encode( Opc_plus( primary, dst ) ); 8228 ins_pipe( ialu_reg ); 8229 %} 8230 8231 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{ 8232 match(Set dst (AddI src0 src1)); 8233 ins_cost(110); 8234 8235 format %{ "LEA $dst,[$src0 + $src1]" %} 8236 opcode(0x8D); /* 0x8D /r */ 8237 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 8238 ins_pipe( ialu_reg_reg ); 8239 %} 8240 8241 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 8242 match(Set dst (AddP src0 src1)); 8243 ins_cost(110); 8244 8245 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 8246 opcode(0x8D); /* 0x8D /r */ 8247 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 8248 ins_pipe( ialu_reg_reg ); 8249 %} 8250 8251 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{ 8252 predicate(UseIncDec); 8253 match(Set dst (AddI dst src)); 8254 effect(KILL cr); 8255 8256 size(1); 8257 format %{ "DEC $dst" %} 8258 opcode(0x48); /* */ 8259 ins_encode( Opc_plus( primary, dst ) ); 8260 ins_pipe( ialu_reg ); 8261 %} 8262 8263 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{ 8264 match(Set dst (AddP dst src)); 8265 effect(KILL cr); 8266 8267 size(2); 8268 format %{ "ADD $dst,$src" %} 8269 opcode(0x03); 8270 ins_encode( OpcP, RegReg( dst, src) ); 8271 ins_pipe( ialu_reg_reg ); 8272 %} 8273 8274 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 8275 match(Set dst (AddP dst src)); 8276 effect(KILL cr); 8277 8278 format %{ "ADD $dst,$src" %} 8279 opcode(0x81,0x00); /* Opcode 81 /0 id */ 8280 // ins_encode( RegImm( dst, src) ); 8281 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8282 ins_pipe( ialu_reg ); 8283 %} 8284 8285 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 8286 match(Set dst (AddI dst (LoadI src))); 8287 effect(KILL cr); 8288 8289 ins_cost(125); 8290 format %{ "ADD $dst,$src" %} 8291 opcode(0x03); 8292 ins_encode( OpcP, RegMem( dst, src) ); 8293 ins_pipe( ialu_reg_mem ); 8294 %} 8295 8296 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 8297 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8298 effect(KILL cr); 8299 8300 ins_cost(150); 8301 format %{ "ADD $dst,$src" %} 8302 opcode(0x01); /* Opcode 01 /r */ 8303 ins_encode( OpcP, RegMem( src, dst ) ); 8304 ins_pipe( ialu_mem_reg ); 8305 %} 8306 8307 // Add Memory with Immediate 8308 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8309 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8310 effect(KILL cr); 8311 8312 ins_cost(125); 8313 format %{ "ADD $dst,$src" %} 8314 opcode(0x81); /* Opcode 81 /0 id */ 8315 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 8316 ins_pipe( ialu_mem_imm ); 8317 %} 8318 8319 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 8320 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8321 effect(KILL cr); 8322 8323 ins_cost(125); 8324 format %{ "INC $dst" %} 8325 opcode(0xFF); /* Opcode FF /0 */ 8326 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 8327 ins_pipe( ialu_mem_imm ); 8328 %} 8329 8330 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 8331 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 8332 effect(KILL cr); 8333 8334 ins_cost(125); 8335 format %{ "DEC $dst" %} 8336 opcode(0xFF); /* Opcode FF /1 */ 8337 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 8338 ins_pipe( ialu_mem_imm ); 8339 %} 8340 8341 8342 instruct checkCastPP( eRegP dst ) %{ 8343 match(Set dst (CheckCastPP dst)); 8344 8345 size(0); 8346 format %{ "#checkcastPP of $dst" %} 8347 ins_encode( /*empty encoding*/ ); 8348 ins_pipe( empty ); 8349 %} 8350 8351 instruct castPP( eRegP dst ) %{ 8352 match(Set dst (CastPP dst)); 8353 format %{ "#castPP of $dst" %} 8354 ins_encode( /*empty encoding*/ ); 8355 ins_pipe( empty ); 8356 %} 8357 8358 instruct castII( eRegI dst ) %{ 8359 match(Set dst (CastII dst)); 8360 format %{ "#castII of $dst" %} 8361 ins_encode( /*empty encoding*/ ); 8362 ins_cost(0); 8363 ins_pipe( empty ); 8364 %} 8365 8366 8367 // Load-locked - same as a regular pointer load when used with compare-swap 8368 instruct loadPLocked(eRegP dst, memory mem) %{ 8369 match(Set dst (LoadPLocked mem)); 8370 8371 ins_cost(125); 8372 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 8373 opcode(0x8B); 8374 ins_encode( OpcP, RegMem(dst,mem)); 8375 ins_pipe( ialu_reg_mem ); 8376 %} 8377 8378 // LoadLong-locked - same as a volatile long load when used with compare-swap 8379 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ 8380 predicate(UseSSE<=1); 8381 match(Set dst (LoadLLocked mem)); 8382 8383 ins_cost(200); 8384 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 8385 "FISTp $dst" %} 8386 ins_encode(enc_loadL_volatile(mem,dst)); 8387 ins_pipe( fpu_reg_mem ); 8388 %} 8389 8390 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ 8391 predicate(UseSSE>=2); 8392 match(Set dst (LoadLLocked mem)); 8393 effect(TEMP tmp); 8394 ins_cost(180); 8395 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 8396 "MOVSD $dst,$tmp" %} 8397 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ 8402 predicate(UseSSE>=2); 8403 match(Set dst (LoadLLocked mem)); 8404 effect(TEMP tmp); 8405 ins_cost(160); 8406 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 8407 "MOVD $dst.lo,$tmp\n\t" 8408 "PSRLQ $tmp,32\n\t" 8409 "MOVD $dst.hi,$tmp" %} 8410 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 // Conditional-store of the updated heap-top. 8415 // Used during allocation of the shared heap. 8416 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 8417 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 8418 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 8419 // EAX is killed if there is contention, but then it's also unused. 8420 // In the common case of no contention, EAX holds the new oop address. 8421 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 8422 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 8423 ins_pipe( pipe_cmpxchg ); 8424 %} 8425 8426 // Conditional-store of an int value. 8427 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 8428 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{ 8429 match(Set cr (StoreIConditional mem (Binary oldval newval))); 8430 effect(KILL oldval); 8431 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 8432 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 8433 ins_pipe( pipe_cmpxchg ); 8434 %} 8435 8436 // Conditional-store of a long value. 8437 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 8438 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 8439 match(Set cr (StoreLConditional mem (Binary oldval newval))); 8440 effect(KILL oldval); 8441 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 8442 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 8443 "XCHG EBX,ECX" 8444 %} 8445 ins_encode %{ 8446 // Note: we need to swap rbx, and rcx before and after the 8447 // cmpxchg8 instruction because the instruction uses 8448 // rcx as the high order word of the new value to store but 8449 // our register encoding uses rbx. 8450 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 8451 if( os::is_MP() ) 8452 __ lock(); 8453 __ cmpxchg8($mem$$Address); 8454 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 8455 %} 8456 ins_pipe( pipe_cmpxchg ); 8457 %} 8458 8459 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 8460 8461 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 8462 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 8463 effect(KILL cr, KILL oldval); 8464 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 8465 "MOV $res,0\n\t" 8466 "JNE,s fail\n\t" 8467 "MOV $res,1\n" 8468 "fail:" %} 8469 ins_encode( enc_cmpxchg8(mem_ptr), 8470 enc_flags_ne_to_boolean(res) ); 8471 ins_pipe( pipe_cmpxchg ); 8472 %} 8473 8474 instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 8475 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 8476 effect(KILL cr, KILL oldval); 8477 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 8478 "MOV $res,0\n\t" 8479 "JNE,s fail\n\t" 8480 "MOV $res,1\n" 8481 "fail:" %} 8482 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 8483 ins_pipe( pipe_cmpxchg ); 8484 %} 8485 8486 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 8487 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 8488 effect(KILL cr, KILL oldval); 8489 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 8490 "MOV $res,0\n\t" 8491 "JNE,s fail\n\t" 8492 "MOV $res,1\n" 8493 "fail:" %} 8494 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 8495 ins_pipe( pipe_cmpxchg ); 8496 %} 8497 8498 //----------Subtraction Instructions------------------------------------------- 8499 // Integer Subtraction Instructions 8500 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 8501 match(Set dst (SubI dst src)); 8502 effect(KILL cr); 8503 8504 size(2); 8505 format %{ "SUB $dst,$src" %} 8506 opcode(0x2B); 8507 ins_encode( OpcP, RegReg( dst, src) ); 8508 ins_pipe( ialu_reg_reg ); 8509 %} 8510 8511 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 8512 match(Set dst (SubI dst src)); 8513 effect(KILL cr); 8514 8515 format %{ "SUB $dst,$src" %} 8516 opcode(0x81,0x05); /* Opcode 81 /5 */ 8517 // ins_encode( RegImm( dst, src) ); 8518 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8519 ins_pipe( ialu_reg ); 8520 %} 8521 8522 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 8523 match(Set dst (SubI dst (LoadI src))); 8524 effect(KILL cr); 8525 8526 ins_cost(125); 8527 format %{ "SUB $dst,$src" %} 8528 opcode(0x2B); 8529 ins_encode( OpcP, RegMem( dst, src) ); 8530 ins_pipe( ialu_reg_mem ); 8531 %} 8532 8533 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 8534 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 8535 effect(KILL cr); 8536 8537 ins_cost(150); 8538 format %{ "SUB $dst,$src" %} 8539 opcode(0x29); /* Opcode 29 /r */ 8540 ins_encode( OpcP, RegMem( src, dst ) ); 8541 ins_pipe( ialu_mem_reg ); 8542 %} 8543 8544 // Subtract from a pointer 8545 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{ 8546 match(Set dst (AddP dst (SubI zero src))); 8547 effect(KILL cr); 8548 8549 size(2); 8550 format %{ "SUB $dst,$src" %} 8551 opcode(0x2B); 8552 ins_encode( OpcP, RegReg( dst, src) ); 8553 ins_pipe( ialu_reg_reg ); 8554 %} 8555 8556 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{ 8557 match(Set dst (SubI zero dst)); 8558 effect(KILL cr); 8559 8560 size(2); 8561 format %{ "NEG $dst" %} 8562 opcode(0xF7,0x03); // Opcode F7 /3 8563 ins_encode( OpcP, RegOpc( dst ) ); 8564 ins_pipe( ialu_reg ); 8565 %} 8566 8567 8568 //----------Multiplication/Division Instructions------------------------------- 8569 // Integer Multiplication Instructions 8570 // Multiply Register 8571 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 8572 match(Set dst (MulI dst src)); 8573 effect(KILL cr); 8574 8575 size(3); 8576 ins_cost(300); 8577 format %{ "IMUL $dst,$src" %} 8578 opcode(0xAF, 0x0F); 8579 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 8580 ins_pipe( ialu_reg_reg_alu0 ); 8581 %} 8582 8583 // Multiply 32-bit Immediate 8584 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{ 8585 match(Set dst (MulI src imm)); 8586 effect(KILL cr); 8587 8588 ins_cost(300); 8589 format %{ "IMUL $dst,$src,$imm" %} 8590 opcode(0x69); /* 69 /r id */ 8591 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 8592 ins_pipe( ialu_reg_reg_alu0 ); 8593 %} 8594 8595 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 8596 match(Set dst src); 8597 effect(KILL cr); 8598 8599 // Note that this is artificially increased to make it more expensive than loadConL 8600 ins_cost(250); 8601 format %{ "MOV EAX,$src\t// low word only" %} 8602 opcode(0xB8); 8603 ins_encode( LdImmL_Lo(dst, src) ); 8604 ins_pipe( ialu_reg_fat ); 8605 %} 8606 8607 // Multiply by 32-bit Immediate, taking the shifted high order results 8608 // (special case for shift by 32) 8609 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 8610 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 8611 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 8612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 8613 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 8614 effect(USE src1, KILL cr); 8615 8616 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 8617 ins_cost(0*100 + 1*400 - 150); 8618 format %{ "IMUL EDX:EAX,$src1" %} 8619 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 // Multiply by 32-bit Immediate, taking the shifted high order results 8624 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 8625 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 8626 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 8627 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 8628 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 8629 effect(USE src1, KILL cr); 8630 8631 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 8632 ins_cost(1*100 + 1*400 - 150); 8633 format %{ "IMUL EDX:EAX,$src1\n\t" 8634 "SAR EDX,$cnt-32" %} 8635 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 // Multiply Memory 32-bit Immediate 8640 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{ 8641 match(Set dst (MulI (LoadI src) imm)); 8642 effect(KILL cr); 8643 8644 ins_cost(300); 8645 format %{ "IMUL $dst,$src,$imm" %} 8646 opcode(0x69); /* 69 /r id */ 8647 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 8648 ins_pipe( ialu_reg_mem_alu0 ); 8649 %} 8650 8651 // Multiply Memory 8652 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{ 8653 match(Set dst (MulI dst (LoadI src))); 8654 effect(KILL cr); 8655 8656 ins_cost(350); 8657 format %{ "IMUL $dst,$src" %} 8658 opcode(0xAF, 0x0F); 8659 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 8660 ins_pipe( ialu_reg_mem_alu0 ); 8661 %} 8662 8663 // Multiply Register Int to Long 8664 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 8665 // Basic Idea: long = (long)int * (long)int 8666 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 8667 effect(DEF dst, USE src, USE src1, KILL flags); 8668 8669 ins_cost(300); 8670 format %{ "IMUL $dst,$src1" %} 8671 8672 ins_encode( long_int_multiply( dst, src1 ) ); 8673 ins_pipe( ialu_reg_reg_alu0 ); 8674 %} 8675 8676 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 8677 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 8678 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 8679 effect(KILL flags); 8680 8681 ins_cost(300); 8682 format %{ "MUL $dst,$src1" %} 8683 8684 ins_encode( long_uint_multiply(dst, src1) ); 8685 ins_pipe( ialu_reg_reg_alu0 ); 8686 %} 8687 8688 // Multiply Register Long 8689 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8690 match(Set dst (MulL dst src)); 8691 effect(KILL cr, TEMP tmp); 8692 ins_cost(4*100+3*400); 8693 // Basic idea: lo(result) = lo(x_lo * y_lo) 8694 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 8695 format %{ "MOV $tmp,$src.lo\n\t" 8696 "IMUL $tmp,EDX\n\t" 8697 "MOV EDX,$src.hi\n\t" 8698 "IMUL EDX,EAX\n\t" 8699 "ADD $tmp,EDX\n\t" 8700 "MUL EDX:EAX,$src.lo\n\t" 8701 "ADD EDX,$tmp" %} 8702 ins_encode( long_multiply( dst, src, tmp ) ); 8703 ins_pipe( pipe_slow ); 8704 %} 8705 8706 // Multiply Register Long where the left operand's high 32 bits are zero 8707 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8708 predicate(is_operand_hi32_zero(n->in(1))); 8709 match(Set dst (MulL dst src)); 8710 effect(KILL cr, TEMP tmp); 8711 ins_cost(2*100+2*400); 8712 // Basic idea: lo(result) = lo(x_lo * y_lo) 8713 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 8714 format %{ "MOV $tmp,$src.hi\n\t" 8715 "IMUL $tmp,EAX\n\t" 8716 "MUL EDX:EAX,$src.lo\n\t" 8717 "ADD EDX,$tmp" %} 8718 ins_encode %{ 8719 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 8720 __ imull($tmp$$Register, rax); 8721 __ mull($src$$Register); 8722 __ addl(rdx, $tmp$$Register); 8723 %} 8724 ins_pipe( pipe_slow ); 8725 %} 8726 8727 // Multiply Register Long where the right operand's high 32 bits are zero 8728 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8729 predicate(is_operand_hi32_zero(n->in(2))); 8730 match(Set dst (MulL dst src)); 8731 effect(KILL cr, TEMP tmp); 8732 ins_cost(2*100+2*400); 8733 // Basic idea: lo(result) = lo(x_lo * y_lo) 8734 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 8735 format %{ "MOV $tmp,$src.lo\n\t" 8736 "IMUL $tmp,EDX\n\t" 8737 "MUL EDX:EAX,$src.lo\n\t" 8738 "ADD EDX,$tmp" %} 8739 ins_encode %{ 8740 __ movl($tmp$$Register, $src$$Register); 8741 __ imull($tmp$$Register, rdx); 8742 __ mull($src$$Register); 8743 __ addl(rdx, $tmp$$Register); 8744 %} 8745 ins_pipe( pipe_slow ); 8746 %} 8747 8748 // Multiply Register Long where the left and the right operands' high 32 bits are zero 8749 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 8750 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 8751 match(Set dst (MulL dst src)); 8752 effect(KILL cr); 8753 ins_cost(1*400); 8754 // Basic idea: lo(result) = lo(x_lo * y_lo) 8755 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 8756 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 8757 ins_encode %{ 8758 __ mull($src$$Register); 8759 %} 8760 ins_pipe( pipe_slow ); 8761 %} 8762 8763 // Multiply Register Long by small constant 8764 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{ 8765 match(Set dst (MulL dst src)); 8766 effect(KILL cr, TEMP tmp); 8767 ins_cost(2*100+2*400); 8768 size(12); 8769 // Basic idea: lo(result) = lo(src * EAX) 8770 // hi(result) = hi(src * EAX) + lo(src * EDX) 8771 format %{ "IMUL $tmp,EDX,$src\n\t" 8772 "MOV EDX,$src\n\t" 8773 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 8774 "ADD EDX,$tmp" %} 8775 ins_encode( long_multiply_con( dst, src, tmp ) ); 8776 ins_pipe( pipe_slow ); 8777 %} 8778 8779 // Integer DIV with Register 8780 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 8781 match(Set rax (DivI rax div)); 8782 effect(KILL rdx, KILL cr); 8783 size(26); 8784 ins_cost(30*100+10*100); 8785 format %{ "CMP EAX,0x80000000\n\t" 8786 "JNE,s normal\n\t" 8787 "XOR EDX,EDX\n\t" 8788 "CMP ECX,-1\n\t" 8789 "JE,s done\n" 8790 "normal: CDQ\n\t" 8791 "IDIV $div\n\t" 8792 "done:" %} 8793 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8794 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8795 ins_pipe( ialu_reg_reg_alu0 ); 8796 %} 8797 8798 // Divide Register Long 8799 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 8800 match(Set dst (DivL src1 src2)); 8801 effect( KILL cr, KILL cx, KILL bx ); 8802 ins_cost(10000); 8803 format %{ "PUSH $src1.hi\n\t" 8804 "PUSH $src1.lo\n\t" 8805 "PUSH $src2.hi\n\t" 8806 "PUSH $src2.lo\n\t" 8807 "CALL SharedRuntime::ldiv\n\t" 8808 "ADD ESP,16" %} 8809 ins_encode( long_div(src1,src2) ); 8810 ins_pipe( pipe_slow ); 8811 %} 8812 8813 // Integer DIVMOD with Register, both quotient and mod results 8814 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 8815 match(DivModI rax div); 8816 effect(KILL cr); 8817 size(26); 8818 ins_cost(30*100+10*100); 8819 format %{ "CMP EAX,0x80000000\n\t" 8820 "JNE,s normal\n\t" 8821 "XOR EDX,EDX\n\t" 8822 "CMP ECX,-1\n\t" 8823 "JE,s done\n" 8824 "normal: CDQ\n\t" 8825 "IDIV $div\n\t" 8826 "done:" %} 8827 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8828 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8829 ins_pipe( pipe_slow ); 8830 %} 8831 8832 // Integer MOD with Register 8833 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 8834 match(Set rdx (ModI rax div)); 8835 effect(KILL rax, KILL cr); 8836 8837 size(26); 8838 ins_cost(300); 8839 format %{ "CDQ\n\t" 8840 "IDIV $div" %} 8841 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8842 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8843 ins_pipe( ialu_reg_reg_alu0 ); 8844 %} 8845 8846 // Remainder Register Long 8847 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 8848 match(Set dst (ModL src1 src2)); 8849 effect( KILL cr, KILL cx, KILL bx ); 8850 ins_cost(10000); 8851 format %{ "PUSH $src1.hi\n\t" 8852 "PUSH $src1.lo\n\t" 8853 "PUSH $src2.hi\n\t" 8854 "PUSH $src2.lo\n\t" 8855 "CALL SharedRuntime::lrem\n\t" 8856 "ADD ESP,16" %} 8857 ins_encode( long_mod(src1,src2) ); 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 // Divide Register Long (no special case since divisor != -1) 8862 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ 8863 match(Set dst (DivL dst imm)); 8864 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8865 ins_cost(1000); 8866 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 8867 "XOR $tmp2,$tmp2\n\t" 8868 "CMP $tmp,EDX\n\t" 8869 "JA,s fast\n\t" 8870 "MOV $tmp2,EAX\n\t" 8871 "MOV EAX,EDX\n\t" 8872 "MOV EDX,0\n\t" 8873 "JLE,s pos\n\t" 8874 "LNEG EAX : $tmp2\n\t" 8875 "DIV $tmp # unsigned division\n\t" 8876 "XCHG EAX,$tmp2\n\t" 8877 "DIV $tmp\n\t" 8878 "LNEG $tmp2 : EAX\n\t" 8879 "JMP,s done\n" 8880 "pos:\n\t" 8881 "DIV $tmp\n\t" 8882 "XCHG EAX,$tmp2\n" 8883 "fast:\n\t" 8884 "DIV $tmp\n" 8885 "done:\n\t" 8886 "MOV EDX,$tmp2\n\t" 8887 "NEG EDX:EAX # if $imm < 0" %} 8888 ins_encode %{ 8889 int con = (int)$imm$$constant; 8890 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8891 int pcon = (con > 0) ? con : -con; 8892 Label Lfast, Lpos, Ldone; 8893 8894 __ movl($tmp$$Register, pcon); 8895 __ xorl($tmp2$$Register,$tmp2$$Register); 8896 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8897 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 8898 8899 __ movl($tmp2$$Register, $dst$$Register); // save 8900 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8901 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8902 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8903 8904 // Negative dividend. 8905 // convert value to positive to use unsigned division 8906 __ lneg($dst$$Register, $tmp2$$Register); 8907 __ divl($tmp$$Register); 8908 __ xchgl($dst$$Register, $tmp2$$Register); 8909 __ divl($tmp$$Register); 8910 // revert result back to negative 8911 __ lneg($tmp2$$Register, $dst$$Register); 8912 __ jmpb(Ldone); 8913 8914 __ bind(Lpos); 8915 __ divl($tmp$$Register); // Use unsigned division 8916 __ xchgl($dst$$Register, $tmp2$$Register); 8917 // Fallthrow for final divide, tmp2 has 32 bit hi result 8918 8919 __ bind(Lfast); 8920 // fast path: src is positive 8921 __ divl($tmp$$Register); // Use unsigned division 8922 8923 __ bind(Ldone); 8924 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8925 if (con < 0) { 8926 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8927 } 8928 %} 8929 ins_pipe( pipe_slow ); 8930 %} 8931 8932 // Remainder Register Long (remainder fit into 32 bits) 8933 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ 8934 match(Set dst (ModL dst imm)); 8935 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8936 ins_cost(1000); 8937 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8938 "CMP $tmp,EDX\n\t" 8939 "JA,s fast\n\t" 8940 "MOV $tmp2,EAX\n\t" 8941 "MOV EAX,EDX\n\t" 8942 "MOV EDX,0\n\t" 8943 "JLE,s pos\n\t" 8944 "LNEG EAX : $tmp2\n\t" 8945 "DIV $tmp # unsigned division\n\t" 8946 "MOV EAX,$tmp2\n\t" 8947 "DIV $tmp\n\t" 8948 "NEG EDX\n\t" 8949 "JMP,s done\n" 8950 "pos:\n\t" 8951 "DIV $tmp\n\t" 8952 "MOV EAX,$tmp2\n" 8953 "fast:\n\t" 8954 "DIV $tmp\n" 8955 "done:\n\t" 8956 "MOV EAX,EDX\n\t" 8957 "SAR EDX,31\n\t" %} 8958 ins_encode %{ 8959 int con = (int)$imm$$constant; 8960 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8961 int pcon = (con > 0) ? con : -con; 8962 Label Lfast, Lpos, Ldone; 8963 8964 __ movl($tmp$$Register, pcon); 8965 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8966 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8967 8968 __ movl($tmp2$$Register, $dst$$Register); // save 8969 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8970 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8971 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8972 8973 // Negative dividend. 8974 // convert value to positive to use unsigned division 8975 __ lneg($dst$$Register, $tmp2$$Register); 8976 __ divl($tmp$$Register); 8977 __ movl($dst$$Register, $tmp2$$Register); 8978 __ divl($tmp$$Register); 8979 // revert remainder back to negative 8980 __ negl(HIGH_FROM_LOW($dst$$Register)); 8981 __ jmpb(Ldone); 8982 8983 __ bind(Lpos); 8984 __ divl($tmp$$Register); 8985 __ movl($dst$$Register, $tmp2$$Register); 8986 8987 __ bind(Lfast); 8988 // fast path: src is positive 8989 __ divl($tmp$$Register); 8990 8991 __ bind(Ldone); 8992 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8993 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8994 8995 %} 8996 ins_pipe( pipe_slow ); 8997 %} 8998 8999 // Integer Shift Instructions 9000 // Shift Left by one 9001 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9002 match(Set dst (LShiftI dst shift)); 9003 effect(KILL cr); 9004 9005 size(2); 9006 format %{ "SHL $dst,$shift" %} 9007 opcode(0xD1, 0x4); /* D1 /4 */ 9008 ins_encode( OpcP, RegOpc( dst ) ); 9009 ins_pipe( ialu_reg ); 9010 %} 9011 9012 // Shift Left by 8-bit immediate 9013 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9014 match(Set dst (LShiftI dst shift)); 9015 effect(KILL cr); 9016 9017 size(3); 9018 format %{ "SHL $dst,$shift" %} 9019 opcode(0xC1, 0x4); /* C1 /4 ib */ 9020 ins_encode( RegOpcImm( dst, shift) ); 9021 ins_pipe( ialu_reg ); 9022 %} 9023 9024 // Shift Left by variable 9025 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 9026 match(Set dst (LShiftI dst shift)); 9027 effect(KILL cr); 9028 9029 size(2); 9030 format %{ "SHL $dst,$shift" %} 9031 opcode(0xD3, 0x4); /* D3 /4 */ 9032 ins_encode( OpcP, RegOpc( dst ) ); 9033 ins_pipe( ialu_reg_reg ); 9034 %} 9035 9036 // Arithmetic shift right by one 9037 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9038 match(Set dst (RShiftI dst shift)); 9039 effect(KILL cr); 9040 9041 size(2); 9042 format %{ "SAR $dst,$shift" %} 9043 opcode(0xD1, 0x7); /* D1 /7 */ 9044 ins_encode( OpcP, RegOpc( dst ) ); 9045 ins_pipe( ialu_reg ); 9046 %} 9047 9048 // Arithmetic shift right by one 9049 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 9050 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 9051 effect(KILL cr); 9052 format %{ "SAR $dst,$shift" %} 9053 opcode(0xD1, 0x7); /* D1 /7 */ 9054 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 9055 ins_pipe( ialu_mem_imm ); 9056 %} 9057 9058 // Arithmetic Shift Right by 8-bit immediate 9059 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9060 match(Set dst (RShiftI dst shift)); 9061 effect(KILL cr); 9062 9063 size(3); 9064 format %{ "SAR $dst,$shift" %} 9065 opcode(0xC1, 0x7); /* C1 /7 ib */ 9066 ins_encode( RegOpcImm( dst, shift ) ); 9067 ins_pipe( ialu_mem_imm ); 9068 %} 9069 9070 // Arithmetic Shift Right by 8-bit immediate 9071 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 9072 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 9073 effect(KILL cr); 9074 9075 format %{ "SAR $dst,$shift" %} 9076 opcode(0xC1, 0x7); /* C1 /7 ib */ 9077 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 9078 ins_pipe( ialu_mem_imm ); 9079 %} 9080 9081 // Arithmetic Shift Right by variable 9082 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 9083 match(Set dst (RShiftI dst shift)); 9084 effect(KILL cr); 9085 9086 size(2); 9087 format %{ "SAR $dst,$shift" %} 9088 opcode(0xD3, 0x7); /* D3 /7 */ 9089 ins_encode( OpcP, RegOpc( dst ) ); 9090 ins_pipe( ialu_reg_reg ); 9091 %} 9092 9093 // Logical shift right by one 9094 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9095 match(Set dst (URShiftI dst shift)); 9096 effect(KILL cr); 9097 9098 size(2); 9099 format %{ "SHR $dst,$shift" %} 9100 opcode(0xD1, 0x5); /* D1 /5 */ 9101 ins_encode( OpcP, RegOpc( dst ) ); 9102 ins_pipe( ialu_reg ); 9103 %} 9104 9105 // Logical Shift Right by 8-bit immediate 9106 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9107 match(Set dst (URShiftI dst shift)); 9108 effect(KILL cr); 9109 9110 size(3); 9111 format %{ "SHR $dst,$shift" %} 9112 opcode(0xC1, 0x5); /* C1 /5 ib */ 9113 ins_encode( RegOpcImm( dst, shift) ); 9114 ins_pipe( ialu_reg ); 9115 %} 9116 9117 9118 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 9119 // This idiom is used by the compiler for the i2b bytecode. 9120 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{ 9121 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 9122 9123 size(3); 9124 format %{ "MOVSX $dst,$src :8" %} 9125 ins_encode %{ 9126 __ movsbl($dst$$Register, $src$$Register); 9127 %} 9128 ins_pipe(ialu_reg_reg); 9129 %} 9130 9131 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 9132 // This idiom is used by the compiler the i2s bytecode. 9133 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{ 9134 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 9135 9136 size(3); 9137 format %{ "MOVSX $dst,$src :16" %} 9138 ins_encode %{ 9139 __ movswl($dst$$Register, $src$$Register); 9140 %} 9141 ins_pipe(ialu_reg_reg); 9142 %} 9143 9144 9145 // Logical Shift Right by variable 9146 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ 9147 match(Set dst (URShiftI dst shift)); 9148 effect(KILL cr); 9149 9150 size(2); 9151 format %{ "SHR $dst,$shift" %} 9152 opcode(0xD3, 0x5); /* D3 /5 */ 9153 ins_encode( OpcP, RegOpc( dst ) ); 9154 ins_pipe( ialu_reg_reg ); 9155 %} 9156 9157 9158 //----------Logical Instructions----------------------------------------------- 9159 //----------Integer Logical Instructions--------------------------------------- 9160 // And Instructions 9161 // And Register with Register 9162 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 9163 match(Set dst (AndI dst src)); 9164 effect(KILL cr); 9165 9166 size(2); 9167 format %{ "AND $dst,$src" %} 9168 opcode(0x23); 9169 ins_encode( OpcP, RegReg( dst, src) ); 9170 ins_pipe( ialu_reg_reg ); 9171 %} 9172 9173 // And Register with Immediate 9174 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 9175 match(Set dst (AndI dst src)); 9176 effect(KILL cr); 9177 9178 format %{ "AND $dst,$src" %} 9179 opcode(0x81,0x04); /* Opcode 81 /4 */ 9180 // ins_encode( RegImm( dst, src) ); 9181 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 9182 ins_pipe( ialu_reg ); 9183 %} 9184 9185 // And Register with Memory 9186 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 9187 match(Set dst (AndI dst (LoadI src))); 9188 effect(KILL cr); 9189 9190 ins_cost(125); 9191 format %{ "AND $dst,$src" %} 9192 opcode(0x23); 9193 ins_encode( OpcP, RegMem( dst, src) ); 9194 ins_pipe( ialu_reg_mem ); 9195 %} 9196 9197 // And Memory with Register 9198 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 9199 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 9200 effect(KILL cr); 9201 9202 ins_cost(150); 9203 format %{ "AND $dst,$src" %} 9204 opcode(0x21); /* Opcode 21 /r */ 9205 ins_encode( OpcP, RegMem( src, dst ) ); 9206 ins_pipe( ialu_mem_reg ); 9207 %} 9208 9209 // And Memory with Immediate 9210 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 9211 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 9212 effect(KILL cr); 9213 9214 ins_cost(125); 9215 format %{ "AND $dst,$src" %} 9216 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 9217 // ins_encode( MemImm( dst, src) ); 9218 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 9219 ins_pipe( ialu_mem_imm ); 9220 %} 9221 9222 // Or Instructions 9223 // Or Register with Register 9224 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 9225 match(Set dst (OrI dst src)); 9226 effect(KILL cr); 9227 9228 size(2); 9229 format %{ "OR $dst,$src" %} 9230 opcode(0x0B); 9231 ins_encode( OpcP, RegReg( dst, src) ); 9232 ins_pipe( ialu_reg_reg ); 9233 %} 9234 9235 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{ 9236 match(Set dst (OrI dst (CastP2X src))); 9237 effect(KILL cr); 9238 9239 size(2); 9240 format %{ "OR $dst,$src" %} 9241 opcode(0x0B); 9242 ins_encode( OpcP, RegReg( dst, src) ); 9243 ins_pipe( ialu_reg_reg ); 9244 %} 9245 9246 9247 // Or Register with Immediate 9248 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 9249 match(Set dst (OrI dst src)); 9250 effect(KILL cr); 9251 9252 format %{ "OR $dst,$src" %} 9253 opcode(0x81,0x01); /* Opcode 81 /1 id */ 9254 // ins_encode( RegImm( dst, src) ); 9255 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 9256 ins_pipe( ialu_reg ); 9257 %} 9258 9259 // Or Register with Memory 9260 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 9261 match(Set dst (OrI dst (LoadI src))); 9262 effect(KILL cr); 9263 9264 ins_cost(125); 9265 format %{ "OR $dst,$src" %} 9266 opcode(0x0B); 9267 ins_encode( OpcP, RegMem( dst, src) ); 9268 ins_pipe( ialu_reg_mem ); 9269 %} 9270 9271 // Or Memory with Register 9272 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 9273 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 9274 effect(KILL cr); 9275 9276 ins_cost(150); 9277 format %{ "OR $dst,$src" %} 9278 opcode(0x09); /* Opcode 09 /r */ 9279 ins_encode( OpcP, RegMem( src, dst ) ); 9280 ins_pipe( ialu_mem_reg ); 9281 %} 9282 9283 // Or Memory with Immediate 9284 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 9285 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 9286 effect(KILL cr); 9287 9288 ins_cost(125); 9289 format %{ "OR $dst,$src" %} 9290 opcode(0x81,0x1); /* Opcode 81 /1 id */ 9291 // ins_encode( MemImm( dst, src) ); 9292 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 9293 ins_pipe( ialu_mem_imm ); 9294 %} 9295 9296 // ROL/ROR 9297 // ROL expand 9298 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9299 effect(USE_DEF dst, USE shift, KILL cr); 9300 9301 format %{ "ROL $dst, $shift" %} 9302 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 9303 ins_encode( OpcP, RegOpc( dst )); 9304 ins_pipe( ialu_reg ); 9305 %} 9306 9307 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9308 effect(USE_DEF dst, USE shift, KILL cr); 9309 9310 format %{ "ROL $dst, $shift" %} 9311 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 9312 ins_encode( RegOpcImm(dst, shift) ); 9313 ins_pipe(ialu_reg); 9314 %} 9315 9316 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 9317 effect(USE_DEF dst, USE shift, KILL cr); 9318 9319 format %{ "ROL $dst, $shift" %} 9320 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 9321 ins_encode(OpcP, RegOpc(dst)); 9322 ins_pipe( ialu_reg_reg ); 9323 %} 9324 // end of ROL expand 9325 9326 // ROL 32bit by one once 9327 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 9328 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 9329 9330 expand %{ 9331 rolI_eReg_imm1(dst, lshift, cr); 9332 %} 9333 %} 9334 9335 // ROL 32bit var by imm8 once 9336 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 9337 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 9338 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 9339 9340 expand %{ 9341 rolI_eReg_imm8(dst, lshift, cr); 9342 %} 9343 %} 9344 9345 // ROL 32bit var by var once 9346 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 9347 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 9348 9349 expand %{ 9350 rolI_eReg_CL(dst, shift, cr); 9351 %} 9352 %} 9353 9354 // ROL 32bit var by var once 9355 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 9356 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 9357 9358 expand %{ 9359 rolI_eReg_CL(dst, shift, cr); 9360 %} 9361 %} 9362 9363 // ROR expand 9364 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ 9365 effect(USE_DEF dst, USE shift, KILL cr); 9366 9367 format %{ "ROR $dst, $shift" %} 9368 opcode(0xD1,0x1); /* Opcode D1 /1 */ 9369 ins_encode( OpcP, RegOpc( dst ) ); 9370 ins_pipe( ialu_reg ); 9371 %} 9372 9373 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ 9374 effect (USE_DEF dst, USE shift, KILL cr); 9375 9376 format %{ "ROR $dst, $shift" %} 9377 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 9378 ins_encode( RegOpcImm(dst, shift) ); 9379 ins_pipe( ialu_reg ); 9380 %} 9381 9382 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 9383 effect(USE_DEF dst, USE shift, KILL cr); 9384 9385 format %{ "ROR $dst, $shift" %} 9386 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 9387 ins_encode(OpcP, RegOpc(dst)); 9388 ins_pipe( ialu_reg_reg ); 9389 %} 9390 // end of ROR expand 9391 9392 // ROR right once 9393 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 9394 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 9395 9396 expand %{ 9397 rorI_eReg_imm1(dst, rshift, cr); 9398 %} 9399 %} 9400 9401 // ROR 32bit by immI8 once 9402 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 9403 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 9404 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 9405 9406 expand %{ 9407 rorI_eReg_imm8(dst, rshift, cr); 9408 %} 9409 %} 9410 9411 // ROR 32bit var by var once 9412 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 9413 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 9414 9415 expand %{ 9416 rorI_eReg_CL(dst, shift, cr); 9417 %} 9418 %} 9419 9420 // ROR 32bit var by var once 9421 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 9422 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 9423 9424 expand %{ 9425 rorI_eReg_CL(dst, shift, cr); 9426 %} 9427 %} 9428 9429 // Xor Instructions 9430 // Xor Register with Register 9431 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ 9432 match(Set dst (XorI dst src)); 9433 effect(KILL cr); 9434 9435 size(2); 9436 format %{ "XOR $dst,$src" %} 9437 opcode(0x33); 9438 ins_encode( OpcP, RegReg( dst, src) ); 9439 ins_pipe( ialu_reg_reg ); 9440 %} 9441 9442 // Xor Register with Immediate -1 9443 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{ 9444 match(Set dst (XorI dst imm)); 9445 9446 size(2); 9447 format %{ "NOT $dst" %} 9448 ins_encode %{ 9449 __ notl($dst$$Register); 9450 %} 9451 ins_pipe( ialu_reg ); 9452 %} 9453 9454 // Xor Register with Immediate 9455 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ 9456 match(Set dst (XorI dst src)); 9457 effect(KILL cr); 9458 9459 format %{ "XOR $dst,$src" %} 9460 opcode(0x81,0x06); /* Opcode 81 /6 id */ 9461 // ins_encode( RegImm( dst, src) ); 9462 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 9463 ins_pipe( ialu_reg ); 9464 %} 9465 9466 // Xor Register with Memory 9467 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ 9468 match(Set dst (XorI dst (LoadI src))); 9469 effect(KILL cr); 9470 9471 ins_cost(125); 9472 format %{ "XOR $dst,$src" %} 9473 opcode(0x33); 9474 ins_encode( OpcP, RegMem(dst, src) ); 9475 ins_pipe( ialu_reg_mem ); 9476 %} 9477 9478 // Xor Memory with Register 9479 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ 9480 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 9481 effect(KILL cr); 9482 9483 ins_cost(150); 9484 format %{ "XOR $dst,$src" %} 9485 opcode(0x31); /* Opcode 31 /r */ 9486 ins_encode( OpcP, RegMem( src, dst ) ); 9487 ins_pipe( ialu_mem_reg ); 9488 %} 9489 9490 // Xor Memory with Immediate 9491 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 9492 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 9493 effect(KILL cr); 9494 9495 ins_cost(125); 9496 format %{ "XOR $dst,$src" %} 9497 opcode(0x81,0x6); /* Opcode 81 /6 id */ 9498 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 9499 ins_pipe( ialu_mem_imm ); 9500 %} 9501 9502 //----------Convert Int to Boolean--------------------------------------------- 9503 9504 instruct movI_nocopy(eRegI dst, eRegI src) %{ 9505 effect( DEF dst, USE src ); 9506 format %{ "MOV $dst,$src" %} 9507 ins_encode( enc_Copy( dst, src) ); 9508 ins_pipe( ialu_reg_reg ); 9509 %} 9510 9511 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{ 9512 effect( USE_DEF dst, USE src, KILL cr ); 9513 9514 size(4); 9515 format %{ "NEG $dst\n\t" 9516 "ADC $dst,$src" %} 9517 ins_encode( neg_reg(dst), 9518 OpcRegReg(0x13,dst,src) ); 9519 ins_pipe( ialu_reg_reg_long ); 9520 %} 9521 9522 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{ 9523 match(Set dst (Conv2B src)); 9524 9525 expand %{ 9526 movI_nocopy(dst,src); 9527 ci2b(dst,src,cr); 9528 %} 9529 %} 9530 9531 instruct movP_nocopy(eRegI dst, eRegP src) %{ 9532 effect( DEF dst, USE src ); 9533 format %{ "MOV $dst,$src" %} 9534 ins_encode( enc_Copy( dst, src) ); 9535 ins_pipe( ialu_reg_reg ); 9536 %} 9537 9538 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{ 9539 effect( USE_DEF dst, USE src, KILL cr ); 9540 format %{ "NEG $dst\n\t" 9541 "ADC $dst,$src" %} 9542 ins_encode( neg_reg(dst), 9543 OpcRegReg(0x13,dst,src) ); 9544 ins_pipe( ialu_reg_reg_long ); 9545 %} 9546 9547 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{ 9548 match(Set dst (Conv2B src)); 9549 9550 expand %{ 9551 movP_nocopy(dst,src); 9552 cp2b(dst,src,cr); 9553 %} 9554 %} 9555 9556 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{ 9557 match(Set dst (CmpLTMask p q)); 9558 effect( KILL cr ); 9559 ins_cost(400); 9560 9561 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 9562 format %{ "XOR $dst,$dst\n\t" 9563 "CMP $p,$q\n\t" 9564 "SETlt $dst\n\t" 9565 "NEG $dst" %} 9566 ins_encode( OpcRegReg(0x33,dst,dst), 9567 OpcRegReg(0x3B,p,q), 9568 setLT_reg(dst), neg_reg(dst) ); 9569 ins_pipe( pipe_slow ); 9570 %} 9571 9572 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{ 9573 match(Set dst (CmpLTMask dst zero)); 9574 effect( DEF dst, KILL cr ); 9575 ins_cost(100); 9576 9577 format %{ "SAR $dst,31" %} 9578 opcode(0xC1, 0x7); /* C1 /7 ib */ 9579 ins_encode( RegOpcImm( dst, 0x1F ) ); 9580 ins_pipe( ialu_reg ); 9581 %} 9582 9583 9584 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{ 9585 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 9586 effect( KILL tmp, KILL cr ); 9587 ins_cost(400); 9588 // annoyingly, $tmp has no edges so you cant ask for it in 9589 // any format or encoding 9590 format %{ "SUB $p,$q\n\t" 9591 "SBB ECX,ECX\n\t" 9592 "AND ECX,$y\n\t" 9593 "ADD $p,ECX" %} 9594 ins_encode( enc_cmpLTP(p,q,y,tmp) ); 9595 ins_pipe( pipe_cmplt ); 9596 %} 9597 9598 /* If I enable this, I encourage spilling in the inner loop of compress. 9599 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{ 9600 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 9601 effect( USE_KILL tmp, KILL cr ); 9602 ins_cost(400); 9603 9604 format %{ "SUB $p,$q\n\t" 9605 "SBB ECX,ECX\n\t" 9606 "AND ECX,$y\n\t" 9607 "ADD $p,ECX" %} 9608 ins_encode( enc_cmpLTP_mem(p,q,y,tmp) ); 9609 %} 9610 */ 9611 9612 //----------Long Instructions------------------------------------------------ 9613 // Add Long Register with Register 9614 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9615 match(Set dst (AddL dst src)); 9616 effect(KILL cr); 9617 ins_cost(200); 9618 format %{ "ADD $dst.lo,$src.lo\n\t" 9619 "ADC $dst.hi,$src.hi" %} 9620 opcode(0x03, 0x13); 9621 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9622 ins_pipe( ialu_reg_reg_long ); 9623 %} 9624 9625 // Add Long Register with Immediate 9626 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9627 match(Set dst (AddL dst src)); 9628 effect(KILL cr); 9629 format %{ "ADD $dst.lo,$src.lo\n\t" 9630 "ADC $dst.hi,$src.hi" %} 9631 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 9632 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9633 ins_pipe( ialu_reg_long ); 9634 %} 9635 9636 // Add Long Register with Memory 9637 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9638 match(Set dst (AddL dst (LoadL mem))); 9639 effect(KILL cr); 9640 ins_cost(125); 9641 format %{ "ADD $dst.lo,$mem\n\t" 9642 "ADC $dst.hi,$mem+4" %} 9643 opcode(0x03, 0x13); 9644 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9645 ins_pipe( ialu_reg_long_mem ); 9646 %} 9647 9648 // Subtract Long Register with Register. 9649 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9650 match(Set dst (SubL dst src)); 9651 effect(KILL cr); 9652 ins_cost(200); 9653 format %{ "SUB $dst.lo,$src.lo\n\t" 9654 "SBB $dst.hi,$src.hi" %} 9655 opcode(0x2B, 0x1B); 9656 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9657 ins_pipe( ialu_reg_reg_long ); 9658 %} 9659 9660 // Subtract Long Register with Immediate 9661 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9662 match(Set dst (SubL dst src)); 9663 effect(KILL cr); 9664 format %{ "SUB $dst.lo,$src.lo\n\t" 9665 "SBB $dst.hi,$src.hi" %} 9666 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9667 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9668 ins_pipe( ialu_reg_long ); 9669 %} 9670 9671 // Subtract Long Register with Memory 9672 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9673 match(Set dst (SubL dst (LoadL mem))); 9674 effect(KILL cr); 9675 ins_cost(125); 9676 format %{ "SUB $dst.lo,$mem\n\t" 9677 "SBB $dst.hi,$mem+4" %} 9678 opcode(0x2B, 0x1B); 9679 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9680 ins_pipe( ialu_reg_long_mem ); 9681 %} 9682 9683 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9684 match(Set dst (SubL zero dst)); 9685 effect(KILL cr); 9686 ins_cost(300); 9687 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9688 ins_encode( neg_long(dst) ); 9689 ins_pipe( ialu_reg_reg_long ); 9690 %} 9691 9692 // And Long Register with Register 9693 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9694 match(Set dst (AndL dst src)); 9695 effect(KILL cr); 9696 format %{ "AND $dst.lo,$src.lo\n\t" 9697 "AND $dst.hi,$src.hi" %} 9698 opcode(0x23,0x23); 9699 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9700 ins_pipe( ialu_reg_reg_long ); 9701 %} 9702 9703 // And Long Register with Immediate 9704 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9705 match(Set dst (AndL dst src)); 9706 effect(KILL cr); 9707 format %{ "AND $dst.lo,$src.lo\n\t" 9708 "AND $dst.hi,$src.hi" %} 9709 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9710 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9711 ins_pipe( ialu_reg_long ); 9712 %} 9713 9714 // And Long Register with Memory 9715 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9716 match(Set dst (AndL dst (LoadL mem))); 9717 effect(KILL cr); 9718 ins_cost(125); 9719 format %{ "AND $dst.lo,$mem\n\t" 9720 "AND $dst.hi,$mem+4" %} 9721 opcode(0x23, 0x23); 9722 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9723 ins_pipe( ialu_reg_long_mem ); 9724 %} 9725 9726 // Or Long Register with Register 9727 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9728 match(Set dst (OrL dst src)); 9729 effect(KILL cr); 9730 format %{ "OR $dst.lo,$src.lo\n\t" 9731 "OR $dst.hi,$src.hi" %} 9732 opcode(0x0B,0x0B); 9733 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9734 ins_pipe( ialu_reg_reg_long ); 9735 %} 9736 9737 // Or Long Register with Immediate 9738 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9739 match(Set dst (OrL dst src)); 9740 effect(KILL cr); 9741 format %{ "OR $dst.lo,$src.lo\n\t" 9742 "OR $dst.hi,$src.hi" %} 9743 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9744 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9745 ins_pipe( ialu_reg_long ); 9746 %} 9747 9748 // Or Long Register with Memory 9749 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9750 match(Set dst (OrL dst (LoadL mem))); 9751 effect(KILL cr); 9752 ins_cost(125); 9753 format %{ "OR $dst.lo,$mem\n\t" 9754 "OR $dst.hi,$mem+4" %} 9755 opcode(0x0B,0x0B); 9756 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9757 ins_pipe( ialu_reg_long_mem ); 9758 %} 9759 9760 // Xor Long Register with Register 9761 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9762 match(Set dst (XorL dst src)); 9763 effect(KILL cr); 9764 format %{ "XOR $dst.lo,$src.lo\n\t" 9765 "XOR $dst.hi,$src.hi" %} 9766 opcode(0x33,0x33); 9767 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9768 ins_pipe( ialu_reg_reg_long ); 9769 %} 9770 9771 // Xor Long Register with Immediate -1 9772 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9773 match(Set dst (XorL dst imm)); 9774 format %{ "NOT $dst.lo\n\t" 9775 "NOT $dst.hi" %} 9776 ins_encode %{ 9777 __ notl($dst$$Register); 9778 __ notl(HIGH_FROM_LOW($dst$$Register)); 9779 %} 9780 ins_pipe( ialu_reg_long ); 9781 %} 9782 9783 // Xor Long Register with Immediate 9784 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9785 match(Set dst (XorL dst src)); 9786 effect(KILL cr); 9787 format %{ "XOR $dst.lo,$src.lo\n\t" 9788 "XOR $dst.hi,$src.hi" %} 9789 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9790 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9791 ins_pipe( ialu_reg_long ); 9792 %} 9793 9794 // Xor Long Register with Memory 9795 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9796 match(Set dst (XorL dst (LoadL mem))); 9797 effect(KILL cr); 9798 ins_cost(125); 9799 format %{ "XOR $dst.lo,$mem\n\t" 9800 "XOR $dst.hi,$mem+4" %} 9801 opcode(0x33,0x33); 9802 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9803 ins_pipe( ialu_reg_long_mem ); 9804 %} 9805 9806 // Shift Left Long by 1 9807 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9808 predicate(UseNewLongLShift); 9809 match(Set dst (LShiftL dst cnt)); 9810 effect(KILL cr); 9811 ins_cost(100); 9812 format %{ "ADD $dst.lo,$dst.lo\n\t" 9813 "ADC $dst.hi,$dst.hi" %} 9814 ins_encode %{ 9815 __ addl($dst$$Register,$dst$$Register); 9816 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9817 %} 9818 ins_pipe( ialu_reg_long ); 9819 %} 9820 9821 // Shift Left Long by 2 9822 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9823 predicate(UseNewLongLShift); 9824 match(Set dst (LShiftL dst cnt)); 9825 effect(KILL cr); 9826 ins_cost(100); 9827 format %{ "ADD $dst.lo,$dst.lo\n\t" 9828 "ADC $dst.hi,$dst.hi\n\t" 9829 "ADD $dst.lo,$dst.lo\n\t" 9830 "ADC $dst.hi,$dst.hi" %} 9831 ins_encode %{ 9832 __ addl($dst$$Register,$dst$$Register); 9833 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9834 __ addl($dst$$Register,$dst$$Register); 9835 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9836 %} 9837 ins_pipe( ialu_reg_long ); 9838 %} 9839 9840 // Shift Left Long by 3 9841 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9842 predicate(UseNewLongLShift); 9843 match(Set dst (LShiftL dst cnt)); 9844 effect(KILL cr); 9845 ins_cost(100); 9846 format %{ "ADD $dst.lo,$dst.lo\n\t" 9847 "ADC $dst.hi,$dst.hi\n\t" 9848 "ADD $dst.lo,$dst.lo\n\t" 9849 "ADC $dst.hi,$dst.hi\n\t" 9850 "ADD $dst.lo,$dst.lo\n\t" 9851 "ADC $dst.hi,$dst.hi" %} 9852 ins_encode %{ 9853 __ addl($dst$$Register,$dst$$Register); 9854 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9855 __ addl($dst$$Register,$dst$$Register); 9856 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9857 __ addl($dst$$Register,$dst$$Register); 9858 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9859 %} 9860 ins_pipe( ialu_reg_long ); 9861 %} 9862 9863 // Shift Left Long by 1-31 9864 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9865 match(Set dst (LShiftL dst cnt)); 9866 effect(KILL cr); 9867 ins_cost(200); 9868 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9869 "SHL $dst.lo,$cnt" %} 9870 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9871 ins_encode( move_long_small_shift(dst,cnt) ); 9872 ins_pipe( ialu_reg_long ); 9873 %} 9874 9875 // Shift Left Long by 32-63 9876 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9877 match(Set dst (LShiftL dst cnt)); 9878 effect(KILL cr); 9879 ins_cost(300); 9880 format %{ "MOV $dst.hi,$dst.lo\n" 9881 "\tSHL $dst.hi,$cnt-32\n" 9882 "\tXOR $dst.lo,$dst.lo" %} 9883 opcode(0xC1, 0x4); /* C1 /4 ib */ 9884 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9885 ins_pipe( ialu_reg_long ); 9886 %} 9887 9888 // Shift Left Long by variable 9889 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9890 match(Set dst (LShiftL dst shift)); 9891 effect(KILL cr); 9892 ins_cost(500+200); 9893 size(17); 9894 format %{ "TEST $shift,32\n\t" 9895 "JEQ,s small\n\t" 9896 "MOV $dst.hi,$dst.lo\n\t" 9897 "XOR $dst.lo,$dst.lo\n" 9898 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9899 "SHL $dst.lo,$shift" %} 9900 ins_encode( shift_left_long( dst, shift ) ); 9901 ins_pipe( pipe_slow ); 9902 %} 9903 9904 // Shift Right Long by 1-31 9905 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9906 match(Set dst (URShiftL dst cnt)); 9907 effect(KILL cr); 9908 ins_cost(200); 9909 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9910 "SHR $dst.hi,$cnt" %} 9911 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9912 ins_encode( move_long_small_shift(dst,cnt) ); 9913 ins_pipe( ialu_reg_long ); 9914 %} 9915 9916 // Shift Right Long by 32-63 9917 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9918 match(Set dst (URShiftL dst cnt)); 9919 effect(KILL cr); 9920 ins_cost(300); 9921 format %{ "MOV $dst.lo,$dst.hi\n" 9922 "\tSHR $dst.lo,$cnt-32\n" 9923 "\tXOR $dst.hi,$dst.hi" %} 9924 opcode(0xC1, 0x5); /* C1 /5 ib */ 9925 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9926 ins_pipe( ialu_reg_long ); 9927 %} 9928 9929 // Shift Right Long by variable 9930 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9931 match(Set dst (URShiftL dst shift)); 9932 effect(KILL cr); 9933 ins_cost(600); 9934 size(17); 9935 format %{ "TEST $shift,32\n\t" 9936 "JEQ,s small\n\t" 9937 "MOV $dst.lo,$dst.hi\n\t" 9938 "XOR $dst.hi,$dst.hi\n" 9939 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9940 "SHR $dst.hi,$shift" %} 9941 ins_encode( shift_right_long( dst, shift ) ); 9942 ins_pipe( pipe_slow ); 9943 %} 9944 9945 // Shift Right Long by 1-31 9946 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9947 match(Set dst (RShiftL dst cnt)); 9948 effect(KILL cr); 9949 ins_cost(200); 9950 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9951 "SAR $dst.hi,$cnt" %} 9952 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9953 ins_encode( move_long_small_shift(dst,cnt) ); 9954 ins_pipe( ialu_reg_long ); 9955 %} 9956 9957 // Shift Right Long by 32-63 9958 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9959 match(Set dst (RShiftL dst cnt)); 9960 effect(KILL cr); 9961 ins_cost(300); 9962 format %{ "MOV $dst.lo,$dst.hi\n" 9963 "\tSAR $dst.lo,$cnt-32\n" 9964 "\tSAR $dst.hi,31" %} 9965 opcode(0xC1, 0x7); /* C1 /7 ib */ 9966 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9967 ins_pipe( ialu_reg_long ); 9968 %} 9969 9970 // Shift Right arithmetic Long by variable 9971 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9972 match(Set dst (RShiftL dst shift)); 9973 effect(KILL cr); 9974 ins_cost(600); 9975 size(18); 9976 format %{ "TEST $shift,32\n\t" 9977 "JEQ,s small\n\t" 9978 "MOV $dst.lo,$dst.hi\n\t" 9979 "SAR $dst.hi,31\n" 9980 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9981 "SAR $dst.hi,$shift" %} 9982 ins_encode( shift_right_arith_long( dst, shift ) ); 9983 ins_pipe( pipe_slow ); 9984 %} 9985 9986 9987 //----------Double Instructions------------------------------------------------ 9988 // Double Math 9989 9990 // Compare & branch 9991 9992 // P6 version of float compare, sets condition codes in EFLAGS 9993 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9994 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9995 match(Set cr (CmpD src1 src2)); 9996 effect(KILL rax); 9997 ins_cost(150); 9998 format %{ "FLD $src1\n\t" 9999 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10000 "JNP exit\n\t" 10001 "MOV ah,1 // saw a NaN, set CF\n\t" 10002 "SAHF\n" 10003 "exit:\tNOP // avoid branch to branch" %} 10004 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10005 ins_encode( Push_Reg_D(src1), 10006 OpcP, RegOpc(src2), 10007 cmpF_P6_fixup ); 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ 10012 predicate(VM_Version::supports_cmov() && UseSSE <=1); 10013 match(Set cr (CmpD src1 src2)); 10014 ins_cost(150); 10015 format %{ "FLD $src1\n\t" 10016 "FUCOMIP ST,$src2 // P6 instruction" %} 10017 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10018 ins_encode( Push_Reg_D(src1), 10019 OpcP, RegOpc(src2)); 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 // Compare & branch 10024 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 10025 predicate(UseSSE<=1); 10026 match(Set cr (CmpD src1 src2)); 10027 effect(KILL rax); 10028 ins_cost(200); 10029 format %{ "FLD $src1\n\t" 10030 "FCOMp $src2\n\t" 10031 "FNSTSW AX\n\t" 10032 "TEST AX,0x400\n\t" 10033 "JZ,s flags\n\t" 10034 "MOV AH,1\t# unordered treat as LT\n" 10035 "flags:\tSAHF" %} 10036 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10037 ins_encode( Push_Reg_D(src1), 10038 OpcP, RegOpc(src2), 10039 fpu_flags); 10040 ins_pipe( pipe_slow ); 10041 %} 10042 10043 // Compare vs zero into -1,0,1 10044 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 10045 predicate(UseSSE<=1); 10046 match(Set dst (CmpD3 src1 zero)); 10047 effect(KILL cr, KILL rax); 10048 ins_cost(280); 10049 format %{ "FTSTD $dst,$src1" %} 10050 opcode(0xE4, 0xD9); 10051 ins_encode( Push_Reg_D(src1), 10052 OpcS, OpcP, PopFPU, 10053 CmpF_Result(dst)); 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 // Compare into -1,0,1 10058 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 10059 predicate(UseSSE<=1); 10060 match(Set dst (CmpD3 src1 src2)); 10061 effect(KILL cr, KILL rax); 10062 ins_cost(300); 10063 format %{ "FCMPD $dst,$src1,$src2" %} 10064 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10065 ins_encode( Push_Reg_D(src1), 10066 OpcP, RegOpc(src2), 10067 CmpF_Result(dst)); 10068 ins_pipe( pipe_slow ); 10069 %} 10070 10071 // float compare and set condition codes in EFLAGS by XMM regs 10072 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ 10073 predicate(UseSSE>=2); 10074 match(Set cr (CmpD dst src)); 10075 effect(KILL rax); 10076 ins_cost(125); 10077 format %{ "COMISD $dst,$src\n" 10078 "\tJNP exit\n" 10079 "\tMOV ah,1 // saw a NaN, set CF\n" 10080 "\tSAHF\n" 10081 "exit:\tNOP // avoid branch to branch" %} 10082 opcode(0x66, 0x0F, 0x2F); 10083 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); 10084 ins_pipe( pipe_slow ); 10085 %} 10086 10087 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ 10088 predicate(UseSSE>=2); 10089 match(Set cr (CmpD dst src)); 10090 ins_cost(100); 10091 format %{ "COMISD $dst,$src" %} 10092 opcode(0x66, 0x0F, 0x2F); 10093 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 10094 ins_pipe( pipe_slow ); 10095 %} 10096 10097 // float compare and set condition codes in EFLAGS by XMM regs 10098 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ 10099 predicate(UseSSE>=2); 10100 match(Set cr (CmpD dst (LoadD src))); 10101 effect(KILL rax); 10102 ins_cost(145); 10103 format %{ "COMISD $dst,$src\n" 10104 "\tJNP exit\n" 10105 "\tMOV ah,1 // saw a NaN, set CF\n" 10106 "\tSAHF\n" 10107 "exit:\tNOP // avoid branch to branch" %} 10108 opcode(0x66, 0x0F, 0x2F); 10109 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); 10110 ins_pipe( pipe_slow ); 10111 %} 10112 10113 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ 10114 predicate(UseSSE>=2); 10115 match(Set cr (CmpD dst (LoadD src))); 10116 ins_cost(100); 10117 format %{ "COMISD $dst,$src" %} 10118 opcode(0x66, 0x0F, 0x2F); 10119 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); 10120 ins_pipe( pipe_slow ); 10121 %} 10122 10123 // Compare into -1,0,1 in XMM 10124 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 10125 predicate(UseSSE>=2); 10126 match(Set dst (CmpD3 src1 src2)); 10127 effect(KILL cr); 10128 ins_cost(255); 10129 format %{ "XOR $dst,$dst\n" 10130 "\tCOMISD $src1,$src2\n" 10131 "\tJP,s nan\n" 10132 "\tJEQ,s exit\n" 10133 "\tJA,s inc\n" 10134 "nan:\tDEC $dst\n" 10135 "\tJMP,s exit\n" 10136 "inc:\tINC $dst\n" 10137 "exit:" 10138 %} 10139 opcode(0x66, 0x0F, 0x2F); 10140 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2), 10141 CmpX_Result(dst)); 10142 ins_pipe( pipe_slow ); 10143 %} 10144 10145 // Compare into -1,0,1 in XMM and memory 10146 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ 10147 predicate(UseSSE>=2); 10148 match(Set dst (CmpD3 src1 (LoadD mem))); 10149 effect(KILL cr); 10150 ins_cost(275); 10151 format %{ "COMISD $src1,$mem\n" 10152 "\tMOV $dst,0\t\t# do not blow flags\n" 10153 "\tJP,s nan\n" 10154 "\tJEQ,s exit\n" 10155 "\tJA,s inc\n" 10156 "nan:\tDEC $dst\n" 10157 "\tJMP,s exit\n" 10158 "inc:\tINC $dst\n" 10159 "exit:" 10160 %} 10161 opcode(0x66, 0x0F, 0x2F); 10162 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem), 10163 LdImmI(dst,0x0), CmpX_Result(dst)); 10164 ins_pipe( pipe_slow ); 10165 %} 10166 10167 10168 instruct subD_reg(regD dst, regD src) %{ 10169 predicate (UseSSE <=1); 10170 match(Set dst (SubD dst src)); 10171 10172 format %{ "FLD $src\n\t" 10173 "DSUBp $dst,ST" %} 10174 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10175 ins_cost(150); 10176 ins_encode( Push_Reg_D(src), 10177 OpcP, RegOpc(dst) ); 10178 ins_pipe( fpu_reg_reg ); 10179 %} 10180 10181 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10182 predicate (UseSSE <=1); 10183 match(Set dst (RoundDouble (SubD src1 src2))); 10184 ins_cost(250); 10185 10186 format %{ "FLD $src2\n\t" 10187 "DSUB ST,$src1\n\t" 10188 "FSTP_D $dst\t# D-round" %} 10189 opcode(0xD8, 0x5); 10190 ins_encode( Push_Reg_D(src2), 10191 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 10192 ins_pipe( fpu_mem_reg_reg ); 10193 %} 10194 10195 10196 instruct subD_reg_mem(regD dst, memory src) %{ 10197 predicate (UseSSE <=1); 10198 match(Set dst (SubD dst (LoadD src))); 10199 ins_cost(150); 10200 10201 format %{ "FLD $src\n\t" 10202 "DSUBp $dst,ST" %} 10203 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 10204 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10205 OpcP, RegOpc(dst) ); 10206 ins_pipe( fpu_reg_mem ); 10207 %} 10208 10209 instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 10210 predicate (UseSSE<=1); 10211 match(Set dst (AbsD src)); 10212 ins_cost(100); 10213 format %{ "FABS" %} 10214 opcode(0xE1, 0xD9); 10215 ins_encode( OpcS, OpcP ); 10216 ins_pipe( fpu_reg_reg ); 10217 %} 10218 10219 instruct absXD_reg( regXD dst ) %{ 10220 predicate(UseSSE>=2); 10221 match(Set dst (AbsD dst)); 10222 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} 10223 ins_encode( AbsXD_encoding(dst)); 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct negD_reg(regDPR1 dst, regDPR1 src) %{ 10228 predicate(UseSSE<=1); 10229 match(Set dst (NegD src)); 10230 ins_cost(100); 10231 format %{ "FCHS" %} 10232 opcode(0xE0, 0xD9); 10233 ins_encode( OpcS, OpcP ); 10234 ins_pipe( fpu_reg_reg ); 10235 %} 10236 10237 instruct negXD_reg( regXD dst ) %{ 10238 predicate(UseSSE>=2); 10239 match(Set dst (NegD dst)); 10240 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} 10241 ins_encode %{ 10242 __ xorpd($dst$$XMMRegister, 10243 ExternalAddress((address)double_signflip_pool)); 10244 %} 10245 ins_pipe( pipe_slow ); 10246 %} 10247 10248 instruct addD_reg(regD dst, regD src) %{ 10249 predicate(UseSSE<=1); 10250 match(Set dst (AddD dst src)); 10251 format %{ "FLD $src\n\t" 10252 "DADD $dst,ST" %} 10253 size(4); 10254 ins_cost(150); 10255 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10256 ins_encode( Push_Reg_D(src), 10257 OpcP, RegOpc(dst) ); 10258 ins_pipe( fpu_reg_reg ); 10259 %} 10260 10261 10262 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10263 predicate(UseSSE<=1); 10264 match(Set dst (RoundDouble (AddD src1 src2))); 10265 ins_cost(250); 10266 10267 format %{ "FLD $src2\n\t" 10268 "DADD ST,$src1\n\t" 10269 "FSTP_D $dst\t# D-round" %} 10270 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 10271 ins_encode( Push_Reg_D(src2), 10272 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 10273 ins_pipe( fpu_mem_reg_reg ); 10274 %} 10275 10276 10277 instruct addD_reg_mem(regD dst, memory src) %{ 10278 predicate(UseSSE<=1); 10279 match(Set dst (AddD dst (LoadD src))); 10280 ins_cost(150); 10281 10282 format %{ "FLD $src\n\t" 10283 "DADDp $dst,ST" %} 10284 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 10285 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10286 OpcP, RegOpc(dst) ); 10287 ins_pipe( fpu_reg_mem ); 10288 %} 10289 10290 // add-to-memory 10291 instruct addD_mem_reg(memory dst, regD src) %{ 10292 predicate(UseSSE<=1); 10293 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 10294 ins_cost(150); 10295 10296 format %{ "FLD_D $dst\n\t" 10297 "DADD ST,$src\n\t" 10298 "FST_D $dst" %} 10299 opcode(0xDD, 0x0); 10300 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 10301 Opcode(0xD8), RegOpc(src), 10302 set_instruction_start, 10303 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 10304 ins_pipe( fpu_reg_mem ); 10305 %} 10306 10307 instruct addD_reg_imm1(regD dst, immD1 con) %{ 10308 predicate(UseSSE<=1); 10309 match(Set dst (AddD dst con)); 10310 ins_cost(125); 10311 format %{ "FLD1\n\t" 10312 "DADDp $dst,ST" %} 10313 ins_encode %{ 10314 __ fld1(); 10315 __ faddp($dst$$reg); 10316 %} 10317 ins_pipe(fpu_reg); 10318 %} 10319 10320 instruct addD_reg_imm(regD dst, immD con) %{ 10321 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 10322 match(Set dst (AddD dst con)); 10323 ins_cost(200); 10324 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 10325 "DADDp $dst,ST" %} 10326 ins_encode %{ 10327 __ fld_d($constantaddress($con)); 10328 __ faddp($dst$$reg); 10329 %} 10330 ins_pipe(fpu_reg_mem); 10331 %} 10332 10333 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 10334 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 10335 match(Set dst (RoundDouble (AddD src con))); 10336 ins_cost(200); 10337 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 10338 "DADD ST,$src\n\t" 10339 "FSTP_D $dst\t# D-round" %} 10340 ins_encode %{ 10341 __ fld_d($constantaddress($con)); 10342 __ fadd($src$$reg); 10343 __ fstp_d(Address(rsp, $dst$$disp)); 10344 %} 10345 ins_pipe(fpu_mem_reg_con); 10346 %} 10347 10348 // Add two double precision floating point values in xmm 10349 instruct addXD_reg(regXD dst, regXD src) %{ 10350 predicate(UseSSE>=2); 10351 match(Set dst (AddD dst src)); 10352 format %{ "ADDSD $dst,$src" %} 10353 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 10354 ins_pipe( pipe_slow ); 10355 %} 10356 10357 instruct addXD_imm(regXD dst, immXD con) %{ 10358 predicate(UseSSE>=2); 10359 match(Set dst (AddD dst con)); 10360 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 10361 ins_encode %{ 10362 __ addsd($dst$$XMMRegister, $constantaddress($con)); 10363 %} 10364 ins_pipe(pipe_slow); 10365 %} 10366 10367 instruct addXD_mem(regXD dst, memory mem) %{ 10368 predicate(UseSSE>=2); 10369 match(Set dst (AddD dst (LoadD mem))); 10370 format %{ "ADDSD $dst,$mem" %} 10371 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); 10372 ins_pipe( pipe_slow ); 10373 %} 10374 10375 // Sub two double precision floating point values in xmm 10376 instruct subXD_reg(regXD dst, regXD src) %{ 10377 predicate(UseSSE>=2); 10378 match(Set dst (SubD dst src)); 10379 format %{ "SUBSD $dst,$src" %} 10380 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 10381 ins_pipe( pipe_slow ); 10382 %} 10383 10384 instruct subXD_imm(regXD dst, immXD con) %{ 10385 predicate(UseSSE>=2); 10386 match(Set dst (SubD dst con)); 10387 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 10388 ins_encode %{ 10389 __ subsd($dst$$XMMRegister, $constantaddress($con)); 10390 %} 10391 ins_pipe(pipe_slow); 10392 %} 10393 10394 instruct subXD_mem(regXD dst, memory mem) %{ 10395 predicate(UseSSE>=2); 10396 match(Set dst (SubD dst (LoadD mem))); 10397 format %{ "SUBSD $dst,$mem" %} 10398 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 10399 ins_pipe( pipe_slow ); 10400 %} 10401 10402 // Mul two double precision floating point values in xmm 10403 instruct mulXD_reg(regXD dst, regXD src) %{ 10404 predicate(UseSSE>=2); 10405 match(Set dst (MulD dst src)); 10406 format %{ "MULSD $dst,$src" %} 10407 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 10408 ins_pipe( pipe_slow ); 10409 %} 10410 10411 instruct mulXD_imm(regXD dst, immXD con) %{ 10412 predicate(UseSSE>=2); 10413 match(Set dst (MulD dst con)); 10414 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 10415 ins_encode %{ 10416 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 10417 %} 10418 ins_pipe(pipe_slow); 10419 %} 10420 10421 instruct mulXD_mem(regXD dst, memory mem) %{ 10422 predicate(UseSSE>=2); 10423 match(Set dst (MulD dst (LoadD mem))); 10424 format %{ "MULSD $dst,$mem" %} 10425 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 10426 ins_pipe( pipe_slow ); 10427 %} 10428 10429 // Div two double precision floating point values in xmm 10430 instruct divXD_reg(regXD dst, regXD src) %{ 10431 predicate(UseSSE>=2); 10432 match(Set dst (DivD dst src)); 10433 format %{ "DIVSD $dst,$src" %} 10434 opcode(0xF2, 0x0F, 0x5E); 10435 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 10436 ins_pipe( pipe_slow ); 10437 %} 10438 10439 instruct divXD_imm(regXD dst, immXD con) %{ 10440 predicate(UseSSE>=2); 10441 match(Set dst (DivD dst con)); 10442 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 10443 ins_encode %{ 10444 __ divsd($dst$$XMMRegister, $constantaddress($con)); 10445 %} 10446 ins_pipe(pipe_slow); 10447 %} 10448 10449 instruct divXD_mem(regXD dst, memory mem) %{ 10450 predicate(UseSSE>=2); 10451 match(Set dst (DivD dst (LoadD mem))); 10452 format %{ "DIVSD $dst,$mem" %} 10453 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 10454 ins_pipe( pipe_slow ); 10455 %} 10456 10457 10458 instruct mulD_reg(regD dst, regD src) %{ 10459 predicate(UseSSE<=1); 10460 match(Set dst (MulD dst src)); 10461 format %{ "FLD $src\n\t" 10462 "DMULp $dst,ST" %} 10463 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 10464 ins_cost(150); 10465 ins_encode( Push_Reg_D(src), 10466 OpcP, RegOpc(dst) ); 10467 ins_pipe( fpu_reg_reg ); 10468 %} 10469 10470 // Strict FP instruction biases argument before multiply then 10471 // biases result to avoid double rounding of subnormals. 10472 // 10473 // scale arg1 by multiplying arg1 by 2^(-15360) 10474 // load arg2 10475 // multiply scaled arg1 by arg2 10476 // rescale product by 2^(15360) 10477 // 10478 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 10479 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10480 match(Set dst (MulD dst src)); 10481 ins_cost(1); // Select this instruction for all strict FP double multiplies 10482 10483 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10484 "DMULp $dst,ST\n\t" 10485 "FLD $src\n\t" 10486 "DMULp $dst,ST\n\t" 10487 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10488 "DMULp $dst,ST\n\t" %} 10489 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 10490 ins_encode( strictfp_bias1(dst), 10491 Push_Reg_D(src), 10492 OpcP, RegOpc(dst), 10493 strictfp_bias2(dst) ); 10494 ins_pipe( fpu_reg_reg ); 10495 %} 10496 10497 instruct mulD_reg_imm(regD dst, immD con) %{ 10498 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 10499 match(Set dst (MulD dst con)); 10500 ins_cost(200); 10501 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 10502 "DMULp $dst,ST" %} 10503 ins_encode %{ 10504 __ fld_d($constantaddress($con)); 10505 __ fmulp($dst$$reg); 10506 %} 10507 ins_pipe(fpu_reg_mem); 10508 %} 10509 10510 10511 instruct mulD_reg_mem(regD dst, memory src) %{ 10512 predicate( UseSSE<=1 ); 10513 match(Set dst (MulD dst (LoadD src))); 10514 ins_cost(200); 10515 format %{ "FLD_D $src\n\t" 10516 "DMULp $dst,ST" %} 10517 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 10518 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10519 OpcP, RegOpc(dst) ); 10520 ins_pipe( fpu_reg_mem ); 10521 %} 10522 10523 // 10524 // Cisc-alternate to reg-reg multiply 10525 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 10526 predicate( UseSSE<=1 ); 10527 match(Set dst (MulD src (LoadD mem))); 10528 ins_cost(250); 10529 format %{ "FLD_D $mem\n\t" 10530 "DMUL ST,$src\n\t" 10531 "FSTP_D $dst" %} 10532 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 10533 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 10534 OpcReg_F(src), 10535 Pop_Reg_D(dst) ); 10536 ins_pipe( fpu_reg_reg_mem ); 10537 %} 10538 10539 10540 // MACRO3 -- addD a mulD 10541 // This instruction is a '2-address' instruction in that the result goes 10542 // back to src2. This eliminates a move from the macro; possibly the 10543 // register allocator will have to add it back (and maybe not). 10544 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 10545 predicate( UseSSE<=1 ); 10546 match(Set src2 (AddD (MulD src0 src1) src2)); 10547 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10548 "DMUL ST,$src1\n\t" 10549 "DADDp $src2,ST" %} 10550 ins_cost(250); 10551 opcode(0xDD); /* LoadD DD /0 */ 10552 ins_encode( Push_Reg_F(src0), 10553 FMul_ST_reg(src1), 10554 FAddP_reg_ST(src2) ); 10555 ins_pipe( fpu_reg_reg_reg ); 10556 %} 10557 10558 10559 // MACRO3 -- subD a mulD 10560 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 10561 predicate( UseSSE<=1 ); 10562 match(Set src2 (SubD (MulD src0 src1) src2)); 10563 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10564 "DMUL ST,$src1\n\t" 10565 "DSUBRp $src2,ST" %} 10566 ins_cost(250); 10567 ins_encode( Push_Reg_F(src0), 10568 FMul_ST_reg(src1), 10569 Opcode(0xDE), Opc_plus(0xE0,src2)); 10570 ins_pipe( fpu_reg_reg_reg ); 10571 %} 10572 10573 10574 instruct divD_reg(regD dst, regD src) %{ 10575 predicate( UseSSE<=1 ); 10576 match(Set dst (DivD dst src)); 10577 10578 format %{ "FLD $src\n\t" 10579 "FDIVp $dst,ST" %} 10580 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10581 ins_cost(150); 10582 ins_encode( Push_Reg_D(src), 10583 OpcP, RegOpc(dst) ); 10584 ins_pipe( fpu_reg_reg ); 10585 %} 10586 10587 // Strict FP instruction biases argument before division then 10588 // biases result, to avoid double rounding of subnormals. 10589 // 10590 // scale dividend by multiplying dividend by 2^(-15360) 10591 // load divisor 10592 // divide scaled dividend by divisor 10593 // rescale quotient by 2^(15360) 10594 // 10595 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 10596 predicate (UseSSE<=1); 10597 match(Set dst (DivD dst src)); 10598 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10599 ins_cost(01); 10600 10601 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10602 "DMULp $dst,ST\n\t" 10603 "FLD $src\n\t" 10604 "FDIVp $dst,ST\n\t" 10605 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10606 "DMULp $dst,ST\n\t" %} 10607 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10608 ins_encode( strictfp_bias1(dst), 10609 Push_Reg_D(src), 10610 OpcP, RegOpc(dst), 10611 strictfp_bias2(dst) ); 10612 ins_pipe( fpu_reg_reg ); 10613 %} 10614 10615 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10616 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10617 match(Set dst (RoundDouble (DivD src1 src2))); 10618 10619 format %{ "FLD $src1\n\t" 10620 "FDIV ST,$src2\n\t" 10621 "FSTP_D $dst\t# D-round" %} 10622 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10623 ins_encode( Push_Reg_D(src1), 10624 OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 10625 ins_pipe( fpu_mem_reg_reg ); 10626 %} 10627 10628 10629 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 10630 predicate(UseSSE<=1); 10631 match(Set dst (ModD dst src)); 10632 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10633 10634 format %{ "DMOD $dst,$src" %} 10635 ins_cost(250); 10636 ins_encode(Push_Reg_Mod_D(dst, src), 10637 emitModD(), 10638 Push_Result_Mod_D(src), 10639 Pop_Reg_D(dst)); 10640 ins_pipe( pipe_slow ); 10641 %} 10642 10643 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 10644 predicate(UseSSE>=2); 10645 match(Set dst (ModD src0 src1)); 10646 effect(KILL rax, KILL cr); 10647 10648 format %{ "SUB ESP,8\t # DMOD\n" 10649 "\tMOVSD [ESP+0],$src1\n" 10650 "\tFLD_D [ESP+0]\n" 10651 "\tMOVSD [ESP+0],$src0\n" 10652 "\tFLD_D [ESP+0]\n" 10653 "loop:\tFPREM\n" 10654 "\tFWAIT\n" 10655 "\tFNSTSW AX\n" 10656 "\tSAHF\n" 10657 "\tJP loop\n" 10658 "\tFSTP_D [ESP+0]\n" 10659 "\tMOVSD $dst,[ESP+0]\n" 10660 "\tADD ESP,8\n" 10661 "\tFSTP ST0\t # Restore FPU Stack" 10662 %} 10663 ins_cost(250); 10664 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 10665 ins_pipe( pipe_slow ); 10666 %} 10667 10668 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 10669 predicate (UseSSE<=1); 10670 match(Set dst (SinD src)); 10671 ins_cost(1800); 10672 format %{ "DSIN $dst" %} 10673 opcode(0xD9, 0xFE); 10674 ins_encode( OpcP, OpcS ); 10675 ins_pipe( pipe_slow ); 10676 %} 10677 10678 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 10679 predicate (UseSSE>=2); 10680 match(Set dst (SinD dst)); 10681 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10682 ins_cost(1800); 10683 format %{ "DSIN $dst" %} 10684 opcode(0xD9, 0xFE); 10685 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10686 ins_pipe( pipe_slow ); 10687 %} 10688 10689 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 10690 predicate (UseSSE<=1); 10691 match(Set dst (CosD src)); 10692 ins_cost(1800); 10693 format %{ "DCOS $dst" %} 10694 opcode(0xD9, 0xFF); 10695 ins_encode( OpcP, OpcS ); 10696 ins_pipe( pipe_slow ); 10697 %} 10698 10699 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 10700 predicate (UseSSE>=2); 10701 match(Set dst (CosD dst)); 10702 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10703 ins_cost(1800); 10704 format %{ "DCOS $dst" %} 10705 opcode(0xD9, 0xFF); 10706 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10707 ins_pipe( pipe_slow ); 10708 %} 10709 10710 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 10711 predicate (UseSSE<=1); 10712 match(Set dst(TanD src)); 10713 format %{ "DTAN $dst" %} 10714 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 10715 Opcode(0xDD), Opcode(0xD8)); // fstp st 10716 ins_pipe( pipe_slow ); 10717 %} 10718 10719 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 10720 predicate (UseSSE>=2); 10721 match(Set dst(TanD dst)); 10722 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10723 format %{ "DTAN $dst" %} 10724 ins_encode( Push_SrcXD(dst), 10725 Opcode(0xD9), Opcode(0xF2), // fptan 10726 Opcode(0xDD), Opcode(0xD8), // fstp st 10727 Push_ResultXD(dst) ); 10728 ins_pipe( pipe_slow ); 10729 %} 10730 10731 instruct atanD_reg(regD dst, regD src) %{ 10732 predicate (UseSSE<=1); 10733 match(Set dst(AtanD dst src)); 10734 format %{ "DATA $dst,$src" %} 10735 opcode(0xD9, 0xF3); 10736 ins_encode( Push_Reg_D(src), 10737 OpcP, OpcS, RegOpc(dst) ); 10738 ins_pipe( pipe_slow ); 10739 %} 10740 10741 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10742 predicate (UseSSE>=2); 10743 match(Set dst(AtanD dst src)); 10744 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10745 format %{ "DATA $dst,$src" %} 10746 opcode(0xD9, 0xF3); 10747 ins_encode( Push_SrcXD(src), 10748 OpcP, OpcS, Push_ResultXD(dst) ); 10749 ins_pipe( pipe_slow ); 10750 %} 10751 10752 instruct sqrtD_reg(regD dst, regD src) %{ 10753 predicate (UseSSE<=1); 10754 match(Set dst (SqrtD src)); 10755 format %{ "DSQRT $dst,$src" %} 10756 opcode(0xFA, 0xD9); 10757 ins_encode( Push_Reg_D(src), 10758 OpcS, OpcP, Pop_Reg_D(dst) ); 10759 ins_pipe( pipe_slow ); 10760 %} 10761 10762 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10763 predicate (UseSSE<=1); 10764 match(Set Y (PowD X Y)); // Raise X to the Yth power 10765 effect(KILL rax, KILL rbx, KILL rcx); 10766 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10767 "FLD_D $X\n\t" 10768 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" 10769 10770 "FDUP \t\t\t# Q Q\n\t" 10771 "FRNDINT\t\t\t# int(Q) Q\n\t" 10772 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10773 "FISTP dword [ESP]\n\t" 10774 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10775 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10776 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10777 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10778 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10779 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10780 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10781 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10782 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10783 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10784 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10785 "MOV [ESP+0],0\n\t" 10786 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10787 10788 "ADD ESP,8" 10789 %} 10790 ins_encode( push_stack_temp_qword, 10791 Push_Reg_D(X), 10792 Opcode(0xD9), Opcode(0xF1), // fyl2x 10793 pow_exp_core_encoding, 10794 pop_stack_temp_qword); 10795 ins_pipe( pipe_slow ); 10796 %} 10797 10798 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 10799 predicate (UseSSE>=2); 10800 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 10801 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 10802 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10803 "MOVSD [ESP],$src1\n\t" 10804 "FLD FPR1,$src1\n\t" 10805 "MOVSD [ESP],$src0\n\t" 10806 "FLD FPR1,$src0\n\t" 10807 "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" 10808 10809 "FDUP \t\t\t# Q Q\n\t" 10810 "FRNDINT\t\t\t# int(Q) Q\n\t" 10811 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10812 "FISTP dword [ESP]\n\t" 10813 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10814 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10815 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10816 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10817 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10818 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10819 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10820 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10821 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10822 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10823 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10824 "MOV [ESP+0],0\n\t" 10825 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10826 10827 "FST_D [ESP]\n\t" 10828 "MOVSD $dst,[ESP]\n\t" 10829 "ADD ESP,8" 10830 %} 10831 ins_encode( push_stack_temp_qword, 10832 push_xmm_to_fpr1(src1), 10833 push_xmm_to_fpr1(src0), 10834 Opcode(0xD9), Opcode(0xF1), // fyl2x 10835 pow_exp_core_encoding, 10836 Push_ResultXD(dst) ); 10837 ins_pipe( pipe_slow ); 10838 %} 10839 10840 10841 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10842 predicate (UseSSE<=1); 10843 match(Set dpr1 (ExpD dpr1)); 10844 effect(KILL rax, KILL rbx, KILL rcx); 10845 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" 10846 "FLDL2E \t\t\t# Ld log2(e) X\n\t" 10847 "FMULP \t\t\t# Q=X*log2(e)\n\t" 10848 10849 "FDUP \t\t\t# Q Q\n\t" 10850 "FRNDINT\t\t\t# int(Q) Q\n\t" 10851 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10852 "FISTP dword [ESP]\n\t" 10853 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10854 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10855 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10856 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10857 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10858 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10859 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10860 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10861 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10862 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10863 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10864 "MOV [ESP+0],0\n\t" 10865 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10866 10867 "ADD ESP,8" 10868 %} 10869 ins_encode( push_stack_temp_qword, 10870 Opcode(0xD9), Opcode(0xEA), // fldl2e 10871 Opcode(0xDE), Opcode(0xC9), // fmulp 10872 pow_exp_core_encoding, 10873 pop_stack_temp_qword); 10874 ins_pipe( pipe_slow ); 10875 %} 10876 10877 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10878 predicate (UseSSE>=2); 10879 match(Set dst (ExpD src)); 10880 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 10881 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" 10882 "MOVSD [ESP],$src\n\t" 10883 "FLDL2E \t\t\t# Ld log2(e) X\n\t" 10884 "FMULP \t\t\t# Q=X*log2(e) X\n\t" 10885 10886 "FDUP \t\t\t# Q Q\n\t" 10887 "FRNDINT\t\t\t# int(Q) Q\n\t" 10888 "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" 10889 "FISTP dword [ESP]\n\t" 10890 "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" 10891 "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" 10892 "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead 10893 "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" 10894 "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" 10895 "ADD EAX,1023\t\t# Double exponent bias\n\t" 10896 "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" 10897 "SHL EAX,20\t\t# Shift exponent into place\n\t" 10898 "TEST EBX,ECX\t\t# Check for overflow\n\t" 10899 "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" 10900 "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" 10901 "MOV [ESP+0],0\n\t" 10902 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10903 10904 "FST_D [ESP]\n\t" 10905 "MOVSD $dst,[ESP]\n\t" 10906 "ADD ESP,8" 10907 %} 10908 ins_encode( Push_SrcXD(src), 10909 Opcode(0xD9), Opcode(0xEA), // fldl2e 10910 Opcode(0xDE), Opcode(0xC9), // fmulp 10911 pow_exp_core_encoding, 10912 Push_ResultXD(dst) ); 10913 ins_pipe( pipe_slow ); 10914 %} 10915 10916 10917 10918 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 10919 predicate (UseSSE<=1); 10920 // The source Double operand on FPU stack 10921 match(Set dst (Log10D src)); 10922 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10923 // fxch ; swap ST(0) with ST(1) 10924 // fyl2x ; compute log_10(2) * log_2(x) 10925 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10926 "FXCH \n\t" 10927 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10928 %} 10929 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10930 Opcode(0xD9), Opcode(0xC9), // fxch 10931 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10932 10933 ins_pipe( pipe_slow ); 10934 %} 10935 10936 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10937 predicate (UseSSE>=2); 10938 effect(KILL cr); 10939 match(Set dst (Log10D src)); 10940 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10941 // fyl2x ; compute log_10(2) * log_2(x) 10942 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10943 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10944 %} 10945 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10946 Push_SrcXD(src), 10947 Opcode(0xD9), Opcode(0xF1), // fyl2x 10948 Push_ResultXD(dst)); 10949 10950 ins_pipe( pipe_slow ); 10951 %} 10952 10953 instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 10954 predicate (UseSSE<=1); 10955 // The source Double operand on FPU stack 10956 match(Set dst (LogD src)); 10957 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10958 // fxch ; swap ST(0) with ST(1) 10959 // fyl2x ; compute log_e(2) * log_2(x) 10960 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10961 "FXCH \n\t" 10962 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10963 %} 10964 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10965 Opcode(0xD9), Opcode(0xC9), // fxch 10966 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10967 10968 ins_pipe( pipe_slow ); 10969 %} 10970 10971 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10972 predicate (UseSSE>=2); 10973 effect(KILL cr); 10974 // The source and result Double operands in XMM registers 10975 match(Set dst (LogD src)); 10976 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10977 // fyl2x ; compute log_e(2) * log_2(x) 10978 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10979 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10980 %} 10981 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10982 Push_SrcXD(src), 10983 Opcode(0xD9), Opcode(0xF1), // fyl2x 10984 Push_ResultXD(dst)); 10985 ins_pipe( pipe_slow ); 10986 %} 10987 10988 //-------------Float Instructions------------------------------- 10989 // Float Math 10990 10991 // Code for float compare: 10992 // fcompp(); 10993 // fwait(); fnstsw_ax(); 10994 // sahf(); 10995 // movl(dst, unordered_result); 10996 // jcc(Assembler::parity, exit); 10997 // movl(dst, less_result); 10998 // jcc(Assembler::below, exit); 10999 // movl(dst, equal_result); 11000 // jcc(Assembler::equal, exit); 11001 // movl(dst, greater_result); 11002 // exit: 11003 11004 // P6 version of float compare, sets condition codes in EFLAGS 11005 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 11006 predicate(VM_Version::supports_cmov() && UseSSE == 0); 11007 match(Set cr (CmpF src1 src2)); 11008 effect(KILL rax); 11009 ins_cost(150); 11010 format %{ "FLD $src1\n\t" 11011 "FUCOMIP ST,$src2 // P6 instruction\n\t" 11012 "JNP exit\n\t" 11013 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 11014 "SAHF\n" 11015 "exit:\tNOP // avoid branch to branch" %} 11016 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 11017 ins_encode( Push_Reg_D(src1), 11018 OpcP, RegOpc(src2), 11019 cmpF_P6_fixup ); 11020 ins_pipe( pipe_slow ); 11021 %} 11022 11023 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ 11024 predicate(VM_Version::supports_cmov() && UseSSE == 0); 11025 match(Set cr (CmpF src1 src2)); 11026 ins_cost(100); 11027 format %{ "FLD $src1\n\t" 11028 "FUCOMIP ST,$src2 // P6 instruction" %} 11029 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 11030 ins_encode( Push_Reg_D(src1), 11031 OpcP, RegOpc(src2)); 11032 ins_pipe( pipe_slow ); 11033 %} 11034 11035 11036 // Compare & branch 11037 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 11038 predicate(UseSSE == 0); 11039 match(Set cr (CmpF src1 src2)); 11040 effect(KILL rax); 11041 ins_cost(200); 11042 format %{ "FLD $src1\n\t" 11043 "FCOMp $src2\n\t" 11044 "FNSTSW AX\n\t" 11045 "TEST AX,0x400\n\t" 11046 "JZ,s flags\n\t" 11047 "MOV AH,1\t# unordered treat as LT\n" 11048 "flags:\tSAHF" %} 11049 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 11050 ins_encode( Push_Reg_D(src1), 11051 OpcP, RegOpc(src2), 11052 fpu_flags); 11053 ins_pipe( pipe_slow ); 11054 %} 11055 11056 // Compare vs zero into -1,0,1 11057 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 11058 predicate(UseSSE == 0); 11059 match(Set dst (CmpF3 src1 zero)); 11060 effect(KILL cr, KILL rax); 11061 ins_cost(280); 11062 format %{ "FTSTF $dst,$src1" %} 11063 opcode(0xE4, 0xD9); 11064 ins_encode( Push_Reg_D(src1), 11065 OpcS, OpcP, PopFPU, 11066 CmpF_Result(dst)); 11067 ins_pipe( pipe_slow ); 11068 %} 11069 11070 // Compare into -1,0,1 11071 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 11072 predicate(UseSSE == 0); 11073 match(Set dst (CmpF3 src1 src2)); 11074 effect(KILL cr, KILL rax); 11075 ins_cost(300); 11076 format %{ "FCMPF $dst,$src1,$src2" %} 11077 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 11078 ins_encode( Push_Reg_D(src1), 11079 OpcP, RegOpc(src2), 11080 CmpF_Result(dst)); 11081 ins_pipe( pipe_slow ); 11082 %} 11083 11084 // float compare and set condition codes in EFLAGS by XMM regs 11085 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ 11086 predicate(UseSSE>=1); 11087 match(Set cr (CmpF dst src)); 11088 effect(KILL rax); 11089 ins_cost(145); 11090 format %{ "COMISS $dst,$src\n" 11091 "\tJNP exit\n" 11092 "\tMOV ah,1 // saw a NaN, set CF\n" 11093 "\tSAHF\n" 11094 "exit:\tNOP // avoid branch to branch" %} 11095 opcode(0x0F, 0x2F); 11096 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); 11097 ins_pipe( pipe_slow ); 11098 %} 11099 11100 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ 11101 predicate(UseSSE>=1); 11102 match(Set cr (CmpF dst src)); 11103 ins_cost(100); 11104 format %{ "COMISS $dst,$src" %} 11105 opcode(0x0F, 0x2F); 11106 ins_encode(OpcP, OpcS, RegReg(dst, src)); 11107 ins_pipe( pipe_slow ); 11108 %} 11109 11110 // float compare and set condition codes in EFLAGS by XMM regs 11111 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ 11112 predicate(UseSSE>=1); 11113 match(Set cr (CmpF dst (LoadF src))); 11114 effect(KILL rax); 11115 ins_cost(165); 11116 format %{ "COMISS $dst,$src\n" 11117 "\tJNP exit\n" 11118 "\tMOV ah,1 // saw a NaN, set CF\n" 11119 "\tSAHF\n" 11120 "exit:\tNOP // avoid branch to branch" %} 11121 opcode(0x0F, 0x2F); 11122 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); 11123 ins_pipe( pipe_slow ); 11124 %} 11125 11126 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ 11127 predicate(UseSSE>=1); 11128 match(Set cr (CmpF dst (LoadF src))); 11129 ins_cost(100); 11130 format %{ "COMISS $dst,$src" %} 11131 opcode(0x0F, 0x2F); 11132 ins_encode(OpcP, OpcS, RegMem(dst, src)); 11133 ins_pipe( pipe_slow ); 11134 %} 11135 11136 // Compare into -1,0,1 in XMM 11137 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 11138 predicate(UseSSE>=1); 11139 match(Set dst (CmpF3 src1 src2)); 11140 effect(KILL cr); 11141 ins_cost(255); 11142 format %{ "XOR $dst,$dst\n" 11143 "\tCOMISS $src1,$src2\n" 11144 "\tJP,s nan\n" 11145 "\tJEQ,s exit\n" 11146 "\tJA,s inc\n" 11147 "nan:\tDEC $dst\n" 11148 "\tJMP,s exit\n" 11149 "inc:\tINC $dst\n" 11150 "exit:" 11151 %} 11152 opcode(0x0F, 0x2F); 11153 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst)); 11154 ins_pipe( pipe_slow ); 11155 %} 11156 11157 // Compare into -1,0,1 in XMM and memory 11158 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ 11159 predicate(UseSSE>=1); 11160 match(Set dst (CmpF3 src1 (LoadF mem))); 11161 effect(KILL cr); 11162 ins_cost(275); 11163 format %{ "COMISS $src1,$mem\n" 11164 "\tMOV $dst,0\t\t# do not blow flags\n" 11165 "\tJP,s nan\n" 11166 "\tJEQ,s exit\n" 11167 "\tJA,s inc\n" 11168 "nan:\tDEC $dst\n" 11169 "\tJMP,s exit\n" 11170 "inc:\tINC $dst\n" 11171 "exit:" 11172 %} 11173 opcode(0x0F, 0x2F); 11174 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst)); 11175 ins_pipe( pipe_slow ); 11176 %} 11177 11178 // Spill to obtain 24-bit precision 11179 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11181 match(Set dst (SubF src1 src2)); 11182 11183 format %{ "FSUB $dst,$src1 - $src2" %} 11184 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 11185 ins_encode( Push_Reg_F(src1), 11186 OpcReg_F(src2), 11187 Pop_Mem_F(dst) ); 11188 ins_pipe( fpu_mem_reg_reg ); 11189 %} 11190 // 11191 // This instruction does not round to 24-bits 11192 instruct subF_reg(regF dst, regF src) %{ 11193 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11194 match(Set dst (SubF dst src)); 11195 11196 format %{ "FSUB $dst,$src" %} 11197 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 11198 ins_encode( Push_Reg_F(src), 11199 OpcP, RegOpc(dst) ); 11200 ins_pipe( fpu_reg_reg ); 11201 %} 11202 11203 // Spill to obtain 24-bit precision 11204 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11205 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11206 match(Set dst (AddF src1 src2)); 11207 11208 format %{ "FADD $dst,$src1,$src2" %} 11209 opcode(0xD8, 0x0); /* D8 C0+i */ 11210 ins_encode( Push_Reg_F(src2), 11211 OpcReg_F(src1), 11212 Pop_Mem_F(dst) ); 11213 ins_pipe( fpu_mem_reg_reg ); 11214 %} 11215 // 11216 // This instruction does not round to 24-bits 11217 instruct addF_reg(regF dst, regF src) %{ 11218 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11219 match(Set dst (AddF dst src)); 11220 11221 format %{ "FLD $src\n\t" 11222 "FADDp $dst,ST" %} 11223 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 11224 ins_encode( Push_Reg_F(src), 11225 OpcP, RegOpc(dst) ); 11226 ins_pipe( fpu_reg_reg ); 11227 %} 11228 11229 // Add two single precision floating point values in xmm 11230 instruct addX_reg(regX dst, regX src) %{ 11231 predicate(UseSSE>=1); 11232 match(Set dst (AddF dst src)); 11233 format %{ "ADDSS $dst,$src" %} 11234 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 11235 ins_pipe( pipe_slow ); 11236 %} 11237 11238 instruct addX_imm(regX dst, immXF con) %{ 11239 predicate(UseSSE>=1); 11240 match(Set dst (AddF dst con)); 11241 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 11242 ins_encode %{ 11243 __ addss($dst$$XMMRegister, $constantaddress($con)); 11244 %} 11245 ins_pipe(pipe_slow); 11246 %} 11247 11248 instruct addX_mem(regX dst, memory mem) %{ 11249 predicate(UseSSE>=1); 11250 match(Set dst (AddF dst (LoadF mem))); 11251 format %{ "ADDSS $dst,$mem" %} 11252 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); 11253 ins_pipe( pipe_slow ); 11254 %} 11255 11256 // Subtract two single precision floating point values in xmm 11257 instruct subX_reg(regX dst, regX src) %{ 11258 predicate(UseSSE>=1); 11259 match(Set dst (SubF dst src)); 11260 format %{ "SUBSS $dst,$src" %} 11261 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 11262 ins_pipe( pipe_slow ); 11263 %} 11264 11265 instruct subX_imm(regX dst, immXF con) %{ 11266 predicate(UseSSE>=1); 11267 match(Set dst (SubF dst con)); 11268 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 11269 ins_encode %{ 11270 __ subss($dst$$XMMRegister, $constantaddress($con)); 11271 %} 11272 ins_pipe(pipe_slow); 11273 %} 11274 11275 instruct subX_mem(regX dst, memory mem) %{ 11276 predicate(UseSSE>=1); 11277 match(Set dst (SubF dst (LoadF mem))); 11278 format %{ "SUBSS $dst,$mem" %} 11279 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 11280 ins_pipe( pipe_slow ); 11281 %} 11282 11283 // Multiply two single precision floating point values in xmm 11284 instruct mulX_reg(regX dst, regX src) %{ 11285 predicate(UseSSE>=1); 11286 match(Set dst (MulF dst src)); 11287 format %{ "MULSS $dst,$src" %} 11288 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 11289 ins_pipe( pipe_slow ); 11290 %} 11291 11292 instruct mulX_imm(regX dst, immXF con) %{ 11293 predicate(UseSSE>=1); 11294 match(Set dst (MulF dst con)); 11295 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 11296 ins_encode %{ 11297 __ mulss($dst$$XMMRegister, $constantaddress($con)); 11298 %} 11299 ins_pipe(pipe_slow); 11300 %} 11301 11302 instruct mulX_mem(regX dst, memory mem) %{ 11303 predicate(UseSSE>=1); 11304 match(Set dst (MulF dst (LoadF mem))); 11305 format %{ "MULSS $dst,$mem" %} 11306 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 11307 ins_pipe( pipe_slow ); 11308 %} 11309 11310 // Divide two single precision floating point values in xmm 11311 instruct divX_reg(regX dst, regX src) %{ 11312 predicate(UseSSE>=1); 11313 match(Set dst (DivF dst src)); 11314 format %{ "DIVSS $dst,$src" %} 11315 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 11316 ins_pipe( pipe_slow ); 11317 %} 11318 11319 instruct divX_imm(regX dst, immXF con) %{ 11320 predicate(UseSSE>=1); 11321 match(Set dst (DivF dst con)); 11322 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 11323 ins_encode %{ 11324 __ divss($dst$$XMMRegister, $constantaddress($con)); 11325 %} 11326 ins_pipe(pipe_slow); 11327 %} 11328 11329 instruct divX_mem(regX dst, memory mem) %{ 11330 predicate(UseSSE>=1); 11331 match(Set dst (DivF dst (LoadF mem))); 11332 format %{ "DIVSS $dst,$mem" %} 11333 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 11334 ins_pipe( pipe_slow ); 11335 %} 11336 11337 // Get the square root of a single precision floating point values in xmm 11338 instruct sqrtX_reg(regX dst, regX src) %{ 11339 predicate(UseSSE>=1); 11340 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 11341 format %{ "SQRTSS $dst,$src" %} 11342 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 11343 ins_pipe( pipe_slow ); 11344 %} 11345 11346 instruct sqrtX_mem(regX dst, memory mem) %{ 11347 predicate(UseSSE>=1); 11348 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); 11349 format %{ "SQRTSS $dst,$mem" %} 11350 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 11351 ins_pipe( pipe_slow ); 11352 %} 11353 11354 // Get the square root of a double precision floating point values in xmm 11355 instruct sqrtXD_reg(regXD dst, regXD src) %{ 11356 predicate(UseSSE>=2); 11357 match(Set dst (SqrtD src)); 11358 format %{ "SQRTSD $dst,$src" %} 11359 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 11360 ins_pipe( pipe_slow ); 11361 %} 11362 11363 instruct sqrtXD_mem(regXD dst, memory mem) %{ 11364 predicate(UseSSE>=2); 11365 match(Set dst (SqrtD (LoadD mem))); 11366 format %{ "SQRTSD $dst,$mem" %} 11367 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 11368 ins_pipe( pipe_slow ); 11369 %} 11370 11371 instruct absF_reg(regFPR1 dst, regFPR1 src) %{ 11372 predicate(UseSSE==0); 11373 match(Set dst (AbsF src)); 11374 ins_cost(100); 11375 format %{ "FABS" %} 11376 opcode(0xE1, 0xD9); 11377 ins_encode( OpcS, OpcP ); 11378 ins_pipe( fpu_reg_reg ); 11379 %} 11380 11381 instruct absX_reg(regX dst ) %{ 11382 predicate(UseSSE>=1); 11383 match(Set dst (AbsF dst)); 11384 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} 11385 ins_encode( AbsXF_encoding(dst)); 11386 ins_pipe( pipe_slow ); 11387 %} 11388 11389 instruct negF_reg(regFPR1 dst, regFPR1 src) %{ 11390 predicate(UseSSE==0); 11391 match(Set dst (NegF src)); 11392 ins_cost(100); 11393 format %{ "FCHS" %} 11394 opcode(0xE0, 0xD9); 11395 ins_encode( OpcS, OpcP ); 11396 ins_pipe( fpu_reg_reg ); 11397 %} 11398 11399 instruct negX_reg( regX dst ) %{ 11400 predicate(UseSSE>=1); 11401 match(Set dst (NegF dst)); 11402 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} 11403 ins_encode( NegXF_encoding(dst)); 11404 ins_pipe( pipe_slow ); 11405 %} 11406 11407 // Cisc-alternate to addF_reg 11408 // Spill to obtain 24-bit precision 11409 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 11410 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11411 match(Set dst (AddF src1 (LoadF src2))); 11412 11413 format %{ "FLD $src2\n\t" 11414 "FADD ST,$src1\n\t" 11415 "FSTP_S $dst" %} 11416 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 11417 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11418 OpcReg_F(src1), 11419 Pop_Mem_F(dst) ); 11420 ins_pipe( fpu_mem_reg_mem ); 11421 %} 11422 // 11423 // Cisc-alternate to addF_reg 11424 // This instruction does not round to 24-bits 11425 instruct addF_reg_mem(regF dst, memory src) %{ 11426 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11427 match(Set dst (AddF dst (LoadF src))); 11428 11429 format %{ "FADD $dst,$src" %} 11430 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 11431 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 11432 OpcP, RegOpc(dst) ); 11433 ins_pipe( fpu_reg_mem ); 11434 %} 11435 11436 // // Following two instructions for _222_mpegaudio 11437 // Spill to obtain 24-bit precision 11438 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 11439 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11440 match(Set dst (AddF src1 src2)); 11441 11442 format %{ "FADD $dst,$src1,$src2" %} 11443 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 11444 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 11445 OpcReg_F(src2), 11446 Pop_Mem_F(dst) ); 11447 ins_pipe( fpu_mem_reg_mem ); 11448 %} 11449 11450 // Cisc-spill variant 11451 // Spill to obtain 24-bit precision 11452 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 11453 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11454 match(Set dst (AddF src1 (LoadF src2))); 11455 11456 format %{ "FADD $dst,$src1,$src2 cisc" %} 11457 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 11458 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11459 set_instruction_start, 11460 OpcP, RMopc_Mem(secondary,src1), 11461 Pop_Mem_F(dst) ); 11462 ins_pipe( fpu_mem_mem_mem ); 11463 %} 11464 11465 // Spill to obtain 24-bit precision 11466 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 11467 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11468 match(Set dst (AddF src1 src2)); 11469 11470 format %{ "FADD $dst,$src1,$src2" %} 11471 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 11472 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11473 set_instruction_start, 11474 OpcP, RMopc_Mem(secondary,src1), 11475 Pop_Mem_F(dst) ); 11476 ins_pipe( fpu_mem_mem_mem ); 11477 %} 11478 11479 11480 // Spill to obtain 24-bit precision 11481 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 11482 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11483 match(Set dst (AddF src con)); 11484 format %{ "FLD $src\n\t" 11485 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 11486 "FSTP_S $dst" %} 11487 ins_encode %{ 11488 __ fld_s($src$$reg - 1); // FLD ST(i-1) 11489 __ fadd_s($constantaddress($con)); 11490 __ fstp_s(Address(rsp, $dst$$disp)); 11491 %} 11492 ins_pipe(fpu_mem_reg_con); 11493 %} 11494 // 11495 // This instruction does not round to 24-bits 11496 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 11497 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11498 match(Set dst (AddF src con)); 11499 format %{ "FLD $src\n\t" 11500 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 11501 "FSTP $dst" %} 11502 ins_encode %{ 11503 __ fld_s($src$$reg - 1); // FLD ST(i-1) 11504 __ fadd_s($constantaddress($con)); 11505 __ fstp_d($dst$$reg); 11506 %} 11507 ins_pipe(fpu_reg_reg_con); 11508 %} 11509 11510 // Spill to obtain 24-bit precision 11511 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11512 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11513 match(Set dst (MulF src1 src2)); 11514 11515 format %{ "FLD $src1\n\t" 11516 "FMUL $src2\n\t" 11517 "FSTP_S $dst" %} 11518 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 11519 ins_encode( Push_Reg_F(src1), 11520 OpcReg_F(src2), 11521 Pop_Mem_F(dst) ); 11522 ins_pipe( fpu_mem_reg_reg ); 11523 %} 11524 // 11525 // This instruction does not round to 24-bits 11526 instruct mulF_reg(regF dst, regF src1, regF src2) %{ 11527 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11528 match(Set dst (MulF src1 src2)); 11529 11530 format %{ "FLD $src1\n\t" 11531 "FMUL $src2\n\t" 11532 "FSTP_S $dst" %} 11533 opcode(0xD8, 0x1); /* D8 C8+i */ 11534 ins_encode( Push_Reg_F(src2), 11535 OpcReg_F(src1), 11536 Pop_Reg_F(dst) ); 11537 ins_pipe( fpu_reg_reg_reg ); 11538 %} 11539 11540 11541 // Spill to obtain 24-bit precision 11542 // Cisc-alternate to reg-reg multiply 11543 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 11544 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11545 match(Set dst (MulF src1 (LoadF src2))); 11546 11547 format %{ "FLD_S $src2\n\t" 11548 "FMUL $src1\n\t" 11549 "FSTP_S $dst" %} 11550 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 11551 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11552 OpcReg_F(src1), 11553 Pop_Mem_F(dst) ); 11554 ins_pipe( fpu_mem_reg_mem ); 11555 %} 11556 // 11557 // This instruction does not round to 24-bits 11558 // Cisc-alternate to reg-reg multiply 11559 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 11560 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11561 match(Set dst (MulF src1 (LoadF src2))); 11562 11563 format %{ "FMUL $dst,$src1,$src2" %} 11564 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 11565 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11566 OpcReg_F(src1), 11567 Pop_Reg_F(dst) ); 11568 ins_pipe( fpu_reg_reg_mem ); 11569 %} 11570 11571 // Spill to obtain 24-bit precision 11572 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 11573 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11574 match(Set dst (MulF src1 src2)); 11575 11576 format %{ "FMUL $dst,$src1,$src2" %} 11577 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 11578 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 11579 set_instruction_start, 11580 OpcP, RMopc_Mem(secondary,src1), 11581 Pop_Mem_F(dst) ); 11582 ins_pipe( fpu_mem_mem_mem ); 11583 %} 11584 11585 // Spill to obtain 24-bit precision 11586 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 11587 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11588 match(Set dst (MulF src con)); 11589 11590 format %{ "FLD $src\n\t" 11591 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 11592 "FSTP_S $dst" %} 11593 ins_encode %{ 11594 __ fld_s($src$$reg - 1); // FLD ST(i-1) 11595 __ fmul_s($constantaddress($con)); 11596 __ fstp_s(Address(rsp, $dst$$disp)); 11597 %} 11598 ins_pipe(fpu_mem_reg_con); 11599 %} 11600 // 11601 // This instruction does not round to 24-bits 11602 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 11603 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11604 match(Set dst (MulF src con)); 11605 11606 format %{ "FLD $src\n\t" 11607 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 11608 "FSTP $dst" %} 11609 ins_encode %{ 11610 __ fld_s($src$$reg - 1); // FLD ST(i-1) 11611 __ fmul_s($constantaddress($con)); 11612 __ fstp_d($dst$$reg); 11613 %} 11614 ins_pipe(fpu_reg_reg_con); 11615 %} 11616 11617 11618 // 11619 // MACRO1 -- subsume unshared load into mulF 11620 // This instruction does not round to 24-bits 11621 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 11622 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11623 match(Set dst (MulF (LoadF mem1) src)); 11624 11625 format %{ "FLD $mem1 ===MACRO1===\n\t" 11626 "FMUL ST,$src\n\t" 11627 "FSTP $dst" %} 11628 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 11629 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 11630 OpcReg_F(src), 11631 Pop_Reg_F(dst) ); 11632 ins_pipe( fpu_reg_reg_mem ); 11633 %} 11634 // 11635 // MACRO2 -- addF a mulF which subsumed an unshared load 11636 // This instruction does not round to 24-bits 11637 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 11638 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11639 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 11640 ins_cost(95); 11641 11642 format %{ "FLD $mem1 ===MACRO2===\n\t" 11643 "FMUL ST,$src1 subsume mulF left load\n\t" 11644 "FADD ST,$src2\n\t" 11645 "FSTP $dst" %} 11646 opcode(0xD9); /* LoadF D9 /0 */ 11647 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 11648 FMul_ST_reg(src1), 11649 FAdd_ST_reg(src2), 11650 Pop_Reg_F(dst) ); 11651 ins_pipe( fpu_reg_mem_reg_reg ); 11652 %} 11653 11654 // MACRO3 -- addF a mulF 11655 // This instruction does not round to 24-bits. It is a '2-address' 11656 // instruction in that the result goes back to src2. This eliminates 11657 // a move from the macro; possibly the register allocator will have 11658 // to add it back (and maybe not). 11659 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 11660 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11661 match(Set src2 (AddF (MulF src0 src1) src2)); 11662 11663 format %{ "FLD $src0 ===MACRO3===\n\t" 11664 "FMUL ST,$src1\n\t" 11665 "FADDP $src2,ST" %} 11666 opcode(0xD9); /* LoadF D9 /0 */ 11667 ins_encode( Push_Reg_F(src0), 11668 FMul_ST_reg(src1), 11669 FAddP_reg_ST(src2) ); 11670 ins_pipe( fpu_reg_reg_reg ); 11671 %} 11672 11673 // MACRO4 -- divF subF 11674 // This instruction does not round to 24-bits 11675 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 11676 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11677 match(Set dst (DivF (SubF src2 src1) src3)); 11678 11679 format %{ "FLD $src2 ===MACRO4===\n\t" 11680 "FSUB ST,$src1\n\t" 11681 "FDIV ST,$src3\n\t" 11682 "FSTP $dst" %} 11683 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 11684 ins_encode( Push_Reg_F(src2), 11685 subF_divF_encode(src1,src3), 11686 Pop_Reg_F(dst) ); 11687 ins_pipe( fpu_reg_reg_reg_reg ); 11688 %} 11689 11690 // Spill to obtain 24-bit precision 11691 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 11692 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 11693 match(Set dst (DivF src1 src2)); 11694 11695 format %{ "FDIV $dst,$src1,$src2" %} 11696 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 11697 ins_encode( Push_Reg_F(src1), 11698 OpcReg_F(src2), 11699 Pop_Mem_F(dst) ); 11700 ins_pipe( fpu_mem_reg_reg ); 11701 %} 11702 // 11703 // This instruction does not round to 24-bits 11704 instruct divF_reg(regF dst, regF src) %{ 11705 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11706 match(Set dst (DivF dst src)); 11707 11708 format %{ "FDIV $dst,$src" %} 11709 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 11710 ins_encode( Push_Reg_F(src), 11711 OpcP, RegOpc(dst) ); 11712 ins_pipe( fpu_reg_reg ); 11713 %} 11714 11715 11716 // Spill to obtain 24-bit precision 11717 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 11718 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11719 match(Set dst (ModF src1 src2)); 11720 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 11721 11722 format %{ "FMOD $dst,$src1,$src2" %} 11723 ins_encode( Push_Reg_Mod_D(src1, src2), 11724 emitModD(), 11725 Push_Result_Mod_D(src2), 11726 Pop_Mem_F(dst)); 11727 ins_pipe( pipe_slow ); 11728 %} 11729 // 11730 // This instruction does not round to 24-bits 11731 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 11732 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11733 match(Set dst (ModF dst src)); 11734 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 11735 11736 format %{ "FMOD $dst,$src" %} 11737 ins_encode(Push_Reg_Mod_D(dst, src), 11738 emitModD(), 11739 Push_Result_Mod_D(src), 11740 Pop_Reg_F(dst)); 11741 ins_pipe( pipe_slow ); 11742 %} 11743 11744 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 11745 predicate(UseSSE>=1); 11746 match(Set dst (ModF src0 src1)); 11747 effect(KILL rax, KILL cr); 11748 format %{ "SUB ESP,4\t # FMOD\n" 11749 "\tMOVSS [ESP+0],$src1\n" 11750 "\tFLD_S [ESP+0]\n" 11751 "\tMOVSS [ESP+0],$src0\n" 11752 "\tFLD_S [ESP+0]\n" 11753 "loop:\tFPREM\n" 11754 "\tFWAIT\n" 11755 "\tFNSTSW AX\n" 11756 "\tSAHF\n" 11757 "\tJP loop\n" 11758 "\tFSTP_S [ESP+0]\n" 11759 "\tMOVSS $dst,[ESP+0]\n" 11760 "\tADD ESP,4\n" 11761 "\tFSTP ST0\t # Restore FPU Stack" 11762 %} 11763 ins_cost(250); 11764 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 11765 ins_pipe( pipe_slow ); 11766 %} 11767 11768 11769 //----------Arithmetic Conversion Instructions--------------------------------- 11770 // The conversions operations are all Alpha sorted. Please keep it that way! 11771 11772 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 11773 predicate(UseSSE==0); 11774 match(Set dst (RoundFloat src)); 11775 ins_cost(125); 11776 format %{ "FST_S $dst,$src\t# F-round" %} 11777 ins_encode( Pop_Mem_Reg_F(dst, src) ); 11778 ins_pipe( fpu_mem_reg ); 11779 %} 11780 11781 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 11782 predicate(UseSSE<=1); 11783 match(Set dst (RoundDouble src)); 11784 ins_cost(125); 11785 format %{ "FST_D $dst,$src\t# D-round" %} 11786 ins_encode( Pop_Mem_Reg_D(dst, src) ); 11787 ins_pipe( fpu_mem_reg ); 11788 %} 11789 11790 // Force rounding to 24-bit precision and 6-bit exponent 11791 instruct convD2F_reg(stackSlotF dst, regD src) %{ 11792 predicate(UseSSE==0); 11793 match(Set dst (ConvD2F src)); 11794 format %{ "FST_S $dst,$src\t# F-round" %} 11795 expand %{ 11796 roundFloat_mem_reg(dst,src); 11797 %} 11798 %} 11799 11800 // Force rounding to 24-bit precision and 6-bit exponent 11801 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 11802 predicate(UseSSE==1); 11803 match(Set dst (ConvD2F src)); 11804 effect( KILL cr ); 11805 format %{ "SUB ESP,4\n\t" 11806 "FST_S [ESP],$src\t# F-round\n\t" 11807 "MOVSS $dst,[ESP]\n\t" 11808 "ADD ESP,4" %} 11809 ins_encode( D2X_encoding(dst, src) ); 11810 ins_pipe( pipe_slow ); 11811 %} 11812 11813 // Force rounding double precision to single precision 11814 instruct convXD2X_reg(regX dst, regXD src) %{ 11815 predicate(UseSSE>=2); 11816 match(Set dst (ConvD2F src)); 11817 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 11818 opcode(0xF2, 0x0F, 0x5A); 11819 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11820 ins_pipe( pipe_slow ); 11821 %} 11822 11823 instruct convF2D_reg_reg(regD dst, regF src) %{ 11824 predicate(UseSSE==0); 11825 match(Set dst (ConvF2D src)); 11826 format %{ "FST_S $dst,$src\t# D-round" %} 11827 ins_encode( Pop_Reg_Reg_D(dst, src)); 11828 ins_pipe( fpu_reg_reg ); 11829 %} 11830 11831 instruct convF2D_reg(stackSlotD dst, regF src) %{ 11832 predicate(UseSSE==1); 11833 match(Set dst (ConvF2D src)); 11834 format %{ "FST_D $dst,$src\t# D-round" %} 11835 expand %{ 11836 roundDouble_mem_reg(dst,src); 11837 %} 11838 %} 11839 11840 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 11841 predicate(UseSSE==1); 11842 match(Set dst (ConvF2D src)); 11843 effect( KILL cr ); 11844 format %{ "SUB ESP,4\n\t" 11845 "MOVSS [ESP] $src\n\t" 11846 "FLD_S [ESP]\n\t" 11847 "ADD ESP,4\n\t" 11848 "FSTP $dst\t# D-round" %} 11849 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); 11850 ins_pipe( pipe_slow ); 11851 %} 11852 11853 instruct convX2XD_reg(regXD dst, regX src) %{ 11854 predicate(UseSSE>=2); 11855 match(Set dst (ConvF2D src)); 11856 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 11857 opcode(0xF3, 0x0F, 0x5A); 11858 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11859 ins_pipe( pipe_slow ); 11860 %} 11861 11862 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11863 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 11864 predicate(UseSSE<=1); 11865 match(Set dst (ConvD2I src)); 11866 effect( KILL tmp, KILL cr ); 11867 format %{ "FLD $src\t# Convert double to int \n\t" 11868 "FLDCW trunc mode\n\t" 11869 "SUB ESP,4\n\t" 11870 "FISTp [ESP + #0]\n\t" 11871 "FLDCW std/24-bit mode\n\t" 11872 "POP EAX\n\t" 11873 "CMP EAX,0x80000000\n\t" 11874 "JNE,s fast\n\t" 11875 "FLD_D $src\n\t" 11876 "CALL d2i_wrapper\n" 11877 "fast:" %} 11878 ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 11879 ins_pipe( pipe_slow ); 11880 %} 11881 11882 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11883 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 11884 predicate(UseSSE>=2); 11885 match(Set dst (ConvD2I src)); 11886 effect( KILL tmp, KILL cr ); 11887 format %{ "CVTTSD2SI $dst, $src\n\t" 11888 "CMP $dst,0x80000000\n\t" 11889 "JNE,s fast\n\t" 11890 "SUB ESP, 8\n\t" 11891 "MOVSD [ESP], $src\n\t" 11892 "FLD_D [ESP]\n\t" 11893 "ADD ESP, 8\n\t" 11894 "CALL d2i_wrapper\n" 11895 "fast:" %} 11896 opcode(0x1); // double-precision conversion 11897 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 11898 ins_pipe( pipe_slow ); 11899 %} 11900 11901 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 11902 predicate(UseSSE<=1); 11903 match(Set dst (ConvD2L src)); 11904 effect( KILL cr ); 11905 format %{ "FLD $src\t# Convert double to long\n\t" 11906 "FLDCW trunc mode\n\t" 11907 "SUB ESP,8\n\t" 11908 "FISTp [ESP + #0]\n\t" 11909 "FLDCW std/24-bit mode\n\t" 11910 "POP EAX\n\t" 11911 "POP EDX\n\t" 11912 "CMP EDX,0x80000000\n\t" 11913 "JNE,s fast\n\t" 11914 "TEST EAX,EAX\n\t" 11915 "JNE,s fast\n\t" 11916 "FLD $src\n\t" 11917 "CALL d2l_wrapper\n" 11918 "fast:" %} 11919 ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 11920 ins_pipe( pipe_slow ); 11921 %} 11922 11923 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11924 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 11925 predicate (UseSSE>=2); 11926 match(Set dst (ConvD2L src)); 11927 effect( KILL cr ); 11928 format %{ "SUB ESP,8\t# Convert double to long\n\t" 11929 "MOVSD [ESP],$src\n\t" 11930 "FLD_D [ESP]\n\t" 11931 "FLDCW trunc mode\n\t" 11932 "FISTp [ESP + #0]\n\t" 11933 "FLDCW std/24-bit mode\n\t" 11934 "POP EAX\n\t" 11935 "POP EDX\n\t" 11936 "CMP EDX,0x80000000\n\t" 11937 "JNE,s fast\n\t" 11938 "TEST EAX,EAX\n\t" 11939 "JNE,s fast\n\t" 11940 "SUB ESP,8\n\t" 11941 "MOVSD [ESP],$src\n\t" 11942 "FLD_D [ESP]\n\t" 11943 "CALL d2l_wrapper\n" 11944 "fast:" %} 11945 ins_encode( XD2L_encoding(src) ); 11946 ins_pipe( pipe_slow ); 11947 %} 11948 11949 // Convert a double to an int. Java semantics require we do complex 11950 // manglations in the corner cases. So we set the rounding mode to 11951 // 'zero', store the darned double down as an int, and reset the 11952 // rounding mode to 'nearest'. The hardware stores a flag value down 11953 // if we would overflow or converted a NAN; we check for this and 11954 // and go the slow path if needed. 11955 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11956 predicate(UseSSE==0); 11957 match(Set dst (ConvF2I src)); 11958 effect( KILL tmp, KILL cr ); 11959 format %{ "FLD $src\t# Convert float to int \n\t" 11960 "FLDCW trunc mode\n\t" 11961 "SUB ESP,4\n\t" 11962 "FISTp [ESP + #0]\n\t" 11963 "FLDCW std/24-bit mode\n\t" 11964 "POP EAX\n\t" 11965 "CMP EAX,0x80000000\n\t" 11966 "JNE,s fast\n\t" 11967 "FLD $src\n\t" 11968 "CALL d2i_wrapper\n" 11969 "fast:" %} 11970 // D2I_encoding works for F2I 11971 ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 11972 ins_pipe( pipe_slow ); 11973 %} 11974 11975 // Convert a float in xmm to an int reg. 11976 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 11977 predicate(UseSSE>=1); 11978 match(Set dst (ConvF2I src)); 11979 effect( KILL tmp, KILL cr ); 11980 format %{ "CVTTSS2SI $dst, $src\n\t" 11981 "CMP $dst,0x80000000\n\t" 11982 "JNE,s fast\n\t" 11983 "SUB ESP, 4\n\t" 11984 "MOVSS [ESP], $src\n\t" 11985 "FLD [ESP]\n\t" 11986 "ADD ESP, 4\n\t" 11987 "CALL d2i_wrapper\n" 11988 "fast:" %} 11989 opcode(0x0); // single-precision conversion 11990 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 11991 ins_pipe( pipe_slow ); 11992 %} 11993 11994 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11995 predicate(UseSSE==0); 11996 match(Set dst (ConvF2L src)); 11997 effect( KILL cr ); 11998 format %{ "FLD $src\t# Convert float to long\n\t" 11999 "FLDCW trunc mode\n\t" 12000 "SUB ESP,8\n\t" 12001 "FISTp [ESP + #0]\n\t" 12002 "FLDCW std/24-bit mode\n\t" 12003 "POP EAX\n\t" 12004 "POP EDX\n\t" 12005 "CMP EDX,0x80000000\n\t" 12006 "JNE,s fast\n\t" 12007 "TEST EAX,EAX\n\t" 12008 "JNE,s fast\n\t" 12009 "FLD $src\n\t" 12010 "CALL d2l_wrapper\n" 12011 "fast:" %} 12012 // D2L_encoding works for F2L 12013 ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 12014 ins_pipe( pipe_slow ); 12015 %} 12016 12017 // XMM lacks a float/double->long conversion, so use the old FPU stack. 12018 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 12019 predicate (UseSSE>=1); 12020 match(Set dst (ConvF2L src)); 12021 effect( KILL cr ); 12022 format %{ "SUB ESP,8\t# Convert float to long\n\t" 12023 "MOVSS [ESP],$src\n\t" 12024 "FLD_S [ESP]\n\t" 12025 "FLDCW trunc mode\n\t" 12026 "FISTp [ESP + #0]\n\t" 12027 "FLDCW std/24-bit mode\n\t" 12028 "POP EAX\n\t" 12029 "POP EDX\n\t" 12030 "CMP EDX,0x80000000\n\t" 12031 "JNE,s fast\n\t" 12032 "TEST EAX,EAX\n\t" 12033 "JNE,s fast\n\t" 12034 "SUB ESP,4\t# Convert float to long\n\t" 12035 "MOVSS [ESP],$src\n\t" 12036 "FLD_S [ESP]\n\t" 12037 "ADD ESP,4\n\t" 12038 "CALL d2l_wrapper\n" 12039 "fast:" %} 12040 ins_encode( X2L_encoding(src) ); 12041 ins_pipe( pipe_slow ); 12042 %} 12043 12044 instruct convI2D_reg(regD dst, stackSlotI src) %{ 12045 predicate( UseSSE<=1 ); 12046 match(Set dst (ConvI2D src)); 12047 format %{ "FILD $src\n\t" 12048 "FSTP $dst" %} 12049 opcode(0xDB, 0x0); /* DB /0 */ 12050 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 12051 ins_pipe( fpu_reg_mem ); 12052 %} 12053 12054 instruct convI2XD_reg(regXD dst, eRegI src) %{ 12055 predicate( UseSSE>=2 && !UseXmmI2D ); 12056 match(Set dst (ConvI2D src)); 12057 format %{ "CVTSI2SD $dst,$src" %} 12058 opcode(0xF2, 0x0F, 0x2A); 12059 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 12060 ins_pipe( pipe_slow ); 12061 %} 12062 12063 instruct convI2XD_mem(regXD dst, memory mem) %{ 12064 predicate( UseSSE>=2 ); 12065 match(Set dst (ConvI2D (LoadI mem))); 12066 format %{ "CVTSI2SD $dst,$mem" %} 12067 opcode(0xF2, 0x0F, 0x2A); 12068 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); 12069 ins_pipe( pipe_slow ); 12070 %} 12071 12072 instruct convXI2XD_reg(regXD dst, eRegI src) 12073 %{ 12074 predicate( UseSSE>=2 && UseXmmI2D ); 12075 match(Set dst (ConvI2D src)); 12076 12077 format %{ "MOVD $dst,$src\n\t" 12078 "CVTDQ2PD $dst,$dst\t# i2d" %} 12079 ins_encode %{ 12080 __ movdl($dst$$XMMRegister, $src$$Register); 12081 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 12082 %} 12083 ins_pipe(pipe_slow); // XXX 12084 %} 12085 12086 instruct convI2D_mem(regD dst, memory mem) %{ 12087 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 12088 match(Set dst (ConvI2D (LoadI mem))); 12089 format %{ "FILD $mem\n\t" 12090 "FSTP $dst" %} 12091 opcode(0xDB); /* DB /0 */ 12092 ins_encode( OpcP, RMopc_Mem(0x00,mem), 12093 Pop_Reg_D(dst)); 12094 ins_pipe( fpu_reg_mem ); 12095 %} 12096 12097 // Convert a byte to a float; no rounding step needed. 12098 instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 12099 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 12100 match(Set dst (ConvI2F src)); 12101 format %{ "FILD $src\n\t" 12102 "FSTP $dst" %} 12103 12104 opcode(0xDB, 0x0); /* DB /0 */ 12105 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 12106 ins_pipe( fpu_reg_mem ); 12107 %} 12108 12109 // In 24-bit mode, force exponent rounding by storing back out 12110 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 12111 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 12112 match(Set dst (ConvI2F src)); 12113 ins_cost(200); 12114 format %{ "FILD $src\n\t" 12115 "FSTP_S $dst" %} 12116 opcode(0xDB, 0x0); /* DB /0 */ 12117 ins_encode( Push_Mem_I(src), 12118 Pop_Mem_F(dst)); 12119 ins_pipe( fpu_mem_mem ); 12120 %} 12121 12122 // In 24-bit mode, force exponent rounding by storing back out 12123 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 12124 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 12125 match(Set dst (ConvI2F (LoadI mem))); 12126 ins_cost(200); 12127 format %{ "FILD $mem\n\t" 12128 "FSTP_S $dst" %} 12129 opcode(0xDB); /* DB /0 */ 12130 ins_encode( OpcP, RMopc_Mem(0x00,mem), 12131 Pop_Mem_F(dst)); 12132 ins_pipe( fpu_mem_mem ); 12133 %} 12134 12135 // This instruction does not round to 24-bits 12136 instruct convI2F_reg(regF dst, stackSlotI src) %{ 12137 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 12138 match(Set dst (ConvI2F src)); 12139 format %{ "FILD $src\n\t" 12140 "FSTP $dst" %} 12141 opcode(0xDB, 0x0); /* DB /0 */ 12142 ins_encode( Push_Mem_I(src), 12143 Pop_Reg_F(dst)); 12144 ins_pipe( fpu_reg_mem ); 12145 %} 12146 12147 // This instruction does not round to 24-bits 12148 instruct convI2F_mem(regF dst, memory mem) %{ 12149 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 12150 match(Set dst (ConvI2F (LoadI mem))); 12151 format %{ "FILD $mem\n\t" 12152 "FSTP $dst" %} 12153 opcode(0xDB); /* DB /0 */ 12154 ins_encode( OpcP, RMopc_Mem(0x00,mem), 12155 Pop_Reg_F(dst)); 12156 ins_pipe( fpu_reg_mem ); 12157 %} 12158 12159 // Convert an int to a float in xmm; no rounding step needed. 12160 instruct convI2X_reg(regX dst, eRegI src) %{ 12161 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 12162 match(Set dst (ConvI2F src)); 12163 format %{ "CVTSI2SS $dst, $src" %} 12164 12165 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ 12166 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 12167 ins_pipe( pipe_slow ); 12168 %} 12169 12170 instruct convXI2X_reg(regX dst, eRegI src) 12171 %{ 12172 predicate( UseSSE>=2 && UseXmmI2F ); 12173 match(Set dst (ConvI2F src)); 12174 12175 format %{ "MOVD $dst,$src\n\t" 12176 "CVTDQ2PS $dst,$dst\t# i2f" %} 12177 ins_encode %{ 12178 __ movdl($dst$$XMMRegister, $src$$Register); 12179 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 12180 %} 12181 ins_pipe(pipe_slow); // XXX 12182 %} 12183 12184 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ 12185 match(Set dst (ConvI2L src)); 12186 effect(KILL cr); 12187 ins_cost(375); 12188 format %{ "MOV $dst.lo,$src\n\t" 12189 "MOV $dst.hi,$src\n\t" 12190 "SAR $dst.hi,31" %} 12191 ins_encode(convert_int_long(dst,src)); 12192 ins_pipe( ialu_reg_reg_long ); 12193 %} 12194 12195 // Zero-extend convert int to long 12196 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{ 12197 match(Set dst (AndL (ConvI2L src) mask) ); 12198 effect( KILL flags ); 12199 ins_cost(250); 12200 format %{ "MOV $dst.lo,$src\n\t" 12201 "XOR $dst.hi,$dst.hi" %} 12202 opcode(0x33); // XOR 12203 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 12204 ins_pipe( ialu_reg_reg_long ); 12205 %} 12206 12207 // Zero-extend long 12208 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 12209 match(Set dst (AndL src mask) ); 12210 effect( KILL flags ); 12211 ins_cost(250); 12212 format %{ "MOV $dst.lo,$src.lo\n\t" 12213 "XOR $dst.hi,$dst.hi\n\t" %} 12214 opcode(0x33); // XOR 12215 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 12216 ins_pipe( ialu_reg_reg_long ); 12217 %} 12218 12219 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 12220 predicate (UseSSE<=1); 12221 match(Set dst (ConvL2D src)); 12222 effect( KILL cr ); 12223 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 12224 "PUSH $src.lo\n\t" 12225 "FILD ST,[ESP + #0]\n\t" 12226 "ADD ESP,8\n\t" 12227 "FSTP_D $dst\t# D-round" %} 12228 opcode(0xDF, 0x5); /* DF /5 */ 12229 ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 12230 ins_pipe( pipe_slow ); 12231 %} 12232 12233 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 12234 predicate (UseSSE>=2); 12235 match(Set dst (ConvL2D src)); 12236 effect( KILL cr ); 12237 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 12238 "PUSH $src.lo\n\t" 12239 "FILD_D [ESP]\n\t" 12240 "FSTP_D [ESP]\n\t" 12241 "MOVSD $dst,[ESP]\n\t" 12242 "ADD ESP,8" %} 12243 opcode(0xDF, 0x5); /* DF /5 */ 12244 ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 12245 ins_pipe( pipe_slow ); 12246 %} 12247 12248 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 12249 predicate (UseSSE>=1); 12250 match(Set dst (ConvL2F src)); 12251 effect( KILL cr ); 12252 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 12253 "PUSH $src.lo\n\t" 12254 "FILD_D [ESP]\n\t" 12255 "FSTP_S [ESP]\n\t" 12256 "MOVSS $dst,[ESP]\n\t" 12257 "ADD ESP,8" %} 12258 opcode(0xDF, 0x5); /* DF /5 */ 12259 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 12260 ins_pipe( pipe_slow ); 12261 %} 12262 12263 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 12264 match(Set dst (ConvL2F src)); 12265 effect( KILL cr ); 12266 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 12267 "PUSH $src.lo\n\t" 12268 "FILD ST,[ESP + #0]\n\t" 12269 "ADD ESP,8\n\t" 12270 "FSTP_S $dst\t# F-round" %} 12271 opcode(0xDF, 0x5); /* DF /5 */ 12272 ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 12273 ins_pipe( pipe_slow ); 12274 %} 12275 12276 instruct convL2I_reg( eRegI dst, eRegL src ) %{ 12277 match(Set dst (ConvL2I src)); 12278 effect( DEF dst, USE src ); 12279 format %{ "MOV $dst,$src.lo" %} 12280 ins_encode(enc_CopyL_Lo(dst,src)); 12281 ins_pipe( ialu_reg_reg ); 12282 %} 12283 12284 12285 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ 12286 match(Set dst (MoveF2I src)); 12287 effect( DEF dst, USE src ); 12288 ins_cost(100); 12289 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 12290 opcode(0x8B); 12291 ins_encode( OpcP, RegMem(dst,src)); 12292 ins_pipe( ialu_reg_mem ); 12293 %} 12294 12295 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 12296 predicate(UseSSE==0); 12297 match(Set dst (MoveF2I src)); 12298 effect( DEF dst, USE src ); 12299 12300 ins_cost(125); 12301 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 12302 ins_encode( Pop_Mem_Reg_F(dst, src) ); 12303 ins_pipe( fpu_mem_reg ); 12304 %} 12305 12306 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 12307 predicate(UseSSE>=1); 12308 match(Set dst (MoveF2I src)); 12309 effect( DEF dst, USE src ); 12310 12311 ins_cost(95); 12312 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 12313 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); 12314 ins_pipe( pipe_slow ); 12315 %} 12316 12317 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 12318 predicate(UseSSE>=2); 12319 match(Set dst (MoveF2I src)); 12320 effect( DEF dst, USE src ); 12321 ins_cost(85); 12322 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 12323 ins_encode( MovX2I_reg(dst, src)); 12324 ins_pipe( pipe_slow ); 12325 %} 12326 12327 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ 12328 match(Set dst (MoveI2F src)); 12329 effect( DEF dst, USE src ); 12330 12331 ins_cost(100); 12332 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 12333 opcode(0x89); 12334 ins_encode( OpcPRegSS( dst, src ) ); 12335 ins_pipe( ialu_mem_reg ); 12336 %} 12337 12338 12339 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 12340 predicate(UseSSE==0); 12341 match(Set dst (MoveI2F src)); 12342 effect(DEF dst, USE src); 12343 12344 ins_cost(125); 12345 format %{ "FLD_S $src\n\t" 12346 "FSTP $dst\t# MoveI2F_stack_reg" %} 12347 opcode(0xD9); /* D9 /0, FLD m32real */ 12348 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 12349 Pop_Reg_F(dst) ); 12350 ins_pipe( fpu_reg_mem ); 12351 %} 12352 12353 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 12354 predicate(UseSSE>=1); 12355 match(Set dst (MoveI2F src)); 12356 effect( DEF dst, USE src ); 12357 12358 ins_cost(95); 12359 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 12360 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 12361 ins_pipe( pipe_slow ); 12362 %} 12363 12364 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 12365 predicate(UseSSE>=2); 12366 match(Set dst (MoveI2F src)); 12367 effect( DEF dst, USE src ); 12368 12369 ins_cost(85); 12370 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 12371 ins_encode( MovI2X_reg(dst, src) ); 12372 ins_pipe( pipe_slow ); 12373 %} 12374 12375 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 12376 match(Set dst (MoveD2L src)); 12377 effect(DEF dst, USE src); 12378 12379 ins_cost(250); 12380 format %{ "MOV $dst.lo,$src\n\t" 12381 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 12382 opcode(0x8B, 0x8B); 12383 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 12384 ins_pipe( ialu_mem_long_reg ); 12385 %} 12386 12387 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 12388 predicate(UseSSE<=1); 12389 match(Set dst (MoveD2L src)); 12390 effect(DEF dst, USE src); 12391 12392 ins_cost(125); 12393 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 12394 ins_encode( Pop_Mem_Reg_D(dst, src) ); 12395 ins_pipe( fpu_mem_reg ); 12396 %} 12397 12398 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 12399 predicate(UseSSE>=2); 12400 match(Set dst (MoveD2L src)); 12401 effect(DEF dst, USE src); 12402 ins_cost(95); 12403 12404 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 12405 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); 12406 ins_pipe( pipe_slow ); 12407 %} 12408 12409 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 12410 predicate(UseSSE>=2); 12411 match(Set dst (MoveD2L src)); 12412 effect(DEF dst, USE src, TEMP tmp); 12413 ins_cost(85); 12414 format %{ "MOVD $dst.lo,$src\n\t" 12415 "PSHUFLW $tmp,$src,0x4E\n\t" 12416 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 12417 ins_encode( MovXD2L_reg(dst, src, tmp) ); 12418 ins_pipe( pipe_slow ); 12419 %} 12420 12421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 12422 match(Set dst (MoveL2D src)); 12423 effect(DEF dst, USE src); 12424 12425 ins_cost(200); 12426 format %{ "MOV $dst,$src.lo\n\t" 12427 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 12428 opcode(0x89, 0x89); 12429 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 12430 ins_pipe( ialu_mem_long_reg ); 12431 %} 12432 12433 12434 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 12435 predicate(UseSSE<=1); 12436 match(Set dst (MoveL2D src)); 12437 effect(DEF dst, USE src); 12438 ins_cost(125); 12439 12440 format %{ "FLD_D $src\n\t" 12441 "FSTP $dst\t# MoveL2D_stack_reg" %} 12442 opcode(0xDD); /* DD /0, FLD m64real */ 12443 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 12444 Pop_Reg_D(dst) ); 12445 ins_pipe( fpu_reg_mem ); 12446 %} 12447 12448 12449 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 12450 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 12451 match(Set dst (MoveL2D src)); 12452 effect(DEF dst, USE src); 12453 12454 ins_cost(95); 12455 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 12456 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 12457 ins_pipe( pipe_slow ); 12458 %} 12459 12460 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 12461 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 12462 match(Set dst (MoveL2D src)); 12463 effect(DEF dst, USE src); 12464 12465 ins_cost(95); 12466 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 12467 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); 12468 ins_pipe( pipe_slow ); 12469 %} 12470 12471 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 12472 predicate(UseSSE>=2); 12473 match(Set dst (MoveL2D src)); 12474 effect(TEMP dst, USE src, TEMP tmp); 12475 ins_cost(85); 12476 format %{ "MOVD $dst,$src.lo\n\t" 12477 "MOVD $tmp,$src.hi\n\t" 12478 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 12479 ins_encode( MovL2XD_reg(dst, src, tmp) ); 12480 ins_pipe( pipe_slow ); 12481 %} 12482 12483 // Replicate scalar to packed byte (1 byte) values in xmm 12484 instruct Repl8B_reg(regXD dst, regXD src) %{ 12485 predicate(UseSSE>=2); 12486 match(Set dst (Replicate8B src)); 12487 format %{ "MOVDQA $dst,$src\n\t" 12488 "PUNPCKLBW $dst,$dst\n\t" 12489 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 12490 ins_encode( pshufd_8x8(dst, src)); 12491 ins_pipe( pipe_slow ); 12492 %} 12493 12494 // Replicate scalar to packed byte (1 byte) values in xmm 12495 instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 12496 predicate(UseSSE>=2); 12497 match(Set dst (Replicate8B src)); 12498 format %{ "MOVD $dst,$src\n\t" 12499 "PUNPCKLBW $dst,$dst\n\t" 12500 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 12501 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); 12502 ins_pipe( pipe_slow ); 12503 %} 12504 12505 // Replicate scalar zero to packed byte (1 byte) values in xmm 12506 instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 12507 predicate(UseSSE>=2); 12508 match(Set dst (Replicate8B zero)); 12509 format %{ "PXOR $dst,$dst\t! replicate8B" %} 12510 ins_encode( pxor(dst, dst)); 12511 ins_pipe( fpu_reg_reg ); 12512 %} 12513 12514 // Replicate scalar to packed shore (2 byte) values in xmm 12515 instruct Repl4S_reg(regXD dst, regXD src) %{ 12516 predicate(UseSSE>=2); 12517 match(Set dst (Replicate4S src)); 12518 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 12519 ins_encode( pshufd_4x16(dst, src)); 12520 ins_pipe( fpu_reg_reg ); 12521 %} 12522 12523 // Replicate scalar to packed shore (2 byte) values in xmm 12524 instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 12525 predicate(UseSSE>=2); 12526 match(Set dst (Replicate4S src)); 12527 format %{ "MOVD $dst,$src\n\t" 12528 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 12529 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 12530 ins_pipe( fpu_reg_reg ); 12531 %} 12532 12533 // Replicate scalar zero to packed short (2 byte) values in xmm 12534 instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 12535 predicate(UseSSE>=2); 12536 match(Set dst (Replicate4S zero)); 12537 format %{ "PXOR $dst,$dst\t! replicate4S" %} 12538 ins_encode( pxor(dst, dst)); 12539 ins_pipe( fpu_reg_reg ); 12540 %} 12541 12542 // Replicate scalar to packed char (2 byte) values in xmm 12543 instruct Repl4C_reg(regXD dst, regXD src) %{ 12544 predicate(UseSSE>=2); 12545 match(Set dst (Replicate4C src)); 12546 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 12547 ins_encode( pshufd_4x16(dst, src)); 12548 ins_pipe( fpu_reg_reg ); 12549 %} 12550 12551 // Replicate scalar to packed char (2 byte) values in xmm 12552 instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 12553 predicate(UseSSE>=2); 12554 match(Set dst (Replicate4C src)); 12555 format %{ "MOVD $dst,$src\n\t" 12556 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 12557 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 12558 ins_pipe( fpu_reg_reg ); 12559 %} 12560 12561 // Replicate scalar zero to packed char (2 byte) values in xmm 12562 instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 12563 predicate(UseSSE>=2); 12564 match(Set dst (Replicate4C zero)); 12565 format %{ "PXOR $dst,$dst\t! replicate4C" %} 12566 ins_encode( pxor(dst, dst)); 12567 ins_pipe( fpu_reg_reg ); 12568 %} 12569 12570 // Replicate scalar to packed integer (4 byte) values in xmm 12571 instruct Repl2I_reg(regXD dst, regXD src) %{ 12572 predicate(UseSSE>=2); 12573 match(Set dst (Replicate2I src)); 12574 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 12575 ins_encode( pshufd(dst, src, 0x00)); 12576 ins_pipe( fpu_reg_reg ); 12577 %} 12578 12579 // Replicate scalar to packed integer (4 byte) values in xmm 12580 instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 12581 predicate(UseSSE>=2); 12582 match(Set dst (Replicate2I src)); 12583 format %{ "MOVD $dst,$src\n\t" 12584 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 12585 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); 12586 ins_pipe( fpu_reg_reg ); 12587 %} 12588 12589 // Replicate scalar zero to packed integer (2 byte) values in xmm 12590 instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 12591 predicate(UseSSE>=2); 12592 match(Set dst (Replicate2I zero)); 12593 format %{ "PXOR $dst,$dst\t! replicate2I" %} 12594 ins_encode( pxor(dst, dst)); 12595 ins_pipe( fpu_reg_reg ); 12596 %} 12597 12598 // Replicate scalar to packed single precision floating point values in xmm 12599 instruct Repl2F_reg(regXD dst, regXD src) %{ 12600 predicate(UseSSE>=2); 12601 match(Set dst (Replicate2F src)); 12602 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12603 ins_encode( pshufd(dst, src, 0xe0)); 12604 ins_pipe( fpu_reg_reg ); 12605 %} 12606 12607 // Replicate scalar to packed single precision floating point values in xmm 12608 instruct Repl2F_regX(regXD dst, regX src) %{ 12609 predicate(UseSSE>=2); 12610 match(Set dst (Replicate2F src)); 12611 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12612 ins_encode( pshufd(dst, src, 0xe0)); 12613 ins_pipe( fpu_reg_reg ); 12614 %} 12615 12616 // Replicate scalar to packed single precision floating point values in xmm 12617 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 12618 predicate(UseSSE>=2); 12619 match(Set dst (Replicate2F zero)); 12620 format %{ "PXOR $dst,$dst\t! replicate2F" %} 12621 ins_encode( pxor(dst, dst)); 12622 ins_pipe( fpu_reg_reg ); 12623 %} 12624 12625 // ======================================================================= 12626 // fast clearing of an array 12627 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 12628 match(Set dummy (ClearArray cnt base)); 12629 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 12630 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t" 12631 "XOR EAX,EAX\n\t" 12632 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 12633 opcode(0,0x4); 12634 ins_encode( Opcode(0xD1), RegOpc(ECX), 12635 OpcRegReg(0x33,EAX,EAX), 12636 Opcode(0xF3), Opcode(0xAB) ); 12637 ins_pipe( pipe_slow ); 12638 %} 12639 12640 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 12641 eAXRegI result, regXD tmp1, eFlagsReg cr) %{ 12642 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 12643 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 12644 12645 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 12646 ins_encode %{ 12647 __ string_compare($str1$$Register, $str2$$Register, 12648 $cnt1$$Register, $cnt2$$Register, $result$$Register, 12649 $tmp1$$XMMRegister); 12650 %} 12651 ins_pipe( pipe_slow ); 12652 %} 12653 12654 // fast string equals 12655 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 12656 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 12657 match(Set result (StrEquals (Binary str1 str2) cnt)); 12658 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 12659 12660 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12661 ins_encode %{ 12662 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 12663 $cnt$$Register, $result$$Register, $tmp3$$Register, 12664 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 12665 %} 12666 ins_pipe( pipe_slow ); 12667 %} 12668 12669 // fast search of substring with known size. 12670 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12671 eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12672 predicate(UseSSE42Intrinsics); 12673 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12674 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12675 12676 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 12677 ins_encode %{ 12678 int icnt2 = (int)$int_cnt2$$constant; 12679 if (icnt2 >= 8) { 12680 // IndexOf for constant substrings with size >= 8 elements 12681 // which don't need to be loaded through stack. 12682 __ string_indexofC8($str1$$Register, $str2$$Register, 12683 $cnt1$$Register, $cnt2$$Register, 12684 icnt2, $result$$Register, 12685 $vec$$XMMRegister, $tmp$$Register); 12686 } else { 12687 // Small strings are loaded through stack if they cross page boundary. 12688 __ string_indexof($str1$$Register, $str2$$Register, 12689 $cnt1$$Register, $cnt2$$Register, 12690 icnt2, $result$$Register, 12691 $vec$$XMMRegister, $tmp$$Register); 12692 } 12693 %} 12694 ins_pipe( pipe_slow ); 12695 %} 12696 12697 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12698 eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ 12699 predicate(UseSSE42Intrinsics); 12700 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12701 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12702 12703 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12704 ins_encode %{ 12705 __ string_indexof($str1$$Register, $str2$$Register, 12706 $cnt1$$Register, $cnt2$$Register, 12707 (-1), $result$$Register, 12708 $vec$$XMMRegister, $tmp$$Register); 12709 %} 12710 ins_pipe( pipe_slow ); 12711 %} 12712 12713 // fast array equals 12714 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12715 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12716 %{ 12717 match(Set result (AryEq ary1 ary2)); 12718 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12719 //ins_cost(300); 12720 12721 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12722 ins_encode %{ 12723 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 12724 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12725 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 12726 %} 12727 ins_pipe( pipe_slow ); 12728 %} 12729 12730 //----------Control Flow Instructions------------------------------------------ 12731 // Signed compare Instructions 12732 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{ 12733 match(Set cr (CmpI op1 op2)); 12734 effect( DEF cr, USE op1, USE op2 ); 12735 format %{ "CMP $op1,$op2" %} 12736 opcode(0x3B); /* Opcode 3B /r */ 12737 ins_encode( OpcP, RegReg( op1, op2) ); 12738 ins_pipe( ialu_cr_reg_reg ); 12739 %} 12740 12741 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{ 12742 match(Set cr (CmpI op1 op2)); 12743 effect( DEF cr, USE op1 ); 12744 format %{ "CMP $op1,$op2" %} 12745 opcode(0x81,0x07); /* Opcode 81 /7 */ 12746 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12747 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12748 ins_pipe( ialu_cr_reg_imm ); 12749 %} 12750 12751 // Cisc-spilled version of cmpI_eReg 12752 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{ 12753 match(Set cr (CmpI op1 (LoadI op2))); 12754 12755 format %{ "CMP $op1,$op2" %} 12756 ins_cost(500); 12757 opcode(0x3B); /* Opcode 3B /r */ 12758 ins_encode( OpcP, RegMem( op1, op2) ); 12759 ins_pipe( ialu_cr_reg_mem ); 12760 %} 12761 12762 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{ 12763 match(Set cr (CmpI src zero)); 12764 effect( DEF cr, USE src ); 12765 12766 format %{ "TEST $src,$src" %} 12767 opcode(0x85); 12768 ins_encode( OpcP, RegReg( src, src ) ); 12769 ins_pipe( ialu_cr_reg_imm ); 12770 %} 12771 12772 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{ 12773 match(Set cr (CmpI (AndI src con) zero)); 12774 12775 format %{ "TEST $src,$con" %} 12776 opcode(0xF7,0x00); 12777 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12778 ins_pipe( ialu_cr_reg_imm ); 12779 %} 12780 12781 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{ 12782 match(Set cr (CmpI (AndI src mem) zero)); 12783 12784 format %{ "TEST $src,$mem" %} 12785 opcode(0x85); 12786 ins_encode( OpcP, RegMem( src, mem ) ); 12787 ins_pipe( ialu_cr_reg_mem ); 12788 %} 12789 12790 // Unsigned compare Instructions; really, same as signed except they 12791 // produce an eFlagsRegU instead of eFlagsReg. 12792 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{ 12793 match(Set cr (CmpU op1 op2)); 12794 12795 format %{ "CMPu $op1,$op2" %} 12796 opcode(0x3B); /* Opcode 3B /r */ 12797 ins_encode( OpcP, RegReg( op1, op2) ); 12798 ins_pipe( ialu_cr_reg_reg ); 12799 %} 12800 12801 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{ 12802 match(Set cr (CmpU op1 op2)); 12803 12804 format %{ "CMPu $op1,$op2" %} 12805 opcode(0x81,0x07); /* Opcode 81 /7 */ 12806 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12807 ins_pipe( ialu_cr_reg_imm ); 12808 %} 12809 12810 // // Cisc-spilled version of cmpU_eReg 12811 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{ 12812 match(Set cr (CmpU op1 (LoadI op2))); 12813 12814 format %{ "CMPu $op1,$op2" %} 12815 ins_cost(500); 12816 opcode(0x3B); /* Opcode 3B /r */ 12817 ins_encode( OpcP, RegMem( op1, op2) ); 12818 ins_pipe( ialu_cr_reg_mem ); 12819 %} 12820 12821 // // Cisc-spilled version of cmpU_eReg 12822 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{ 12823 // match(Set cr (CmpU (LoadI op1) op2)); 12824 // 12825 // format %{ "CMPu $op1,$op2" %} 12826 // ins_cost(500); 12827 // opcode(0x39); /* Opcode 39 /r */ 12828 // ins_encode( OpcP, RegMem( op1, op2) ); 12829 //%} 12830 12831 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{ 12832 match(Set cr (CmpU src zero)); 12833 12834 format %{ "TESTu $src,$src" %} 12835 opcode(0x85); 12836 ins_encode( OpcP, RegReg( src, src ) ); 12837 ins_pipe( ialu_cr_reg_imm ); 12838 %} 12839 12840 // Unsigned pointer compare Instructions 12841 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12842 match(Set cr (CmpP op1 op2)); 12843 12844 format %{ "CMPu $op1,$op2" %} 12845 opcode(0x3B); /* Opcode 3B /r */ 12846 ins_encode( OpcP, RegReg( op1, op2) ); 12847 ins_pipe( ialu_cr_reg_reg ); 12848 %} 12849 12850 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12851 match(Set cr (CmpP op1 op2)); 12852 12853 format %{ "CMPu $op1,$op2" %} 12854 opcode(0x81,0x07); /* Opcode 81 /7 */ 12855 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12856 ins_pipe( ialu_cr_reg_imm ); 12857 %} 12858 12859 // // Cisc-spilled version of cmpP_eReg 12860 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12861 match(Set cr (CmpP op1 (LoadP op2))); 12862 12863 format %{ "CMPu $op1,$op2" %} 12864 ins_cost(500); 12865 opcode(0x3B); /* Opcode 3B /r */ 12866 ins_encode( OpcP, RegMem( op1, op2) ); 12867 ins_pipe( ialu_cr_reg_mem ); 12868 %} 12869 12870 // // Cisc-spilled version of cmpP_eReg 12871 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12872 // match(Set cr (CmpP (LoadP op1) op2)); 12873 // 12874 // format %{ "CMPu $op1,$op2" %} 12875 // ins_cost(500); 12876 // opcode(0x39); /* Opcode 39 /r */ 12877 // ins_encode( OpcP, RegMem( op1, op2) ); 12878 //%} 12879 12880 // Compare raw pointer (used in out-of-heap check). 12881 // Only works because non-oop pointers must be raw pointers 12882 // and raw pointers have no anti-dependencies. 12883 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12884 predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() ); 12885 match(Set cr (CmpP op1 (LoadP op2))); 12886 12887 format %{ "CMPu $op1,$op2" %} 12888 opcode(0x3B); /* Opcode 3B /r */ 12889 ins_encode( OpcP, RegMem( op1, op2) ); 12890 ins_pipe( ialu_cr_reg_mem ); 12891 %} 12892 12893 // 12894 // This will generate a signed flags result. This should be ok 12895 // since any compare to a zero should be eq/neq. 12896 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12897 match(Set cr (CmpP src zero)); 12898 12899 format %{ "TEST $src,$src" %} 12900 opcode(0x85); 12901 ins_encode( OpcP, RegReg( src, src ) ); 12902 ins_pipe( ialu_cr_reg_imm ); 12903 %} 12904 12905 // Cisc-spilled version of testP_reg 12906 // This will generate a signed flags result. This should be ok 12907 // since any compare to a zero should be eq/neq. 12908 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12909 match(Set cr (CmpP (LoadP op) zero)); 12910 12911 format %{ "TEST $op,0xFFFFFFFF" %} 12912 ins_cost(500); 12913 opcode(0xF7); /* Opcode F7 /0 */ 12914 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12915 ins_pipe( ialu_cr_reg_imm ); 12916 %} 12917 12918 // Yanked all unsigned pointer compare operations. 12919 // Pointer compares are done with CmpP which is already unsigned. 12920 12921 //----------Max and Min-------------------------------------------------------- 12922 // Min Instructions 12923 //// 12924 // *** Min and Max using the conditional move are slower than the 12925 // *** branch version on a Pentium III. 12926 // // Conditional move for min 12927 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ 12928 // effect( USE_DEF op2, USE op1, USE cr ); 12929 // format %{ "CMOVlt $op2,$op1\t! min" %} 12930 // opcode(0x4C,0x0F); 12931 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12932 // ins_pipe( pipe_cmov_reg ); 12933 //%} 12934 // 12935 //// Min Register with Register (P6 version) 12936 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ 12937 // predicate(VM_Version::supports_cmov() ); 12938 // match(Set op2 (MinI op1 op2)); 12939 // ins_cost(200); 12940 // expand %{ 12941 // eFlagsReg cr; 12942 // compI_eReg(cr,op1,op2); 12943 // cmovI_reg_lt(op2,op1,cr); 12944 // %} 12945 //%} 12946 12947 // Min Register with Register (generic version) 12948 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ 12949 match(Set dst (MinI dst src)); 12950 effect(KILL flags); 12951 ins_cost(300); 12952 12953 format %{ "MIN $dst,$src" %} 12954 opcode(0xCC); 12955 ins_encode( min_enc(dst,src) ); 12956 ins_pipe( pipe_slow ); 12957 %} 12958 12959 // Max Register with Register 12960 // *** Min and Max using the conditional move are slower than the 12961 // *** branch version on a Pentium III. 12962 // // Conditional move for max 12963 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ 12964 // effect( USE_DEF op2, USE op1, USE cr ); 12965 // format %{ "CMOVgt $op2,$op1\t! max" %} 12966 // opcode(0x4F,0x0F); 12967 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12968 // ins_pipe( pipe_cmov_reg ); 12969 //%} 12970 // 12971 // // Max Register with Register (P6 version) 12972 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ 12973 // predicate(VM_Version::supports_cmov() ); 12974 // match(Set op2 (MaxI op1 op2)); 12975 // ins_cost(200); 12976 // expand %{ 12977 // eFlagsReg cr; 12978 // compI_eReg(cr,op1,op2); 12979 // cmovI_reg_gt(op2,op1,cr); 12980 // %} 12981 //%} 12982 12983 // Max Register with Register (generic version) 12984 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ 12985 match(Set dst (MaxI dst src)); 12986 effect(KILL flags); 12987 ins_cost(300); 12988 12989 format %{ "MAX $dst,$src" %} 12990 opcode(0xCC); 12991 ins_encode( max_enc(dst,src) ); 12992 ins_pipe( pipe_slow ); 12993 %} 12994 12995 // ============================================================================ 12996 // Counted Loop limit node which represents exact final iterator value. 12997 // Note: the resulting value should fit into integer range since 12998 // counted loops have limit check on overflow. 12999 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 13000 match(Set limit (LoopLimit (Binary init limit) stride)); 13001 effect(TEMP limit_hi, TEMP tmp, KILL flags); 13002 ins_cost(300); 13003 13004 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 13005 ins_encode %{ 13006 int strd = (int)$stride$$constant; 13007 assert(strd != 1 && strd != -1, "sanity"); 13008 int m1 = (strd > 0) ? 1 : -1; 13009 // Convert limit to long (EAX:EDX) 13010 __ cdql(); 13011 // Convert init to long (init:tmp) 13012 __ movl($tmp$$Register, $init$$Register); 13013 __ sarl($tmp$$Register, 31); 13014 // $limit - $init 13015 __ subl($limit$$Register, $init$$Register); 13016 __ sbbl($limit_hi$$Register, $tmp$$Register); 13017 // + ($stride - 1) 13018 if (strd > 0) { 13019 __ addl($limit$$Register, (strd - 1)); 13020 __ adcl($limit_hi$$Register, 0); 13021 __ movl($tmp$$Register, strd); 13022 } else { 13023 __ addl($limit$$Register, (strd + 1)); 13024 __ adcl($limit_hi$$Register, -1); 13025 __ lneg($limit_hi$$Register, $limit$$Register); 13026 __ movl($tmp$$Register, -strd); 13027 } 13028 // signed devision: (EAX:EDX) / pos_stride 13029 __ idivl($tmp$$Register); 13030 if (strd < 0) { 13031 // restore sign 13032 __ negl($tmp$$Register); 13033 } 13034 // (EAX) * stride 13035 __ mull($tmp$$Register); 13036 // + init (ignore upper bits) 13037 __ addl($limit$$Register, $init$$Register); 13038 %} 13039 ins_pipe( pipe_slow ); 13040 %} 13041 13042 // ============================================================================ 13043 // Branch Instructions 13044 // Jump Table 13045 instruct jumpXtnd(eRegI switch_val) %{ 13046 match(Jump switch_val); 13047 ins_cost(350); 13048 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 13049 ins_encode %{ 13050 // Jump to Address(table_base + switch_reg) 13051 Address index(noreg, $switch_val$$Register, Address::times_1); 13052 __ jump(ArrayAddress($constantaddress, index)); 13053 %} 13054 ins_pc_relative(1); 13055 ins_pipe(pipe_jmp); 13056 %} 13057 13058 // Jump Direct - Label defines a relative address from JMP+1 13059 instruct jmpDir(label labl) %{ 13060 match(Goto); 13061 effect(USE labl); 13062 13063 ins_cost(300); 13064 format %{ "JMP $labl" %} 13065 size(5); 13066 opcode(0xE9); 13067 ins_encode( OpcP, Lbl( labl ) ); 13068 ins_pipe( pipe_jmp ); 13069 ins_pc_relative(1); 13070 %} 13071 13072 // Jump Direct Conditional - Label defines a relative address from Jcc+1 13073 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 13074 match(If cop cr); 13075 effect(USE labl); 13076 13077 ins_cost(300); 13078 format %{ "J$cop $labl" %} 13079 size(6); 13080 opcode(0x0F, 0x80); 13081 ins_encode( Jcc( cop, labl) ); 13082 ins_pipe( pipe_jcc ); 13083 ins_pc_relative(1); 13084 %} 13085 13086 // Jump Direct Conditional - Label defines a relative address from Jcc+1 13087 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 13088 match(CountedLoopEnd cop cr); 13089 effect(USE labl); 13090 13091 ins_cost(300); 13092 format %{ "J$cop $labl\t# Loop end" %} 13093 size(6); 13094 opcode(0x0F, 0x80); 13095 ins_encode( Jcc( cop, labl) ); 13096 ins_pipe( pipe_jcc ); 13097 ins_pc_relative(1); 13098 %} 13099 13100 // Jump Direct Conditional - Label defines a relative address from Jcc+1 13101 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 13102 match(CountedLoopEnd cop cmp); 13103 effect(USE labl); 13104 13105 ins_cost(300); 13106 format %{ "J$cop,u $labl\t# Loop end" %} 13107 size(6); 13108 opcode(0x0F, 0x80); 13109 ins_encode( Jcc( cop, labl) ); 13110 ins_pipe( pipe_jcc ); 13111 ins_pc_relative(1); 13112 %} 13113 13114 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 13115 match(CountedLoopEnd cop cmp); 13116 effect(USE labl); 13117 13118 ins_cost(200); 13119 format %{ "J$cop,u $labl\t# Loop end" %} 13120 size(6); 13121 opcode(0x0F, 0x80); 13122 ins_encode( Jcc( cop, labl) ); 13123 ins_pipe( pipe_jcc ); 13124 ins_pc_relative(1); 13125 %} 13126 13127 // Jump Direct Conditional - using unsigned comparison 13128 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 13129 match(If cop cmp); 13130 effect(USE labl); 13131 13132 ins_cost(300); 13133 format %{ "J$cop,u $labl" %} 13134 size(6); 13135 opcode(0x0F, 0x80); 13136 ins_encode(Jcc(cop, labl)); 13137 ins_pipe(pipe_jcc); 13138 ins_pc_relative(1); 13139 %} 13140 13141 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 13142 match(If cop cmp); 13143 effect(USE labl); 13144 13145 ins_cost(200); 13146 format %{ "J$cop,u $labl" %} 13147 size(6); 13148 opcode(0x0F, 0x80); 13149 ins_encode(Jcc(cop, labl)); 13150 ins_pipe(pipe_jcc); 13151 ins_pc_relative(1); 13152 %} 13153 13154 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 13155 match(If cop cmp); 13156 effect(USE labl); 13157 13158 ins_cost(200); 13159 format %{ $$template 13160 if ($cop$$cmpcode == Assembler::notEqual) { 13161 $$emit$$"JP,u $labl\n\t" 13162 $$emit$$"J$cop,u $labl" 13163 } else { 13164 $$emit$$"JP,u done\n\t" 13165 $$emit$$"J$cop,u $labl\n\t" 13166 $$emit$$"done:" 13167 } 13168 %} 13169 size(12); 13170 opcode(0x0F, 0x80); 13171 ins_encode %{ 13172 Label* l = $labl$$label; 13173 assert(l != NULL, "need Label"); 13174 $$$emit8$primary; 13175 emit_cc(cbuf, $secondary, Assembler::parity); 13176 int parity_disp = -1; 13177 bool ok = false; 13178 if ($cop$$cmpcode == Assembler::notEqual) { 13179 // the two jumps 6 bytes apart so the jump distances are too 13180 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; 13181 } else if ($cop$$cmpcode == Assembler::equal) { 13182 parity_disp = 6; 13183 ok = true; 13184 } else { 13185 ShouldNotReachHere(); 13186 } 13187 emit_d32(cbuf, parity_disp); 13188 $$$emit8$primary; 13189 emit_cc(cbuf, $secondary, $cop$$cmpcode); 13190 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0; 13191 emit_d32(cbuf, disp); 13192 %} 13193 ins_pipe(pipe_jcc); 13194 ins_pc_relative(1); 13195 %} 13196 13197 // ============================================================================ 13198 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 13199 // array for an instance of the superklass. Set a hidden internal cache on a 13200 // hit (cache is checked with exposed code in gen_subtype_check()). Return 13201 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 13202 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 13203 match(Set result (PartialSubtypeCheck sub super)); 13204 effect( KILL rcx, KILL cr ); 13205 13206 ins_cost(1100); // slightly larger than the next version 13207 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 13208 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 13209 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 13210 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 13211 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 13212 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 13213 "XOR $result,$result\t\t Hit: EDI zero\n\t" 13214 "miss:\t" %} 13215 13216 opcode(0x1); // Force a XOR of EDI 13217 ins_encode( enc_PartialSubtypeCheck() ); 13218 ins_pipe( pipe_slow ); 13219 %} 13220 13221 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 13222 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 13223 effect( KILL rcx, KILL result ); 13224 13225 ins_cost(1000); 13226 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 13227 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 13228 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 13229 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 13230 "JNE,s miss\t\t# Missed: flags NZ\n\t" 13231 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 13232 "miss:\t" %} 13233 13234 opcode(0x0); // No need to XOR EDI 13235 ins_encode( enc_PartialSubtypeCheck() ); 13236 ins_pipe( pipe_slow ); 13237 %} 13238 13239 // ============================================================================ 13240 // Branch Instructions -- short offset versions 13241 // 13242 // These instructions are used to replace jumps of a long offset (the default 13243 // match) with jumps of a shorter offset. These instructions are all tagged 13244 // with the ins_short_branch attribute, which causes the ADLC to suppress the 13245 // match rules in general matching. Instead, the ADLC generates a conversion 13246 // method in the MachNode which can be used to do in-place replacement of the 13247 // long variant with the shorter variant. The compiler will determine if a 13248 // branch can be taken by the is_short_branch_offset() predicate in the machine 13249 // specific code section of the file. 13250 13251 // Jump Direct - Label defines a relative address from JMP+1 13252 instruct jmpDir_short(label labl) %{ 13253 match(Goto); 13254 effect(USE labl); 13255 13256 ins_cost(300); 13257 format %{ "JMP,s $labl" %} 13258 size(2); 13259 opcode(0xEB); 13260 ins_encode( OpcP, LblShort( labl ) ); 13261 ins_pipe( pipe_jmp ); 13262 ins_pc_relative(1); 13263 ins_short_branch(1); 13264 %} 13265 13266 // Jump Direct Conditional - Label defines a relative address from Jcc+1 13267 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 13268 match(If cop cr); 13269 effect(USE labl); 13270 13271 ins_cost(300); 13272 format %{ "J$cop,s $labl" %} 13273 size(2); 13274 opcode(0x70); 13275 ins_encode( JccShort( cop, labl) ); 13276 ins_pipe( pipe_jcc ); 13277 ins_pc_relative(1); 13278 ins_short_branch(1); 13279 %} 13280 13281 // Jump Direct Conditional - Label defines a relative address from Jcc+1 13282 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 13283 match(CountedLoopEnd cop cr); 13284 effect(USE labl); 13285 13286 ins_cost(300); 13287 format %{ "J$cop,s $labl\t# Loop end" %} 13288 size(2); 13289 opcode(0x70); 13290 ins_encode( JccShort( cop, labl) ); 13291 ins_pipe( pipe_jcc ); 13292 ins_pc_relative(1); 13293 ins_short_branch(1); 13294 %} 13295 13296 // Jump Direct Conditional - Label defines a relative address from Jcc+1 13297 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 13298 match(CountedLoopEnd cop cmp); 13299 effect(USE labl); 13300 13301 ins_cost(300); 13302 format %{ "J$cop,us $labl\t# Loop end" %} 13303 size(2); 13304 opcode(0x70); 13305 ins_encode( JccShort( cop, labl) ); 13306 ins_pipe( pipe_jcc ); 13307 ins_pc_relative(1); 13308 ins_short_branch(1); 13309 %} 13310 13311 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 13312 match(CountedLoopEnd cop cmp); 13313 effect(USE labl); 13314 13315 ins_cost(300); 13316 format %{ "J$cop,us $labl\t# Loop end" %} 13317 size(2); 13318 opcode(0x70); 13319 ins_encode( JccShort( cop, labl) ); 13320 ins_pipe( pipe_jcc ); 13321 ins_pc_relative(1); 13322 ins_short_branch(1); 13323 %} 13324 13325 // Jump Direct Conditional - using unsigned comparison 13326 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 13327 match(If cop cmp); 13328 effect(USE labl); 13329 13330 ins_cost(300); 13331 format %{ "J$cop,us $labl" %} 13332 size(2); 13333 opcode(0x70); 13334 ins_encode( JccShort( cop, labl) ); 13335 ins_pipe( pipe_jcc ); 13336 ins_pc_relative(1); 13337 ins_short_branch(1); 13338 %} 13339 13340 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 13341 match(If cop cmp); 13342 effect(USE labl); 13343 13344 ins_cost(300); 13345 format %{ "J$cop,us $labl" %} 13346 size(2); 13347 opcode(0x70); 13348 ins_encode( JccShort( cop, labl) ); 13349 ins_pipe( pipe_jcc ); 13350 ins_pc_relative(1); 13351 ins_short_branch(1); 13352 %} 13353 13354 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 13355 match(If cop cmp); 13356 effect(USE labl); 13357 13358 ins_cost(300); 13359 format %{ $$template 13360 if ($cop$$cmpcode == Assembler::notEqual) { 13361 $$emit$$"JP,u,s $labl\n\t" 13362 $$emit$$"J$cop,u,s $labl" 13363 } else { 13364 $$emit$$"JP,u,s done\n\t" 13365 $$emit$$"J$cop,u,s $labl\n\t" 13366 $$emit$$"done:" 13367 } 13368 %} 13369 size(4); 13370 opcode(0x70); 13371 ins_encode %{ 13372 Label* l = $labl$$label; 13373 assert(l != NULL, "need Label"); 13374 emit_cc(cbuf, $primary, Assembler::parity); 13375 int parity_disp = -1; 13376 if ($cop$$cmpcode == Assembler::notEqual) { 13377 parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; 13378 } else if ($cop$$cmpcode == Assembler::equal) { 13379 parity_disp = 2; 13380 } else { 13381 ShouldNotReachHere(); 13382 } 13383 emit_d8(cbuf, parity_disp); 13384 emit_cc(cbuf, $primary, $cop$$cmpcode); 13385 int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0; 13386 emit_d8(cbuf, disp); 13387 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp"); 13388 assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp"); 13389 %} 13390 ins_pipe(pipe_jcc); 13391 ins_pc_relative(1); 13392 ins_short_branch(1); 13393 %} 13394 13395 // ============================================================================ 13396 // Long Compare 13397 // 13398 // Currently we hold longs in 2 registers. Comparing such values efficiently 13399 // is tricky. The flavor of compare used depends on whether we are testing 13400 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 13401 // The GE test is the negated LT test. The LE test can be had by commuting 13402 // the operands (yielding a GE test) and then negating; negate again for the 13403 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 13404 // NE test is negated from that. 13405 13406 // Due to a shortcoming in the ADLC, it mixes up expressions like: 13407 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 13408 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 13409 // are collapsed internally in the ADLC's dfa-gen code. The match for 13410 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 13411 // foo match ends up with the wrong leaf. One fix is to not match both 13412 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 13413 // both forms beat the trinary form of long-compare and both are very useful 13414 // on Intel which has so few registers. 13415 13416 // Manifest a CmpL result in an integer register. Very painful. 13417 // This is the test to avoid. 13418 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 13419 match(Set dst (CmpL3 src1 src2)); 13420 effect( KILL flags ); 13421 ins_cost(1000); 13422 format %{ "XOR $dst,$dst\n\t" 13423 "CMP $src1.hi,$src2.hi\n\t" 13424 "JLT,s m_one\n\t" 13425 "JGT,s p_one\n\t" 13426 "CMP $src1.lo,$src2.lo\n\t" 13427 "JB,s m_one\n\t" 13428 "JEQ,s done\n" 13429 "p_one:\tINC $dst\n\t" 13430 "JMP,s done\n" 13431 "m_one:\tDEC $dst\n" 13432 "done:" %} 13433 ins_encode %{ 13434 Label p_one, m_one, done; 13435 __ xorptr($dst$$Register, $dst$$Register); 13436 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13437 __ jccb(Assembler::less, m_one); 13438 __ jccb(Assembler::greater, p_one); 13439 __ cmpl($src1$$Register, $src2$$Register); 13440 __ jccb(Assembler::below, m_one); 13441 __ jccb(Assembler::equal, done); 13442 __ bind(p_one); 13443 __ incrementl($dst$$Register); 13444 __ jmpb(done); 13445 __ bind(m_one); 13446 __ decrementl($dst$$Register); 13447 __ bind(done); 13448 %} 13449 ins_pipe( pipe_slow ); 13450 %} 13451 13452 //====== 13453 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13454 // compares. Can be used for LE or GT compares by reversing arguments. 13455 // NOT GOOD FOR EQ/NE tests. 13456 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13457 match( Set flags (CmpL src zero )); 13458 ins_cost(100); 13459 format %{ "TEST $src.hi,$src.hi" %} 13460 opcode(0x85); 13461 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13462 ins_pipe( ialu_cr_reg_reg ); 13463 %} 13464 13465 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13466 // compares. Can be used for LE or GT compares by reversing arguments. 13467 // NOT GOOD FOR EQ/NE tests. 13468 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{ 13469 match( Set flags (CmpL src1 src2 )); 13470 effect( TEMP tmp ); 13471 ins_cost(300); 13472 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13473 "MOV $tmp,$src1.hi\n\t" 13474 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13475 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13476 ins_pipe( ialu_cr_reg_reg ); 13477 %} 13478 13479 // Long compares reg < zero/req OR reg >= zero/req. 13480 // Just a wrapper for a normal branch, plus the predicate test. 13481 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13482 match(If cmp flags); 13483 effect(USE labl); 13484 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13485 expand %{ 13486 jmpCon(cmp,flags,labl); // JLT or JGE... 13487 %} 13488 %} 13489 13490 // Compare 2 longs and CMOVE longs. 13491 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13492 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13493 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13494 ins_cost(400); 13495 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13496 "CMOV$cmp $dst.hi,$src.hi" %} 13497 opcode(0x0F,0x40); 13498 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13499 ins_pipe( pipe_cmov_reg_long ); 13500 %} 13501 13502 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13503 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13504 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13505 ins_cost(500); 13506 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13507 "CMOV$cmp $dst.hi,$src.hi" %} 13508 opcode(0x0F,0x40); 13509 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13510 ins_pipe( pipe_cmov_reg_long ); 13511 %} 13512 13513 // Compare 2 longs and CMOVE ints. 13514 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{ 13515 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13516 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13517 ins_cost(200); 13518 format %{ "CMOV$cmp $dst,$src" %} 13519 opcode(0x0F,0x40); 13520 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13521 ins_pipe( pipe_cmov_reg ); 13522 %} 13523 13524 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{ 13525 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13526 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13527 ins_cost(250); 13528 format %{ "CMOV$cmp $dst,$src" %} 13529 opcode(0x0F,0x40); 13530 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13531 ins_pipe( pipe_cmov_mem ); 13532 %} 13533 13534 // Compare 2 longs and CMOVE ints. 13535 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13536 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13537 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13538 ins_cost(200); 13539 format %{ "CMOV$cmp $dst,$src" %} 13540 opcode(0x0F,0x40); 13541 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13542 ins_pipe( pipe_cmov_reg ); 13543 %} 13544 13545 // Compare 2 longs and CMOVE doubles 13546 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13547 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13548 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13549 ins_cost(200); 13550 expand %{ 13551 fcmovD_regS(cmp,flags,dst,src); 13552 %} 13553 %} 13554 13555 // Compare 2 longs and CMOVE doubles 13556 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 13557 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13558 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13559 ins_cost(200); 13560 expand %{ 13561 fcmovXD_regS(cmp,flags,dst,src); 13562 %} 13563 %} 13564 13565 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13566 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13567 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13568 ins_cost(200); 13569 expand %{ 13570 fcmovF_regS(cmp,flags,dst,src); 13571 %} 13572 %} 13573 13574 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 13575 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13576 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13577 ins_cost(200); 13578 expand %{ 13579 fcmovX_regS(cmp,flags,dst,src); 13580 %} 13581 %} 13582 13583 //====== 13584 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13585 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{ 13586 match( Set flags (CmpL src zero )); 13587 effect(TEMP tmp); 13588 ins_cost(200); 13589 format %{ "MOV $tmp,$src.lo\n\t" 13590 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13591 ins_encode( long_cmp_flags0( src, tmp ) ); 13592 ins_pipe( ialu_reg_reg_long ); 13593 %} 13594 13595 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13596 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13597 match( Set flags (CmpL src1 src2 )); 13598 ins_cost(200+300); 13599 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13600 "JNE,s skip\n\t" 13601 "CMP $src1.hi,$src2.hi\n\t" 13602 "skip:\t" %} 13603 ins_encode( long_cmp_flags1( src1, src2 ) ); 13604 ins_pipe( ialu_cr_reg_reg ); 13605 %} 13606 13607 // Long compare reg == zero/reg OR reg != zero/reg 13608 // Just a wrapper for a normal branch, plus the predicate test. 13609 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13610 match(If cmp flags); 13611 effect(USE labl); 13612 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13613 expand %{ 13614 jmpCon(cmp,flags,labl); // JEQ or JNE... 13615 %} 13616 %} 13617 13618 // Compare 2 longs and CMOVE longs. 13619 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13620 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13621 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13622 ins_cost(400); 13623 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13624 "CMOV$cmp $dst.hi,$src.hi" %} 13625 opcode(0x0F,0x40); 13626 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13627 ins_pipe( pipe_cmov_reg_long ); 13628 %} 13629 13630 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13631 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13632 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13633 ins_cost(500); 13634 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13635 "CMOV$cmp $dst.hi,$src.hi" %} 13636 opcode(0x0F,0x40); 13637 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13638 ins_pipe( pipe_cmov_reg_long ); 13639 %} 13640 13641 // Compare 2 longs and CMOVE ints. 13642 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{ 13643 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13644 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13645 ins_cost(200); 13646 format %{ "CMOV$cmp $dst,$src" %} 13647 opcode(0x0F,0x40); 13648 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13649 ins_pipe( pipe_cmov_reg ); 13650 %} 13651 13652 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{ 13653 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13654 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13655 ins_cost(250); 13656 format %{ "CMOV$cmp $dst,$src" %} 13657 opcode(0x0F,0x40); 13658 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13659 ins_pipe( pipe_cmov_mem ); 13660 %} 13661 13662 // Compare 2 longs and CMOVE ints. 13663 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13664 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13665 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13666 ins_cost(200); 13667 format %{ "CMOV$cmp $dst,$src" %} 13668 opcode(0x0F,0x40); 13669 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13670 ins_pipe( pipe_cmov_reg ); 13671 %} 13672 13673 // Compare 2 longs and CMOVE doubles 13674 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13675 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13676 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13677 ins_cost(200); 13678 expand %{ 13679 fcmovD_regS(cmp,flags,dst,src); 13680 %} 13681 %} 13682 13683 // Compare 2 longs and CMOVE doubles 13684 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 13685 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13686 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13687 ins_cost(200); 13688 expand %{ 13689 fcmovXD_regS(cmp,flags,dst,src); 13690 %} 13691 %} 13692 13693 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13694 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13695 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13696 ins_cost(200); 13697 expand %{ 13698 fcmovF_regS(cmp,flags,dst,src); 13699 %} 13700 %} 13701 13702 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 13703 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13704 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13705 ins_cost(200); 13706 expand %{ 13707 fcmovX_regS(cmp,flags,dst,src); 13708 %} 13709 %} 13710 13711 //====== 13712 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13713 // Same as cmpL_reg_flags_LEGT except must negate src 13714 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{ 13715 match( Set flags (CmpL src zero )); 13716 effect( TEMP tmp ); 13717 ins_cost(300); 13718 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13719 "CMP $tmp,$src.lo\n\t" 13720 "SBB $tmp,$src.hi\n\t" %} 13721 ins_encode( long_cmp_flags3(src, tmp) ); 13722 ins_pipe( ialu_reg_reg_long ); 13723 %} 13724 13725 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13726 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13727 // requires a commuted test to get the same result. 13728 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{ 13729 match( Set flags (CmpL src1 src2 )); 13730 effect( TEMP tmp ); 13731 ins_cost(300); 13732 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13733 "MOV $tmp,$src2.hi\n\t" 13734 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13735 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13736 ins_pipe( ialu_cr_reg_reg ); 13737 %} 13738 13739 // Long compares reg < zero/req OR reg >= zero/req. 13740 // Just a wrapper for a normal branch, plus the predicate test 13741 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13742 match(If cmp flags); 13743 effect(USE labl); 13744 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13745 ins_cost(300); 13746 expand %{ 13747 jmpCon(cmp,flags,labl); // JGT or JLE... 13748 %} 13749 %} 13750 13751 // Compare 2 longs and CMOVE longs. 13752 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13753 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13754 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13755 ins_cost(400); 13756 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13757 "CMOV$cmp $dst.hi,$src.hi" %} 13758 opcode(0x0F,0x40); 13759 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13760 ins_pipe( pipe_cmov_reg_long ); 13761 %} 13762 13763 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13764 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13765 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13766 ins_cost(500); 13767 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13768 "CMOV$cmp $dst.hi,$src.hi+4" %} 13769 opcode(0x0F,0x40); 13770 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13771 ins_pipe( pipe_cmov_reg_long ); 13772 %} 13773 13774 // Compare 2 longs and CMOVE ints. 13775 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{ 13776 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13777 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13778 ins_cost(200); 13779 format %{ "CMOV$cmp $dst,$src" %} 13780 opcode(0x0F,0x40); 13781 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13782 ins_pipe( pipe_cmov_reg ); 13783 %} 13784 13785 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{ 13786 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13787 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13788 ins_cost(250); 13789 format %{ "CMOV$cmp $dst,$src" %} 13790 opcode(0x0F,0x40); 13791 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13792 ins_pipe( pipe_cmov_mem ); 13793 %} 13794 13795 // Compare 2 longs and CMOVE ptrs. 13796 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13797 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13798 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13799 ins_cost(200); 13800 format %{ "CMOV$cmp $dst,$src" %} 13801 opcode(0x0F,0x40); 13802 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13803 ins_pipe( pipe_cmov_reg ); 13804 %} 13805 13806 // Compare 2 longs and CMOVE doubles 13807 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13808 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13809 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13810 ins_cost(200); 13811 expand %{ 13812 fcmovD_regS(cmp,flags,dst,src); 13813 %} 13814 %} 13815 13816 // Compare 2 longs and CMOVE doubles 13817 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 13818 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13819 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13820 ins_cost(200); 13821 expand %{ 13822 fcmovXD_regS(cmp,flags,dst,src); 13823 %} 13824 %} 13825 13826 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13827 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13828 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13829 ins_cost(200); 13830 expand %{ 13831 fcmovF_regS(cmp,flags,dst,src); 13832 %} 13833 %} 13834 13835 13836 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 13837 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13838 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13839 ins_cost(200); 13840 expand %{ 13841 fcmovX_regS(cmp,flags,dst,src); 13842 %} 13843 %} 13844 13845 13846 // ============================================================================ 13847 // Procedure Call/Return Instructions 13848 // Call Java Static Instruction 13849 // Note: If this code changes, the corresponding ret_addr_offset() and 13850 // compute_padding() functions will have to be adjusted. 13851 instruct CallStaticJavaDirect(method meth) %{ 13852 match(CallStaticJava); 13853 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); 13854 effect(USE meth); 13855 13856 ins_cost(300); 13857 format %{ "CALL,static " %} 13858 opcode(0xE8); /* E8 cd */ 13859 ins_encode( pre_call_FPU, 13860 Java_Static_Call( meth ), 13861 call_epilog, 13862 post_call_FPU ); 13863 ins_pipe( pipe_slow ); 13864 ins_pc_relative(1); 13865 ins_alignment(4); 13866 %} 13867 13868 // Call Java Static Instruction (method handle version) 13869 // Note: If this code changes, the corresponding ret_addr_offset() and 13870 // compute_padding() functions will have to be adjusted. 13871 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{ 13872 match(CallStaticJava); 13873 predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); 13874 effect(USE meth); 13875 // EBP is saved by all callees (for interpreter stack correction). 13876 // We use it here for a similar purpose, in {preserve,restore}_SP. 13877 13878 ins_cost(300); 13879 format %{ "CALL,static/MethodHandle " %} 13880 opcode(0xE8); /* E8 cd */ 13881 ins_encode( pre_call_FPU, 13882 preserve_SP, 13883 Java_Static_Call( meth ), 13884 restore_SP, 13885 call_epilog, 13886 post_call_FPU ); 13887 ins_pipe( pipe_slow ); 13888 ins_pc_relative(1); 13889 ins_alignment(4); 13890 %} 13891 13892 // Call Java Dynamic Instruction 13893 // Note: If this code changes, the corresponding ret_addr_offset() and 13894 // compute_padding() functions will have to be adjusted. 13895 instruct CallDynamicJavaDirect(method meth) %{ 13896 match(CallDynamicJava); 13897 effect(USE meth); 13898 13899 ins_cost(300); 13900 format %{ "MOV EAX,(oop)-1\n\t" 13901 "CALL,dynamic" %} 13902 opcode(0xE8); /* E8 cd */ 13903 ins_encode( pre_call_FPU, 13904 Java_Dynamic_Call( meth ), 13905 call_epilog, 13906 post_call_FPU ); 13907 ins_pipe( pipe_slow ); 13908 ins_pc_relative(1); 13909 ins_alignment(4); 13910 %} 13911 13912 // Call Runtime Instruction 13913 instruct CallRuntimeDirect(method meth) %{ 13914 match(CallRuntime ); 13915 effect(USE meth); 13916 13917 ins_cost(300); 13918 format %{ "CALL,runtime " %} 13919 opcode(0xE8); /* E8 cd */ 13920 // Use FFREEs to clear entries in float stack 13921 ins_encode( pre_call_FPU, 13922 FFree_Float_Stack_All, 13923 Java_To_Runtime( meth ), 13924 post_call_FPU ); 13925 ins_pipe( pipe_slow ); 13926 ins_pc_relative(1); 13927 %} 13928 13929 // Call runtime without safepoint 13930 instruct CallLeafDirect(method meth) %{ 13931 match(CallLeaf); 13932 effect(USE meth); 13933 13934 ins_cost(300); 13935 format %{ "CALL_LEAF,runtime " %} 13936 opcode(0xE8); /* E8 cd */ 13937 ins_encode( pre_call_FPU, 13938 FFree_Float_Stack_All, 13939 Java_To_Runtime( meth ), 13940 Verify_FPU_For_Leaf, post_call_FPU ); 13941 ins_pipe( pipe_slow ); 13942 ins_pc_relative(1); 13943 %} 13944 13945 instruct CallLeafNoFPDirect(method meth) %{ 13946 match(CallLeafNoFP); 13947 effect(USE meth); 13948 13949 ins_cost(300); 13950 format %{ "CALL_LEAF_NOFP,runtime " %} 13951 opcode(0xE8); /* E8 cd */ 13952 ins_encode(Java_To_Runtime(meth)); 13953 ins_pipe( pipe_slow ); 13954 ins_pc_relative(1); 13955 %} 13956 13957 13958 // Return Instruction 13959 // Remove the return address & jump to it. 13960 instruct Ret() %{ 13961 match(Return); 13962 format %{ "RET" %} 13963 opcode(0xC3); 13964 ins_encode(OpcP); 13965 ins_pipe( pipe_jmp ); 13966 %} 13967 13968 // Tail Call; Jump from runtime stub to Java code. 13969 // Also known as an 'interprocedural jump'. 13970 // Target of jump will eventually return to caller. 13971 // TailJump below removes the return address. 13972 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13973 match(TailCall jump_target method_oop ); 13974 ins_cost(300); 13975 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13976 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13977 ins_encode( OpcP, RegOpc(jump_target) ); 13978 ins_pipe( pipe_jmp ); 13979 %} 13980 13981 13982 // Tail Jump; remove the return address; jump to target. 13983 // TailCall above leaves the return address around. 13984 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13985 match( TailJump jump_target ex_oop ); 13986 ins_cost(300); 13987 format %{ "POP EDX\t# pop return address into dummy\n\t" 13988 "JMP $jump_target " %} 13989 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13990 ins_encode( enc_pop_rdx, 13991 OpcP, RegOpc(jump_target) ); 13992 ins_pipe( pipe_jmp ); 13993 %} 13994 13995 // Create exception oop: created by stack-crawling runtime code. 13996 // Created exception is now available to this handler, and is setup 13997 // just prior to jumping to this handler. No code emitted. 13998 instruct CreateException( eAXRegP ex_oop ) 13999 %{ 14000 match(Set ex_oop (CreateEx)); 14001 14002 size(0); 14003 // use the following format syntax 14004 format %{ "# exception oop is in EAX; no code emitted" %} 14005 ins_encode(); 14006 ins_pipe( empty ); 14007 %} 14008 14009 14010 // Rethrow exception: 14011 // The exception oop will come in the first argument position. 14012 // Then JUMP (not call) to the rethrow stub code. 14013 instruct RethrowException() 14014 %{ 14015 match(Rethrow); 14016 14017 // use the following format syntax 14018 format %{ "JMP rethrow_stub" %} 14019 ins_encode(enc_rethrow); 14020 ins_pipe( pipe_jmp ); 14021 %} 14022 14023 // inlined locking and unlocking 14024 14025 14026 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{ 14027 match( Set cr (FastLock object box) ); 14028 effect( TEMP tmp, TEMP scr ); 14029 ins_cost(300); 14030 format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %} 14031 ins_encode( Fast_Lock(object,box,tmp,scr) ); 14032 ins_pipe( pipe_slow ); 14033 ins_pc_relative(1); 14034 %} 14035 14036 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 14037 match( Set cr (FastUnlock object box) ); 14038 effect( TEMP tmp ); 14039 ins_cost(300); 14040 format %{ "FASTUNLOCK $object, $box, $tmp" %} 14041 ins_encode( Fast_Unlock(object,box,tmp) ); 14042 ins_pipe( pipe_slow ); 14043 ins_pc_relative(1); 14044 %} 14045 14046 14047 14048 // ============================================================================ 14049 // Safepoint Instruction 14050 instruct safePoint_poll(eFlagsReg cr) %{ 14051 match(SafePoint); 14052 effect(KILL cr); 14053 14054 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 14055 // On SPARC that might be acceptable as we can generate the address with 14056 // just a sethi, saving an or. By polling at offset 0 we can end up 14057 // putting additional pressure on the index-0 in the D$. Because of 14058 // alignment (just like the situation at hand) the lower indices tend 14059 // to see more traffic. It'd be better to change the polling address 14060 // to offset 0 of the last $line in the polling page. 14061 14062 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 14063 ins_cost(125); 14064 size(6) ; 14065 ins_encode( Safepoint_Poll() ); 14066 ins_pipe( ialu_reg_mem ); 14067 %} 14068 14069 //----------PEEPHOLE RULES----------------------------------------------------- 14070 // These must follow all instruction definitions as they use the names 14071 // defined in the instructions definitions. 14072 // 14073 // peepmatch ( root_instr_name [preceding_instruction]* ); 14074 // 14075 // peepconstraint %{ 14076 // (instruction_number.operand_name relational_op instruction_number.operand_name 14077 // [, ...] ); 14078 // // instruction numbers are zero-based using left to right order in peepmatch 14079 // 14080 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 14081 // // provide an instruction_number.operand_name for each operand that appears 14082 // // in the replacement instruction's match rule 14083 // 14084 // ---------VM FLAGS--------------------------------------------------------- 14085 // 14086 // All peephole optimizations can be turned off using -XX:-OptoPeephole 14087 // 14088 // Each peephole rule is given an identifying number starting with zero and 14089 // increasing by one in the order seen by the parser. An individual peephole 14090 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 14091 // on the command-line. 14092 // 14093 // ---------CURRENT LIMITATIONS---------------------------------------------- 14094 // 14095 // Only match adjacent instructions in same basic block 14096 // Only equality constraints 14097 // Only constraints between operands, not (0.dest_reg == EAX_enc) 14098 // Only one replacement instruction 14099 // 14100 // ---------EXAMPLE---------------------------------------------------------- 14101 // 14102 // // pertinent parts of existing instructions in architecture description 14103 // instruct movI(eRegI dst, eRegI src) %{ 14104 // match(Set dst (CopyI src)); 14105 // %} 14106 // 14107 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ 14108 // match(Set dst (AddI dst src)); 14109 // effect(KILL cr); 14110 // %} 14111 // 14112 // // Change (inc mov) to lea 14113 // peephole %{ 14114 // // increment preceeded by register-register move 14115 // peepmatch ( incI_eReg movI ); 14116 // // require that the destination register of the increment 14117 // // match the destination register of the move 14118 // peepconstraint ( 0.dst == 1.dst ); 14119 // // construct a replacement instruction that sets 14120 // // the destination to ( move's source register + one ) 14121 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 14122 // %} 14123 // 14124 // Implementation no longer uses movX instructions since 14125 // machine-independent system no longer uses CopyX nodes. 14126 // 14127 // peephole %{ 14128 // peepmatch ( incI_eReg movI ); 14129 // peepconstraint ( 0.dst == 1.dst ); 14130 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 14131 // %} 14132 // 14133 // peephole %{ 14134 // peepmatch ( decI_eReg movI ); 14135 // peepconstraint ( 0.dst == 1.dst ); 14136 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 14137 // %} 14138 // 14139 // peephole %{ 14140 // peepmatch ( addI_eReg_imm movI ); 14141 // peepconstraint ( 0.dst == 1.dst ); 14142 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 14143 // %} 14144 // 14145 // peephole %{ 14146 // peepmatch ( addP_eReg_imm movP ); 14147 // peepconstraint ( 0.dst == 1.dst ); 14148 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 14149 // %} 14150 14151 // // Change load of spilled value to only a spill 14152 // instruct storeI(memory mem, eRegI src) %{ 14153 // match(Set mem (StoreI mem src)); 14154 // %} 14155 // 14156 // instruct loadI(eRegI dst, memory mem) %{ 14157 // match(Set dst (LoadI mem)); 14158 // %} 14159 // 14160 peephole %{ 14161 peepmatch ( loadI storeI ); 14162 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 14163 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 14164 %} 14165 14166 //----------SMARTSPILL RULES--------------------------------------------------- 14167 // These must follow all instruction definitions as they use the names 14168 // defined in the instructions definitions.