1 // 2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return SafepointMechanism::uses_thread_local_poll(); 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 if (SafepointMechanism::uses_thread_local_poll()) { 710 Register pollReg = as_Register(EBX_enc); 711 MacroAssembler masm(&cbuf); 712 masm.get_thread(pollReg); 713 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 714 masm.relocate(relocInfo::poll_return_type); 715 masm.testl(rax, Address(pollReg, 0)); 716 } else { 717 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 718 emit_opcode(cbuf,0x85); 719 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 720 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 721 } 722 } 723 } 724 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 726 return MachNode::size(ra_); // too many variables; just compute it 727 // the hard way 728 } 729 730 int MachEpilogNode::reloc() const { 731 return 0; // a large enough number 732 } 733 734 const Pipeline * MachEpilogNode::pipeline() const { 735 return MachNode::pipeline_class(); 736 } 737 738 int MachEpilogNode::safepoint_offset() const { return 0; } 739 740 //============================================================================= 741 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 743 static enum RC rc_class( OptoReg::Name reg ) { 744 745 if( !OptoReg::is_valid(reg) ) return rc_bad; 746 if (OptoReg::is_stack(reg)) return rc_stack; 747 748 VMReg r = OptoReg::as_VMReg(reg); 749 if (r->is_Register()) return rc_int; 750 if (r->is_FloatRegister()) { 751 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 752 return rc_float; 753 } 754 assert(r->is_XMMRegister(), "must be"); 755 return rc_xmm; 756 } 757 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 759 int opcode, const char *op_str, int size, outputStream* st ) { 760 if( cbuf ) { 761 emit_opcode (*cbuf, opcode ); 762 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 763 #ifndef PRODUCT 764 } else if( !do_size ) { 765 if( size != 0 ) st->print("\n\t"); 766 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 767 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 768 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 769 } else { // FLD, FST, PUSH, POP 770 st->print("%s [ESP + #%d]",op_str,offset); 771 } 772 #endif 773 } 774 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 775 return size+3+offset_size; 776 } 777 778 // Helper for XMM registers. Extra opcode bits, limited syntax. 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 780 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 781 int in_size_in_bits = Assembler::EVEX_32bit; 782 int evex_encoding = 0; 783 if (reg_lo+1 == reg_hi) { 784 in_size_in_bits = Assembler::EVEX_64bit; 785 evex_encoding = Assembler::VEX_W; 786 } 787 if (cbuf) { 788 MacroAssembler _masm(cbuf); 789 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 790 // it maps more cases to single byte displacement 791 _masm.set_managed(); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 843 _masm.set_managed(); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 883 _masm.set_managed(); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 901 _masm.set_managed(); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1016 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Needs 2 CMOV's for longs. 1424 const int Matcher::long_cmove_cost() { return 1; } 1425 1426 // No CMOVF/CMOVD with SSE/SSE2 1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429 // Does the CPU require late expand (see block.cpp for description of late expand)? 1430 const bool Matcher::require_postalloc_expand = false; 1431 1432 // Do we need to mask the count passed to shift instructions or does 1433 // the cpu only look at the lower 5/6 bits anyway? 1434 const bool Matcher::need_masked_shift_count = false; 1435 1436 bool Matcher::narrow_oop_use_complex_address() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 bool Matcher::narrow_klass_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444 } 1445 1446 bool Matcher::const_oop_prefer_decode() { 1447 ShouldNotCallThis(); 1448 return true; 1449 } 1450 1451 bool Matcher::const_klass_prefer_decode() { 1452 ShouldNotCallThis(); 1453 return true; 1454 } 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 debug_only(int off1 = cbuf.insts_size()); 1890 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1891 %} 1892 1893 enc_class post_call_FPU %{ 1894 // If method sets FPU control word do it here also 1895 if (Compile::current()->in_24_bit_fp_mode()) { 1896 MacroAssembler masm(&cbuf); 1897 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1898 } 1899 %} 1900 1901 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1902 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1903 // who we intended to call. 1904 cbuf.set_insts_mark(); 1905 $$$emit8$primary; 1906 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), 1910 RELOC_IMM32); 1911 } else { 1912 int method_index = resolved_method_index(cbuf); 1913 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1914 : static_call_Relocation::spec(method_index); 1915 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1916 rspec, RELOC_DISP32); 1917 // Emit stubs for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943 // Following encoding is no longer used, but may be restored if calling 1944 // convention changes significantly. 1945 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946 // 1947 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948 // // int ic_reg = Matcher::inline_cache_reg(); 1949 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1950 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1952 // 1953 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954 // // // so we load it immediately before the call 1955 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957 // 1958 // // xor rbp,ebp 1959 // emit_opcode(cbuf, 0x33); 1960 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961 // 1962 // // CALL to interpreter. 1963 // cbuf.set_insts_mark(); 1964 // $$$emit8$primary; 1965 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967 // %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 emit_opcode(cbuf,0xF0); // [Lock] 2091 %} 2092 2093 // Cmp-xchg long value. 2094 // Note: we need to swap rbx, and rcx before and after the 2095 // cmpxchg8 instruction because the instruction uses 2096 // rcx as the high order word of the new value to store but 2097 // our register encoding uses rbx,. 2098 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2099 2100 // XCHG rbx,ecx 2101 emit_opcode(cbuf,0x87); 2102 emit_opcode(cbuf,0xD9); 2103 // [Lock] 2104 emit_opcode(cbuf,0xF0); 2105 // CMPXCHG8 [Eptr] 2106 emit_opcode(cbuf,0x0F); 2107 emit_opcode(cbuf,0xC7); 2108 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2109 // XCHG rbx,ecx 2110 emit_opcode(cbuf,0x87); 2111 emit_opcode(cbuf,0xD9); 2112 %} 2113 2114 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2115 // [Lock] 2116 emit_opcode(cbuf,0xF0); 2117 2118 // CMPXCHG [Eptr] 2119 emit_opcode(cbuf,0x0F); 2120 emit_opcode(cbuf,0xB1); 2121 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2122 %} 2123 2124 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2125 // [Lock] 2126 emit_opcode(cbuf,0xF0); 2127 2128 // CMPXCHGB [Eptr] 2129 emit_opcode(cbuf,0x0F); 2130 emit_opcode(cbuf,0xB0); 2131 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2132 %} 2133 2134 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2135 // [Lock] 2136 emit_opcode(cbuf,0xF0); 2137 2138 // 16-bit mode 2139 emit_opcode(cbuf, 0x66); 2140 2141 // CMPXCHGW [Eptr] 2142 emit_opcode(cbuf,0x0F); 2143 emit_opcode(cbuf,0xB1); 2144 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2145 %} 2146 2147 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2148 int res_encoding = $res$$reg; 2149 2150 // MOV res,0 2151 emit_opcode( cbuf, 0xB8 + res_encoding); 2152 emit_d32( cbuf, 0 ); 2153 // JNE,s fail 2154 emit_opcode(cbuf,0x75); 2155 emit_d8(cbuf, 5 ); 2156 // MOV res,1 2157 emit_opcode( cbuf, 0xB8 + res_encoding); 2158 emit_d32( cbuf, 1 ); 2159 // fail: 2160 %} 2161 2162 enc_class set_instruction_start( ) %{ 2163 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2164 %} 2165 2166 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2167 int reg_encoding = $ereg$$reg; 2168 int base = $mem$$base; 2169 int index = $mem$$index; 2170 int scale = $mem$$scale; 2171 int displace = $mem$$disp; 2172 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2173 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2174 %} 2175 2176 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2177 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2178 int base = $mem$$base; 2179 int index = $mem$$index; 2180 int scale = $mem$$scale; 2181 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2182 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2183 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2184 %} 2185 2186 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2187 int r1, r2; 2188 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2189 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2190 emit_opcode(cbuf,0x0F); 2191 emit_opcode(cbuf,$tertiary); 2192 emit_rm(cbuf, 0x3, r1, r2); 2193 emit_d8(cbuf,$cnt$$constant); 2194 emit_d8(cbuf,$primary); 2195 emit_rm(cbuf, 0x3, $secondary, r1); 2196 emit_d8(cbuf,$cnt$$constant); 2197 %} 2198 2199 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2200 emit_opcode( cbuf, 0x8B ); // Move 2201 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2202 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2203 emit_d8(cbuf,$primary); 2204 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2205 emit_d8(cbuf,$cnt$$constant-32); 2206 } 2207 emit_d8(cbuf,$primary); 2208 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2209 emit_d8(cbuf,31); 2210 %} 2211 2212 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2213 int r1, r2; 2214 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2215 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2216 2217 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2218 emit_rm(cbuf, 0x3, r1, r2); 2219 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2220 emit_opcode(cbuf,$primary); 2221 emit_rm(cbuf, 0x3, $secondary, r1); 2222 emit_d8(cbuf,$cnt$$constant-32); 2223 } 2224 emit_opcode(cbuf,0x33); // XOR r2,r2 2225 emit_rm(cbuf, 0x3, r2, r2); 2226 %} 2227 2228 // Clone of RegMem but accepts an extra parameter to access each 2229 // half of a double in memory; it never needs relocation info. 2230 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2231 emit_opcode(cbuf,$opcode$$constant); 2232 int reg_encoding = $rm_reg$$reg; 2233 int base = $mem$$base; 2234 int index = $mem$$index; 2235 int scale = $mem$$scale; 2236 int displace = $mem$$disp + $disp_for_half$$constant; 2237 relocInfo::relocType disp_reloc = relocInfo::none; 2238 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2239 %} 2240 2241 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2242 // 2243 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2244 // and it never needs relocation information. 2245 // Frequently used to move data between FPU's Stack Top and memory. 2246 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2247 int rm_byte_opcode = $rm_opcode$$constant; 2248 int base = $mem$$base; 2249 int index = $mem$$index; 2250 int scale = $mem$$scale; 2251 int displace = $mem$$disp; 2252 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2254 %} 2255 2256 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2257 int rm_byte_opcode = $rm_opcode$$constant; 2258 int base = $mem$$base; 2259 int index = $mem$$index; 2260 int scale = $mem$$scale; 2261 int displace = $mem$$disp; 2262 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2263 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2264 %} 2265 2266 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2267 int reg_encoding = $dst$$reg; 2268 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2269 int index = 0x04; // 0x04 indicates no index 2270 int scale = 0x00; // 0x00 indicates no scale 2271 int displace = $src1$$constant; // 0x00 indicates no displacement 2272 relocInfo::relocType disp_reloc = relocInfo::none; 2273 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2274 %} 2275 2276 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2277 // Compare dst,src 2278 emit_opcode(cbuf,0x3B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 // jmp dst < src around move 2281 emit_opcode(cbuf,0x7C); 2282 emit_d8(cbuf,2); 2283 // move dst,src 2284 emit_opcode(cbuf,0x8B); 2285 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2286 %} 2287 2288 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2289 // Compare dst,src 2290 emit_opcode(cbuf,0x3B); 2291 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2292 // jmp dst > src around move 2293 emit_opcode(cbuf,0x7F); 2294 emit_d8(cbuf,2); 2295 // move dst,src 2296 emit_opcode(cbuf,0x8B); 2297 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2298 %} 2299 2300 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2301 // If src is FPR1, we can just FST to store it. 2302 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2303 int reg_encoding = 0x2; // Just store 2304 int base = $mem$$base; 2305 int index = $mem$$index; 2306 int scale = $mem$$scale; 2307 int displace = $mem$$disp; 2308 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2309 if( $src$$reg != FPR1L_enc ) { 2310 reg_encoding = 0x3; // Store & pop 2311 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2312 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2313 } 2314 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2315 emit_opcode(cbuf,$primary); 2316 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2317 %} 2318 2319 enc_class neg_reg(rRegI dst) %{ 2320 // NEG $dst 2321 emit_opcode(cbuf,0xF7); 2322 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2323 %} 2324 2325 enc_class setLT_reg(eCXRegI dst) %{ 2326 // SETLT $dst 2327 emit_opcode(cbuf,0x0F); 2328 emit_opcode(cbuf,0x9C); 2329 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2330 %} 2331 2332 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2333 int tmpReg = $tmp$$reg; 2334 2335 // SUB $p,$q 2336 emit_opcode(cbuf,0x2B); 2337 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2338 // SBB $tmp,$tmp 2339 emit_opcode(cbuf,0x1B); 2340 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2341 // AND $tmp,$y 2342 emit_opcode(cbuf,0x23); 2343 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2344 // ADD $p,$tmp 2345 emit_opcode(cbuf,0x03); 2346 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2347 %} 2348 2349 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2350 // TEST shift,32 2351 emit_opcode(cbuf,0xF7); 2352 emit_rm(cbuf, 0x3, 0, ECX_enc); 2353 emit_d32(cbuf,0x20); 2354 // JEQ,s small 2355 emit_opcode(cbuf, 0x74); 2356 emit_d8(cbuf, 0x04); 2357 // MOV $dst.hi,$dst.lo 2358 emit_opcode( cbuf, 0x8B ); 2359 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2360 // CLR $dst.lo 2361 emit_opcode(cbuf, 0x33); 2362 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2363 // small: 2364 // SHLD $dst.hi,$dst.lo,$shift 2365 emit_opcode(cbuf,0x0F); 2366 emit_opcode(cbuf,0xA5); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2368 // SHL $dst.lo,$shift" 2369 emit_opcode(cbuf,0xD3); 2370 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2371 %} 2372 2373 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2374 // TEST shift,32 2375 emit_opcode(cbuf,0xF7); 2376 emit_rm(cbuf, 0x3, 0, ECX_enc); 2377 emit_d32(cbuf,0x20); 2378 // JEQ,s small 2379 emit_opcode(cbuf, 0x74); 2380 emit_d8(cbuf, 0x04); 2381 // MOV $dst.lo,$dst.hi 2382 emit_opcode( cbuf, 0x8B ); 2383 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2384 // CLR $dst.hi 2385 emit_opcode(cbuf, 0x33); 2386 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2387 // small: 2388 // SHRD $dst.lo,$dst.hi,$shift 2389 emit_opcode(cbuf,0x0F); 2390 emit_opcode(cbuf,0xAD); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2392 // SHR $dst.hi,$shift" 2393 emit_opcode(cbuf,0xD3); 2394 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2395 %} 2396 2397 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2398 // TEST shift,32 2399 emit_opcode(cbuf,0xF7); 2400 emit_rm(cbuf, 0x3, 0, ECX_enc); 2401 emit_d32(cbuf,0x20); 2402 // JEQ,s small 2403 emit_opcode(cbuf, 0x74); 2404 emit_d8(cbuf, 0x05); 2405 // MOV $dst.lo,$dst.hi 2406 emit_opcode( cbuf, 0x8B ); 2407 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2408 // SAR $dst.hi,31 2409 emit_opcode(cbuf, 0xC1); 2410 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2411 emit_d8(cbuf, 0x1F ); 2412 // small: 2413 // SHRD $dst.lo,$dst.hi,$shift 2414 emit_opcode(cbuf,0x0F); 2415 emit_opcode(cbuf,0xAD); 2416 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2417 // SAR $dst.hi,$shift" 2418 emit_opcode(cbuf,0xD3); 2419 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2420 %} 2421 2422 2423 // ----------------- Encodings for floating point unit ----------------- 2424 // May leave result in FPU-TOS or FPU reg depending on opcodes 2425 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2426 $$$emit8$primary; 2427 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2428 %} 2429 2430 // Pop argument in FPR0 with FSTP ST(0) 2431 enc_class PopFPU() %{ 2432 emit_opcode( cbuf, 0xDD ); 2433 emit_d8( cbuf, 0xD8 ); 2434 %} 2435 2436 // !!!!! equivalent to Pop_Reg_F 2437 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2438 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2439 emit_d8( cbuf, 0xD8+$dst$$reg ); 2440 %} 2441 2442 enc_class Push_Reg_DPR( regDPR dst ) %{ 2443 emit_opcode( cbuf, 0xD9 ); 2444 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2445 %} 2446 2447 enc_class strictfp_bias1( regDPR dst ) %{ 2448 emit_opcode( cbuf, 0xDB ); // FLD m80real 2449 emit_opcode( cbuf, 0x2D ); 2450 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2451 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2452 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2453 %} 2454 2455 enc_class strictfp_bias2( regDPR dst ) %{ 2456 emit_opcode( cbuf, 0xDB ); // FLD m80real 2457 emit_opcode( cbuf, 0x2D ); 2458 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2459 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2460 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2461 %} 2462 2463 // Special case for moving an integer register to a stack slot. 2464 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2465 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2466 %} 2467 2468 // Special case for moving a register to a stack slot. 2469 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2470 // Opcode already emitted 2471 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2472 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2473 emit_d32(cbuf, $dst$$disp); // Displacement 2474 %} 2475 2476 // Push the integer in stackSlot 'src' onto FP-stack 2477 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2478 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2479 %} 2480 2481 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2483 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2484 %} 2485 2486 // Same as Pop_Mem_F except for opcode 2487 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2488 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2489 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2490 %} 2491 2492 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2493 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2494 emit_d8( cbuf, 0xD8+$dst$$reg ); 2495 %} 2496 2497 enc_class Push_Reg_FPR( regFPR dst ) %{ 2498 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2499 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2500 %} 2501 2502 // Push FPU's float to a stack-slot, and pop FPU-stack 2503 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2504 int pop = 0x02; 2505 if ($src$$reg != FPR1L_enc) { 2506 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2507 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2508 pop = 0x03; 2509 } 2510 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2511 %} 2512 2513 // Push FPU's double to a stack-slot, and pop FPU-stack 2514 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2515 int pop = 0x02; 2516 if ($src$$reg != FPR1L_enc) { 2517 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2518 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2519 pop = 0x03; 2520 } 2521 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2522 %} 2523 2524 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2525 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2526 int pop = 0xD0 - 1; // -1 since we skip FLD 2527 if ($src$$reg != FPR1L_enc) { 2528 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2529 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2530 pop = 0xD8; 2531 } 2532 emit_opcode( cbuf, 0xDD ); 2533 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2534 %} 2535 2536 2537 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2538 // load dst in FPR0 2539 emit_opcode( cbuf, 0xD9 ); 2540 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2541 if ($src$$reg != FPR1L_enc) { 2542 // fincstp 2543 emit_opcode (cbuf, 0xD9); 2544 emit_opcode (cbuf, 0xF7); 2545 // swap src with FPR1: 2546 // FXCH FPR1 with src 2547 emit_opcode(cbuf, 0xD9); 2548 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2549 // fdecstp 2550 emit_opcode (cbuf, 0xD9); 2551 emit_opcode (cbuf, 0xF6); 2552 } 2553 %} 2554 2555 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ subptr(rsp, 8); 2558 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2559 __ fld_d(Address(rsp, 0)); 2560 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2561 __ fld_d(Address(rsp, 0)); 2562 %} 2563 2564 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ subptr(rsp, 4); 2567 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2568 __ fld_s(Address(rsp, 0)); 2569 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2570 __ fld_s(Address(rsp, 0)); 2571 %} 2572 2573 enc_class Push_ResultD(regD dst) %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ fstp_d(Address(rsp, 0)); 2576 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2577 __ addptr(rsp, 8); 2578 %} 2579 2580 enc_class Push_ResultF(regF dst, immI d8) %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ fstp_s(Address(rsp, 0)); 2583 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2584 __ addptr(rsp, $d8$$constant); 2585 %} 2586 2587 enc_class Push_SrcD(regD src) %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ subptr(rsp, 8); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class push_stack_temp_qword() %{ 2595 MacroAssembler _masm(&cbuf); 2596 __ subptr(rsp, 8); 2597 %} 2598 2599 enc_class pop_stack_temp_qword() %{ 2600 MacroAssembler _masm(&cbuf); 2601 __ addptr(rsp, 8); 2602 %} 2603 2604 enc_class push_xmm_to_fpr1(regD src) %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2607 __ fld_d(Address(rsp, 0)); 2608 %} 2609 2610 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2611 if ($src$$reg != FPR1L_enc) { 2612 // fincstp 2613 emit_opcode (cbuf, 0xD9); 2614 emit_opcode (cbuf, 0xF7); 2615 // FXCH FPR1 with src 2616 emit_opcode(cbuf, 0xD9); 2617 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2618 // fdecstp 2619 emit_opcode (cbuf, 0xD9); 2620 emit_opcode (cbuf, 0xF6); 2621 } 2622 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2623 // // FSTP FPR$dst$$reg 2624 // emit_opcode( cbuf, 0xDD ); 2625 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2626 %} 2627 2628 enc_class fnstsw_sahf_skip_parity() %{ 2629 // fnstsw ax 2630 emit_opcode( cbuf, 0xDF ); 2631 emit_opcode( cbuf, 0xE0 ); 2632 // sahf 2633 emit_opcode( cbuf, 0x9E ); 2634 // jnp ::skip 2635 emit_opcode( cbuf, 0x7B ); 2636 emit_opcode( cbuf, 0x05 ); 2637 %} 2638 2639 enc_class emitModDPR() %{ 2640 // fprem must be iterative 2641 // :: loop 2642 // fprem 2643 emit_opcode( cbuf, 0xD9 ); 2644 emit_opcode( cbuf, 0xF8 ); 2645 // wait 2646 emit_opcode( cbuf, 0x9b ); 2647 // fnstsw ax 2648 emit_opcode( cbuf, 0xDF ); 2649 emit_opcode( cbuf, 0xE0 ); 2650 // sahf 2651 emit_opcode( cbuf, 0x9E ); 2652 // jp ::loop 2653 emit_opcode( cbuf, 0x0F ); 2654 emit_opcode( cbuf, 0x8A ); 2655 emit_opcode( cbuf, 0xF4 ); 2656 emit_opcode( cbuf, 0xFF ); 2657 emit_opcode( cbuf, 0xFF ); 2658 emit_opcode( cbuf, 0xFF ); 2659 %} 2660 2661 enc_class fpu_flags() %{ 2662 // fnstsw_ax 2663 emit_opcode( cbuf, 0xDF); 2664 emit_opcode( cbuf, 0xE0); 2665 // test ax,0x0400 2666 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2667 emit_opcode( cbuf, 0xA9 ); 2668 emit_d16 ( cbuf, 0x0400 ); 2669 // // // This sequence works, but stalls for 12-16 cycles on PPro 2670 // // test rax,0x0400 2671 // emit_opcode( cbuf, 0xA9 ); 2672 // emit_d32 ( cbuf, 0x00000400 ); 2673 // 2674 // jz exit (no unordered comparison) 2675 emit_opcode( cbuf, 0x74 ); 2676 emit_d8 ( cbuf, 0x02 ); 2677 // mov ah,1 - treat as LT case (set carry flag) 2678 emit_opcode( cbuf, 0xB4 ); 2679 emit_d8 ( cbuf, 0x01 ); 2680 // sahf 2681 emit_opcode( cbuf, 0x9E); 2682 %} 2683 2684 enc_class cmpF_P6_fixup() %{ 2685 // Fixup the integer flags in case comparison involved a NaN 2686 // 2687 // JNP exit (no unordered comparison, P-flag is set by NaN) 2688 emit_opcode( cbuf, 0x7B ); 2689 emit_d8 ( cbuf, 0x03 ); 2690 // MOV AH,1 - treat as LT case (set carry flag) 2691 emit_opcode( cbuf, 0xB4 ); 2692 emit_d8 ( cbuf, 0x01 ); 2693 // SAHF 2694 emit_opcode( cbuf, 0x9E); 2695 // NOP // target for branch to avoid branch to branch 2696 emit_opcode( cbuf, 0x90); 2697 %} 2698 2699 // fnstsw_ax(); 2700 // sahf(); 2701 // movl(dst, nan_result); 2702 // jcc(Assembler::parity, exit); 2703 // movl(dst, less_result); 2704 // jcc(Assembler::below, exit); 2705 // movl(dst, equal_result); 2706 // jcc(Assembler::equal, exit); 2707 // movl(dst, greater_result); 2708 2709 // less_result = 1; 2710 // greater_result = -1; 2711 // equal_result = 0; 2712 // nan_result = -1; 2713 2714 enc_class CmpF_Result(rRegI dst) %{ 2715 // fnstsw_ax(); 2716 emit_opcode( cbuf, 0xDF); 2717 emit_opcode( cbuf, 0xE0); 2718 // sahf 2719 emit_opcode( cbuf, 0x9E); 2720 // movl(dst, nan_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, -1 ); 2723 // jcc(Assembler::parity, exit); 2724 emit_opcode( cbuf, 0x7A ); 2725 emit_d8 ( cbuf, 0x13 ); 2726 // movl(dst, less_result); 2727 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2728 emit_d32( cbuf, -1 ); 2729 // jcc(Assembler::below, exit); 2730 emit_opcode( cbuf, 0x72 ); 2731 emit_d8 ( cbuf, 0x0C ); 2732 // movl(dst, equal_result); 2733 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2734 emit_d32( cbuf, 0 ); 2735 // jcc(Assembler::equal, exit); 2736 emit_opcode( cbuf, 0x74 ); 2737 emit_d8 ( cbuf, 0x05 ); 2738 // movl(dst, greater_result); 2739 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2740 emit_d32( cbuf, 1 ); 2741 %} 2742 2743 2744 // Compare the longs and set flags 2745 // BROKEN! Do Not use as-is 2746 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2747 // CMP $src1.hi,$src2.hi 2748 emit_opcode( cbuf, 0x3B ); 2749 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2750 // JNE,s done 2751 emit_opcode(cbuf,0x75); 2752 emit_d8(cbuf, 2 ); 2753 // CMP $src1.lo,$src2.lo 2754 emit_opcode( cbuf, 0x3B ); 2755 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2756 // done: 2757 %} 2758 2759 enc_class convert_int_long( regL dst, rRegI src ) %{ 2760 // mov $dst.lo,$src 2761 int dst_encoding = $dst$$reg; 2762 int src_encoding = $src$$reg; 2763 encode_Copy( cbuf, dst_encoding , src_encoding ); 2764 // mov $dst.hi,$src 2765 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2766 // sar $dst.hi,31 2767 emit_opcode( cbuf, 0xC1 ); 2768 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2769 emit_d8(cbuf, 0x1F ); 2770 %} 2771 2772 enc_class convert_long_double( eRegL src ) %{ 2773 // push $src.hi 2774 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2775 // push $src.lo 2776 emit_opcode(cbuf, 0x50+$src$$reg ); 2777 // fild 64-bits at [SP] 2778 emit_opcode(cbuf,0xdf); 2779 emit_d8(cbuf, 0x6C); 2780 emit_d8(cbuf, 0x24); 2781 emit_d8(cbuf, 0x00); 2782 // pop stack 2783 emit_opcode(cbuf, 0x83); // add SP, #8 2784 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2785 emit_d8(cbuf, 0x8); 2786 %} 2787 2788 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2789 // IMUL EDX:EAX,$src1 2790 emit_opcode( cbuf, 0xF7 ); 2791 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2792 // SAR EDX,$cnt-32 2793 int shift_count = ((int)$cnt$$constant) - 32; 2794 if (shift_count > 0) { 2795 emit_opcode(cbuf, 0xC1); 2796 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2797 emit_d8(cbuf, shift_count); 2798 } 2799 %} 2800 2801 // this version doesn't have add sp, 8 2802 enc_class convert_long_double2( eRegL src ) %{ 2803 // push $src.hi 2804 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2805 // push $src.lo 2806 emit_opcode(cbuf, 0x50+$src$$reg ); 2807 // fild 64-bits at [SP] 2808 emit_opcode(cbuf,0xdf); 2809 emit_d8(cbuf, 0x6C); 2810 emit_d8(cbuf, 0x24); 2811 emit_d8(cbuf, 0x00); 2812 %} 2813 2814 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2815 // Basic idea: long = (long)int * (long)int 2816 // IMUL EDX:EAX, src 2817 emit_opcode( cbuf, 0xF7 ); 2818 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2819 %} 2820 2821 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2822 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2823 // MUL EDX:EAX, src 2824 emit_opcode( cbuf, 0xF7 ); 2825 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2826 %} 2827 2828 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2829 // Basic idea: lo(result) = lo(x_lo * y_lo) 2830 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2831 // MOV $tmp,$src.lo 2832 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2833 // IMUL $tmp,EDX 2834 emit_opcode( cbuf, 0x0F ); 2835 emit_opcode( cbuf, 0xAF ); 2836 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2837 // MOV EDX,$src.hi 2838 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2839 // IMUL EDX,EAX 2840 emit_opcode( cbuf, 0x0F ); 2841 emit_opcode( cbuf, 0xAF ); 2842 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2843 // ADD $tmp,EDX 2844 emit_opcode( cbuf, 0x03 ); 2845 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2846 // MUL EDX:EAX,$src.lo 2847 emit_opcode( cbuf, 0xF7 ); 2848 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2849 // ADD EDX,ESI 2850 emit_opcode( cbuf, 0x03 ); 2851 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2852 %} 2853 2854 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2855 // Basic idea: lo(result) = lo(src * y_lo) 2856 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2857 // IMUL $tmp,EDX,$src 2858 emit_opcode( cbuf, 0x6B ); 2859 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2860 emit_d8( cbuf, (int)$src$$constant ); 2861 // MOV EDX,$src 2862 emit_opcode(cbuf, 0xB8 + EDX_enc); 2863 emit_d32( cbuf, (int)$src$$constant ); 2864 // MUL EDX:EAX,EDX 2865 emit_opcode( cbuf, 0xF7 ); 2866 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2867 // ADD EDX,ESI 2868 emit_opcode( cbuf, 0x03 ); 2869 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2870 %} 2871 2872 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2873 // PUSH src1.hi 2874 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2875 // PUSH src1.lo 2876 emit_opcode(cbuf, 0x50+$src1$$reg ); 2877 // PUSH src2.hi 2878 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2879 // PUSH src2.lo 2880 emit_opcode(cbuf, 0x50+$src2$$reg ); 2881 // CALL directly to the runtime 2882 cbuf.set_insts_mark(); 2883 emit_opcode(cbuf,0xE8); // Call into runtime 2884 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2885 // Restore stack 2886 emit_opcode(cbuf, 0x83); // add SP, #framesize 2887 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2888 emit_d8(cbuf, 4*4); 2889 %} 2890 2891 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2892 // PUSH src1.hi 2893 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2894 // PUSH src1.lo 2895 emit_opcode(cbuf, 0x50+$src1$$reg ); 2896 // PUSH src2.hi 2897 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2898 // PUSH src2.lo 2899 emit_opcode(cbuf, 0x50+$src2$$reg ); 2900 // CALL directly to the runtime 2901 cbuf.set_insts_mark(); 2902 emit_opcode(cbuf,0xE8); // Call into runtime 2903 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2904 // Restore stack 2905 emit_opcode(cbuf, 0x83); // add SP, #framesize 2906 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2907 emit_d8(cbuf, 4*4); 2908 %} 2909 2910 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2911 // MOV $tmp,$src.lo 2912 emit_opcode(cbuf, 0x8B); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2914 // OR $tmp,$src.hi 2915 emit_opcode(cbuf, 0x0B); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2917 %} 2918 2919 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2920 // CMP $src1.lo,$src2.lo 2921 emit_opcode( cbuf, 0x3B ); 2922 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2923 // JNE,s skip 2924 emit_cc(cbuf, 0x70, 0x5); 2925 emit_d8(cbuf,2); 2926 // CMP $src1.hi,$src2.hi 2927 emit_opcode( cbuf, 0x3B ); 2928 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2929 %} 2930 2931 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2932 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2933 emit_opcode( cbuf, 0x3B ); 2934 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2935 // MOV $tmp,$src1.hi 2936 emit_opcode( cbuf, 0x8B ); 2937 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2938 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2939 emit_opcode( cbuf, 0x1B ); 2940 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2941 %} 2942 2943 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2944 // XOR $tmp,$tmp 2945 emit_opcode(cbuf,0x33); // XOR 2946 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2947 // CMP $tmp,$src.lo 2948 emit_opcode( cbuf, 0x3B ); 2949 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2950 // SBB $tmp,$src.hi 2951 emit_opcode( cbuf, 0x1B ); 2952 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2953 %} 2954 2955 // Sniff, sniff... smells like Gnu Superoptimizer 2956 enc_class neg_long( eRegL dst ) %{ 2957 emit_opcode(cbuf,0xF7); // NEG hi 2958 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2959 emit_opcode(cbuf,0xF7); // NEG lo 2960 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2961 emit_opcode(cbuf,0x83); // SBB hi,0 2962 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2963 emit_d8 (cbuf,0 ); 2964 %} 2965 2966 enc_class enc_pop_rdx() %{ 2967 emit_opcode(cbuf,0x5A); 2968 %} 2969 2970 enc_class enc_rethrow() %{ 2971 cbuf.set_insts_mark(); 2972 emit_opcode(cbuf, 0xE9); // jmp entry 2973 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2974 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2975 %} 2976 2977 2978 // Convert a double to an int. Java semantics require we do complex 2979 // manglelations in the corner cases. So we set the rounding mode to 2980 // 'zero', store the darned double down as an int, and reset the 2981 // rounding mode to 'nearest'. The hardware throws an exception which 2982 // patches up the correct value directly to the stack. 2983 enc_class DPR2I_encoding( regDPR src ) %{ 2984 // Flip to round-to-zero mode. We attempted to allow invalid-op 2985 // exceptions here, so that a NAN or other corner-case value will 2986 // thrown an exception (but normal values get converted at full speed). 2987 // However, I2C adapters and other float-stack manglers leave pending 2988 // invalid-op exceptions hanging. We would have to clear them before 2989 // enabling them and that is more expensive than just testing for the 2990 // invalid value Intel stores down in the corner cases. 2991 emit_opcode(cbuf,0xD9); // FLDCW trunc 2992 emit_opcode(cbuf,0x2D); 2993 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2994 // Allocate a word 2995 emit_opcode(cbuf,0x83); // SUB ESP,4 2996 emit_opcode(cbuf,0xEC); 2997 emit_d8(cbuf,0x04); 2998 // Encoding assumes a double has been pushed into FPR0. 2999 // Store down the double as an int, popping the FPU stack 3000 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3001 emit_opcode(cbuf,0x1C); 3002 emit_d8(cbuf,0x24); 3003 // Restore the rounding mode; mask the exception 3004 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3005 emit_opcode(cbuf,0x2D); 3006 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3007 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3008 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3009 3010 // Load the converted int; adjust CPU stack 3011 emit_opcode(cbuf,0x58); // POP EAX 3012 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3013 emit_d32 (cbuf,0x80000000); // 0x80000000 3014 emit_opcode(cbuf,0x75); // JNE around_slow_call 3015 emit_d8 (cbuf,0x07); // Size of slow_call 3016 // Push src onto stack slow-path 3017 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3018 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3019 // CALL directly to the runtime 3020 cbuf.set_insts_mark(); 3021 emit_opcode(cbuf,0xE8); // Call into runtime 3022 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3023 // Carry on here... 3024 %} 3025 3026 enc_class DPR2L_encoding( regDPR src ) %{ 3027 emit_opcode(cbuf,0xD9); // FLDCW trunc 3028 emit_opcode(cbuf,0x2D); 3029 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3030 // Allocate a word 3031 emit_opcode(cbuf,0x83); // SUB ESP,8 3032 emit_opcode(cbuf,0xEC); 3033 emit_d8(cbuf,0x08); 3034 // Encoding assumes a double has been pushed into FPR0. 3035 // Store down the double as a long, popping the FPU stack 3036 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3037 emit_opcode(cbuf,0x3C); 3038 emit_d8(cbuf,0x24); 3039 // Restore the rounding mode; mask the exception 3040 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3041 emit_opcode(cbuf,0x2D); 3042 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3043 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3044 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3045 3046 // Load the converted int; adjust CPU stack 3047 emit_opcode(cbuf,0x58); // POP EAX 3048 emit_opcode(cbuf,0x5A); // POP EDX 3049 emit_opcode(cbuf,0x81); // CMP EDX,imm 3050 emit_d8 (cbuf,0xFA); // rdx 3051 emit_d32 (cbuf,0x80000000); // 0x80000000 3052 emit_opcode(cbuf,0x75); // JNE around_slow_call 3053 emit_d8 (cbuf,0x07+4); // Size of slow_call 3054 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3055 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3056 emit_opcode(cbuf,0x75); // JNE around_slow_call 3057 emit_d8 (cbuf,0x07); // Size of slow_call 3058 // Push src onto stack slow-path 3059 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3060 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3061 // CALL directly to the runtime 3062 cbuf.set_insts_mark(); 3063 emit_opcode(cbuf,0xE8); // Call into runtime 3064 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3065 // Carry on here... 3066 %} 3067 3068 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3069 // Operand was loaded from memory into fp ST (stack top) 3070 // FMUL ST,$src /* D8 C8+i */ 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3073 %} 3074 3075 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3076 // FADDP ST,src2 /* D8 C0+i */ 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3079 //could use FADDP src2,fpST /* DE C0+i */ 3080 %} 3081 3082 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3083 // FADDP src2,ST /* DE C0+i */ 3084 emit_opcode(cbuf, 0xDE); 3085 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3086 %} 3087 3088 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3089 // Operand has been loaded into fp ST (stack top) 3090 // FSUB ST,$src1 3091 emit_opcode(cbuf, 0xD8); 3092 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3093 3094 // FDIV 3095 emit_opcode(cbuf, 0xD8); 3096 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3097 %} 3098 3099 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3100 // Operand was loaded from memory into fp ST (stack top) 3101 // FADD ST,$src /* D8 C0+i */ 3102 emit_opcode(cbuf, 0xD8); 3103 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3104 3105 // FMUL ST,src2 /* D8 C*+i */ 3106 emit_opcode(cbuf, 0xD8); 3107 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3108 %} 3109 3110 3111 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3112 // Operand was loaded from memory into fp ST (stack top) 3113 // FADD ST,$src /* D8 C0+i */ 3114 emit_opcode(cbuf, 0xD8); 3115 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3116 3117 // FMULP src2,ST /* DE C8+i */ 3118 emit_opcode(cbuf, 0xDE); 3119 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3120 %} 3121 3122 // Atomically load the volatile long 3123 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3124 emit_opcode(cbuf,0xDF); 3125 int rm_byte_opcode = 0x05; 3126 int base = $mem$$base; 3127 int index = $mem$$index; 3128 int scale = $mem$$scale; 3129 int displace = $mem$$disp; 3130 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3131 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3132 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3133 %} 3134 3135 // Volatile Store Long. Must be atomic, so move it into 3136 // the FP TOS and then do a 64-bit FIST. Has to probe the 3137 // target address before the store (for null-ptr checks) 3138 // so the memory operand is used twice in the encoding. 3139 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3140 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3141 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3142 emit_opcode(cbuf,0xDF); 3143 int rm_byte_opcode = 0x07; 3144 int base = $mem$$base; 3145 int index = $mem$$index; 3146 int scale = $mem$$scale; 3147 int displace = $mem$$disp; 3148 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3149 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3150 %} 3151 3152 // Safepoint Poll. This polls the safepoint page, and causes an 3153 // exception if it is not readable. Unfortunately, it kills the condition code 3154 // in the process 3155 // We current use TESTL [spp],EDI 3156 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3157 3158 enc_class Safepoint_Poll() %{ 3159 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3160 emit_opcode(cbuf,0x85); 3161 emit_rm (cbuf, 0x0, 0x7, 0x5); 3162 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3163 %} 3164 %} 3165 3166 3167 //----------FRAME-------------------------------------------------------------- 3168 // Definition of frame structure and management information. 3169 // 3170 // S T A C K L A Y O U T Allocators stack-slot number 3171 // | (to get allocators register number 3172 // G Owned by | | v add OptoReg::stack0()) 3173 // r CALLER | | 3174 // o | +--------+ pad to even-align allocators stack-slot 3175 // w V | pad0 | numbers; owned by CALLER 3176 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3177 // h ^ | in | 5 3178 // | | args | 4 Holes in incoming args owned by SELF 3179 // | | | | 3 3180 // | | +--------+ 3181 // V | | old out| Empty on Intel, window on Sparc 3182 // | old |preserve| Must be even aligned. 3183 // | SP-+--------+----> Matcher::_old_SP, even aligned 3184 // | | in | 3 area for Intel ret address 3185 // Owned by |preserve| Empty on Sparc. 3186 // SELF +--------+ 3187 // | | pad2 | 2 pad to align old SP 3188 // | +--------+ 1 3189 // | | locks | 0 3190 // | +--------+----> OptoReg::stack0(), even aligned 3191 // | | pad1 | 11 pad to align new SP 3192 // | +--------+ 3193 // | | | 10 3194 // | | spills | 9 spills 3195 // V | | 8 (pad0 slot for callee) 3196 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3197 // ^ | out | 7 3198 // | | args | 6 Holes in outgoing args owned by CALLEE 3199 // Owned by +--------+ 3200 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3201 // | new |preserve| Must be even-aligned. 3202 // | SP-+--------+----> Matcher::_new_SP, even aligned 3203 // | | | 3204 // 3205 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3206 // known from SELF's arguments and the Java calling convention. 3207 // Region 6-7 is determined per call site. 3208 // Note 2: If the calling convention leaves holes in the incoming argument 3209 // area, those holes are owned by SELF. Holes in the outgoing area 3210 // are owned by the CALLEE. Holes should not be nessecary in the 3211 // incoming area, as the Java calling convention is completely under 3212 // the control of the AD file. Doubles can be sorted and packed to 3213 // avoid holes. Holes in the outgoing arguments may be nessecary for 3214 // varargs C calling conventions. 3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3216 // even aligned with pad0 as needed. 3217 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3218 // region 6-11 is even aligned; it may be padded out more so that 3219 // the region from SP to FP meets the minimum stack alignment. 3220 3221 frame %{ 3222 // What direction does stack grow in (assumed to be same for C & Java) 3223 stack_direction(TOWARDS_LOW); 3224 3225 // These three registers define part of the calling convention 3226 // between compiled code and the interpreter. 3227 inline_cache_reg(EAX); // Inline Cache Register 3228 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3229 3230 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3231 cisc_spilling_operand_name(indOffset32); 3232 3233 // Number of stack slots consumed by locking an object 3234 sync_stack_slots(1); 3235 3236 // Compiled code's Frame Pointer 3237 frame_pointer(ESP); 3238 // Interpreter stores its frame pointer in a register which is 3239 // stored to the stack by I2CAdaptors. 3240 // I2CAdaptors convert from interpreted java to compiled java. 3241 interpreter_frame_pointer(EBP); 3242 3243 // Stack alignment requirement 3244 // Alignment size in bytes (128-bit -> 16 bytes) 3245 stack_alignment(StackAlignmentInBytes); 3246 3247 // Number of stack slots between incoming argument block and the start of 3248 // a new frame. The PROLOG must add this many slots to the stack. The 3249 // EPILOG must remove this many slots. Intel needs one slot for 3250 // return address and one for rbp, (must save rbp) 3251 in_preserve_stack_slots(2+VerifyStackAtCalls); 3252 3253 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3254 // for calls to C. Supports the var-args backing area for register parms. 3255 varargs_C_out_slots_killed(0); 3256 3257 // The after-PROLOG location of the return address. Location of 3258 // return address specifies a type (REG or STACK) and a number 3259 // representing the register number (i.e. - use a register name) or 3260 // stack slot. 3261 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3262 // Otherwise, it is above the locks and verification slot and alignment word 3263 return_addr(STACK - 1 + 3264 align_up((Compile::current()->in_preserve_stack_slots() + 3265 Compile::current()->fixed_slots()), 3266 stack_alignment_in_slots())); 3267 3268 // Body of function which returns an integer array locating 3269 // arguments either in registers or in stack slots. Passed an array 3270 // of ideal registers called "sig" and a "length" count. Stack-slot 3271 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3272 // arguments for a CALLEE. Incoming stack arguments are 3273 // automatically biased by the preserve_stack_slots field above. 3274 calling_convention %{ 3275 // No difference between ingoing/outgoing just pass false 3276 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3277 %} 3278 3279 3280 // Body of function which returns an integer array locating 3281 // arguments either in registers or in stack slots. Passed an array 3282 // of ideal registers called "sig" and a "length" count. Stack-slot 3283 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3284 // arguments for a CALLEE. Incoming stack arguments are 3285 // automatically biased by the preserve_stack_slots field above. 3286 c_calling_convention %{ 3287 // This is obviously always outgoing 3288 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3289 %} 3290 3291 // Location of C & interpreter return values 3292 c_return_value %{ 3293 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3294 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3295 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3296 3297 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3298 // that C functions return float and double results in XMM0. 3299 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3300 return OptoRegPair(XMM0b_num,XMM0_num); 3301 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3302 return OptoRegPair(OptoReg::Bad,XMM0_num); 3303 3304 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3305 %} 3306 3307 // Location of return values 3308 return_value %{ 3309 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3310 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3311 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3312 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3313 return OptoRegPair(XMM0b_num,XMM0_num); 3314 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3315 return OptoRegPair(OptoReg::Bad,XMM0_num); 3316 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3317 %} 3318 3319 %} 3320 3321 //----------ATTRIBUTES--------------------------------------------------------- 3322 //----------Operand Attributes------------------------------------------------- 3323 op_attrib op_cost(0); // Required cost attribute 3324 3325 //----------Instruction Attributes--------------------------------------------- 3326 ins_attrib ins_cost(100); // Required cost attribute 3327 ins_attrib ins_size(8); // Required size attribute (in bits) 3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3329 // non-matching short branch variant of some 3330 // long branch? 3331 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3332 // specifies the alignment that some part of the instruction (not 3333 // necessarily the start) requires. If > 1, a compute_padding() 3334 // function must be provided for the instruction 3335 3336 //----------OPERANDS----------------------------------------------------------- 3337 // Operand definitions must precede instruction definitions for correct parsing 3338 // in the ADLC because operands constitute user defined types which are used in 3339 // instruction definitions. 3340 3341 //----------Simple Operands---------------------------------------------------- 3342 // Immediate Operands 3343 // Integer Immediate 3344 operand immI() %{ 3345 match(ConI); 3346 3347 op_cost(10); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 // Constant for test vs zero 3353 operand immI0() %{ 3354 predicate(n->get_int() == 0); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 // Constant for increment 3363 operand immI1() %{ 3364 predicate(n->get_int() == 1); 3365 match(ConI); 3366 3367 op_cost(0); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 // Constant for decrement 3373 operand immI_M1() %{ 3374 predicate(n->get_int() == -1); 3375 match(ConI); 3376 3377 op_cost(0); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 // Valid scale values for addressing modes 3383 operand immI2() %{ 3384 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3385 match(ConI); 3386 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 operand immI8() %{ 3392 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3393 match(ConI); 3394 3395 op_cost(5); 3396 format %{ %} 3397 interface(CONST_INTER); 3398 %} 3399 3400 operand immI16() %{ 3401 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3402 match(ConI); 3403 3404 op_cost(10); 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Int Immediate non-negative 3410 operand immU31() 3411 %{ 3412 predicate(n->get_int() >= 0); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // Constant for long shifts 3421 operand immI_32() %{ 3422 predicate( n->get_int() == 32 ); 3423 match(ConI); 3424 3425 op_cost(0); 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 operand immI_1_31() %{ 3431 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3432 match(ConI); 3433 3434 op_cost(0); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 operand immI_32_63() %{ 3440 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3441 match(ConI); 3442 op_cost(0); 3443 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immI_1() %{ 3449 predicate( n->get_int() == 1 ); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 operand immI_2() %{ 3458 predicate( n->get_int() == 2 ); 3459 match(ConI); 3460 3461 op_cost(0); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 operand immI_3() %{ 3467 predicate( n->get_int() == 3 ); 3468 match(ConI); 3469 3470 op_cost(0); 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Pointer Immediate 3476 operand immP() %{ 3477 match(ConP); 3478 3479 op_cost(10); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 // NULL Pointer Immediate 3485 operand immP0() %{ 3486 predicate( n->get_ptr() == 0 ); 3487 match(ConP); 3488 op_cost(0); 3489 3490 format %{ %} 3491 interface(CONST_INTER); 3492 %} 3493 3494 // Long Immediate 3495 operand immL() %{ 3496 match(ConL); 3497 3498 op_cost(20); 3499 format %{ %} 3500 interface(CONST_INTER); 3501 %} 3502 3503 // Long Immediate zero 3504 operand immL0() %{ 3505 predicate( n->get_long() == 0L ); 3506 match(ConL); 3507 op_cost(0); 3508 3509 format %{ %} 3510 interface(CONST_INTER); 3511 %} 3512 3513 // Long Immediate zero 3514 operand immL_M1() %{ 3515 predicate( n->get_long() == -1L ); 3516 match(ConL); 3517 op_cost(0); 3518 3519 format %{ %} 3520 interface(CONST_INTER); 3521 %} 3522 3523 // Long immediate from 0 to 127. 3524 // Used for a shorter form of long mul by 10. 3525 operand immL_127() %{ 3526 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3527 match(ConL); 3528 op_cost(0); 3529 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 // Long Immediate: low 32-bit mask 3535 operand immL_32bits() %{ 3536 predicate(n->get_long() == 0xFFFFFFFFL); 3537 match(ConL); 3538 op_cost(0); 3539 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 // Long Immediate: low 32-bit mask 3545 operand immL32() %{ 3546 predicate(n->get_long() == (int)(n->get_long())); 3547 match(ConL); 3548 op_cost(20); 3549 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 //Double Immediate zero 3555 operand immDPR0() %{ 3556 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3557 // bug that generates code such that NaNs compare equal to 0.0 3558 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3559 match(ConD); 3560 3561 op_cost(5); 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 // Double Immediate one 3567 operand immDPR1() %{ 3568 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3569 match(ConD); 3570 3571 op_cost(5); 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Double Immediate 3577 operand immDPR() %{ 3578 predicate(UseSSE<=1); 3579 match(ConD); 3580 3581 op_cost(5); 3582 format %{ %} 3583 interface(CONST_INTER); 3584 %} 3585 3586 operand immD() %{ 3587 predicate(UseSSE>=2); 3588 match(ConD); 3589 3590 op_cost(5); 3591 format %{ %} 3592 interface(CONST_INTER); 3593 %} 3594 3595 // Double Immediate zero 3596 operand immD0() %{ 3597 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3598 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3599 // compare equal to -0.0. 3600 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3601 match(ConD); 3602 3603 format %{ %} 3604 interface(CONST_INTER); 3605 %} 3606 3607 // Float Immediate zero 3608 operand immFPR0() %{ 3609 predicate(UseSSE == 0 && n->getf() == 0.0F); 3610 match(ConF); 3611 3612 op_cost(5); 3613 format %{ %} 3614 interface(CONST_INTER); 3615 %} 3616 3617 // Float Immediate one 3618 operand immFPR1() %{ 3619 predicate(UseSSE == 0 && n->getf() == 1.0F); 3620 match(ConF); 3621 3622 op_cost(5); 3623 format %{ %} 3624 interface(CONST_INTER); 3625 %} 3626 3627 // Float Immediate 3628 operand immFPR() %{ 3629 predicate( UseSSE == 0 ); 3630 match(ConF); 3631 3632 op_cost(5); 3633 format %{ %} 3634 interface(CONST_INTER); 3635 %} 3636 3637 // Float Immediate 3638 operand immF() %{ 3639 predicate(UseSSE >= 1); 3640 match(ConF); 3641 3642 op_cost(5); 3643 format %{ %} 3644 interface(CONST_INTER); 3645 %} 3646 3647 // Float Immediate zero. Zero and not -0.0 3648 operand immF0() %{ 3649 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3650 match(ConF); 3651 3652 op_cost(5); 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Immediates for special shifts (sign extend) 3658 3659 // Constants for increment 3660 operand immI_16() %{ 3661 predicate( n->get_int() == 16 ); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 operand immI_24() %{ 3669 predicate( n->get_int() == 24 ); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 // Constant for byte-wide masking 3677 operand immI_255() %{ 3678 predicate( n->get_int() == 255 ); 3679 match(ConI); 3680 3681 format %{ %} 3682 interface(CONST_INTER); 3683 %} 3684 3685 // Constant for short-wide masking 3686 operand immI_65535() %{ 3687 predicate(n->get_int() == 65535); 3688 match(ConI); 3689 3690 format %{ %} 3691 interface(CONST_INTER); 3692 %} 3693 3694 // Register Operands 3695 // Integer Register 3696 operand rRegI() %{ 3697 constraint(ALLOC_IN_RC(int_reg)); 3698 match(RegI); 3699 match(xRegI); 3700 match(eAXRegI); 3701 match(eBXRegI); 3702 match(eCXRegI); 3703 match(eDXRegI); 3704 match(eDIRegI); 3705 match(eSIRegI); 3706 3707 format %{ %} 3708 interface(REG_INTER); 3709 %} 3710 3711 // Subset of Integer Register 3712 operand xRegI(rRegI reg) %{ 3713 constraint(ALLOC_IN_RC(int_x_reg)); 3714 match(reg); 3715 match(eAXRegI); 3716 match(eBXRegI); 3717 match(eCXRegI); 3718 match(eDXRegI); 3719 3720 format %{ %} 3721 interface(REG_INTER); 3722 %} 3723 3724 // Special Registers 3725 operand eAXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(eax_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EAX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 // Special Registers 3735 operand eBXRegI(xRegI reg) %{ 3736 constraint(ALLOC_IN_RC(ebx_reg)); 3737 match(reg); 3738 match(rRegI); 3739 3740 format %{ "EBX" %} 3741 interface(REG_INTER); 3742 %} 3743 3744 operand eCXRegI(xRegI reg) %{ 3745 constraint(ALLOC_IN_RC(ecx_reg)); 3746 match(reg); 3747 match(rRegI); 3748 3749 format %{ "ECX" %} 3750 interface(REG_INTER); 3751 %} 3752 3753 operand eDXRegI(xRegI reg) %{ 3754 constraint(ALLOC_IN_RC(edx_reg)); 3755 match(reg); 3756 match(rRegI); 3757 3758 format %{ "EDX" %} 3759 interface(REG_INTER); 3760 %} 3761 3762 operand eDIRegI(xRegI reg) %{ 3763 constraint(ALLOC_IN_RC(edi_reg)); 3764 match(reg); 3765 match(rRegI); 3766 3767 format %{ "EDI" %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand naxRegI() %{ 3772 constraint(ALLOC_IN_RC(nax_reg)); 3773 match(RegI); 3774 match(eCXRegI); 3775 match(eDXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 operand nadxRegI() %{ 3784 constraint(ALLOC_IN_RC(nadx_reg)); 3785 match(RegI); 3786 match(eBXRegI); 3787 match(eCXRegI); 3788 match(eSIRegI); 3789 match(eDIRegI); 3790 3791 format %{ %} 3792 interface(REG_INTER); 3793 %} 3794 3795 operand ncxRegI() %{ 3796 constraint(ALLOC_IN_RC(ncx_reg)); 3797 match(RegI); 3798 match(eAXRegI); 3799 match(eDXRegI); 3800 match(eSIRegI); 3801 match(eDIRegI); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3808 // // 3809 operand eSIRegI(xRegI reg) %{ 3810 constraint(ALLOC_IN_RC(esi_reg)); 3811 match(reg); 3812 match(rRegI); 3813 3814 format %{ "ESI" %} 3815 interface(REG_INTER); 3816 %} 3817 3818 // Pointer Register 3819 operand anyRegP() %{ 3820 constraint(ALLOC_IN_RC(any_reg)); 3821 match(RegP); 3822 match(eAXRegP); 3823 match(eBXRegP); 3824 match(eCXRegP); 3825 match(eDIRegP); 3826 match(eRegP); 3827 3828 format %{ %} 3829 interface(REG_INTER); 3830 %} 3831 3832 operand eRegP() %{ 3833 constraint(ALLOC_IN_RC(int_reg)); 3834 match(RegP); 3835 match(eAXRegP); 3836 match(eBXRegP); 3837 match(eCXRegP); 3838 match(eDIRegP); 3839 3840 format %{ %} 3841 interface(REG_INTER); 3842 %} 3843 3844 // On windows95, EBP is not safe to use for implicit null tests. 3845 operand eRegP_no_EBP() %{ 3846 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3847 match(RegP); 3848 match(eAXRegP); 3849 match(eBXRegP); 3850 match(eCXRegP); 3851 match(eDIRegP); 3852 3853 op_cost(100); 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand naxRegP() %{ 3859 constraint(ALLOC_IN_RC(nax_reg)); 3860 match(RegP); 3861 match(eBXRegP); 3862 match(eDXRegP); 3863 match(eCXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 operand nabxRegP() %{ 3872 constraint(ALLOC_IN_RC(nabx_reg)); 3873 match(RegP); 3874 match(eCXRegP); 3875 match(eDXRegP); 3876 match(eSIRegP); 3877 match(eDIRegP); 3878 3879 format %{ %} 3880 interface(REG_INTER); 3881 %} 3882 3883 operand pRegP() %{ 3884 constraint(ALLOC_IN_RC(p_reg)); 3885 match(RegP); 3886 match(eBXRegP); 3887 match(eDXRegP); 3888 match(eSIRegP); 3889 match(eDIRegP); 3890 3891 format %{ %} 3892 interface(REG_INTER); 3893 %} 3894 3895 // Special Registers 3896 // Return a pointer value 3897 operand eAXRegP(eRegP reg) %{ 3898 constraint(ALLOC_IN_RC(eax_reg)); 3899 match(reg); 3900 format %{ "EAX" %} 3901 interface(REG_INTER); 3902 %} 3903 3904 // Used in AtomicAdd 3905 operand eBXRegP(eRegP reg) %{ 3906 constraint(ALLOC_IN_RC(ebx_reg)); 3907 match(reg); 3908 format %{ "EBX" %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Tail-call (interprocedural jump) to interpreter 3913 operand eCXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(ecx_reg)); 3915 match(reg); 3916 format %{ "ECX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eSIRegP(eRegP reg) %{ 3921 constraint(ALLOC_IN_RC(esi_reg)); 3922 match(reg); 3923 format %{ "ESI" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 // Used in rep stosw 3928 operand eDIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(edi_reg)); 3930 match(reg); 3931 format %{ "EDI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eRegL() %{ 3936 constraint(ALLOC_IN_RC(long_reg)); 3937 match(RegL); 3938 match(eADXRegL); 3939 3940 format %{ %} 3941 interface(REG_INTER); 3942 %} 3943 3944 operand eADXRegL( eRegL reg ) %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(reg); 3947 3948 format %{ "EDX:EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eBCXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(ebcx_reg)); 3954 match(reg); 3955 3956 format %{ "EBX:ECX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 // Special case for integer high multiply 3961 operand eADXRegL_low_only() %{ 3962 constraint(ALLOC_IN_RC(eadx_reg)); 3963 match(RegL); 3964 3965 format %{ "EAX" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 // Flags register, used as output of compare instructions 3970 operand eFlagsReg() %{ 3971 constraint(ALLOC_IN_RC(int_flags)); 3972 match(RegFlags); 3973 3974 format %{ "EFLAGS" %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Flags register, used as output of FLOATING POINT compare instructions 3979 operand eFlagsRegU() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 3983 format %{ "EFLAGS_U" %} 3984 interface(REG_INTER); 3985 %} 3986 3987 operand eFlagsRegUCF() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 predicate(false); 3991 3992 format %{ "EFLAGS_U_CF" %} 3993 interface(REG_INTER); 3994 %} 3995 3996 // Condition Code Register used by long compare 3997 operand flagsReg_long_LTGE() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_LTGE" %} 4001 interface(REG_INTER); 4002 %} 4003 operand flagsReg_long_EQNE() %{ 4004 constraint(ALLOC_IN_RC(int_flags)); 4005 match(RegFlags); 4006 format %{ "FLAGS_EQNE" %} 4007 interface(REG_INTER); 4008 %} 4009 operand flagsReg_long_LEGT() %{ 4010 constraint(ALLOC_IN_RC(int_flags)); 4011 match(RegFlags); 4012 format %{ "FLAGS_LEGT" %} 4013 interface(REG_INTER); 4014 %} 4015 4016 // Condition Code Register used by unsigned long compare 4017 operand flagsReg_ulong_LTGE() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_U_LTGE" %} 4021 interface(REG_INTER); 4022 %} 4023 operand flagsReg_ulong_EQNE() %{ 4024 constraint(ALLOC_IN_RC(int_flags)); 4025 match(RegFlags); 4026 format %{ "FLAGS_U_EQNE" %} 4027 interface(REG_INTER); 4028 %} 4029 operand flagsReg_ulong_LEGT() %{ 4030 constraint(ALLOC_IN_RC(int_flags)); 4031 match(RegFlags); 4032 format %{ "FLAGS_U_LEGT" %} 4033 interface(REG_INTER); 4034 %} 4035 4036 // Float register operands 4037 operand regDPR() %{ 4038 predicate( UseSSE < 2 ); 4039 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4040 match(RegD); 4041 match(regDPR1); 4042 match(regDPR2); 4043 format %{ %} 4044 interface(REG_INTER); 4045 %} 4046 4047 operand regDPR1(regDPR reg) %{ 4048 predicate( UseSSE < 2 ); 4049 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4050 match(reg); 4051 format %{ "FPR1" %} 4052 interface(REG_INTER); 4053 %} 4054 4055 operand regDPR2(regDPR reg) %{ 4056 predicate( UseSSE < 2 ); 4057 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4058 match(reg); 4059 format %{ "FPR2" %} 4060 interface(REG_INTER); 4061 %} 4062 4063 operand regnotDPR1(regDPR reg) %{ 4064 predicate( UseSSE < 2 ); 4065 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4066 match(reg); 4067 format %{ %} 4068 interface(REG_INTER); 4069 %} 4070 4071 // Float register operands 4072 operand regFPR() %{ 4073 predicate( UseSSE < 2 ); 4074 constraint(ALLOC_IN_RC(fp_flt_reg)); 4075 match(RegF); 4076 match(regFPR1); 4077 format %{ %} 4078 interface(REG_INTER); 4079 %} 4080 4081 // Float register operands 4082 operand regFPR1(regFPR reg) %{ 4083 predicate( UseSSE < 2 ); 4084 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4085 match(reg); 4086 format %{ "FPR1" %} 4087 interface(REG_INTER); 4088 %} 4089 4090 // XMM Float register operands 4091 operand regF() %{ 4092 predicate( UseSSE>=1 ); 4093 constraint(ALLOC_IN_RC(float_reg_legacy)); 4094 match(RegF); 4095 format %{ %} 4096 interface(REG_INTER); 4097 %} 4098 4099 // Float register operands 4100 operand vlRegF() %{ 4101 constraint(ALLOC_IN_RC(float_reg_vl)); 4102 match(RegF); 4103 4104 format %{ %} 4105 interface(REG_INTER); 4106 %} 4107 4108 // XMM Double register operands 4109 operand regD() %{ 4110 predicate( UseSSE>=2 ); 4111 constraint(ALLOC_IN_RC(double_reg_legacy)); 4112 match(RegD); 4113 format %{ %} 4114 interface(REG_INTER); 4115 %} 4116 4117 // Double register operands 4118 operand vlRegD() %{ 4119 constraint(ALLOC_IN_RC(double_reg_vl)); 4120 match(RegD); 4121 4122 format %{ %} 4123 interface(REG_INTER); 4124 %} 4125 4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4127 // runtime code generation via reg_class_dynamic. 4128 operand vecS() %{ 4129 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4130 match(VecS); 4131 4132 format %{ %} 4133 interface(REG_INTER); 4134 %} 4135 4136 operand legVecS() %{ 4137 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4138 match(VecS); 4139 4140 format %{ %} 4141 interface(REG_INTER); 4142 %} 4143 4144 operand vecD() %{ 4145 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4146 match(VecD); 4147 4148 format %{ %} 4149 interface(REG_INTER); 4150 %} 4151 4152 operand legVecD() %{ 4153 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4154 match(VecD); 4155 4156 format %{ %} 4157 interface(REG_INTER); 4158 %} 4159 4160 operand vecX() %{ 4161 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4162 match(VecX); 4163 4164 format %{ %} 4165 interface(REG_INTER); 4166 %} 4167 4168 operand legVecX() %{ 4169 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4170 match(VecX); 4171 4172 format %{ %} 4173 interface(REG_INTER); 4174 %} 4175 4176 operand vecY() %{ 4177 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4178 match(VecY); 4179 4180 format %{ %} 4181 interface(REG_INTER); 4182 %} 4183 4184 operand legVecY() %{ 4185 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4186 match(VecY); 4187 4188 format %{ %} 4189 interface(REG_INTER); 4190 %} 4191 4192 //----------Memory Operands---------------------------------------------------- 4193 // Direct Memory Operand 4194 operand direct(immP addr) %{ 4195 match(addr); 4196 4197 format %{ "[$addr]" %} 4198 interface(MEMORY_INTER) %{ 4199 base(0xFFFFFFFF); 4200 index(0x4); 4201 scale(0x0); 4202 disp($addr); 4203 %} 4204 %} 4205 4206 // Indirect Memory Operand 4207 operand indirect(eRegP reg) %{ 4208 constraint(ALLOC_IN_RC(int_reg)); 4209 match(reg); 4210 4211 format %{ "[$reg]" %} 4212 interface(MEMORY_INTER) %{ 4213 base($reg); 4214 index(0x4); 4215 scale(0x0); 4216 disp(0x0); 4217 %} 4218 %} 4219 4220 // Indirect Memory Plus Short Offset Operand 4221 operand indOffset8(eRegP reg, immI8 off) %{ 4222 match(AddP reg off); 4223 4224 format %{ "[$reg + $off]" %} 4225 interface(MEMORY_INTER) %{ 4226 base($reg); 4227 index(0x4); 4228 scale(0x0); 4229 disp($off); 4230 %} 4231 %} 4232 4233 // Indirect Memory Plus Long Offset Operand 4234 operand indOffset32(eRegP reg, immI off) %{ 4235 match(AddP reg off); 4236 4237 format %{ "[$reg + $off]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index(0x4); 4241 scale(0x0); 4242 disp($off); 4243 %} 4244 %} 4245 4246 // Indirect Memory Plus Long Offset Operand 4247 operand indOffset32X(rRegI reg, immP off) %{ 4248 match(AddP off reg); 4249 4250 format %{ "[$reg + $off]" %} 4251 interface(MEMORY_INTER) %{ 4252 base($reg); 4253 index(0x4); 4254 scale(0x0); 4255 disp($off); 4256 %} 4257 %} 4258 4259 // Indirect Memory Plus Index Register Plus Offset Operand 4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4261 match(AddP (AddP reg ireg) off); 4262 4263 op_cost(10); 4264 format %{"[$reg + $off + $ireg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base($reg); 4267 index($ireg); 4268 scale(0x0); 4269 disp($off); 4270 %} 4271 %} 4272 4273 // Indirect Memory Plus Index Register Plus Offset Operand 4274 operand indIndex(eRegP reg, rRegI ireg) %{ 4275 match(AddP reg ireg); 4276 4277 op_cost(10); 4278 format %{"[$reg + $ireg]" %} 4279 interface(MEMORY_INTER) %{ 4280 base($reg); 4281 index($ireg); 4282 scale(0x0); 4283 disp(0x0); 4284 %} 4285 %} 4286 4287 // // ------------------------------------------------------------------------- 4288 // // 486 architecture doesn't support "scale * index + offset" with out a base 4289 // // ------------------------------------------------------------------------- 4290 // // Scaled Memory Operands 4291 // // Indirect Memory Times Scale Plus Offset Operand 4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4293 // match(AddP off (LShiftI ireg scale)); 4294 // 4295 // op_cost(10); 4296 // format %{"[$off + $ireg << $scale]" %} 4297 // interface(MEMORY_INTER) %{ 4298 // base(0x4); 4299 // index($ireg); 4300 // scale($scale); 4301 // disp($off); 4302 // %} 4303 // %} 4304 4305 // Indirect Memory Times Scale Plus Index Register 4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4307 match(AddP reg (LShiftI ireg scale)); 4308 4309 op_cost(10); 4310 format %{"[$reg + $ireg << $scale]" %} 4311 interface(MEMORY_INTER) %{ 4312 base($reg); 4313 index($ireg); 4314 scale($scale); 4315 disp(0x0); 4316 %} 4317 %} 4318 4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4321 match(AddP (AddP reg (LShiftI ireg scale)) off); 4322 4323 op_cost(10); 4324 format %{"[$reg + $off + $ireg << $scale]" %} 4325 interface(MEMORY_INTER) %{ 4326 base($reg); 4327 index($ireg); 4328 scale($scale); 4329 disp($off); 4330 %} 4331 %} 4332 4333 //----------Load Long Memory Operands------------------------------------------ 4334 // The load-long idiom will use it's address expression again after loading 4335 // the first word of the long. If the load-long destination overlaps with 4336 // registers used in the addressing expression, the 2nd half will be loaded 4337 // from a clobbered address. Fix this by requiring that load-long use 4338 // address registers that do not overlap with the load-long target. 4339 4340 // load-long support 4341 operand load_long_RegP() %{ 4342 constraint(ALLOC_IN_RC(esi_reg)); 4343 match(RegP); 4344 match(eSIRegP); 4345 op_cost(100); 4346 format %{ %} 4347 interface(REG_INTER); 4348 %} 4349 4350 // Indirect Memory Operand Long 4351 operand load_long_indirect(load_long_RegP reg) %{ 4352 constraint(ALLOC_IN_RC(esi_reg)); 4353 match(reg); 4354 4355 format %{ "[$reg]" %} 4356 interface(MEMORY_INTER) %{ 4357 base($reg); 4358 index(0x4); 4359 scale(0x0); 4360 disp(0x0); 4361 %} 4362 %} 4363 4364 // Indirect Memory Plus Long Offset Operand 4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4366 match(AddP reg off); 4367 4368 format %{ "[$reg + $off]" %} 4369 interface(MEMORY_INTER) %{ 4370 base($reg); 4371 index(0x4); 4372 scale(0x0); 4373 disp($off); 4374 %} 4375 %} 4376 4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4378 4379 4380 opclass legRegF (regF); 4381 opclass legRegD (regD); 4382 4383 4384 4385 //----------Special Memory Operands-------------------------------------------- 4386 // Stack Slot Operand - This operand is used for loading and storing temporary 4387 // values on the stack where a match requires a value to 4388 // flow through memory. 4389 operand stackSlotP(sRegP reg) %{ 4390 constraint(ALLOC_IN_RC(stack_slots)); 4391 // No match rule because this operand is only generated in matching 4392 format %{ "[$reg]" %} 4393 interface(MEMORY_INTER) %{ 4394 base(0x4); // ESP 4395 index(0x4); // No Index 4396 scale(0x0); // No Scale 4397 disp($reg); // Stack Offset 4398 %} 4399 %} 4400 4401 operand stackSlotI(sRegI reg) %{ 4402 constraint(ALLOC_IN_RC(stack_slots)); 4403 // No match rule because this operand is only generated in matching 4404 format %{ "[$reg]" %} 4405 interface(MEMORY_INTER) %{ 4406 base(0x4); // ESP 4407 index(0x4); // No Index 4408 scale(0x0); // No Scale 4409 disp($reg); // Stack Offset 4410 %} 4411 %} 4412 4413 operand stackSlotF(sRegF reg) %{ 4414 constraint(ALLOC_IN_RC(stack_slots)); 4415 // No match rule because this operand is only generated in matching 4416 format %{ "[$reg]" %} 4417 interface(MEMORY_INTER) %{ 4418 base(0x4); // ESP 4419 index(0x4); // No Index 4420 scale(0x0); // No Scale 4421 disp($reg); // Stack Offset 4422 %} 4423 %} 4424 4425 operand stackSlotD(sRegD reg) %{ 4426 constraint(ALLOC_IN_RC(stack_slots)); 4427 // No match rule because this operand is only generated in matching 4428 format %{ "[$reg]" %} 4429 interface(MEMORY_INTER) %{ 4430 base(0x4); // ESP 4431 index(0x4); // No Index 4432 scale(0x0); // No Scale 4433 disp($reg); // Stack Offset 4434 %} 4435 %} 4436 4437 operand stackSlotL(sRegL reg) %{ 4438 constraint(ALLOC_IN_RC(stack_slots)); 4439 // No match rule because this operand is only generated in matching 4440 format %{ "[$reg]" %} 4441 interface(MEMORY_INTER) %{ 4442 base(0x4); // ESP 4443 index(0x4); // No Index 4444 scale(0x0); // No Scale 4445 disp($reg); // Stack Offset 4446 %} 4447 %} 4448 4449 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4450 // Indirect Memory Operand 4451 operand indirect_win95_safe(eRegP_no_EBP reg) 4452 %{ 4453 constraint(ALLOC_IN_RC(int_reg)); 4454 match(reg); 4455 4456 op_cost(100); 4457 format %{ "[$reg]" %} 4458 interface(MEMORY_INTER) %{ 4459 base($reg); 4460 index(0x4); 4461 scale(0x0); 4462 disp(0x0); 4463 %} 4464 %} 4465 4466 // Indirect Memory Plus Short Offset Operand 4467 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4468 %{ 4469 match(AddP reg off); 4470 4471 op_cost(100); 4472 format %{ "[$reg + $off]" %} 4473 interface(MEMORY_INTER) %{ 4474 base($reg); 4475 index(0x4); 4476 scale(0x0); 4477 disp($off); 4478 %} 4479 %} 4480 4481 // Indirect Memory Plus Long Offset Operand 4482 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4483 %{ 4484 match(AddP reg off); 4485 4486 op_cost(100); 4487 format %{ "[$reg + $off]" %} 4488 interface(MEMORY_INTER) %{ 4489 base($reg); 4490 index(0x4); 4491 scale(0x0); 4492 disp($off); 4493 %} 4494 %} 4495 4496 // Indirect Memory Plus Index Register Plus Offset Operand 4497 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4498 %{ 4499 match(AddP (AddP reg ireg) off); 4500 4501 op_cost(100); 4502 format %{"[$reg + $off + $ireg]" %} 4503 interface(MEMORY_INTER) %{ 4504 base($reg); 4505 index($ireg); 4506 scale(0x0); 4507 disp($off); 4508 %} 4509 %} 4510 4511 // Indirect Memory Times Scale Plus Index Register 4512 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4513 %{ 4514 match(AddP reg (LShiftI ireg scale)); 4515 4516 op_cost(100); 4517 format %{"[$reg + $ireg << $scale]" %} 4518 interface(MEMORY_INTER) %{ 4519 base($reg); 4520 index($ireg); 4521 scale($scale); 4522 disp(0x0); 4523 %} 4524 %} 4525 4526 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4527 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4528 %{ 4529 match(AddP (AddP reg (LShiftI ireg scale)) off); 4530 4531 op_cost(100); 4532 format %{"[$reg + $off + $ireg << $scale]" %} 4533 interface(MEMORY_INTER) %{ 4534 base($reg); 4535 index($ireg); 4536 scale($scale); 4537 disp($off); 4538 %} 4539 %} 4540 4541 //----------Conditional Branch Operands---------------------------------------- 4542 // Comparison Op - This is the operation of the comparison, and is limited to 4543 // the following set of codes: 4544 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4545 // 4546 // Other attributes of the comparison, such as unsignedness, are specified 4547 // by the comparison instruction that sets a condition code flags register. 4548 // That result is represented by a flags operand whose subtype is appropriate 4549 // to the unsignedness (etc.) of the comparison. 4550 // 4551 // Later, the instruction which matches both the Comparison Op (a Bool) and 4552 // the flags (produced by the Cmp) specifies the coding of the comparison op 4553 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4554 4555 // Comparision Code 4556 operand cmpOp() %{ 4557 match(Bool); 4558 4559 format %{ "" %} 4560 interface(COND_INTER) %{ 4561 equal(0x4, "e"); 4562 not_equal(0x5, "ne"); 4563 less(0xC, "l"); 4564 greater_equal(0xD, "ge"); 4565 less_equal(0xE, "le"); 4566 greater(0xF, "g"); 4567 overflow(0x0, "o"); 4568 no_overflow(0x1, "no"); 4569 %} 4570 %} 4571 4572 // Comparison Code, unsigned compare. Used by FP also, with 4573 // C2 (unordered) turned into GT or LT already. The other bits 4574 // C0 and C3 are turned into Carry & Zero flags. 4575 operand cmpOpU() %{ 4576 match(Bool); 4577 4578 format %{ "" %} 4579 interface(COND_INTER) %{ 4580 equal(0x4, "e"); 4581 not_equal(0x5, "ne"); 4582 less(0x2, "b"); 4583 greater_equal(0x3, "nb"); 4584 less_equal(0x6, "be"); 4585 greater(0x7, "nbe"); 4586 overflow(0x0, "o"); 4587 no_overflow(0x1, "no"); 4588 %} 4589 %} 4590 4591 // Floating comparisons that don't require any fixup for the unordered case 4592 operand cmpOpUCF() %{ 4593 match(Bool); 4594 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4595 n->as_Bool()->_test._test == BoolTest::ge || 4596 n->as_Bool()->_test._test == BoolTest::le || 4597 n->as_Bool()->_test._test == BoolTest::gt); 4598 format %{ "" %} 4599 interface(COND_INTER) %{ 4600 equal(0x4, "e"); 4601 not_equal(0x5, "ne"); 4602 less(0x2, "b"); 4603 greater_equal(0x3, "nb"); 4604 less_equal(0x6, "be"); 4605 greater(0x7, "nbe"); 4606 overflow(0x0, "o"); 4607 no_overflow(0x1, "no"); 4608 %} 4609 %} 4610 4611 4612 // Floating comparisons that can be fixed up with extra conditional jumps 4613 operand cmpOpUCF2() %{ 4614 match(Bool); 4615 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4616 n->as_Bool()->_test._test == BoolTest::eq); 4617 format %{ "" %} 4618 interface(COND_INTER) %{ 4619 equal(0x4, "e"); 4620 not_equal(0x5, "ne"); 4621 less(0x2, "b"); 4622 greater_equal(0x3, "nb"); 4623 less_equal(0x6, "be"); 4624 greater(0x7, "nbe"); 4625 overflow(0x0, "o"); 4626 no_overflow(0x1, "no"); 4627 %} 4628 %} 4629 4630 // Comparison Code for FP conditional move 4631 operand cmpOp_fcmov() %{ 4632 match(Bool); 4633 4634 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4635 n->as_Bool()->_test._test != BoolTest::no_overflow); 4636 format %{ "" %} 4637 interface(COND_INTER) %{ 4638 equal (0x0C8); 4639 not_equal (0x1C8); 4640 less (0x0C0); 4641 greater_equal(0x1C0); 4642 less_equal (0x0D0); 4643 greater (0x1D0); 4644 overflow(0x0, "o"); // not really supported by the instruction 4645 no_overflow(0x1, "no"); // not really supported by the instruction 4646 %} 4647 %} 4648 4649 // Comparison Code used in long compares 4650 operand cmpOp_commute() %{ 4651 match(Bool); 4652 4653 format %{ "" %} 4654 interface(COND_INTER) %{ 4655 equal(0x4, "e"); 4656 not_equal(0x5, "ne"); 4657 less(0xF, "g"); 4658 greater_equal(0xE, "le"); 4659 less_equal(0xD, "ge"); 4660 greater(0xC, "l"); 4661 overflow(0x0, "o"); 4662 no_overflow(0x1, "no"); 4663 %} 4664 %} 4665 4666 // Comparison Code used in unsigned long compares 4667 operand cmpOpU_commute() %{ 4668 match(Bool); 4669 4670 format %{ "" %} 4671 interface(COND_INTER) %{ 4672 equal(0x4, "e"); 4673 not_equal(0x5, "ne"); 4674 less(0x7, "nbe"); 4675 greater_equal(0x6, "be"); 4676 less_equal(0x3, "nb"); 4677 greater(0x2, "b"); 4678 overflow(0x0, "o"); 4679 no_overflow(0x1, "no"); 4680 %} 4681 %} 4682 4683 //----------OPERAND CLASSES---------------------------------------------------- 4684 // Operand Classes are groups of operands that are used as to simplify 4685 // instruction definitions by not requiring the AD writer to specify separate 4686 // instructions for every form of operand when the instruction accepts 4687 // multiple operand types with the same basic encoding and format. The classic 4688 // case of this is memory operands. 4689 4690 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4691 indIndex, indIndexScale, indIndexScaleOffset); 4692 4693 // Long memory operations are encoded in 2 instructions and a +4 offset. 4694 // This means some kind of offset is always required and you cannot use 4695 // an oop as the offset (done when working on static globals). 4696 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4697 indIndex, indIndexScale, indIndexScaleOffset); 4698 4699 4700 //----------PIPELINE----------------------------------------------------------- 4701 // Rules which define the behavior of the target architectures pipeline. 4702 pipeline %{ 4703 4704 //----------ATTRIBUTES--------------------------------------------------------- 4705 attributes %{ 4706 variable_size_instructions; // Fixed size instructions 4707 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4708 instruction_unit_size = 1; // An instruction is 1 bytes long 4709 instruction_fetch_unit_size = 16; // The processor fetches one line 4710 instruction_fetch_units = 1; // of 16 bytes 4711 4712 // List of nop instructions 4713 nops( MachNop ); 4714 %} 4715 4716 //----------RESOURCES---------------------------------------------------------- 4717 // Resources are the functional units available to the machine 4718 4719 // Generic P2/P3 pipeline 4720 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4721 // 3 instructions decoded per cycle. 4722 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4723 // 2 ALU op, only ALU0 handles mul/div instructions. 4724 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4725 MS0, MS1, MEM = MS0 | MS1, 4726 BR, FPU, 4727 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4728 4729 //----------PIPELINE DESCRIPTION----------------------------------------------- 4730 // Pipeline Description specifies the stages in the machine's pipeline 4731 4732 // Generic P2/P3 pipeline 4733 pipe_desc(S0, S1, S2, S3, S4, S5); 4734 4735 //----------PIPELINE CLASSES--------------------------------------------------- 4736 // Pipeline Classes describe the stages in which input and output are 4737 // referenced by the hardware pipeline. 4738 4739 // Naming convention: ialu or fpu 4740 // Then: _reg 4741 // Then: _reg if there is a 2nd register 4742 // Then: _long if it's a pair of instructions implementing a long 4743 // Then: _fat if it requires the big decoder 4744 // Or: _mem if it requires the big decoder and a memory unit. 4745 4746 // Integer ALU reg operation 4747 pipe_class ialu_reg(rRegI dst) %{ 4748 single_instruction; 4749 dst : S4(write); 4750 dst : S3(read); 4751 DECODE : S0; // any decoder 4752 ALU : S3; // any alu 4753 %} 4754 4755 // Long ALU reg operation 4756 pipe_class ialu_reg_long(eRegL dst) %{ 4757 instruction_count(2); 4758 dst : S4(write); 4759 dst : S3(read); 4760 DECODE : S0(2); // any 2 decoders 4761 ALU : S3(2); // both alus 4762 %} 4763 4764 // Integer ALU reg operation using big decoder 4765 pipe_class ialu_reg_fat(rRegI dst) %{ 4766 single_instruction; 4767 dst : S4(write); 4768 dst : S3(read); 4769 D0 : S0; // big decoder only 4770 ALU : S3; // any alu 4771 %} 4772 4773 // Long ALU reg operation using big decoder 4774 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4775 instruction_count(2); 4776 dst : S4(write); 4777 dst : S3(read); 4778 D0 : S0(2); // big decoder only; twice 4779 ALU : S3(2); // any 2 alus 4780 %} 4781 4782 // Integer ALU reg-reg operation 4783 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4784 single_instruction; 4785 dst : S4(write); 4786 src : S3(read); 4787 DECODE : S0; // any decoder 4788 ALU : S3; // any alu 4789 %} 4790 4791 // Long ALU reg-reg operation 4792 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4793 instruction_count(2); 4794 dst : S4(write); 4795 src : S3(read); 4796 DECODE : S0(2); // any 2 decoders 4797 ALU : S3(2); // both alus 4798 %} 4799 4800 // Integer ALU reg-reg operation 4801 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4802 single_instruction; 4803 dst : S4(write); 4804 src : S3(read); 4805 D0 : S0; // big decoder only 4806 ALU : S3; // any alu 4807 %} 4808 4809 // Long ALU reg-reg operation 4810 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4811 instruction_count(2); 4812 dst : S4(write); 4813 src : S3(read); 4814 D0 : S0(2); // big decoder only; twice 4815 ALU : S3(2); // both alus 4816 %} 4817 4818 // Integer ALU reg-mem operation 4819 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4820 single_instruction; 4821 dst : S5(write); 4822 mem : S3(read); 4823 D0 : S0; // big decoder only 4824 ALU : S4; // any alu 4825 MEM : S3; // any mem 4826 %} 4827 4828 // Long ALU reg-mem operation 4829 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4830 instruction_count(2); 4831 dst : S5(write); 4832 mem : S3(read); 4833 D0 : S0(2); // big decoder only; twice 4834 ALU : S4(2); // any 2 alus 4835 MEM : S3(2); // both mems 4836 %} 4837 4838 // Integer mem operation (prefetch) 4839 pipe_class ialu_mem(memory mem) 4840 %{ 4841 single_instruction; 4842 mem : S3(read); 4843 D0 : S0; // big decoder only 4844 MEM : S3; // any mem 4845 %} 4846 4847 // Integer Store to Memory 4848 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4849 single_instruction; 4850 mem : S3(read); 4851 src : S5(read); 4852 D0 : S0; // big decoder only 4853 ALU : S4; // any alu 4854 MEM : S3; 4855 %} 4856 4857 // Long Store to Memory 4858 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4859 instruction_count(2); 4860 mem : S3(read); 4861 src : S5(read); 4862 D0 : S0(2); // big decoder only; twice 4863 ALU : S4(2); // any 2 alus 4864 MEM : S3(2); // Both mems 4865 %} 4866 4867 // Integer Store to Memory 4868 pipe_class ialu_mem_imm(memory mem) %{ 4869 single_instruction; 4870 mem : S3(read); 4871 D0 : S0; // big decoder only 4872 ALU : S4; // any alu 4873 MEM : S3; 4874 %} 4875 4876 // Integer ALU0 reg-reg operation 4877 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4878 single_instruction; 4879 dst : S4(write); 4880 src : S3(read); 4881 D0 : S0; // Big decoder only 4882 ALU0 : S3; // only alu0 4883 %} 4884 4885 // Integer ALU0 reg-mem operation 4886 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4887 single_instruction; 4888 dst : S5(write); 4889 mem : S3(read); 4890 D0 : S0; // big decoder only 4891 ALU0 : S4; // ALU0 only 4892 MEM : S3; // any mem 4893 %} 4894 4895 // Integer ALU reg-reg operation 4896 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4897 single_instruction; 4898 cr : S4(write); 4899 src1 : S3(read); 4900 src2 : S3(read); 4901 DECODE : S0; // any decoder 4902 ALU : S3; // any alu 4903 %} 4904 4905 // Integer ALU reg-imm operation 4906 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4907 single_instruction; 4908 cr : S4(write); 4909 src1 : S3(read); 4910 DECODE : S0; // any decoder 4911 ALU : S3; // any alu 4912 %} 4913 4914 // Integer ALU reg-mem operation 4915 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4916 single_instruction; 4917 cr : S4(write); 4918 src1 : S3(read); 4919 src2 : S3(read); 4920 D0 : S0; // big decoder only 4921 ALU : S4; // any alu 4922 MEM : S3; 4923 %} 4924 4925 // Conditional move reg-reg 4926 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4927 instruction_count(4); 4928 y : S4(read); 4929 q : S3(read); 4930 p : S3(read); 4931 DECODE : S0(4); // any decoder 4932 %} 4933 4934 // Conditional move reg-reg 4935 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4936 single_instruction; 4937 dst : S4(write); 4938 src : S3(read); 4939 cr : S3(read); 4940 DECODE : S0; // any decoder 4941 %} 4942 4943 // Conditional move reg-mem 4944 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4945 single_instruction; 4946 dst : S4(write); 4947 src : S3(read); 4948 cr : S3(read); 4949 DECODE : S0; // any decoder 4950 MEM : S3; 4951 %} 4952 4953 // Conditional move reg-reg long 4954 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4955 single_instruction; 4956 dst : S4(write); 4957 src : S3(read); 4958 cr : S3(read); 4959 DECODE : S0(2); // any 2 decoders 4960 %} 4961 4962 // Conditional move double reg-reg 4963 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4964 single_instruction; 4965 dst : S4(write); 4966 src : S3(read); 4967 cr : S3(read); 4968 DECODE : S0; // any decoder 4969 %} 4970 4971 // Float reg-reg operation 4972 pipe_class fpu_reg(regDPR dst) %{ 4973 instruction_count(2); 4974 dst : S3(read); 4975 DECODE : S0(2); // any 2 decoders 4976 FPU : S3; 4977 %} 4978 4979 // Float reg-reg operation 4980 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4981 instruction_count(2); 4982 dst : S4(write); 4983 src : S3(read); 4984 DECODE : S0(2); // any 2 decoders 4985 FPU : S3; 4986 %} 4987 4988 // Float reg-reg operation 4989 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4990 instruction_count(3); 4991 dst : S4(write); 4992 src1 : S3(read); 4993 src2 : S3(read); 4994 DECODE : S0(3); // any 3 decoders 4995 FPU : S3(2); 4996 %} 4997 4998 // Float reg-reg operation 4999 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 5000 instruction_count(4); 5001 dst : S4(write); 5002 src1 : S3(read); 5003 src2 : S3(read); 5004 src3 : S3(read); 5005 DECODE : S0(4); // any 3 decoders 5006 FPU : S3(2); 5007 %} 5008 5009 // Float reg-reg operation 5010 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 5011 instruction_count(4); 5012 dst : S4(write); 5013 src1 : S3(read); 5014 src2 : S3(read); 5015 src3 : S3(read); 5016 DECODE : S1(3); // any 3 decoders 5017 D0 : S0; // Big decoder only 5018 FPU : S3(2); 5019 MEM : S3; 5020 %} 5021 5022 // Float reg-mem operation 5023 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 5024 instruction_count(2); 5025 dst : S5(write); 5026 mem : S3(read); 5027 D0 : S0; // big decoder only 5028 DECODE : S1; // any decoder for FPU POP 5029 FPU : S4; 5030 MEM : S3; // any mem 5031 %} 5032 5033 // Float reg-mem operation 5034 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 5035 instruction_count(3); 5036 dst : S5(write); 5037 src1 : S3(read); 5038 mem : S3(read); 5039 D0 : S0; // big decoder only 5040 DECODE : S1(2); // any decoder for FPU POP 5041 FPU : S4; 5042 MEM : S3; // any mem 5043 %} 5044 5045 // Float mem-reg operation 5046 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5047 instruction_count(2); 5048 src : S5(read); 5049 mem : S3(read); 5050 DECODE : S0; // any decoder for FPU PUSH 5051 D0 : S1; // big decoder only 5052 FPU : S4; 5053 MEM : S3; // any mem 5054 %} 5055 5056 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5057 instruction_count(3); 5058 src1 : S3(read); 5059 src2 : S3(read); 5060 mem : S3(read); 5061 DECODE : S0(2); // any decoder for FPU PUSH 5062 D0 : S1; // big decoder only 5063 FPU : S4; 5064 MEM : S3; // any mem 5065 %} 5066 5067 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5068 instruction_count(3); 5069 src1 : S3(read); 5070 src2 : S3(read); 5071 mem : S4(read); 5072 DECODE : S0; // any decoder for FPU PUSH 5073 D0 : S0(2); // big decoder only 5074 FPU : S4; 5075 MEM : S3(2); // any mem 5076 %} 5077 5078 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5079 instruction_count(2); 5080 src1 : S3(read); 5081 dst : S4(read); 5082 D0 : S0(2); // big decoder only 5083 MEM : S3(2); // any mem 5084 %} 5085 5086 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5087 instruction_count(3); 5088 src1 : S3(read); 5089 src2 : S3(read); 5090 dst : S4(read); 5091 D0 : S0(3); // big decoder only 5092 FPU : S4; 5093 MEM : S3(3); // any mem 5094 %} 5095 5096 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5097 instruction_count(3); 5098 src1 : S4(read); 5099 mem : S4(read); 5100 DECODE : S0; // any decoder for FPU PUSH 5101 D0 : S0(2); // big decoder only 5102 FPU : S4; 5103 MEM : S3(2); // any mem 5104 %} 5105 5106 // Float load constant 5107 pipe_class fpu_reg_con(regDPR dst) %{ 5108 instruction_count(2); 5109 dst : S5(write); 5110 D0 : S0; // big decoder only for the load 5111 DECODE : S1; // any decoder for FPU POP 5112 FPU : S4; 5113 MEM : S3; // any mem 5114 %} 5115 5116 // Float load constant 5117 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5118 instruction_count(3); 5119 dst : S5(write); 5120 src : S3(read); 5121 D0 : S0; // big decoder only for the load 5122 DECODE : S1(2); // any decoder for FPU POP 5123 FPU : S4; 5124 MEM : S3; // any mem 5125 %} 5126 5127 // UnConditional branch 5128 pipe_class pipe_jmp( label labl ) %{ 5129 single_instruction; 5130 BR : S3; 5131 %} 5132 5133 // Conditional branch 5134 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5135 single_instruction; 5136 cr : S1(read); 5137 BR : S3; 5138 %} 5139 5140 // Allocation idiom 5141 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5142 instruction_count(1); force_serialization; 5143 fixed_latency(6); 5144 heap_ptr : S3(read); 5145 DECODE : S0(3); 5146 D0 : S2; 5147 MEM : S3; 5148 ALU : S3(2); 5149 dst : S5(write); 5150 BR : S5; 5151 %} 5152 5153 // Generic big/slow expanded idiom 5154 pipe_class pipe_slow( ) %{ 5155 instruction_count(10); multiple_bundles; force_serialization; 5156 fixed_latency(100); 5157 D0 : S0(2); 5158 MEM : S3(2); 5159 %} 5160 5161 // The real do-nothing guy 5162 pipe_class empty( ) %{ 5163 instruction_count(0); 5164 %} 5165 5166 // Define the class for the Nop node 5167 define %{ 5168 MachNop = empty; 5169 %} 5170 5171 %} 5172 5173 //----------INSTRUCTIONS------------------------------------------------------- 5174 // 5175 // match -- States which machine-independent subtree may be replaced 5176 // by this instruction. 5177 // ins_cost -- The estimated cost of this instruction is used by instruction 5178 // selection to identify a minimum cost tree of machine 5179 // instructions that matches a tree of machine-independent 5180 // instructions. 5181 // format -- A string providing the disassembly for this instruction. 5182 // The value of an instruction's operand may be inserted 5183 // by referring to it with a '$' prefix. 5184 // opcode -- Three instruction opcodes may be provided. These are referred 5185 // to within an encode class as $primary, $secondary, and $tertiary 5186 // respectively. The primary opcode is commonly used to 5187 // indicate the type of machine instruction, while secondary 5188 // and tertiary are often used for prefix options or addressing 5189 // modes. 5190 // ins_encode -- A list of encode classes with parameters. The encode class 5191 // name must have been defined in an 'enc_class' specification 5192 // in the encode section of the architecture description. 5193 5194 //----------BSWAP-Instruction-------------------------------------------------- 5195 instruct bytes_reverse_int(rRegI dst) %{ 5196 match(Set dst (ReverseBytesI dst)); 5197 5198 format %{ "BSWAP $dst" %} 5199 opcode(0x0F, 0xC8); 5200 ins_encode( OpcP, OpcSReg(dst) ); 5201 ins_pipe( ialu_reg ); 5202 %} 5203 5204 instruct bytes_reverse_long(eRegL dst) %{ 5205 match(Set dst (ReverseBytesL dst)); 5206 5207 format %{ "BSWAP $dst.lo\n\t" 5208 "BSWAP $dst.hi\n\t" 5209 "XCHG $dst.lo $dst.hi" %} 5210 5211 ins_cost(125); 5212 ins_encode( bswap_long_bytes(dst) ); 5213 ins_pipe( ialu_reg_reg); 5214 %} 5215 5216 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5217 match(Set dst (ReverseBytesUS dst)); 5218 effect(KILL cr); 5219 5220 format %{ "BSWAP $dst\n\t" 5221 "SHR $dst,16\n\t" %} 5222 ins_encode %{ 5223 __ bswapl($dst$$Register); 5224 __ shrl($dst$$Register, 16); 5225 %} 5226 ins_pipe( ialu_reg ); 5227 %} 5228 5229 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5230 match(Set dst (ReverseBytesS dst)); 5231 effect(KILL cr); 5232 5233 format %{ "BSWAP $dst\n\t" 5234 "SAR $dst,16\n\t" %} 5235 ins_encode %{ 5236 __ bswapl($dst$$Register); 5237 __ sarl($dst$$Register, 16); 5238 %} 5239 ins_pipe( ialu_reg ); 5240 %} 5241 5242 5243 //---------- Zeros Count Instructions ------------------------------------------ 5244 5245 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5246 predicate(UseCountLeadingZerosInstruction); 5247 match(Set dst (CountLeadingZerosI src)); 5248 effect(KILL cr); 5249 5250 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5251 ins_encode %{ 5252 __ lzcntl($dst$$Register, $src$$Register); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5258 predicate(!UseCountLeadingZerosInstruction); 5259 match(Set dst (CountLeadingZerosI src)); 5260 effect(KILL cr); 5261 5262 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5263 "JNZ skip\n\t" 5264 "MOV $dst, -1\n" 5265 "skip:\n\t" 5266 "NEG $dst\n\t" 5267 "ADD $dst, 31" %} 5268 ins_encode %{ 5269 Register Rdst = $dst$$Register; 5270 Register Rsrc = $src$$Register; 5271 Label skip; 5272 __ bsrl(Rdst, Rsrc); 5273 __ jccb(Assembler::notZero, skip); 5274 __ movl(Rdst, -1); 5275 __ bind(skip); 5276 __ negl(Rdst); 5277 __ addl(Rdst, BitsPerInt - 1); 5278 %} 5279 ins_pipe(ialu_reg); 5280 %} 5281 5282 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5283 predicate(UseCountLeadingZerosInstruction); 5284 match(Set dst (CountLeadingZerosL src)); 5285 effect(TEMP dst, KILL cr); 5286 5287 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5288 "JNC done\n\t" 5289 "LZCNT $dst, $src.lo\n\t" 5290 "ADD $dst, 32\n" 5291 "done:" %} 5292 ins_encode %{ 5293 Register Rdst = $dst$$Register; 5294 Register Rsrc = $src$$Register; 5295 Label done; 5296 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5297 __ jccb(Assembler::carryClear, done); 5298 __ lzcntl(Rdst, Rsrc); 5299 __ addl(Rdst, BitsPerInt); 5300 __ bind(done); 5301 %} 5302 ins_pipe(ialu_reg); 5303 %} 5304 5305 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5306 predicate(!UseCountLeadingZerosInstruction); 5307 match(Set dst (CountLeadingZerosL src)); 5308 effect(TEMP dst, KILL cr); 5309 5310 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5311 "JZ msw_is_zero\n\t" 5312 "ADD $dst, 32\n\t" 5313 "JMP not_zero\n" 5314 "msw_is_zero:\n\t" 5315 "BSR $dst, $src.lo\n\t" 5316 "JNZ not_zero\n\t" 5317 "MOV $dst, -1\n" 5318 "not_zero:\n\t" 5319 "NEG $dst\n\t" 5320 "ADD $dst, 63\n" %} 5321 ins_encode %{ 5322 Register Rdst = $dst$$Register; 5323 Register Rsrc = $src$$Register; 5324 Label msw_is_zero; 5325 Label not_zero; 5326 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5327 __ jccb(Assembler::zero, msw_is_zero); 5328 __ addl(Rdst, BitsPerInt); 5329 __ jmpb(not_zero); 5330 __ bind(msw_is_zero); 5331 __ bsrl(Rdst, Rsrc); 5332 __ jccb(Assembler::notZero, not_zero); 5333 __ movl(Rdst, -1); 5334 __ bind(not_zero); 5335 __ negl(Rdst); 5336 __ addl(Rdst, BitsPerLong - 1); 5337 %} 5338 ins_pipe(ialu_reg); 5339 %} 5340 5341 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5342 predicate(UseCountTrailingZerosInstruction); 5343 match(Set dst (CountTrailingZerosI src)); 5344 effect(KILL cr); 5345 5346 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5347 ins_encode %{ 5348 __ tzcntl($dst$$Register, $src$$Register); 5349 %} 5350 ins_pipe(ialu_reg); 5351 %} 5352 5353 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5354 predicate(!UseCountTrailingZerosInstruction); 5355 match(Set dst (CountTrailingZerosI src)); 5356 effect(KILL cr); 5357 5358 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5359 "JNZ done\n\t" 5360 "MOV $dst, 32\n" 5361 "done:" %} 5362 ins_encode %{ 5363 Register Rdst = $dst$$Register; 5364 Label done; 5365 __ bsfl(Rdst, $src$$Register); 5366 __ jccb(Assembler::notZero, done); 5367 __ movl(Rdst, BitsPerInt); 5368 __ bind(done); 5369 %} 5370 ins_pipe(ialu_reg); 5371 %} 5372 5373 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5374 predicate(UseCountTrailingZerosInstruction); 5375 match(Set dst (CountTrailingZerosL src)); 5376 effect(TEMP dst, KILL cr); 5377 5378 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5379 "JNC done\n\t" 5380 "TZCNT $dst, $src.hi\n\t" 5381 "ADD $dst, 32\n" 5382 "done:" %} 5383 ins_encode %{ 5384 Register Rdst = $dst$$Register; 5385 Register Rsrc = $src$$Register; 5386 Label done; 5387 __ tzcntl(Rdst, Rsrc); 5388 __ jccb(Assembler::carryClear, done); 5389 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5390 __ addl(Rdst, BitsPerInt); 5391 __ bind(done); 5392 %} 5393 ins_pipe(ialu_reg); 5394 %} 5395 5396 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5397 predicate(!UseCountTrailingZerosInstruction); 5398 match(Set dst (CountTrailingZerosL src)); 5399 effect(TEMP dst, KILL cr); 5400 5401 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5402 "JNZ done\n\t" 5403 "BSF $dst, $src.hi\n\t" 5404 "JNZ msw_not_zero\n\t" 5405 "MOV $dst, 32\n" 5406 "msw_not_zero:\n\t" 5407 "ADD $dst, 32\n" 5408 "done:" %} 5409 ins_encode %{ 5410 Register Rdst = $dst$$Register; 5411 Register Rsrc = $src$$Register; 5412 Label msw_not_zero; 5413 Label done; 5414 __ bsfl(Rdst, Rsrc); 5415 __ jccb(Assembler::notZero, done); 5416 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5417 __ jccb(Assembler::notZero, msw_not_zero); 5418 __ movl(Rdst, BitsPerInt); 5419 __ bind(msw_not_zero); 5420 __ addl(Rdst, BitsPerInt); 5421 __ bind(done); 5422 %} 5423 ins_pipe(ialu_reg); 5424 %} 5425 5426 5427 //---------- Population Count Instructions ------------------------------------- 5428 5429 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5430 predicate(UsePopCountInstruction); 5431 match(Set dst (PopCountI src)); 5432 effect(KILL cr); 5433 5434 format %{ "POPCNT $dst, $src" %} 5435 ins_encode %{ 5436 __ popcntl($dst$$Register, $src$$Register); 5437 %} 5438 ins_pipe(ialu_reg); 5439 %} 5440 5441 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5442 predicate(UsePopCountInstruction); 5443 match(Set dst (PopCountI (LoadI mem))); 5444 effect(KILL cr); 5445 5446 format %{ "POPCNT $dst, $mem" %} 5447 ins_encode %{ 5448 __ popcntl($dst$$Register, $mem$$Address); 5449 %} 5450 ins_pipe(ialu_reg); 5451 %} 5452 5453 // Note: Long.bitCount(long) returns an int. 5454 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5455 predicate(UsePopCountInstruction); 5456 match(Set dst (PopCountL src)); 5457 effect(KILL cr, TEMP tmp, TEMP dst); 5458 5459 format %{ "POPCNT $dst, $src.lo\n\t" 5460 "POPCNT $tmp, $src.hi\n\t" 5461 "ADD $dst, $tmp" %} 5462 ins_encode %{ 5463 __ popcntl($dst$$Register, $src$$Register); 5464 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5465 __ addl($dst$$Register, $tmp$$Register); 5466 %} 5467 ins_pipe(ialu_reg); 5468 %} 5469 5470 // Note: Long.bitCount(long) returns an int. 5471 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5472 predicate(UsePopCountInstruction); 5473 match(Set dst (PopCountL (LoadL mem))); 5474 effect(KILL cr, TEMP tmp, TEMP dst); 5475 5476 format %{ "POPCNT $dst, $mem\n\t" 5477 "POPCNT $tmp, $mem+4\n\t" 5478 "ADD $dst, $tmp" %} 5479 ins_encode %{ 5480 //__ popcntl($dst$$Register, $mem$$Address$$first); 5481 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5482 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5483 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5484 __ addl($dst$$Register, $tmp$$Register); 5485 %} 5486 ins_pipe(ialu_reg); 5487 %} 5488 5489 5490 //----------Load/Store/Move Instructions--------------------------------------- 5491 //----------Load Instructions-------------------------------------------------- 5492 // Load Byte (8bit signed) 5493 instruct loadB(xRegI dst, memory mem) %{ 5494 match(Set dst (LoadB mem)); 5495 5496 ins_cost(125); 5497 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5498 5499 ins_encode %{ 5500 __ movsbl($dst$$Register, $mem$$Address); 5501 %} 5502 5503 ins_pipe(ialu_reg_mem); 5504 %} 5505 5506 // Load Byte (8bit signed) into Long Register 5507 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5508 match(Set dst (ConvI2L (LoadB mem))); 5509 effect(KILL cr); 5510 5511 ins_cost(375); 5512 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5513 "MOV $dst.hi,$dst.lo\n\t" 5514 "SAR $dst.hi,7" %} 5515 5516 ins_encode %{ 5517 __ movsbl($dst$$Register, $mem$$Address); 5518 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5519 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5520 %} 5521 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Unsigned Byte (8bit UNsigned) 5526 instruct loadUB(xRegI dst, memory mem) %{ 5527 match(Set dst (LoadUB mem)); 5528 5529 ins_cost(125); 5530 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5531 5532 ins_encode %{ 5533 __ movzbl($dst$$Register, $mem$$Address); 5534 %} 5535 5536 ins_pipe(ialu_reg_mem); 5537 %} 5538 5539 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5540 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5541 match(Set dst (ConvI2L (LoadUB mem))); 5542 effect(KILL cr); 5543 5544 ins_cost(250); 5545 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5546 "XOR $dst.hi,$dst.hi" %} 5547 5548 ins_encode %{ 5549 Register Rdst = $dst$$Register; 5550 __ movzbl(Rdst, $mem$$Address); 5551 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5552 %} 5553 5554 ins_pipe(ialu_reg_mem); 5555 %} 5556 5557 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5558 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5559 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5560 effect(KILL cr); 5561 5562 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5563 "XOR $dst.hi,$dst.hi\n\t" 5564 "AND $dst.lo,right_n_bits($mask, 8)" %} 5565 ins_encode %{ 5566 Register Rdst = $dst$$Register; 5567 __ movzbl(Rdst, $mem$$Address); 5568 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5569 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5570 %} 5571 ins_pipe(ialu_reg_mem); 5572 %} 5573 5574 // Load Short (16bit signed) 5575 instruct loadS(rRegI dst, memory mem) %{ 5576 match(Set dst (LoadS mem)); 5577 5578 ins_cost(125); 5579 format %{ "MOVSX $dst,$mem\t# short" %} 5580 5581 ins_encode %{ 5582 __ movswl($dst$$Register, $mem$$Address); 5583 %} 5584 5585 ins_pipe(ialu_reg_mem); 5586 %} 5587 5588 // Load Short (16 bit signed) to Byte (8 bit signed) 5589 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5590 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5591 5592 ins_cost(125); 5593 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5594 ins_encode %{ 5595 __ movsbl($dst$$Register, $mem$$Address); 5596 %} 5597 ins_pipe(ialu_reg_mem); 5598 %} 5599 5600 // Load Short (16bit signed) into Long Register 5601 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5602 match(Set dst (ConvI2L (LoadS mem))); 5603 effect(KILL cr); 5604 5605 ins_cost(375); 5606 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5607 "MOV $dst.hi,$dst.lo\n\t" 5608 "SAR $dst.hi,15" %} 5609 5610 ins_encode %{ 5611 __ movswl($dst$$Register, $mem$$Address); 5612 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5613 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5614 %} 5615 5616 ins_pipe(ialu_reg_mem); 5617 %} 5618 5619 // Load Unsigned Short/Char (16bit unsigned) 5620 instruct loadUS(rRegI dst, memory mem) %{ 5621 match(Set dst (LoadUS mem)); 5622 5623 ins_cost(125); 5624 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5625 5626 ins_encode %{ 5627 __ movzwl($dst$$Register, $mem$$Address); 5628 %} 5629 5630 ins_pipe(ialu_reg_mem); 5631 %} 5632 5633 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5634 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5635 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5636 5637 ins_cost(125); 5638 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5639 ins_encode %{ 5640 __ movsbl($dst$$Register, $mem$$Address); 5641 %} 5642 ins_pipe(ialu_reg_mem); 5643 %} 5644 5645 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5646 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5647 match(Set dst (ConvI2L (LoadUS mem))); 5648 effect(KILL cr); 5649 5650 ins_cost(250); 5651 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5652 "XOR $dst.hi,$dst.hi" %} 5653 5654 ins_encode %{ 5655 __ movzwl($dst$$Register, $mem$$Address); 5656 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5657 %} 5658 5659 ins_pipe(ialu_reg_mem); 5660 %} 5661 5662 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5663 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5664 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5665 effect(KILL cr); 5666 5667 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5668 "XOR $dst.hi,$dst.hi" %} 5669 ins_encode %{ 5670 Register Rdst = $dst$$Register; 5671 __ movzbl(Rdst, $mem$$Address); 5672 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5673 %} 5674 ins_pipe(ialu_reg_mem); 5675 %} 5676 5677 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5678 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5679 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5680 effect(KILL cr); 5681 5682 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5683 "XOR $dst.hi,$dst.hi\n\t" 5684 "AND $dst.lo,right_n_bits($mask, 16)" %} 5685 ins_encode %{ 5686 Register Rdst = $dst$$Register; 5687 __ movzwl(Rdst, $mem$$Address); 5688 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5689 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5690 %} 5691 ins_pipe(ialu_reg_mem); 5692 %} 5693 5694 // Load Integer 5695 instruct loadI(rRegI dst, memory mem) %{ 5696 match(Set dst (LoadI mem)); 5697 5698 ins_cost(125); 5699 format %{ "MOV $dst,$mem\t# int" %} 5700 5701 ins_encode %{ 5702 __ movl($dst$$Register, $mem$$Address); 5703 %} 5704 5705 ins_pipe(ialu_reg_mem); 5706 %} 5707 5708 // Load Integer (32 bit signed) to Byte (8 bit signed) 5709 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5710 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5711 5712 ins_cost(125); 5713 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5714 ins_encode %{ 5715 __ movsbl($dst$$Register, $mem$$Address); 5716 %} 5717 ins_pipe(ialu_reg_mem); 5718 %} 5719 5720 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5721 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5722 match(Set dst (AndI (LoadI mem) mask)); 5723 5724 ins_cost(125); 5725 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5726 ins_encode %{ 5727 __ movzbl($dst$$Register, $mem$$Address); 5728 %} 5729 ins_pipe(ialu_reg_mem); 5730 %} 5731 5732 // Load Integer (32 bit signed) to Short (16 bit signed) 5733 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5734 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5735 5736 ins_cost(125); 5737 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5738 ins_encode %{ 5739 __ movswl($dst$$Register, $mem$$Address); 5740 %} 5741 ins_pipe(ialu_reg_mem); 5742 %} 5743 5744 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5745 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5746 match(Set dst (AndI (LoadI mem) mask)); 5747 5748 ins_cost(125); 5749 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5750 ins_encode %{ 5751 __ movzwl($dst$$Register, $mem$$Address); 5752 %} 5753 ins_pipe(ialu_reg_mem); 5754 %} 5755 5756 // Load Integer into Long Register 5757 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5758 match(Set dst (ConvI2L (LoadI mem))); 5759 effect(KILL cr); 5760 5761 ins_cost(375); 5762 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5763 "MOV $dst.hi,$dst.lo\n\t" 5764 "SAR $dst.hi,31" %} 5765 5766 ins_encode %{ 5767 __ movl($dst$$Register, $mem$$Address); 5768 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5769 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5770 %} 5771 5772 ins_pipe(ialu_reg_mem); 5773 %} 5774 5775 // Load Integer with mask 0xFF into Long Register 5776 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5777 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5778 effect(KILL cr); 5779 5780 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5781 "XOR $dst.hi,$dst.hi" %} 5782 ins_encode %{ 5783 Register Rdst = $dst$$Register; 5784 __ movzbl(Rdst, $mem$$Address); 5785 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5786 %} 5787 ins_pipe(ialu_reg_mem); 5788 %} 5789 5790 // Load Integer with mask 0xFFFF into Long Register 5791 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5792 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5793 effect(KILL cr); 5794 5795 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5796 "XOR $dst.hi,$dst.hi" %} 5797 ins_encode %{ 5798 Register Rdst = $dst$$Register; 5799 __ movzwl(Rdst, $mem$$Address); 5800 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5801 %} 5802 ins_pipe(ialu_reg_mem); 5803 %} 5804 5805 // Load Integer with 31-bit mask into Long Register 5806 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5807 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5808 effect(KILL cr); 5809 5810 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5811 "XOR $dst.hi,$dst.hi\n\t" 5812 "AND $dst.lo,$mask" %} 5813 ins_encode %{ 5814 Register Rdst = $dst$$Register; 5815 __ movl(Rdst, $mem$$Address); 5816 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5817 __ andl(Rdst, $mask$$constant); 5818 %} 5819 ins_pipe(ialu_reg_mem); 5820 %} 5821 5822 // Load Unsigned Integer into Long Register 5823 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5824 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5825 effect(KILL cr); 5826 5827 ins_cost(250); 5828 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5829 "XOR $dst.hi,$dst.hi" %} 5830 5831 ins_encode %{ 5832 __ movl($dst$$Register, $mem$$Address); 5833 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5834 %} 5835 5836 ins_pipe(ialu_reg_mem); 5837 %} 5838 5839 // Load Long. Cannot clobber address while loading, so restrict address 5840 // register to ESI 5841 instruct loadL(eRegL dst, load_long_memory mem) %{ 5842 predicate(!((LoadLNode*)n)->require_atomic_access()); 5843 match(Set dst (LoadL mem)); 5844 5845 ins_cost(250); 5846 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5847 "MOV $dst.hi,$mem+4" %} 5848 5849 ins_encode %{ 5850 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5851 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5852 __ movl($dst$$Register, Amemlo); 5853 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5854 %} 5855 5856 ins_pipe(ialu_reg_long_mem); 5857 %} 5858 5859 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5860 // then store it down to the stack and reload on the int 5861 // side. 5862 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5863 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5864 match(Set dst (LoadL mem)); 5865 5866 ins_cost(200); 5867 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5868 "FISTp $dst" %} 5869 ins_encode(enc_loadL_volatile(mem,dst)); 5870 ins_pipe( fpu_reg_mem ); 5871 %} 5872 5873 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5874 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5875 match(Set dst (LoadL mem)); 5876 effect(TEMP tmp); 5877 ins_cost(180); 5878 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5879 "MOVSD $dst,$tmp" %} 5880 ins_encode %{ 5881 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5882 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5888 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5889 match(Set dst (LoadL mem)); 5890 effect(TEMP tmp); 5891 ins_cost(160); 5892 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5893 "MOVD $dst.lo,$tmp\n\t" 5894 "PSRLQ $tmp,32\n\t" 5895 "MOVD $dst.hi,$tmp" %} 5896 ins_encode %{ 5897 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5898 __ movdl($dst$$Register, $tmp$$XMMRegister); 5899 __ psrlq($tmp$$XMMRegister, 32); 5900 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 // Load Range 5906 instruct loadRange(rRegI dst, memory mem) %{ 5907 match(Set dst (LoadRange mem)); 5908 5909 ins_cost(125); 5910 format %{ "MOV $dst,$mem" %} 5911 opcode(0x8B); 5912 ins_encode( OpcP, RegMem(dst,mem)); 5913 ins_pipe( ialu_reg_mem ); 5914 %} 5915 5916 5917 // Load Pointer 5918 instruct loadP(eRegP dst, memory mem) %{ 5919 match(Set dst (LoadP mem)); 5920 5921 ins_cost(125); 5922 format %{ "MOV $dst,$mem" %} 5923 opcode(0x8B); 5924 ins_encode( OpcP, RegMem(dst,mem)); 5925 ins_pipe( ialu_reg_mem ); 5926 %} 5927 5928 // Load Klass Pointer 5929 instruct loadKlass(eRegP dst, memory mem) %{ 5930 match(Set dst (LoadKlass mem)); 5931 5932 ins_cost(125); 5933 format %{ "MOV $dst,$mem" %} 5934 opcode(0x8B); 5935 ins_encode( OpcP, RegMem(dst,mem)); 5936 ins_pipe( ialu_reg_mem ); 5937 %} 5938 5939 // Load Double 5940 instruct loadDPR(regDPR dst, memory mem) %{ 5941 predicate(UseSSE<=1); 5942 match(Set dst (LoadD mem)); 5943 5944 ins_cost(150); 5945 format %{ "FLD_D ST,$mem\n\t" 5946 "FSTP $dst" %} 5947 opcode(0xDD); /* DD /0 */ 5948 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5949 Pop_Reg_DPR(dst) ); 5950 ins_pipe( fpu_reg_mem ); 5951 %} 5952 5953 // Load Double to XMM 5954 instruct loadD(regD dst, memory mem) %{ 5955 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5956 match(Set dst (LoadD mem)); 5957 ins_cost(145); 5958 format %{ "MOVSD $dst,$mem" %} 5959 ins_encode %{ 5960 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5961 %} 5962 ins_pipe( pipe_slow ); 5963 %} 5964 5965 instruct loadD_partial(regD dst, memory mem) %{ 5966 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5967 match(Set dst (LoadD mem)); 5968 ins_cost(145); 5969 format %{ "MOVLPD $dst,$mem" %} 5970 ins_encode %{ 5971 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5972 %} 5973 ins_pipe( pipe_slow ); 5974 %} 5975 5976 // Load to XMM register (single-precision floating point) 5977 // MOVSS instruction 5978 instruct loadF(regF dst, memory mem) %{ 5979 predicate(UseSSE>=1); 5980 match(Set dst (LoadF mem)); 5981 ins_cost(145); 5982 format %{ "MOVSS $dst,$mem" %} 5983 ins_encode %{ 5984 __ movflt ($dst$$XMMRegister, $mem$$Address); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 // Load Float 5990 instruct loadFPR(regFPR dst, memory mem) %{ 5991 predicate(UseSSE==0); 5992 match(Set dst (LoadF mem)); 5993 5994 ins_cost(150); 5995 format %{ "FLD_S ST,$mem\n\t" 5996 "FSTP $dst" %} 5997 opcode(0xD9); /* D9 /0 */ 5998 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5999 Pop_Reg_FPR(dst) ); 6000 ins_pipe( fpu_reg_mem ); 6001 %} 6002 6003 // Load Effective Address 6004 instruct leaP8(eRegP dst, indOffset8 mem) %{ 6005 match(Set dst mem); 6006 6007 ins_cost(110); 6008 format %{ "LEA $dst,$mem" %} 6009 opcode(0x8D); 6010 ins_encode( OpcP, RegMem(dst,mem)); 6011 ins_pipe( ialu_reg_reg_fat ); 6012 %} 6013 6014 instruct leaP32(eRegP dst, indOffset32 mem) %{ 6015 match(Set dst mem); 6016 6017 ins_cost(110); 6018 format %{ "LEA $dst,$mem" %} 6019 opcode(0x8D); 6020 ins_encode( OpcP, RegMem(dst,mem)); 6021 ins_pipe( ialu_reg_reg_fat ); 6022 %} 6023 6024 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 6025 match(Set dst mem); 6026 6027 ins_cost(110); 6028 format %{ "LEA $dst,$mem" %} 6029 opcode(0x8D); 6030 ins_encode( OpcP, RegMem(dst,mem)); 6031 ins_pipe( ialu_reg_reg_fat ); 6032 %} 6033 6034 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 6035 match(Set dst mem); 6036 6037 ins_cost(110); 6038 format %{ "LEA $dst,$mem" %} 6039 opcode(0x8D); 6040 ins_encode( OpcP, RegMem(dst,mem)); 6041 ins_pipe( ialu_reg_reg_fat ); 6042 %} 6043 6044 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6045 match(Set dst mem); 6046 6047 ins_cost(110); 6048 format %{ "LEA $dst,$mem" %} 6049 opcode(0x8D); 6050 ins_encode( OpcP, RegMem(dst,mem)); 6051 ins_pipe( ialu_reg_reg_fat ); 6052 %} 6053 6054 // Load Constant 6055 instruct loadConI(rRegI dst, immI src) %{ 6056 match(Set dst src); 6057 6058 format %{ "MOV $dst,$src" %} 6059 ins_encode( LdImmI(dst, src) ); 6060 ins_pipe( ialu_reg_fat ); 6061 %} 6062 6063 // Load Constant zero 6064 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6065 match(Set dst src); 6066 effect(KILL cr); 6067 6068 ins_cost(50); 6069 format %{ "XOR $dst,$dst" %} 6070 opcode(0x33); /* + rd */ 6071 ins_encode( OpcP, RegReg( dst, dst ) ); 6072 ins_pipe( ialu_reg ); 6073 %} 6074 6075 instruct loadConP(eRegP dst, immP src) %{ 6076 match(Set dst src); 6077 6078 format %{ "MOV $dst,$src" %} 6079 opcode(0xB8); /* + rd */ 6080 ins_encode( LdImmP(dst, src) ); 6081 ins_pipe( ialu_reg_fat ); 6082 %} 6083 6084 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6085 match(Set dst src); 6086 effect(KILL cr); 6087 ins_cost(200); 6088 format %{ "MOV $dst.lo,$src.lo\n\t" 6089 "MOV $dst.hi,$src.hi" %} 6090 opcode(0xB8); 6091 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6092 ins_pipe( ialu_reg_long_fat ); 6093 %} 6094 6095 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6096 match(Set dst src); 6097 effect(KILL cr); 6098 ins_cost(150); 6099 format %{ "XOR $dst.lo,$dst.lo\n\t" 6100 "XOR $dst.hi,$dst.hi" %} 6101 opcode(0x33,0x33); 6102 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6103 ins_pipe( ialu_reg_long ); 6104 %} 6105 6106 // The instruction usage is guarded by predicate in operand immFPR(). 6107 instruct loadConFPR(regFPR dst, immFPR con) %{ 6108 match(Set dst con); 6109 ins_cost(125); 6110 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6111 "FSTP $dst" %} 6112 ins_encode %{ 6113 __ fld_s($constantaddress($con)); 6114 __ fstp_d($dst$$reg); 6115 %} 6116 ins_pipe(fpu_reg_con); 6117 %} 6118 6119 // The instruction usage is guarded by predicate in operand immFPR0(). 6120 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6121 match(Set dst con); 6122 ins_cost(125); 6123 format %{ "FLDZ ST\n\t" 6124 "FSTP $dst" %} 6125 ins_encode %{ 6126 __ fldz(); 6127 __ fstp_d($dst$$reg); 6128 %} 6129 ins_pipe(fpu_reg_con); 6130 %} 6131 6132 // The instruction usage is guarded by predicate in operand immFPR1(). 6133 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6134 match(Set dst con); 6135 ins_cost(125); 6136 format %{ "FLD1 ST\n\t" 6137 "FSTP $dst" %} 6138 ins_encode %{ 6139 __ fld1(); 6140 __ fstp_d($dst$$reg); 6141 %} 6142 ins_pipe(fpu_reg_con); 6143 %} 6144 6145 // The instruction usage is guarded by predicate in operand immF(). 6146 instruct loadConF(regF dst, immF con) %{ 6147 match(Set dst con); 6148 ins_cost(125); 6149 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6150 ins_encode %{ 6151 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6152 %} 6153 ins_pipe(pipe_slow); 6154 %} 6155 6156 // The instruction usage is guarded by predicate in operand immF0(). 6157 instruct loadConF0(regF dst, immF0 src) %{ 6158 match(Set dst src); 6159 ins_cost(100); 6160 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6161 ins_encode %{ 6162 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6163 %} 6164 ins_pipe(pipe_slow); 6165 %} 6166 6167 // The instruction usage is guarded by predicate in operand immDPR(). 6168 instruct loadConDPR(regDPR dst, immDPR con) %{ 6169 match(Set dst con); 6170 ins_cost(125); 6171 6172 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6173 "FSTP $dst" %} 6174 ins_encode %{ 6175 __ fld_d($constantaddress($con)); 6176 __ fstp_d($dst$$reg); 6177 %} 6178 ins_pipe(fpu_reg_con); 6179 %} 6180 6181 // The instruction usage is guarded by predicate in operand immDPR0(). 6182 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6183 match(Set dst con); 6184 ins_cost(125); 6185 6186 format %{ "FLDZ ST\n\t" 6187 "FSTP $dst" %} 6188 ins_encode %{ 6189 __ fldz(); 6190 __ fstp_d($dst$$reg); 6191 %} 6192 ins_pipe(fpu_reg_con); 6193 %} 6194 6195 // The instruction usage is guarded by predicate in operand immDPR1(). 6196 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6197 match(Set dst con); 6198 ins_cost(125); 6199 6200 format %{ "FLD1 ST\n\t" 6201 "FSTP $dst" %} 6202 ins_encode %{ 6203 __ fld1(); 6204 __ fstp_d($dst$$reg); 6205 %} 6206 ins_pipe(fpu_reg_con); 6207 %} 6208 6209 // The instruction usage is guarded by predicate in operand immD(). 6210 instruct loadConD(regD dst, immD con) %{ 6211 match(Set dst con); 6212 ins_cost(125); 6213 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6214 ins_encode %{ 6215 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6216 %} 6217 ins_pipe(pipe_slow); 6218 %} 6219 6220 // The instruction usage is guarded by predicate in operand immD0(). 6221 instruct loadConD0(regD dst, immD0 src) %{ 6222 match(Set dst src); 6223 ins_cost(100); 6224 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6225 ins_encode %{ 6226 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 // Load Stack Slot 6232 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6233 match(Set dst src); 6234 ins_cost(125); 6235 6236 format %{ "MOV $dst,$src" %} 6237 opcode(0x8B); 6238 ins_encode( OpcP, RegMem(dst,src)); 6239 ins_pipe( ialu_reg_mem ); 6240 %} 6241 6242 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6243 match(Set dst src); 6244 6245 ins_cost(200); 6246 format %{ "MOV $dst,$src.lo\n\t" 6247 "MOV $dst+4,$src.hi" %} 6248 opcode(0x8B, 0x8B); 6249 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6250 ins_pipe( ialu_mem_long_reg ); 6251 %} 6252 6253 // Load Stack Slot 6254 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6255 match(Set dst src); 6256 ins_cost(125); 6257 6258 format %{ "MOV $dst,$src" %} 6259 opcode(0x8B); 6260 ins_encode( OpcP, RegMem(dst,src)); 6261 ins_pipe( ialu_reg_mem ); 6262 %} 6263 6264 // Load Stack Slot 6265 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6266 match(Set dst src); 6267 ins_cost(125); 6268 6269 format %{ "FLD_S $src\n\t" 6270 "FSTP $dst" %} 6271 opcode(0xD9); /* D9 /0, FLD m32real */ 6272 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6273 Pop_Reg_FPR(dst) ); 6274 ins_pipe( fpu_reg_mem ); 6275 %} 6276 6277 // Load Stack Slot 6278 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6279 match(Set dst src); 6280 ins_cost(125); 6281 6282 format %{ "FLD_D $src\n\t" 6283 "FSTP $dst" %} 6284 opcode(0xDD); /* DD /0, FLD m64real */ 6285 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6286 Pop_Reg_DPR(dst) ); 6287 ins_pipe( fpu_reg_mem ); 6288 %} 6289 6290 // Prefetch instructions for allocation. 6291 // Must be safe to execute with invalid address (cannot fault). 6292 6293 instruct prefetchAlloc0( memory mem ) %{ 6294 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6295 match(PrefetchAllocation mem); 6296 ins_cost(0); 6297 size(0); 6298 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6299 ins_encode(); 6300 ins_pipe(empty); 6301 %} 6302 6303 instruct prefetchAlloc( memory mem ) %{ 6304 predicate(AllocatePrefetchInstr==3); 6305 match( PrefetchAllocation mem ); 6306 ins_cost(100); 6307 6308 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6309 ins_encode %{ 6310 __ prefetchw($mem$$Address); 6311 %} 6312 ins_pipe(ialu_mem); 6313 %} 6314 6315 instruct prefetchAllocNTA( memory mem ) %{ 6316 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6317 match(PrefetchAllocation mem); 6318 ins_cost(100); 6319 6320 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6321 ins_encode %{ 6322 __ prefetchnta($mem$$Address); 6323 %} 6324 ins_pipe(ialu_mem); 6325 %} 6326 6327 instruct prefetchAllocT0( memory mem ) %{ 6328 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6329 match(PrefetchAllocation mem); 6330 ins_cost(100); 6331 6332 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6333 ins_encode %{ 6334 __ prefetcht0($mem$$Address); 6335 %} 6336 ins_pipe(ialu_mem); 6337 %} 6338 6339 instruct prefetchAllocT2( memory mem ) %{ 6340 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6341 match(PrefetchAllocation mem); 6342 ins_cost(100); 6343 6344 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6345 ins_encode %{ 6346 __ prefetcht2($mem$$Address); 6347 %} 6348 ins_pipe(ialu_mem); 6349 %} 6350 6351 //----------Store Instructions------------------------------------------------- 6352 6353 // Store Byte 6354 instruct storeB(memory mem, xRegI src) %{ 6355 match(Set mem (StoreB mem src)); 6356 6357 ins_cost(125); 6358 format %{ "MOV8 $mem,$src" %} 6359 opcode(0x88); 6360 ins_encode( OpcP, RegMem( src, mem ) ); 6361 ins_pipe( ialu_mem_reg ); 6362 %} 6363 6364 // Store Char/Short 6365 instruct storeC(memory mem, rRegI src) %{ 6366 match(Set mem (StoreC mem src)); 6367 6368 ins_cost(125); 6369 format %{ "MOV16 $mem,$src" %} 6370 opcode(0x89, 0x66); 6371 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6372 ins_pipe( ialu_mem_reg ); 6373 %} 6374 6375 // Store Integer 6376 instruct storeI(memory mem, rRegI src) %{ 6377 match(Set mem (StoreI mem src)); 6378 6379 ins_cost(125); 6380 format %{ "MOV $mem,$src" %} 6381 opcode(0x89); 6382 ins_encode( OpcP, RegMem( src, mem ) ); 6383 ins_pipe( ialu_mem_reg ); 6384 %} 6385 6386 // Store Long 6387 instruct storeL(long_memory mem, eRegL src) %{ 6388 predicate(!((StoreLNode*)n)->require_atomic_access()); 6389 match(Set mem (StoreL mem src)); 6390 6391 ins_cost(200); 6392 format %{ "MOV $mem,$src.lo\n\t" 6393 "MOV $mem+4,$src.hi" %} 6394 opcode(0x89, 0x89); 6395 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6396 ins_pipe( ialu_mem_long_reg ); 6397 %} 6398 6399 // Store Long to Integer 6400 instruct storeL2I(memory mem, eRegL src) %{ 6401 match(Set mem (StoreI mem (ConvL2I src))); 6402 6403 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6404 ins_encode %{ 6405 __ movl($mem$$Address, $src$$Register); 6406 %} 6407 ins_pipe(ialu_mem_reg); 6408 %} 6409 6410 // Volatile Store Long. Must be atomic, so move it into 6411 // the FP TOS and then do a 64-bit FIST. Has to probe the 6412 // target address before the store (for null-ptr checks) 6413 // so the memory operand is used twice in the encoding. 6414 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6415 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6416 match(Set mem (StoreL mem src)); 6417 effect( KILL cr ); 6418 ins_cost(400); 6419 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6420 "FILD $src\n\t" 6421 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6422 opcode(0x3B); 6423 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6424 ins_pipe( fpu_reg_mem ); 6425 %} 6426 6427 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6428 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6429 match(Set mem (StoreL mem src)); 6430 effect( TEMP tmp, KILL cr ); 6431 ins_cost(380); 6432 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6433 "MOVSD $tmp,$src\n\t" 6434 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6435 ins_encode %{ 6436 __ cmpl(rax, $mem$$Address); 6437 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6438 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6439 %} 6440 ins_pipe( pipe_slow ); 6441 %} 6442 6443 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6444 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6445 match(Set mem (StoreL mem src)); 6446 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6447 ins_cost(360); 6448 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6449 "MOVD $tmp,$src.lo\n\t" 6450 "MOVD $tmp2,$src.hi\n\t" 6451 "PUNPCKLDQ $tmp,$tmp2\n\t" 6452 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6453 ins_encode %{ 6454 __ cmpl(rax, $mem$$Address); 6455 __ movdl($tmp$$XMMRegister, $src$$Register); 6456 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6457 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6458 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6459 %} 6460 ins_pipe( pipe_slow ); 6461 %} 6462 6463 // Store Pointer; for storing unknown oops and raw pointers 6464 instruct storeP(memory mem, anyRegP src) %{ 6465 match(Set mem (StoreP mem src)); 6466 6467 ins_cost(125); 6468 format %{ "MOV $mem,$src" %} 6469 opcode(0x89); 6470 ins_encode( OpcP, RegMem( src, mem ) ); 6471 ins_pipe( ialu_mem_reg ); 6472 %} 6473 6474 // Store Integer Immediate 6475 instruct storeImmI(memory mem, immI src) %{ 6476 match(Set mem (StoreI mem src)); 6477 6478 ins_cost(150); 6479 format %{ "MOV $mem,$src" %} 6480 opcode(0xC7); /* C7 /0 */ 6481 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6482 ins_pipe( ialu_mem_imm ); 6483 %} 6484 6485 // Store Short/Char Immediate 6486 instruct storeImmI16(memory mem, immI16 src) %{ 6487 predicate(UseStoreImmI16); 6488 match(Set mem (StoreC mem src)); 6489 6490 ins_cost(150); 6491 format %{ "MOV16 $mem,$src" %} 6492 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6493 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6494 ins_pipe( ialu_mem_imm ); 6495 %} 6496 6497 // Store Pointer Immediate; null pointers or constant oops that do not 6498 // need card-mark barriers. 6499 instruct storeImmP(memory mem, immP src) %{ 6500 match(Set mem (StoreP mem src)); 6501 6502 ins_cost(150); 6503 format %{ "MOV $mem,$src" %} 6504 opcode(0xC7); /* C7 /0 */ 6505 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6506 ins_pipe( ialu_mem_imm ); 6507 %} 6508 6509 // Store Byte Immediate 6510 instruct storeImmB(memory mem, immI8 src) %{ 6511 match(Set mem (StoreB mem src)); 6512 6513 ins_cost(150); 6514 format %{ "MOV8 $mem,$src" %} 6515 opcode(0xC6); /* C6 /0 */ 6516 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6517 ins_pipe( ialu_mem_imm ); 6518 %} 6519 6520 // Store CMS card-mark Immediate 6521 instruct storeImmCM(memory mem, immI8 src) %{ 6522 match(Set mem (StoreCM mem src)); 6523 6524 ins_cost(150); 6525 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6526 opcode(0xC6); /* C6 /0 */ 6527 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6528 ins_pipe( ialu_mem_imm ); 6529 %} 6530 6531 // Store Double 6532 instruct storeDPR( memory mem, regDPR1 src) %{ 6533 predicate(UseSSE<=1); 6534 match(Set mem (StoreD mem src)); 6535 6536 ins_cost(100); 6537 format %{ "FST_D $mem,$src" %} 6538 opcode(0xDD); /* DD /2 */ 6539 ins_encode( enc_FPR_store(mem,src) ); 6540 ins_pipe( fpu_mem_reg ); 6541 %} 6542 6543 // Store double does rounding on x86 6544 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6545 predicate(UseSSE<=1); 6546 match(Set mem (StoreD mem (RoundDouble src))); 6547 6548 ins_cost(100); 6549 format %{ "FST_D $mem,$src\t# round" %} 6550 opcode(0xDD); /* DD /2 */ 6551 ins_encode( enc_FPR_store(mem,src) ); 6552 ins_pipe( fpu_mem_reg ); 6553 %} 6554 6555 // Store XMM register to memory (double-precision floating points) 6556 // MOVSD instruction 6557 instruct storeD(memory mem, regD src) %{ 6558 predicate(UseSSE>=2); 6559 match(Set mem (StoreD mem src)); 6560 ins_cost(95); 6561 format %{ "MOVSD $mem,$src" %} 6562 ins_encode %{ 6563 __ movdbl($mem$$Address, $src$$XMMRegister); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 // Load Double 6569 instruct MoveD2VL(vlRegD dst, regD src) %{ 6570 match(Set dst src); 6571 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6572 ins_encode %{ 6573 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6574 %} 6575 ins_pipe( fpu_reg_reg ); 6576 %} 6577 6578 // Load Double 6579 instruct MoveVL2D(regD dst, vlRegD src) %{ 6580 match(Set dst src); 6581 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6582 ins_encode %{ 6583 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6584 %} 6585 ins_pipe( fpu_reg_reg ); 6586 %} 6587 6588 // Store XMM register to memory (single-precision floating point) 6589 // MOVSS instruction 6590 instruct storeF(memory mem, regF src) %{ 6591 predicate(UseSSE>=1); 6592 match(Set mem (StoreF mem src)); 6593 ins_cost(95); 6594 format %{ "MOVSS $mem,$src" %} 6595 ins_encode %{ 6596 __ movflt($mem$$Address, $src$$XMMRegister); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 // Load Float 6602 instruct MoveF2VL(vlRegF dst, regF src) %{ 6603 match(Set dst src); 6604 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6605 ins_encode %{ 6606 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6607 %} 6608 ins_pipe( fpu_reg_reg ); 6609 %} 6610 6611 // Load Float 6612 instruct MoveVL2F(regF dst, vlRegF src) %{ 6613 match(Set dst src); 6614 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6615 ins_encode %{ 6616 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6617 %} 6618 ins_pipe( fpu_reg_reg ); 6619 %} 6620 6621 // Store Float 6622 instruct storeFPR( memory mem, regFPR1 src) %{ 6623 predicate(UseSSE==0); 6624 match(Set mem (StoreF mem src)); 6625 6626 ins_cost(100); 6627 format %{ "FST_S $mem,$src" %} 6628 opcode(0xD9); /* D9 /2 */ 6629 ins_encode( enc_FPR_store(mem,src) ); 6630 ins_pipe( fpu_mem_reg ); 6631 %} 6632 6633 // Store Float does rounding on x86 6634 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6635 predicate(UseSSE==0); 6636 match(Set mem (StoreF mem (RoundFloat src))); 6637 6638 ins_cost(100); 6639 format %{ "FST_S $mem,$src\t# round" %} 6640 opcode(0xD9); /* D9 /2 */ 6641 ins_encode( enc_FPR_store(mem,src) ); 6642 ins_pipe( fpu_mem_reg ); 6643 %} 6644 6645 // Store Float does rounding on x86 6646 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6647 predicate(UseSSE<=1); 6648 match(Set mem (StoreF mem (ConvD2F src))); 6649 6650 ins_cost(100); 6651 format %{ "FST_S $mem,$src\t# D-round" %} 6652 opcode(0xD9); /* D9 /2 */ 6653 ins_encode( enc_FPR_store(mem,src) ); 6654 ins_pipe( fpu_mem_reg ); 6655 %} 6656 6657 // Store immediate Float value (it is faster than store from FPU register) 6658 // The instruction usage is guarded by predicate in operand immFPR(). 6659 instruct storeFPR_imm( memory mem, immFPR src) %{ 6660 match(Set mem (StoreF mem src)); 6661 6662 ins_cost(50); 6663 format %{ "MOV $mem,$src\t# store float" %} 6664 opcode(0xC7); /* C7 /0 */ 6665 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6666 ins_pipe( ialu_mem_imm ); 6667 %} 6668 6669 // Store immediate Float value (it is faster than store from XMM register) 6670 // The instruction usage is guarded by predicate in operand immF(). 6671 instruct storeF_imm( memory mem, immF src) %{ 6672 match(Set mem (StoreF mem src)); 6673 6674 ins_cost(50); 6675 format %{ "MOV $mem,$src\t# store float" %} 6676 opcode(0xC7); /* C7 /0 */ 6677 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6678 ins_pipe( ialu_mem_imm ); 6679 %} 6680 6681 // Store Integer to stack slot 6682 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6683 match(Set dst src); 6684 6685 ins_cost(100); 6686 format %{ "MOV $dst,$src" %} 6687 opcode(0x89); 6688 ins_encode( OpcPRegSS( dst, src ) ); 6689 ins_pipe( ialu_mem_reg ); 6690 %} 6691 6692 // Store Integer to stack slot 6693 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6694 match(Set dst src); 6695 6696 ins_cost(100); 6697 format %{ "MOV $dst,$src" %} 6698 opcode(0x89); 6699 ins_encode( OpcPRegSS( dst, src ) ); 6700 ins_pipe( ialu_mem_reg ); 6701 %} 6702 6703 // Store Long to stack slot 6704 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6705 match(Set dst src); 6706 6707 ins_cost(200); 6708 format %{ "MOV $dst,$src.lo\n\t" 6709 "MOV $dst+4,$src.hi" %} 6710 opcode(0x89, 0x89); 6711 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6712 ins_pipe( ialu_mem_long_reg ); 6713 %} 6714 6715 //----------MemBar Instructions----------------------------------------------- 6716 // Memory barrier flavors 6717 6718 instruct membar_acquire() %{ 6719 match(MemBarAcquire); 6720 match(LoadFence); 6721 ins_cost(400); 6722 6723 size(0); 6724 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6725 ins_encode(); 6726 ins_pipe(empty); 6727 %} 6728 6729 instruct membar_acquire_lock() %{ 6730 match(MemBarAcquireLock); 6731 ins_cost(0); 6732 6733 size(0); 6734 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6735 ins_encode( ); 6736 ins_pipe(empty); 6737 %} 6738 6739 instruct membar_release() %{ 6740 match(MemBarRelease); 6741 match(StoreFence); 6742 ins_cost(400); 6743 6744 size(0); 6745 format %{ "MEMBAR-release ! (empty encoding)" %} 6746 ins_encode( ); 6747 ins_pipe(empty); 6748 %} 6749 6750 instruct membar_release_lock() %{ 6751 match(MemBarReleaseLock); 6752 ins_cost(0); 6753 6754 size(0); 6755 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6756 ins_encode( ); 6757 ins_pipe(empty); 6758 %} 6759 6760 instruct membar_volatile(eFlagsReg cr) %{ 6761 match(MemBarVolatile); 6762 effect(KILL cr); 6763 ins_cost(400); 6764 6765 format %{ 6766 $$template 6767 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6768 %} 6769 ins_encode %{ 6770 __ membar(Assembler::StoreLoad); 6771 %} 6772 ins_pipe(pipe_slow); 6773 %} 6774 6775 instruct unnecessary_membar_volatile() %{ 6776 match(MemBarVolatile); 6777 predicate(Matcher::post_store_load_barrier(n)); 6778 ins_cost(0); 6779 6780 size(0); 6781 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6782 ins_encode( ); 6783 ins_pipe(empty); 6784 %} 6785 6786 instruct membar_storestore() %{ 6787 match(MemBarStoreStore); 6788 ins_cost(0); 6789 6790 size(0); 6791 format %{ "MEMBAR-storestore (empty encoding)" %} 6792 ins_encode( ); 6793 ins_pipe(empty); 6794 %} 6795 6796 //----------Move Instructions-------------------------------------------------- 6797 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6798 match(Set dst (CastX2P src)); 6799 format %{ "# X2P $dst, $src" %} 6800 ins_encode( /*empty encoding*/ ); 6801 ins_cost(0); 6802 ins_pipe(empty); 6803 %} 6804 6805 instruct castP2X(rRegI dst, eRegP src ) %{ 6806 match(Set dst (CastP2X src)); 6807 ins_cost(50); 6808 format %{ "MOV $dst, $src\t# CastP2X" %} 6809 ins_encode( enc_Copy( dst, src) ); 6810 ins_pipe( ialu_reg_reg ); 6811 %} 6812 6813 //----------Conditional Move--------------------------------------------------- 6814 // Conditional move 6815 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6816 predicate(!VM_Version::supports_cmov() ); 6817 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6818 ins_cost(200); 6819 format %{ "J$cop,us skip\t# signed cmove\n\t" 6820 "MOV $dst,$src\n" 6821 "skip:" %} 6822 ins_encode %{ 6823 Label Lskip; 6824 // Invert sense of branch from sense of CMOV 6825 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6826 __ movl($dst$$Register, $src$$Register); 6827 __ bind(Lskip); 6828 %} 6829 ins_pipe( pipe_cmov_reg ); 6830 %} 6831 6832 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6833 predicate(!VM_Version::supports_cmov() ); 6834 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6835 ins_cost(200); 6836 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6837 "MOV $dst,$src\n" 6838 "skip:" %} 6839 ins_encode %{ 6840 Label Lskip; 6841 // Invert sense of branch from sense of CMOV 6842 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6843 __ movl($dst$$Register, $src$$Register); 6844 __ bind(Lskip); 6845 %} 6846 ins_pipe( pipe_cmov_reg ); 6847 %} 6848 6849 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6850 predicate(VM_Version::supports_cmov() ); 6851 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6852 ins_cost(200); 6853 format %{ "CMOV$cop $dst,$src" %} 6854 opcode(0x0F,0x40); 6855 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6856 ins_pipe( pipe_cmov_reg ); 6857 %} 6858 6859 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6860 predicate(VM_Version::supports_cmov() ); 6861 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6862 ins_cost(200); 6863 format %{ "CMOV$cop $dst,$src" %} 6864 opcode(0x0F,0x40); 6865 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6866 ins_pipe( pipe_cmov_reg ); 6867 %} 6868 6869 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6870 predicate(VM_Version::supports_cmov() ); 6871 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6872 ins_cost(200); 6873 expand %{ 6874 cmovI_regU(cop, cr, dst, src); 6875 %} 6876 %} 6877 6878 // Conditional move 6879 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6880 predicate(VM_Version::supports_cmov() ); 6881 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6882 ins_cost(250); 6883 format %{ "CMOV$cop $dst,$src" %} 6884 opcode(0x0F,0x40); 6885 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6886 ins_pipe( pipe_cmov_mem ); 6887 %} 6888 6889 // Conditional move 6890 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6891 predicate(VM_Version::supports_cmov() ); 6892 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6893 ins_cost(250); 6894 format %{ "CMOV$cop $dst,$src" %} 6895 opcode(0x0F,0x40); 6896 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6897 ins_pipe( pipe_cmov_mem ); 6898 %} 6899 6900 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6901 predicate(VM_Version::supports_cmov() ); 6902 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6903 ins_cost(250); 6904 expand %{ 6905 cmovI_memU(cop, cr, dst, src); 6906 %} 6907 %} 6908 6909 // Conditional move 6910 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6911 predicate(VM_Version::supports_cmov() ); 6912 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6913 ins_cost(200); 6914 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6915 opcode(0x0F,0x40); 6916 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6917 ins_pipe( pipe_cmov_reg ); 6918 %} 6919 6920 // Conditional move (non-P6 version) 6921 // Note: a CMoveP is generated for stubs and native wrappers 6922 // regardless of whether we are on a P6, so we 6923 // emulate a cmov here 6924 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6925 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6926 ins_cost(300); 6927 format %{ "Jn$cop skip\n\t" 6928 "MOV $dst,$src\t# pointer\n" 6929 "skip:" %} 6930 opcode(0x8b); 6931 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6932 ins_pipe( pipe_cmov_reg ); 6933 %} 6934 6935 // Conditional move 6936 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6937 predicate(VM_Version::supports_cmov() ); 6938 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6939 ins_cost(200); 6940 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6941 opcode(0x0F,0x40); 6942 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6943 ins_pipe( pipe_cmov_reg ); 6944 %} 6945 6946 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6947 predicate(VM_Version::supports_cmov() ); 6948 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6949 ins_cost(200); 6950 expand %{ 6951 cmovP_regU(cop, cr, dst, src); 6952 %} 6953 %} 6954 6955 // DISABLED: Requires the ADLC to emit a bottom_type call that 6956 // correctly meets the two pointer arguments; one is an incoming 6957 // register but the other is a memory operand. ALSO appears to 6958 // be buggy with implicit null checks. 6959 // 6960 //// Conditional move 6961 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6962 // predicate(VM_Version::supports_cmov() ); 6963 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6964 // ins_cost(250); 6965 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6966 // opcode(0x0F,0x40); 6967 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6968 // ins_pipe( pipe_cmov_mem ); 6969 //%} 6970 // 6971 //// Conditional move 6972 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6973 // predicate(VM_Version::supports_cmov() ); 6974 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6975 // ins_cost(250); 6976 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6977 // opcode(0x0F,0x40); 6978 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6979 // ins_pipe( pipe_cmov_mem ); 6980 //%} 6981 6982 // Conditional move 6983 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6984 predicate(UseSSE<=1); 6985 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6986 ins_cost(200); 6987 format %{ "FCMOV$cop $dst,$src\t# double" %} 6988 opcode(0xDA); 6989 ins_encode( enc_cmov_dpr(cop,src) ); 6990 ins_pipe( pipe_cmovDPR_reg ); 6991 %} 6992 6993 // Conditional move 6994 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6995 predicate(UseSSE==0); 6996 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6997 ins_cost(200); 6998 format %{ "FCMOV$cop $dst,$src\t# float" %} 6999 opcode(0xDA); 7000 ins_encode( enc_cmov_dpr(cop,src) ); 7001 ins_pipe( pipe_cmovDPR_reg ); 7002 %} 7003 7004 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7005 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 7006 predicate(UseSSE<=1); 7007 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7008 ins_cost(200); 7009 format %{ "Jn$cop skip\n\t" 7010 "MOV $dst,$src\t# double\n" 7011 "skip:" %} 7012 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7013 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 7014 ins_pipe( pipe_cmovDPR_reg ); 7015 %} 7016 7017 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7018 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 7019 predicate(UseSSE==0); 7020 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7021 ins_cost(200); 7022 format %{ "Jn$cop skip\n\t" 7023 "MOV $dst,$src\t# float\n" 7024 "skip:" %} 7025 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7026 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 7027 ins_pipe( pipe_cmovDPR_reg ); 7028 %} 7029 7030 // No CMOVE with SSE/SSE2 7031 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7032 predicate (UseSSE>=1); 7033 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7034 ins_cost(200); 7035 format %{ "Jn$cop skip\n\t" 7036 "MOVSS $dst,$src\t# float\n" 7037 "skip:" %} 7038 ins_encode %{ 7039 Label skip; 7040 // Invert sense of branch from sense of CMOV 7041 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7042 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7043 __ bind(skip); 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 // No CMOVE with SSE/SSE2 7049 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7050 predicate (UseSSE>=2); 7051 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7052 ins_cost(200); 7053 format %{ "Jn$cop skip\n\t" 7054 "MOVSD $dst,$src\t# float\n" 7055 "skip:" %} 7056 ins_encode %{ 7057 Label skip; 7058 // Invert sense of branch from sense of CMOV 7059 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7060 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7061 __ bind(skip); 7062 %} 7063 ins_pipe( pipe_slow ); 7064 %} 7065 7066 // unsigned version 7067 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 7068 predicate (UseSSE>=1); 7069 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7070 ins_cost(200); 7071 format %{ "Jn$cop skip\n\t" 7072 "MOVSS $dst,$src\t# float\n" 7073 "skip:" %} 7074 ins_encode %{ 7075 Label skip; 7076 // Invert sense of branch from sense of CMOV 7077 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7078 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7079 __ bind(skip); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7085 predicate (UseSSE>=1); 7086 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7087 ins_cost(200); 7088 expand %{ 7089 fcmovF_regU(cop, cr, dst, src); 7090 %} 7091 %} 7092 7093 // unsigned version 7094 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7095 predicate (UseSSE>=2); 7096 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7097 ins_cost(200); 7098 format %{ "Jn$cop skip\n\t" 7099 "MOVSD $dst,$src\t# float\n" 7100 "skip:" %} 7101 ins_encode %{ 7102 Label skip; 7103 // Invert sense of branch from sense of CMOV 7104 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7105 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7106 __ bind(skip); 7107 %} 7108 ins_pipe( pipe_slow ); 7109 %} 7110 7111 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7112 predicate (UseSSE>=2); 7113 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7114 ins_cost(200); 7115 expand %{ 7116 fcmovD_regU(cop, cr, dst, src); 7117 %} 7118 %} 7119 7120 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7121 predicate(VM_Version::supports_cmov() ); 7122 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7123 ins_cost(200); 7124 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7125 "CMOV$cop $dst.hi,$src.hi" %} 7126 opcode(0x0F,0x40); 7127 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7128 ins_pipe( pipe_cmov_reg_long ); 7129 %} 7130 7131 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7132 predicate(VM_Version::supports_cmov() ); 7133 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7134 ins_cost(200); 7135 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7136 "CMOV$cop $dst.hi,$src.hi" %} 7137 opcode(0x0F,0x40); 7138 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7139 ins_pipe( pipe_cmov_reg_long ); 7140 %} 7141 7142 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7143 predicate(VM_Version::supports_cmov() ); 7144 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7145 ins_cost(200); 7146 expand %{ 7147 cmovL_regU(cop, cr, dst, src); 7148 %} 7149 %} 7150 7151 //----------Arithmetic Instructions-------------------------------------------- 7152 //----------Addition Instructions---------------------------------------------- 7153 7154 // Integer Addition Instructions 7155 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7156 match(Set dst (AddI dst src)); 7157 effect(KILL cr); 7158 7159 size(2); 7160 format %{ "ADD $dst,$src" %} 7161 opcode(0x03); 7162 ins_encode( OpcP, RegReg( dst, src) ); 7163 ins_pipe( ialu_reg_reg ); 7164 %} 7165 7166 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7167 match(Set dst (AddI dst src)); 7168 effect(KILL cr); 7169 7170 format %{ "ADD $dst,$src" %} 7171 opcode(0x81, 0x00); /* /0 id */ 7172 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7173 ins_pipe( ialu_reg ); 7174 %} 7175 7176 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7177 predicate(UseIncDec); 7178 match(Set dst (AddI dst src)); 7179 effect(KILL cr); 7180 7181 size(1); 7182 format %{ "INC $dst" %} 7183 opcode(0x40); /* */ 7184 ins_encode( Opc_plus( primary, dst ) ); 7185 ins_pipe( ialu_reg ); 7186 %} 7187 7188 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7189 match(Set dst (AddI src0 src1)); 7190 ins_cost(110); 7191 7192 format %{ "LEA $dst,[$src0 + $src1]" %} 7193 opcode(0x8D); /* 0x8D /r */ 7194 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7195 ins_pipe( ialu_reg_reg ); 7196 %} 7197 7198 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7199 match(Set dst (AddP src0 src1)); 7200 ins_cost(110); 7201 7202 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7203 opcode(0x8D); /* 0x8D /r */ 7204 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7205 ins_pipe( ialu_reg_reg ); 7206 %} 7207 7208 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7209 predicate(UseIncDec); 7210 match(Set dst (AddI dst src)); 7211 effect(KILL cr); 7212 7213 size(1); 7214 format %{ "DEC $dst" %} 7215 opcode(0x48); /* */ 7216 ins_encode( Opc_plus( primary, dst ) ); 7217 ins_pipe( ialu_reg ); 7218 %} 7219 7220 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7221 match(Set dst (AddP dst src)); 7222 effect(KILL cr); 7223 7224 size(2); 7225 format %{ "ADD $dst,$src" %} 7226 opcode(0x03); 7227 ins_encode( OpcP, RegReg( dst, src) ); 7228 ins_pipe( ialu_reg_reg ); 7229 %} 7230 7231 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7232 match(Set dst (AddP dst src)); 7233 effect(KILL cr); 7234 7235 format %{ "ADD $dst,$src" %} 7236 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7237 // ins_encode( RegImm( dst, src) ); 7238 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7239 ins_pipe( ialu_reg ); 7240 %} 7241 7242 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7243 match(Set dst (AddI dst (LoadI src))); 7244 effect(KILL cr); 7245 7246 ins_cost(125); 7247 format %{ "ADD $dst,$src" %} 7248 opcode(0x03); 7249 ins_encode( OpcP, RegMem( dst, src) ); 7250 ins_pipe( ialu_reg_mem ); 7251 %} 7252 7253 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7254 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7255 effect(KILL cr); 7256 7257 ins_cost(150); 7258 format %{ "ADD $dst,$src" %} 7259 opcode(0x01); /* Opcode 01 /r */ 7260 ins_encode( OpcP, RegMem( src, dst ) ); 7261 ins_pipe( ialu_mem_reg ); 7262 %} 7263 7264 // Add Memory with Immediate 7265 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7266 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7267 effect(KILL cr); 7268 7269 ins_cost(125); 7270 format %{ "ADD $dst,$src" %} 7271 opcode(0x81); /* Opcode 81 /0 id */ 7272 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7273 ins_pipe( ialu_mem_imm ); 7274 %} 7275 7276 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7277 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7278 effect(KILL cr); 7279 7280 ins_cost(125); 7281 format %{ "INC $dst" %} 7282 opcode(0xFF); /* Opcode FF /0 */ 7283 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7284 ins_pipe( ialu_mem_imm ); 7285 %} 7286 7287 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7288 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7289 effect(KILL cr); 7290 7291 ins_cost(125); 7292 format %{ "DEC $dst" %} 7293 opcode(0xFF); /* Opcode FF /1 */ 7294 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7295 ins_pipe( ialu_mem_imm ); 7296 %} 7297 7298 7299 instruct checkCastPP( eRegP dst ) %{ 7300 match(Set dst (CheckCastPP dst)); 7301 7302 size(0); 7303 format %{ "#checkcastPP of $dst" %} 7304 ins_encode( /*empty encoding*/ ); 7305 ins_pipe( empty ); 7306 %} 7307 7308 instruct castPP( eRegP dst ) %{ 7309 match(Set dst (CastPP dst)); 7310 format %{ "#castPP of $dst" %} 7311 ins_encode( /*empty encoding*/ ); 7312 ins_pipe( empty ); 7313 %} 7314 7315 instruct castII( rRegI dst ) %{ 7316 match(Set dst (CastII dst)); 7317 format %{ "#castII of $dst" %} 7318 ins_encode( /*empty encoding*/ ); 7319 ins_cost(0); 7320 ins_pipe( empty ); 7321 %} 7322 7323 7324 // Load-locked - same as a regular pointer load when used with compare-swap 7325 instruct loadPLocked(eRegP dst, memory mem) %{ 7326 match(Set dst (LoadPLocked mem)); 7327 7328 ins_cost(125); 7329 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7330 opcode(0x8B); 7331 ins_encode( OpcP, RegMem(dst,mem)); 7332 ins_pipe( ialu_reg_mem ); 7333 %} 7334 7335 // Conditional-store of the updated heap-top. 7336 // Used during allocation of the shared heap. 7337 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7338 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7339 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7340 // EAX is killed if there is contention, but then it's also unused. 7341 // In the common case of no contention, EAX holds the new oop address. 7342 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7343 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7344 ins_pipe( pipe_cmpxchg ); 7345 %} 7346 7347 // Conditional-store of an int value. 7348 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7349 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7350 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7351 effect(KILL oldval); 7352 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7353 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7354 ins_pipe( pipe_cmpxchg ); 7355 %} 7356 7357 // Conditional-store of a long value. 7358 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7359 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7360 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7361 effect(KILL oldval); 7362 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7363 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7364 "XCHG EBX,ECX" 7365 %} 7366 ins_encode %{ 7367 // Note: we need to swap rbx, and rcx before and after the 7368 // cmpxchg8 instruction because the instruction uses 7369 // rcx as the high order word of the new value to store but 7370 // our register encoding uses rbx. 7371 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7372 __ lock(); 7373 __ cmpxchg8($mem$$Address); 7374 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7375 %} 7376 ins_pipe( pipe_cmpxchg ); 7377 %} 7378 7379 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7380 7381 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7382 predicate(VM_Version::supports_cx8()); 7383 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7384 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7385 effect(KILL cr, KILL oldval); 7386 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7387 "MOV $res,0\n\t" 7388 "JNE,s fail\n\t" 7389 "MOV $res,1\n" 7390 "fail:" %} 7391 ins_encode( enc_cmpxchg8(mem_ptr), 7392 enc_flags_ne_to_boolean(res) ); 7393 ins_pipe( pipe_cmpxchg ); 7394 %} 7395 7396 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7397 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7398 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7399 effect(KILL cr, KILL oldval); 7400 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7401 "MOV $res,0\n\t" 7402 "JNE,s fail\n\t" 7403 "MOV $res,1\n" 7404 "fail:" %} 7405 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7406 ins_pipe( pipe_cmpxchg ); 7407 %} 7408 7409 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7410 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7411 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7412 effect(KILL cr, KILL oldval); 7413 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7414 "MOV $res,0\n\t" 7415 "JNE,s fail\n\t" 7416 "MOV $res,1\n" 7417 "fail:" %} 7418 ins_encode( enc_cmpxchgb(mem_ptr), 7419 enc_flags_ne_to_boolean(res) ); 7420 ins_pipe( pipe_cmpxchg ); 7421 %} 7422 7423 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7424 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7425 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7426 effect(KILL cr, KILL oldval); 7427 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7428 "MOV $res,0\n\t" 7429 "JNE,s fail\n\t" 7430 "MOV $res,1\n" 7431 "fail:" %} 7432 ins_encode( enc_cmpxchgw(mem_ptr), 7433 enc_flags_ne_to_boolean(res) ); 7434 ins_pipe( pipe_cmpxchg ); 7435 %} 7436 7437 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7438 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7439 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7440 effect(KILL cr, KILL oldval); 7441 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7442 "MOV $res,0\n\t" 7443 "JNE,s fail\n\t" 7444 "MOV $res,1\n" 7445 "fail:" %} 7446 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7447 ins_pipe( pipe_cmpxchg ); 7448 %} 7449 7450 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7451 predicate(VM_Version::supports_cx8()); 7452 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7453 effect(KILL cr); 7454 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7455 ins_encode( enc_cmpxchg8(mem_ptr) ); 7456 ins_pipe( pipe_cmpxchg ); 7457 %} 7458 7459 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7460 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7461 effect(KILL cr); 7462 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7463 ins_encode( enc_cmpxchg(mem_ptr) ); 7464 ins_pipe( pipe_cmpxchg ); 7465 %} 7466 7467 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7468 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7469 effect(KILL cr); 7470 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7471 ins_encode( enc_cmpxchgb(mem_ptr) ); 7472 ins_pipe( pipe_cmpxchg ); 7473 %} 7474 7475 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7476 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7477 effect(KILL cr); 7478 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7479 ins_encode( enc_cmpxchgw(mem_ptr) ); 7480 ins_pipe( pipe_cmpxchg ); 7481 %} 7482 7483 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7484 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7485 effect(KILL cr); 7486 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7487 ins_encode( enc_cmpxchg(mem_ptr) ); 7488 ins_pipe( pipe_cmpxchg ); 7489 %} 7490 7491 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7492 predicate(n->as_LoadStore()->result_not_used()); 7493 match(Set dummy (GetAndAddB mem add)); 7494 effect(KILL cr); 7495 format %{ "ADDB [$mem],$add" %} 7496 ins_encode %{ 7497 __ lock(); 7498 __ addb($mem$$Address, $add$$constant); 7499 %} 7500 ins_pipe( pipe_cmpxchg ); 7501 %} 7502 7503 // Important to match to xRegI: only 8-bit regs. 7504 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7505 match(Set newval (GetAndAddB mem newval)); 7506 effect(KILL cr); 7507 format %{ "XADDB [$mem],$newval" %} 7508 ins_encode %{ 7509 __ lock(); 7510 __ xaddb($mem$$Address, $newval$$Register); 7511 %} 7512 ins_pipe( pipe_cmpxchg ); 7513 %} 7514 7515 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7516 predicate(n->as_LoadStore()->result_not_used()); 7517 match(Set dummy (GetAndAddS mem add)); 7518 effect(KILL cr); 7519 format %{ "ADDS [$mem],$add" %} 7520 ins_encode %{ 7521 __ lock(); 7522 __ addw($mem$$Address, $add$$constant); 7523 %} 7524 ins_pipe( pipe_cmpxchg ); 7525 %} 7526 7527 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7528 match(Set newval (GetAndAddS mem newval)); 7529 effect(KILL cr); 7530 format %{ "XADDS [$mem],$newval" %} 7531 ins_encode %{ 7532 __ lock(); 7533 __ xaddw($mem$$Address, $newval$$Register); 7534 %} 7535 ins_pipe( pipe_cmpxchg ); 7536 %} 7537 7538 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7539 predicate(n->as_LoadStore()->result_not_used()); 7540 match(Set dummy (GetAndAddI mem add)); 7541 effect(KILL cr); 7542 format %{ "ADDL [$mem],$add" %} 7543 ins_encode %{ 7544 __ lock(); 7545 __ addl($mem$$Address, $add$$constant); 7546 %} 7547 ins_pipe( pipe_cmpxchg ); 7548 %} 7549 7550 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7551 match(Set newval (GetAndAddI mem newval)); 7552 effect(KILL cr); 7553 format %{ "XADDL [$mem],$newval" %} 7554 ins_encode %{ 7555 __ lock(); 7556 __ xaddl($mem$$Address, $newval$$Register); 7557 %} 7558 ins_pipe( pipe_cmpxchg ); 7559 %} 7560 7561 // Important to match to xRegI: only 8-bit regs. 7562 instruct xchgB( memory mem, xRegI newval) %{ 7563 match(Set newval (GetAndSetB mem newval)); 7564 format %{ "XCHGB $newval,[$mem]" %} 7565 ins_encode %{ 7566 __ xchgb($newval$$Register, $mem$$Address); 7567 %} 7568 ins_pipe( pipe_cmpxchg ); 7569 %} 7570 7571 instruct xchgS( memory mem, rRegI newval) %{ 7572 match(Set newval (GetAndSetS mem newval)); 7573 format %{ "XCHGW $newval,[$mem]" %} 7574 ins_encode %{ 7575 __ xchgw($newval$$Register, $mem$$Address); 7576 %} 7577 ins_pipe( pipe_cmpxchg ); 7578 %} 7579 7580 instruct xchgI( memory mem, rRegI newval) %{ 7581 match(Set newval (GetAndSetI mem newval)); 7582 format %{ "XCHGL $newval,[$mem]" %} 7583 ins_encode %{ 7584 __ xchgl($newval$$Register, $mem$$Address); 7585 %} 7586 ins_pipe( pipe_cmpxchg ); 7587 %} 7588 7589 instruct xchgP( memory mem, pRegP newval) %{ 7590 match(Set newval (GetAndSetP mem newval)); 7591 format %{ "XCHGL $newval,[$mem]" %} 7592 ins_encode %{ 7593 __ xchgl($newval$$Register, $mem$$Address); 7594 %} 7595 ins_pipe( pipe_cmpxchg ); 7596 %} 7597 7598 //----------Subtraction Instructions------------------------------------------- 7599 7600 // Integer Subtraction Instructions 7601 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7602 match(Set dst (SubI dst src)); 7603 effect(KILL cr); 7604 7605 size(2); 7606 format %{ "SUB $dst,$src" %} 7607 opcode(0x2B); 7608 ins_encode( OpcP, RegReg( dst, src) ); 7609 ins_pipe( ialu_reg_reg ); 7610 %} 7611 7612 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7613 match(Set dst (SubI dst src)); 7614 effect(KILL cr); 7615 7616 format %{ "SUB $dst,$src" %} 7617 opcode(0x81,0x05); /* Opcode 81 /5 */ 7618 // ins_encode( RegImm( dst, src) ); 7619 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7620 ins_pipe( ialu_reg ); 7621 %} 7622 7623 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7624 match(Set dst (SubI dst (LoadI src))); 7625 effect(KILL cr); 7626 7627 ins_cost(125); 7628 format %{ "SUB $dst,$src" %} 7629 opcode(0x2B); 7630 ins_encode( OpcP, RegMem( dst, src) ); 7631 ins_pipe( ialu_reg_mem ); 7632 %} 7633 7634 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7635 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7636 effect(KILL cr); 7637 7638 ins_cost(150); 7639 format %{ "SUB $dst,$src" %} 7640 opcode(0x29); /* Opcode 29 /r */ 7641 ins_encode( OpcP, RegMem( src, dst ) ); 7642 ins_pipe( ialu_mem_reg ); 7643 %} 7644 7645 // Subtract from a pointer 7646 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7647 match(Set dst (AddP dst (SubI zero src))); 7648 effect(KILL cr); 7649 7650 size(2); 7651 format %{ "SUB $dst,$src" %} 7652 opcode(0x2B); 7653 ins_encode( OpcP, RegReg( dst, src) ); 7654 ins_pipe( ialu_reg_reg ); 7655 %} 7656 7657 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7658 match(Set dst (SubI zero dst)); 7659 effect(KILL cr); 7660 7661 size(2); 7662 format %{ "NEG $dst" %} 7663 opcode(0xF7,0x03); // Opcode F7 /3 7664 ins_encode( OpcP, RegOpc( dst ) ); 7665 ins_pipe( ialu_reg ); 7666 %} 7667 7668 //----------Multiplication/Division Instructions------------------------------- 7669 // Integer Multiplication Instructions 7670 // Multiply Register 7671 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7672 match(Set dst (MulI dst src)); 7673 effect(KILL cr); 7674 7675 size(3); 7676 ins_cost(300); 7677 format %{ "IMUL $dst,$src" %} 7678 opcode(0xAF, 0x0F); 7679 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7680 ins_pipe( ialu_reg_reg_alu0 ); 7681 %} 7682 7683 // Multiply 32-bit Immediate 7684 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7685 match(Set dst (MulI src imm)); 7686 effect(KILL cr); 7687 7688 ins_cost(300); 7689 format %{ "IMUL $dst,$src,$imm" %} 7690 opcode(0x69); /* 69 /r id */ 7691 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7692 ins_pipe( ialu_reg_reg_alu0 ); 7693 %} 7694 7695 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7696 match(Set dst src); 7697 effect(KILL cr); 7698 7699 // Note that this is artificially increased to make it more expensive than loadConL 7700 ins_cost(250); 7701 format %{ "MOV EAX,$src\t// low word only" %} 7702 opcode(0xB8); 7703 ins_encode( LdImmL_Lo(dst, src) ); 7704 ins_pipe( ialu_reg_fat ); 7705 %} 7706 7707 // Multiply by 32-bit Immediate, taking the shifted high order results 7708 // (special case for shift by 32) 7709 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7710 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7711 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7712 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7713 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7714 effect(USE src1, KILL cr); 7715 7716 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7717 ins_cost(0*100 + 1*400 - 150); 7718 format %{ "IMUL EDX:EAX,$src1" %} 7719 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 // Multiply by 32-bit Immediate, taking the shifted high order results 7724 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7725 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7726 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7727 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7728 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7729 effect(USE src1, KILL cr); 7730 7731 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7732 ins_cost(1*100 + 1*400 - 150); 7733 format %{ "IMUL EDX:EAX,$src1\n\t" 7734 "SAR EDX,$cnt-32" %} 7735 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 // Multiply Memory 32-bit Immediate 7740 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7741 match(Set dst (MulI (LoadI src) imm)); 7742 effect(KILL cr); 7743 7744 ins_cost(300); 7745 format %{ "IMUL $dst,$src,$imm" %} 7746 opcode(0x69); /* 69 /r id */ 7747 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7748 ins_pipe( ialu_reg_mem_alu0 ); 7749 %} 7750 7751 // Multiply Memory 7752 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7753 match(Set dst (MulI dst (LoadI src))); 7754 effect(KILL cr); 7755 7756 ins_cost(350); 7757 format %{ "IMUL $dst,$src" %} 7758 opcode(0xAF, 0x0F); 7759 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7760 ins_pipe( ialu_reg_mem_alu0 ); 7761 %} 7762 7763 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7764 %{ 7765 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7766 effect(KILL cr, KILL src2); 7767 7768 expand %{ mulI_eReg(dst, src1, cr); 7769 mulI_eReg(src2, src3, cr); 7770 addI_eReg(dst, src2, cr); %} 7771 %} 7772 7773 // Multiply Register Int to Long 7774 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7775 // Basic Idea: long = (long)int * (long)int 7776 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7777 effect(DEF dst, USE src, USE src1, KILL flags); 7778 7779 ins_cost(300); 7780 format %{ "IMUL $dst,$src1" %} 7781 7782 ins_encode( long_int_multiply( dst, src1 ) ); 7783 ins_pipe( ialu_reg_reg_alu0 ); 7784 %} 7785 7786 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7787 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7788 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7789 effect(KILL flags); 7790 7791 ins_cost(300); 7792 format %{ "MUL $dst,$src1" %} 7793 7794 ins_encode( long_uint_multiply(dst, src1) ); 7795 ins_pipe( ialu_reg_reg_alu0 ); 7796 %} 7797 7798 // Multiply Register Long 7799 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7800 match(Set dst (MulL dst src)); 7801 effect(KILL cr, TEMP tmp); 7802 ins_cost(4*100+3*400); 7803 // Basic idea: lo(result) = lo(x_lo * y_lo) 7804 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7805 format %{ "MOV $tmp,$src.lo\n\t" 7806 "IMUL $tmp,EDX\n\t" 7807 "MOV EDX,$src.hi\n\t" 7808 "IMUL EDX,EAX\n\t" 7809 "ADD $tmp,EDX\n\t" 7810 "MUL EDX:EAX,$src.lo\n\t" 7811 "ADD EDX,$tmp" %} 7812 ins_encode( long_multiply( dst, src, tmp ) ); 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 // Multiply Register Long where the left operand's high 32 bits are zero 7817 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7818 predicate(is_operand_hi32_zero(n->in(1))); 7819 match(Set dst (MulL dst src)); 7820 effect(KILL cr, TEMP tmp); 7821 ins_cost(2*100+2*400); 7822 // Basic idea: lo(result) = lo(x_lo * y_lo) 7823 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7824 format %{ "MOV $tmp,$src.hi\n\t" 7825 "IMUL $tmp,EAX\n\t" 7826 "MUL EDX:EAX,$src.lo\n\t" 7827 "ADD EDX,$tmp" %} 7828 ins_encode %{ 7829 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7830 __ imull($tmp$$Register, rax); 7831 __ mull($src$$Register); 7832 __ addl(rdx, $tmp$$Register); 7833 %} 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 // Multiply Register Long where the right operand's high 32 bits are zero 7838 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7839 predicate(is_operand_hi32_zero(n->in(2))); 7840 match(Set dst (MulL dst src)); 7841 effect(KILL cr, TEMP tmp); 7842 ins_cost(2*100+2*400); 7843 // Basic idea: lo(result) = lo(x_lo * y_lo) 7844 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7845 format %{ "MOV $tmp,$src.lo\n\t" 7846 "IMUL $tmp,EDX\n\t" 7847 "MUL EDX:EAX,$src.lo\n\t" 7848 "ADD EDX,$tmp" %} 7849 ins_encode %{ 7850 __ movl($tmp$$Register, $src$$Register); 7851 __ imull($tmp$$Register, rdx); 7852 __ mull($src$$Register); 7853 __ addl(rdx, $tmp$$Register); 7854 %} 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7859 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7860 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7861 match(Set dst (MulL dst src)); 7862 effect(KILL cr); 7863 ins_cost(1*400); 7864 // Basic idea: lo(result) = lo(x_lo * y_lo) 7865 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7866 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7867 ins_encode %{ 7868 __ mull($src$$Register); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 // Multiply Register Long by small constant 7874 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7875 match(Set dst (MulL dst src)); 7876 effect(KILL cr, TEMP tmp); 7877 ins_cost(2*100+2*400); 7878 size(12); 7879 // Basic idea: lo(result) = lo(src * EAX) 7880 // hi(result) = hi(src * EAX) + lo(src * EDX) 7881 format %{ "IMUL $tmp,EDX,$src\n\t" 7882 "MOV EDX,$src\n\t" 7883 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7884 "ADD EDX,$tmp" %} 7885 ins_encode( long_multiply_con( dst, src, tmp ) ); 7886 ins_pipe( pipe_slow ); 7887 %} 7888 7889 // Integer DIV with Register 7890 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7891 match(Set rax (DivI rax div)); 7892 effect(KILL rdx, KILL cr); 7893 size(26); 7894 ins_cost(30*100+10*100); 7895 format %{ "CMP EAX,0x80000000\n\t" 7896 "JNE,s normal\n\t" 7897 "XOR EDX,EDX\n\t" 7898 "CMP ECX,-1\n\t" 7899 "JE,s done\n" 7900 "normal: CDQ\n\t" 7901 "IDIV $div\n\t" 7902 "done:" %} 7903 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7904 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7905 ins_pipe( ialu_reg_reg_alu0 ); 7906 %} 7907 7908 // Divide Register Long 7909 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7910 match(Set dst (DivL src1 src2)); 7911 effect( KILL cr, KILL cx, KILL bx ); 7912 ins_cost(10000); 7913 format %{ "PUSH $src1.hi\n\t" 7914 "PUSH $src1.lo\n\t" 7915 "PUSH $src2.hi\n\t" 7916 "PUSH $src2.lo\n\t" 7917 "CALL SharedRuntime::ldiv\n\t" 7918 "ADD ESP,16" %} 7919 ins_encode( long_div(src1,src2) ); 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 // Integer DIVMOD with Register, both quotient and mod results 7924 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7925 match(DivModI rax div); 7926 effect(KILL cr); 7927 size(26); 7928 ins_cost(30*100+10*100); 7929 format %{ "CMP EAX,0x80000000\n\t" 7930 "JNE,s normal\n\t" 7931 "XOR EDX,EDX\n\t" 7932 "CMP ECX,-1\n\t" 7933 "JE,s done\n" 7934 "normal: CDQ\n\t" 7935 "IDIV $div\n\t" 7936 "done:" %} 7937 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7938 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 // Integer MOD with Register 7943 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7944 match(Set rdx (ModI rax div)); 7945 effect(KILL rax, KILL cr); 7946 7947 size(26); 7948 ins_cost(300); 7949 format %{ "CDQ\n\t" 7950 "IDIV $div" %} 7951 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7952 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7953 ins_pipe( ialu_reg_reg_alu0 ); 7954 %} 7955 7956 // Remainder Register Long 7957 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7958 match(Set dst (ModL src1 src2)); 7959 effect( KILL cr, KILL cx, KILL bx ); 7960 ins_cost(10000); 7961 format %{ "PUSH $src1.hi\n\t" 7962 "PUSH $src1.lo\n\t" 7963 "PUSH $src2.hi\n\t" 7964 "PUSH $src2.lo\n\t" 7965 "CALL SharedRuntime::lrem\n\t" 7966 "ADD ESP,16" %} 7967 ins_encode( long_mod(src1,src2) ); 7968 ins_pipe( pipe_slow ); 7969 %} 7970 7971 // Divide Register Long (no special case since divisor != -1) 7972 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7973 match(Set dst (DivL dst imm)); 7974 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7975 ins_cost(1000); 7976 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7977 "XOR $tmp2,$tmp2\n\t" 7978 "CMP $tmp,EDX\n\t" 7979 "JA,s fast\n\t" 7980 "MOV $tmp2,EAX\n\t" 7981 "MOV EAX,EDX\n\t" 7982 "MOV EDX,0\n\t" 7983 "JLE,s pos\n\t" 7984 "LNEG EAX : $tmp2\n\t" 7985 "DIV $tmp # unsigned division\n\t" 7986 "XCHG EAX,$tmp2\n\t" 7987 "DIV $tmp\n\t" 7988 "LNEG $tmp2 : EAX\n\t" 7989 "JMP,s done\n" 7990 "pos:\n\t" 7991 "DIV $tmp\n\t" 7992 "XCHG EAX,$tmp2\n" 7993 "fast:\n\t" 7994 "DIV $tmp\n" 7995 "done:\n\t" 7996 "MOV EDX,$tmp2\n\t" 7997 "NEG EDX:EAX # if $imm < 0" %} 7998 ins_encode %{ 7999 int con = (int)$imm$$constant; 8000 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8001 int pcon = (con > 0) ? con : -con; 8002 Label Lfast, Lpos, Ldone; 8003 8004 __ movl($tmp$$Register, pcon); 8005 __ xorl($tmp2$$Register,$tmp2$$Register); 8006 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8007 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 8008 8009 __ movl($tmp2$$Register, $dst$$Register); // save 8010 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8011 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8012 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8013 8014 // Negative dividend. 8015 // convert value to positive to use unsigned division 8016 __ lneg($dst$$Register, $tmp2$$Register); 8017 __ divl($tmp$$Register); 8018 __ xchgl($dst$$Register, $tmp2$$Register); 8019 __ divl($tmp$$Register); 8020 // revert result back to negative 8021 __ lneg($tmp2$$Register, $dst$$Register); 8022 __ jmpb(Ldone); 8023 8024 __ bind(Lpos); 8025 __ divl($tmp$$Register); // Use unsigned division 8026 __ xchgl($dst$$Register, $tmp2$$Register); 8027 // Fallthrow for final divide, tmp2 has 32 bit hi result 8028 8029 __ bind(Lfast); 8030 // fast path: src is positive 8031 __ divl($tmp$$Register); // Use unsigned division 8032 8033 __ bind(Ldone); 8034 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8035 if (con < 0) { 8036 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8037 } 8038 %} 8039 ins_pipe( pipe_slow ); 8040 %} 8041 8042 // Remainder Register Long (remainder fit into 32 bits) 8043 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8044 match(Set dst (ModL dst imm)); 8045 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8046 ins_cost(1000); 8047 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8048 "CMP $tmp,EDX\n\t" 8049 "JA,s fast\n\t" 8050 "MOV $tmp2,EAX\n\t" 8051 "MOV EAX,EDX\n\t" 8052 "MOV EDX,0\n\t" 8053 "JLE,s pos\n\t" 8054 "LNEG EAX : $tmp2\n\t" 8055 "DIV $tmp # unsigned division\n\t" 8056 "MOV EAX,$tmp2\n\t" 8057 "DIV $tmp\n\t" 8058 "NEG EDX\n\t" 8059 "JMP,s done\n" 8060 "pos:\n\t" 8061 "DIV $tmp\n\t" 8062 "MOV EAX,$tmp2\n" 8063 "fast:\n\t" 8064 "DIV $tmp\n" 8065 "done:\n\t" 8066 "MOV EAX,EDX\n\t" 8067 "SAR EDX,31\n\t" %} 8068 ins_encode %{ 8069 int con = (int)$imm$$constant; 8070 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8071 int pcon = (con > 0) ? con : -con; 8072 Label Lfast, Lpos, Ldone; 8073 8074 __ movl($tmp$$Register, pcon); 8075 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8076 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8077 8078 __ movl($tmp2$$Register, $dst$$Register); // save 8079 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8080 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8081 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8082 8083 // Negative dividend. 8084 // convert value to positive to use unsigned division 8085 __ lneg($dst$$Register, $tmp2$$Register); 8086 __ divl($tmp$$Register); 8087 __ movl($dst$$Register, $tmp2$$Register); 8088 __ divl($tmp$$Register); 8089 // revert remainder back to negative 8090 __ negl(HIGH_FROM_LOW($dst$$Register)); 8091 __ jmpb(Ldone); 8092 8093 __ bind(Lpos); 8094 __ divl($tmp$$Register); 8095 __ movl($dst$$Register, $tmp2$$Register); 8096 8097 __ bind(Lfast); 8098 // fast path: src is positive 8099 __ divl($tmp$$Register); 8100 8101 __ bind(Ldone); 8102 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8103 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8104 8105 %} 8106 ins_pipe( pipe_slow ); 8107 %} 8108 8109 // Integer Shift Instructions 8110 // Shift Left by one 8111 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8112 match(Set dst (LShiftI dst shift)); 8113 effect(KILL cr); 8114 8115 size(2); 8116 format %{ "SHL $dst,$shift" %} 8117 opcode(0xD1, 0x4); /* D1 /4 */ 8118 ins_encode( OpcP, RegOpc( dst ) ); 8119 ins_pipe( ialu_reg ); 8120 %} 8121 8122 // Shift Left by 8-bit immediate 8123 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8124 match(Set dst (LShiftI dst shift)); 8125 effect(KILL cr); 8126 8127 size(3); 8128 format %{ "SHL $dst,$shift" %} 8129 opcode(0xC1, 0x4); /* C1 /4 ib */ 8130 ins_encode( RegOpcImm( dst, shift) ); 8131 ins_pipe( ialu_reg ); 8132 %} 8133 8134 // Shift Left by variable 8135 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8136 match(Set dst (LShiftI dst shift)); 8137 effect(KILL cr); 8138 8139 size(2); 8140 format %{ "SHL $dst,$shift" %} 8141 opcode(0xD3, 0x4); /* D3 /4 */ 8142 ins_encode( OpcP, RegOpc( dst ) ); 8143 ins_pipe( ialu_reg_reg ); 8144 %} 8145 8146 // Arithmetic shift right by one 8147 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8148 match(Set dst (RShiftI dst shift)); 8149 effect(KILL cr); 8150 8151 size(2); 8152 format %{ "SAR $dst,$shift" %} 8153 opcode(0xD1, 0x7); /* D1 /7 */ 8154 ins_encode( OpcP, RegOpc( dst ) ); 8155 ins_pipe( ialu_reg ); 8156 %} 8157 8158 // Arithmetic shift right by one 8159 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8160 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8161 effect(KILL cr); 8162 format %{ "SAR $dst,$shift" %} 8163 opcode(0xD1, 0x7); /* D1 /7 */ 8164 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8165 ins_pipe( ialu_mem_imm ); 8166 %} 8167 8168 // Arithmetic Shift Right by 8-bit immediate 8169 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8170 match(Set dst (RShiftI dst shift)); 8171 effect(KILL cr); 8172 8173 size(3); 8174 format %{ "SAR $dst,$shift" %} 8175 opcode(0xC1, 0x7); /* C1 /7 ib */ 8176 ins_encode( RegOpcImm( dst, shift ) ); 8177 ins_pipe( ialu_mem_imm ); 8178 %} 8179 8180 // Arithmetic Shift Right by 8-bit immediate 8181 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8182 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8183 effect(KILL cr); 8184 8185 format %{ "SAR $dst,$shift" %} 8186 opcode(0xC1, 0x7); /* C1 /7 ib */ 8187 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8188 ins_pipe( ialu_mem_imm ); 8189 %} 8190 8191 // Arithmetic Shift Right by variable 8192 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8193 match(Set dst (RShiftI dst shift)); 8194 effect(KILL cr); 8195 8196 size(2); 8197 format %{ "SAR $dst,$shift" %} 8198 opcode(0xD3, 0x7); /* D3 /7 */ 8199 ins_encode( OpcP, RegOpc( dst ) ); 8200 ins_pipe( ialu_reg_reg ); 8201 %} 8202 8203 // Logical shift right by one 8204 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8205 match(Set dst (URShiftI dst shift)); 8206 effect(KILL cr); 8207 8208 size(2); 8209 format %{ "SHR $dst,$shift" %} 8210 opcode(0xD1, 0x5); /* D1 /5 */ 8211 ins_encode( OpcP, RegOpc( dst ) ); 8212 ins_pipe( ialu_reg ); 8213 %} 8214 8215 // Logical Shift Right by 8-bit immediate 8216 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8217 match(Set dst (URShiftI dst shift)); 8218 effect(KILL cr); 8219 8220 size(3); 8221 format %{ "SHR $dst,$shift" %} 8222 opcode(0xC1, 0x5); /* C1 /5 ib */ 8223 ins_encode( RegOpcImm( dst, shift) ); 8224 ins_pipe( ialu_reg ); 8225 %} 8226 8227 8228 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8229 // This idiom is used by the compiler for the i2b bytecode. 8230 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8231 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8232 8233 size(3); 8234 format %{ "MOVSX $dst,$src :8" %} 8235 ins_encode %{ 8236 __ movsbl($dst$$Register, $src$$Register); 8237 %} 8238 ins_pipe(ialu_reg_reg); 8239 %} 8240 8241 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8242 // This idiom is used by the compiler the i2s bytecode. 8243 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8244 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8245 8246 size(3); 8247 format %{ "MOVSX $dst,$src :16" %} 8248 ins_encode %{ 8249 __ movswl($dst$$Register, $src$$Register); 8250 %} 8251 ins_pipe(ialu_reg_reg); 8252 %} 8253 8254 8255 // Logical Shift Right by variable 8256 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8257 match(Set dst (URShiftI dst shift)); 8258 effect(KILL cr); 8259 8260 size(2); 8261 format %{ "SHR $dst,$shift" %} 8262 opcode(0xD3, 0x5); /* D3 /5 */ 8263 ins_encode( OpcP, RegOpc( dst ) ); 8264 ins_pipe( ialu_reg_reg ); 8265 %} 8266 8267 8268 //----------Logical Instructions----------------------------------------------- 8269 //----------Integer Logical Instructions--------------------------------------- 8270 // And Instructions 8271 // And Register with Register 8272 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8273 match(Set dst (AndI dst src)); 8274 effect(KILL cr); 8275 8276 size(2); 8277 format %{ "AND $dst,$src" %} 8278 opcode(0x23); 8279 ins_encode( OpcP, RegReg( dst, src) ); 8280 ins_pipe( ialu_reg_reg ); 8281 %} 8282 8283 // And Register with Immediate 8284 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8285 match(Set dst (AndI dst src)); 8286 effect(KILL cr); 8287 8288 format %{ "AND $dst,$src" %} 8289 opcode(0x81,0x04); /* Opcode 81 /4 */ 8290 // ins_encode( RegImm( dst, src) ); 8291 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8292 ins_pipe( ialu_reg ); 8293 %} 8294 8295 // And Register with Memory 8296 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8297 match(Set dst (AndI dst (LoadI src))); 8298 effect(KILL cr); 8299 8300 ins_cost(125); 8301 format %{ "AND $dst,$src" %} 8302 opcode(0x23); 8303 ins_encode( OpcP, RegMem( dst, src) ); 8304 ins_pipe( ialu_reg_mem ); 8305 %} 8306 8307 // And Memory with Register 8308 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8309 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8310 effect(KILL cr); 8311 8312 ins_cost(150); 8313 format %{ "AND $dst,$src" %} 8314 opcode(0x21); /* Opcode 21 /r */ 8315 ins_encode( OpcP, RegMem( src, dst ) ); 8316 ins_pipe( ialu_mem_reg ); 8317 %} 8318 8319 // And Memory with Immediate 8320 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8321 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8322 effect(KILL cr); 8323 8324 ins_cost(125); 8325 format %{ "AND $dst,$src" %} 8326 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8327 // ins_encode( MemImm( dst, src) ); 8328 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8329 ins_pipe( ialu_mem_imm ); 8330 %} 8331 8332 // BMI1 instructions 8333 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8334 match(Set dst (AndI (XorI src1 minus_1) src2)); 8335 predicate(UseBMI1Instructions); 8336 effect(KILL cr); 8337 8338 format %{ "ANDNL $dst, $src1, $src2" %} 8339 8340 ins_encode %{ 8341 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8342 %} 8343 ins_pipe(ialu_reg); 8344 %} 8345 8346 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8347 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8348 predicate(UseBMI1Instructions); 8349 effect(KILL cr); 8350 8351 ins_cost(125); 8352 format %{ "ANDNL $dst, $src1, $src2" %} 8353 8354 ins_encode %{ 8355 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8356 %} 8357 ins_pipe(ialu_reg_mem); 8358 %} 8359 8360 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8361 match(Set dst (AndI (SubI imm_zero src) src)); 8362 predicate(UseBMI1Instructions); 8363 effect(KILL cr); 8364 8365 format %{ "BLSIL $dst, $src" %} 8366 8367 ins_encode %{ 8368 __ blsil($dst$$Register, $src$$Register); 8369 %} 8370 ins_pipe(ialu_reg); 8371 %} 8372 8373 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8374 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8375 predicate(UseBMI1Instructions); 8376 effect(KILL cr); 8377 8378 ins_cost(125); 8379 format %{ "BLSIL $dst, $src" %} 8380 8381 ins_encode %{ 8382 __ blsil($dst$$Register, $src$$Address); 8383 %} 8384 ins_pipe(ialu_reg_mem); 8385 %} 8386 8387 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8388 %{ 8389 match(Set dst (XorI (AddI src minus_1) src)); 8390 predicate(UseBMI1Instructions); 8391 effect(KILL cr); 8392 8393 format %{ "BLSMSKL $dst, $src" %} 8394 8395 ins_encode %{ 8396 __ blsmskl($dst$$Register, $src$$Register); 8397 %} 8398 8399 ins_pipe(ialu_reg); 8400 %} 8401 8402 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8403 %{ 8404 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8405 predicate(UseBMI1Instructions); 8406 effect(KILL cr); 8407 8408 ins_cost(125); 8409 format %{ "BLSMSKL $dst, $src" %} 8410 8411 ins_encode %{ 8412 __ blsmskl($dst$$Register, $src$$Address); 8413 %} 8414 8415 ins_pipe(ialu_reg_mem); 8416 %} 8417 8418 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8419 %{ 8420 match(Set dst (AndI (AddI src minus_1) src) ); 8421 predicate(UseBMI1Instructions); 8422 effect(KILL cr); 8423 8424 format %{ "BLSRL $dst, $src" %} 8425 8426 ins_encode %{ 8427 __ blsrl($dst$$Register, $src$$Register); 8428 %} 8429 8430 ins_pipe(ialu_reg); 8431 %} 8432 8433 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8434 %{ 8435 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8436 predicate(UseBMI1Instructions); 8437 effect(KILL cr); 8438 8439 ins_cost(125); 8440 format %{ "BLSRL $dst, $src" %} 8441 8442 ins_encode %{ 8443 __ blsrl($dst$$Register, $src$$Address); 8444 %} 8445 8446 ins_pipe(ialu_reg_mem); 8447 %} 8448 8449 // Or Instructions 8450 // Or Register with Register 8451 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8452 match(Set dst (OrI dst src)); 8453 effect(KILL cr); 8454 8455 size(2); 8456 format %{ "OR $dst,$src" %} 8457 opcode(0x0B); 8458 ins_encode( OpcP, RegReg( dst, src) ); 8459 ins_pipe( ialu_reg_reg ); 8460 %} 8461 8462 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8463 match(Set dst (OrI dst (CastP2X src))); 8464 effect(KILL cr); 8465 8466 size(2); 8467 format %{ "OR $dst,$src" %} 8468 opcode(0x0B); 8469 ins_encode( OpcP, RegReg( dst, src) ); 8470 ins_pipe( ialu_reg_reg ); 8471 %} 8472 8473 8474 // Or Register with Immediate 8475 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8476 match(Set dst (OrI dst src)); 8477 effect(KILL cr); 8478 8479 format %{ "OR $dst,$src" %} 8480 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8481 // ins_encode( RegImm( dst, src) ); 8482 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8483 ins_pipe( ialu_reg ); 8484 %} 8485 8486 // Or Register with Memory 8487 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8488 match(Set dst (OrI dst (LoadI src))); 8489 effect(KILL cr); 8490 8491 ins_cost(125); 8492 format %{ "OR $dst,$src" %} 8493 opcode(0x0B); 8494 ins_encode( OpcP, RegMem( dst, src) ); 8495 ins_pipe( ialu_reg_mem ); 8496 %} 8497 8498 // Or Memory with Register 8499 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8500 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8501 effect(KILL cr); 8502 8503 ins_cost(150); 8504 format %{ "OR $dst,$src" %} 8505 opcode(0x09); /* Opcode 09 /r */ 8506 ins_encode( OpcP, RegMem( src, dst ) ); 8507 ins_pipe( ialu_mem_reg ); 8508 %} 8509 8510 // Or Memory with Immediate 8511 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8512 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8513 effect(KILL cr); 8514 8515 ins_cost(125); 8516 format %{ "OR $dst,$src" %} 8517 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8518 // ins_encode( MemImm( dst, src) ); 8519 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8520 ins_pipe( ialu_mem_imm ); 8521 %} 8522 8523 // ROL/ROR 8524 // ROL expand 8525 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8526 effect(USE_DEF dst, USE shift, KILL cr); 8527 8528 format %{ "ROL $dst, $shift" %} 8529 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8530 ins_encode( OpcP, RegOpc( dst )); 8531 ins_pipe( ialu_reg ); 8532 %} 8533 8534 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8535 effect(USE_DEF dst, USE shift, KILL cr); 8536 8537 format %{ "ROL $dst, $shift" %} 8538 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8539 ins_encode( RegOpcImm(dst, shift) ); 8540 ins_pipe(ialu_reg); 8541 %} 8542 8543 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8544 effect(USE_DEF dst, USE shift, KILL cr); 8545 8546 format %{ "ROL $dst, $shift" %} 8547 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8548 ins_encode(OpcP, RegOpc(dst)); 8549 ins_pipe( ialu_reg_reg ); 8550 %} 8551 // end of ROL expand 8552 8553 // ROL 32bit by one once 8554 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8555 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8556 8557 expand %{ 8558 rolI_eReg_imm1(dst, lshift, cr); 8559 %} 8560 %} 8561 8562 // ROL 32bit var by imm8 once 8563 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8564 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8565 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8566 8567 expand %{ 8568 rolI_eReg_imm8(dst, lshift, cr); 8569 %} 8570 %} 8571 8572 // ROL 32bit var by var once 8573 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8574 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8575 8576 expand %{ 8577 rolI_eReg_CL(dst, shift, cr); 8578 %} 8579 %} 8580 8581 // ROL 32bit var by var once 8582 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8583 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8584 8585 expand %{ 8586 rolI_eReg_CL(dst, shift, cr); 8587 %} 8588 %} 8589 8590 // ROR expand 8591 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8592 effect(USE_DEF dst, USE shift, KILL cr); 8593 8594 format %{ "ROR $dst, $shift" %} 8595 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8596 ins_encode( OpcP, RegOpc( dst ) ); 8597 ins_pipe( ialu_reg ); 8598 %} 8599 8600 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8601 effect (USE_DEF dst, USE shift, KILL cr); 8602 8603 format %{ "ROR $dst, $shift" %} 8604 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8605 ins_encode( RegOpcImm(dst, shift) ); 8606 ins_pipe( ialu_reg ); 8607 %} 8608 8609 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8610 effect(USE_DEF dst, USE shift, KILL cr); 8611 8612 format %{ "ROR $dst, $shift" %} 8613 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8614 ins_encode(OpcP, RegOpc(dst)); 8615 ins_pipe( ialu_reg_reg ); 8616 %} 8617 // end of ROR expand 8618 8619 // ROR right once 8620 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8621 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8622 8623 expand %{ 8624 rorI_eReg_imm1(dst, rshift, cr); 8625 %} 8626 %} 8627 8628 // ROR 32bit by immI8 once 8629 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8630 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8631 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8632 8633 expand %{ 8634 rorI_eReg_imm8(dst, rshift, cr); 8635 %} 8636 %} 8637 8638 // ROR 32bit var by var once 8639 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8640 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8641 8642 expand %{ 8643 rorI_eReg_CL(dst, shift, cr); 8644 %} 8645 %} 8646 8647 // ROR 32bit var by var once 8648 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8649 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8650 8651 expand %{ 8652 rorI_eReg_CL(dst, shift, cr); 8653 %} 8654 %} 8655 8656 // Xor Instructions 8657 // Xor Register with Register 8658 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8659 match(Set dst (XorI dst src)); 8660 effect(KILL cr); 8661 8662 size(2); 8663 format %{ "XOR $dst,$src" %} 8664 opcode(0x33); 8665 ins_encode( OpcP, RegReg( dst, src) ); 8666 ins_pipe( ialu_reg_reg ); 8667 %} 8668 8669 // Xor Register with Immediate -1 8670 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8671 match(Set dst (XorI dst imm)); 8672 8673 size(2); 8674 format %{ "NOT $dst" %} 8675 ins_encode %{ 8676 __ notl($dst$$Register); 8677 %} 8678 ins_pipe( ialu_reg ); 8679 %} 8680 8681 // Xor Register with Immediate 8682 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8683 match(Set dst (XorI dst src)); 8684 effect(KILL cr); 8685 8686 format %{ "XOR $dst,$src" %} 8687 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8688 // ins_encode( RegImm( dst, src) ); 8689 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8690 ins_pipe( ialu_reg ); 8691 %} 8692 8693 // Xor Register with Memory 8694 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8695 match(Set dst (XorI dst (LoadI src))); 8696 effect(KILL cr); 8697 8698 ins_cost(125); 8699 format %{ "XOR $dst,$src" %} 8700 opcode(0x33); 8701 ins_encode( OpcP, RegMem(dst, src) ); 8702 ins_pipe( ialu_reg_mem ); 8703 %} 8704 8705 // Xor Memory with Register 8706 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8707 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8708 effect(KILL cr); 8709 8710 ins_cost(150); 8711 format %{ "XOR $dst,$src" %} 8712 opcode(0x31); /* Opcode 31 /r */ 8713 ins_encode( OpcP, RegMem( src, dst ) ); 8714 ins_pipe( ialu_mem_reg ); 8715 %} 8716 8717 // Xor Memory with Immediate 8718 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8719 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8720 effect(KILL cr); 8721 8722 ins_cost(125); 8723 format %{ "XOR $dst,$src" %} 8724 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8725 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8726 ins_pipe( ialu_mem_imm ); 8727 %} 8728 8729 //----------Convert Int to Boolean--------------------------------------------- 8730 8731 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8732 effect( DEF dst, USE src ); 8733 format %{ "MOV $dst,$src" %} 8734 ins_encode( enc_Copy( dst, src) ); 8735 ins_pipe( ialu_reg_reg ); 8736 %} 8737 8738 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8739 effect( USE_DEF dst, USE src, KILL cr ); 8740 8741 size(4); 8742 format %{ "NEG $dst\n\t" 8743 "ADC $dst,$src" %} 8744 ins_encode( neg_reg(dst), 8745 OpcRegReg(0x13,dst,src) ); 8746 ins_pipe( ialu_reg_reg_long ); 8747 %} 8748 8749 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8750 match(Set dst (Conv2B src)); 8751 8752 expand %{ 8753 movI_nocopy(dst,src); 8754 ci2b(dst,src,cr); 8755 %} 8756 %} 8757 8758 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8759 effect( DEF dst, USE src ); 8760 format %{ "MOV $dst,$src" %} 8761 ins_encode( enc_Copy( dst, src) ); 8762 ins_pipe( ialu_reg_reg ); 8763 %} 8764 8765 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8766 effect( USE_DEF dst, USE src, KILL cr ); 8767 format %{ "NEG $dst\n\t" 8768 "ADC $dst,$src" %} 8769 ins_encode( neg_reg(dst), 8770 OpcRegReg(0x13,dst,src) ); 8771 ins_pipe( ialu_reg_reg_long ); 8772 %} 8773 8774 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8775 match(Set dst (Conv2B src)); 8776 8777 expand %{ 8778 movP_nocopy(dst,src); 8779 cp2b(dst,src,cr); 8780 %} 8781 %} 8782 8783 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8784 match(Set dst (CmpLTMask p q)); 8785 effect(KILL cr); 8786 ins_cost(400); 8787 8788 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8789 format %{ "XOR $dst,$dst\n\t" 8790 "CMP $p,$q\n\t" 8791 "SETlt $dst\n\t" 8792 "NEG $dst" %} 8793 ins_encode %{ 8794 Register Rp = $p$$Register; 8795 Register Rq = $q$$Register; 8796 Register Rd = $dst$$Register; 8797 Label done; 8798 __ xorl(Rd, Rd); 8799 __ cmpl(Rp, Rq); 8800 __ setb(Assembler::less, Rd); 8801 __ negl(Rd); 8802 %} 8803 8804 ins_pipe(pipe_slow); 8805 %} 8806 8807 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8808 match(Set dst (CmpLTMask dst zero)); 8809 effect(DEF dst, KILL cr); 8810 ins_cost(100); 8811 8812 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8813 ins_encode %{ 8814 __ sarl($dst$$Register, 31); 8815 %} 8816 ins_pipe(ialu_reg); 8817 %} 8818 8819 /* better to save a register than avoid a branch */ 8820 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8821 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8822 effect(KILL cr); 8823 ins_cost(400); 8824 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8825 "JGE done\n\t" 8826 "ADD $p,$y\n" 8827 "done: " %} 8828 ins_encode %{ 8829 Register Rp = $p$$Register; 8830 Register Rq = $q$$Register; 8831 Register Ry = $y$$Register; 8832 Label done; 8833 __ subl(Rp, Rq); 8834 __ jccb(Assembler::greaterEqual, done); 8835 __ addl(Rp, Ry); 8836 __ bind(done); 8837 %} 8838 8839 ins_pipe(pipe_cmplt); 8840 %} 8841 8842 /* better to save a register than avoid a branch */ 8843 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8844 match(Set y (AndI (CmpLTMask p q) y)); 8845 effect(KILL cr); 8846 8847 ins_cost(300); 8848 8849 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8850 "JLT done\n\t" 8851 "XORL $y, $y\n" 8852 "done: " %} 8853 ins_encode %{ 8854 Register Rp = $p$$Register; 8855 Register Rq = $q$$Register; 8856 Register Ry = $y$$Register; 8857 Label done; 8858 __ cmpl(Rp, Rq); 8859 __ jccb(Assembler::less, done); 8860 __ xorl(Ry, Ry); 8861 __ bind(done); 8862 %} 8863 8864 ins_pipe(pipe_cmplt); 8865 %} 8866 8867 /* If I enable this, I encourage spilling in the inner loop of compress. 8868 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8869 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8870 */ 8871 //----------Overflow Math Instructions----------------------------------------- 8872 8873 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8874 %{ 8875 match(Set cr (OverflowAddI op1 op2)); 8876 effect(DEF cr, USE_KILL op1, USE op2); 8877 8878 format %{ "ADD $op1, $op2\t# overflow check int" %} 8879 8880 ins_encode %{ 8881 __ addl($op1$$Register, $op2$$Register); 8882 %} 8883 ins_pipe(ialu_reg_reg); 8884 %} 8885 8886 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8887 %{ 8888 match(Set cr (OverflowAddI op1 op2)); 8889 effect(DEF cr, USE_KILL op1, USE op2); 8890 8891 format %{ "ADD $op1, $op2\t# overflow check int" %} 8892 8893 ins_encode %{ 8894 __ addl($op1$$Register, $op2$$constant); 8895 %} 8896 ins_pipe(ialu_reg_reg); 8897 %} 8898 8899 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8900 %{ 8901 match(Set cr (OverflowSubI op1 op2)); 8902 8903 format %{ "CMP $op1, $op2\t# overflow check int" %} 8904 ins_encode %{ 8905 __ cmpl($op1$$Register, $op2$$Register); 8906 %} 8907 ins_pipe(ialu_reg_reg); 8908 %} 8909 8910 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8911 %{ 8912 match(Set cr (OverflowSubI op1 op2)); 8913 8914 format %{ "CMP $op1, $op2\t# overflow check int" %} 8915 ins_encode %{ 8916 __ cmpl($op1$$Register, $op2$$constant); 8917 %} 8918 ins_pipe(ialu_reg_reg); 8919 %} 8920 8921 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8922 %{ 8923 match(Set cr (OverflowSubI zero op2)); 8924 effect(DEF cr, USE_KILL op2); 8925 8926 format %{ "NEG $op2\t# overflow check int" %} 8927 ins_encode %{ 8928 __ negl($op2$$Register); 8929 %} 8930 ins_pipe(ialu_reg_reg); 8931 %} 8932 8933 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8934 %{ 8935 match(Set cr (OverflowMulI op1 op2)); 8936 effect(DEF cr, USE_KILL op1, USE op2); 8937 8938 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8939 ins_encode %{ 8940 __ imull($op1$$Register, $op2$$Register); 8941 %} 8942 ins_pipe(ialu_reg_reg_alu0); 8943 %} 8944 8945 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8946 %{ 8947 match(Set cr (OverflowMulI op1 op2)); 8948 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8949 8950 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8951 ins_encode %{ 8952 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8953 %} 8954 ins_pipe(ialu_reg_reg_alu0); 8955 %} 8956 8957 //----------Long Instructions------------------------------------------------ 8958 // Add Long Register with Register 8959 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8960 match(Set dst (AddL dst src)); 8961 effect(KILL cr); 8962 ins_cost(200); 8963 format %{ "ADD $dst.lo,$src.lo\n\t" 8964 "ADC $dst.hi,$src.hi" %} 8965 opcode(0x03, 0x13); 8966 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8967 ins_pipe( ialu_reg_reg_long ); 8968 %} 8969 8970 // Add Long Register with Immediate 8971 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8972 match(Set dst (AddL dst src)); 8973 effect(KILL cr); 8974 format %{ "ADD $dst.lo,$src.lo\n\t" 8975 "ADC $dst.hi,$src.hi" %} 8976 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8977 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8978 ins_pipe( ialu_reg_long ); 8979 %} 8980 8981 // Add Long Register with Memory 8982 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8983 match(Set dst (AddL dst (LoadL mem))); 8984 effect(KILL cr); 8985 ins_cost(125); 8986 format %{ "ADD $dst.lo,$mem\n\t" 8987 "ADC $dst.hi,$mem+4" %} 8988 opcode(0x03, 0x13); 8989 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8990 ins_pipe( ialu_reg_long_mem ); 8991 %} 8992 8993 // Subtract Long Register with Register. 8994 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8995 match(Set dst (SubL dst src)); 8996 effect(KILL cr); 8997 ins_cost(200); 8998 format %{ "SUB $dst.lo,$src.lo\n\t" 8999 "SBB $dst.hi,$src.hi" %} 9000 opcode(0x2B, 0x1B); 9001 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9002 ins_pipe( ialu_reg_reg_long ); 9003 %} 9004 9005 // Subtract Long Register with Immediate 9006 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9007 match(Set dst (SubL dst src)); 9008 effect(KILL cr); 9009 format %{ "SUB $dst.lo,$src.lo\n\t" 9010 "SBB $dst.hi,$src.hi" %} 9011 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9012 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9013 ins_pipe( ialu_reg_long ); 9014 %} 9015 9016 // Subtract Long Register with Memory 9017 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9018 match(Set dst (SubL dst (LoadL mem))); 9019 effect(KILL cr); 9020 ins_cost(125); 9021 format %{ "SUB $dst.lo,$mem\n\t" 9022 "SBB $dst.hi,$mem+4" %} 9023 opcode(0x2B, 0x1B); 9024 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9025 ins_pipe( ialu_reg_long_mem ); 9026 %} 9027 9028 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9029 match(Set dst (SubL zero dst)); 9030 effect(KILL cr); 9031 ins_cost(300); 9032 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9033 ins_encode( neg_long(dst) ); 9034 ins_pipe( ialu_reg_reg_long ); 9035 %} 9036 9037 // And Long Register with Register 9038 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9039 match(Set dst (AndL dst src)); 9040 effect(KILL cr); 9041 format %{ "AND $dst.lo,$src.lo\n\t" 9042 "AND $dst.hi,$src.hi" %} 9043 opcode(0x23,0x23); 9044 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9045 ins_pipe( ialu_reg_reg_long ); 9046 %} 9047 9048 // And Long Register with Immediate 9049 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9050 match(Set dst (AndL dst src)); 9051 effect(KILL cr); 9052 format %{ "AND $dst.lo,$src.lo\n\t" 9053 "AND $dst.hi,$src.hi" %} 9054 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9055 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9056 ins_pipe( ialu_reg_long ); 9057 %} 9058 9059 // And Long Register with Memory 9060 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9061 match(Set dst (AndL dst (LoadL mem))); 9062 effect(KILL cr); 9063 ins_cost(125); 9064 format %{ "AND $dst.lo,$mem\n\t" 9065 "AND $dst.hi,$mem+4" %} 9066 opcode(0x23, 0x23); 9067 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9068 ins_pipe( ialu_reg_long_mem ); 9069 %} 9070 9071 // BMI1 instructions 9072 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9073 match(Set dst (AndL (XorL src1 minus_1) src2)); 9074 predicate(UseBMI1Instructions); 9075 effect(KILL cr, TEMP dst); 9076 9077 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9078 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9079 %} 9080 9081 ins_encode %{ 9082 Register Rdst = $dst$$Register; 9083 Register Rsrc1 = $src1$$Register; 9084 Register Rsrc2 = $src2$$Register; 9085 __ andnl(Rdst, Rsrc1, Rsrc2); 9086 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9087 %} 9088 ins_pipe(ialu_reg_reg_long); 9089 %} 9090 9091 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9092 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9093 predicate(UseBMI1Instructions); 9094 effect(KILL cr, TEMP dst); 9095 9096 ins_cost(125); 9097 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9098 "ANDNL $dst.hi, $src1.hi, $src2+4" 9099 %} 9100 9101 ins_encode %{ 9102 Register Rdst = $dst$$Register; 9103 Register Rsrc1 = $src1$$Register; 9104 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9105 9106 __ andnl(Rdst, Rsrc1, $src2$$Address); 9107 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9108 %} 9109 ins_pipe(ialu_reg_mem); 9110 %} 9111 9112 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9113 match(Set dst (AndL (SubL imm_zero src) src)); 9114 predicate(UseBMI1Instructions); 9115 effect(KILL cr, TEMP dst); 9116 9117 format %{ "MOVL $dst.hi, 0\n\t" 9118 "BLSIL $dst.lo, $src.lo\n\t" 9119 "JNZ done\n\t" 9120 "BLSIL $dst.hi, $src.hi\n" 9121 "done:" 9122 %} 9123 9124 ins_encode %{ 9125 Label done; 9126 Register Rdst = $dst$$Register; 9127 Register Rsrc = $src$$Register; 9128 __ movl(HIGH_FROM_LOW(Rdst), 0); 9129 __ blsil(Rdst, Rsrc); 9130 __ jccb(Assembler::notZero, done); 9131 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9132 __ bind(done); 9133 %} 9134 ins_pipe(ialu_reg); 9135 %} 9136 9137 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9138 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9139 predicate(UseBMI1Instructions); 9140 effect(KILL cr, TEMP dst); 9141 9142 ins_cost(125); 9143 format %{ "MOVL $dst.hi, 0\n\t" 9144 "BLSIL $dst.lo, $src\n\t" 9145 "JNZ done\n\t" 9146 "BLSIL $dst.hi, $src+4\n" 9147 "done:" 9148 %} 9149 9150 ins_encode %{ 9151 Label done; 9152 Register Rdst = $dst$$Register; 9153 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9154 9155 __ movl(HIGH_FROM_LOW(Rdst), 0); 9156 __ blsil(Rdst, $src$$Address); 9157 __ jccb(Assembler::notZero, done); 9158 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9159 __ bind(done); 9160 %} 9161 ins_pipe(ialu_reg_mem); 9162 %} 9163 9164 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9165 %{ 9166 match(Set dst (XorL (AddL src minus_1) src)); 9167 predicate(UseBMI1Instructions); 9168 effect(KILL cr, TEMP dst); 9169 9170 format %{ "MOVL $dst.hi, 0\n\t" 9171 "BLSMSKL $dst.lo, $src.lo\n\t" 9172 "JNC done\n\t" 9173 "BLSMSKL $dst.hi, $src.hi\n" 9174 "done:" 9175 %} 9176 9177 ins_encode %{ 9178 Label done; 9179 Register Rdst = $dst$$Register; 9180 Register Rsrc = $src$$Register; 9181 __ movl(HIGH_FROM_LOW(Rdst), 0); 9182 __ blsmskl(Rdst, Rsrc); 9183 __ jccb(Assembler::carryClear, done); 9184 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9185 __ bind(done); 9186 %} 9187 9188 ins_pipe(ialu_reg); 9189 %} 9190 9191 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9192 %{ 9193 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9194 predicate(UseBMI1Instructions); 9195 effect(KILL cr, TEMP dst); 9196 9197 ins_cost(125); 9198 format %{ "MOVL $dst.hi, 0\n\t" 9199 "BLSMSKL $dst.lo, $src\n\t" 9200 "JNC done\n\t" 9201 "BLSMSKL $dst.hi, $src+4\n" 9202 "done:" 9203 %} 9204 9205 ins_encode %{ 9206 Label done; 9207 Register Rdst = $dst$$Register; 9208 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9209 9210 __ movl(HIGH_FROM_LOW(Rdst), 0); 9211 __ blsmskl(Rdst, $src$$Address); 9212 __ jccb(Assembler::carryClear, done); 9213 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9214 __ bind(done); 9215 %} 9216 9217 ins_pipe(ialu_reg_mem); 9218 %} 9219 9220 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9221 %{ 9222 match(Set dst (AndL (AddL src minus_1) src) ); 9223 predicate(UseBMI1Instructions); 9224 effect(KILL cr, TEMP dst); 9225 9226 format %{ "MOVL $dst.hi, $src.hi\n\t" 9227 "BLSRL $dst.lo, $src.lo\n\t" 9228 "JNC done\n\t" 9229 "BLSRL $dst.hi, $src.hi\n" 9230 "done:" 9231 %} 9232 9233 ins_encode %{ 9234 Label done; 9235 Register Rdst = $dst$$Register; 9236 Register Rsrc = $src$$Register; 9237 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9238 __ blsrl(Rdst, Rsrc); 9239 __ jccb(Assembler::carryClear, done); 9240 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9241 __ bind(done); 9242 %} 9243 9244 ins_pipe(ialu_reg); 9245 %} 9246 9247 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9248 %{ 9249 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9250 predicate(UseBMI1Instructions); 9251 effect(KILL cr, TEMP dst); 9252 9253 ins_cost(125); 9254 format %{ "MOVL $dst.hi, $src+4\n\t" 9255 "BLSRL $dst.lo, $src\n\t" 9256 "JNC done\n\t" 9257 "BLSRL $dst.hi, $src+4\n" 9258 "done:" 9259 %} 9260 9261 ins_encode %{ 9262 Label done; 9263 Register Rdst = $dst$$Register; 9264 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9265 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9266 __ blsrl(Rdst, $src$$Address); 9267 __ jccb(Assembler::carryClear, done); 9268 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9269 __ bind(done); 9270 %} 9271 9272 ins_pipe(ialu_reg_mem); 9273 %} 9274 9275 // Or Long Register with Register 9276 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9277 match(Set dst (OrL dst src)); 9278 effect(KILL cr); 9279 format %{ "OR $dst.lo,$src.lo\n\t" 9280 "OR $dst.hi,$src.hi" %} 9281 opcode(0x0B,0x0B); 9282 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9283 ins_pipe( ialu_reg_reg_long ); 9284 %} 9285 9286 // Or Long Register with Immediate 9287 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9288 match(Set dst (OrL dst src)); 9289 effect(KILL cr); 9290 format %{ "OR $dst.lo,$src.lo\n\t" 9291 "OR $dst.hi,$src.hi" %} 9292 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9293 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9294 ins_pipe( ialu_reg_long ); 9295 %} 9296 9297 // Or Long Register with Memory 9298 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9299 match(Set dst (OrL dst (LoadL mem))); 9300 effect(KILL cr); 9301 ins_cost(125); 9302 format %{ "OR $dst.lo,$mem\n\t" 9303 "OR $dst.hi,$mem+4" %} 9304 opcode(0x0B,0x0B); 9305 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9306 ins_pipe( ialu_reg_long_mem ); 9307 %} 9308 9309 // Xor Long Register with Register 9310 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9311 match(Set dst (XorL dst src)); 9312 effect(KILL cr); 9313 format %{ "XOR $dst.lo,$src.lo\n\t" 9314 "XOR $dst.hi,$src.hi" %} 9315 opcode(0x33,0x33); 9316 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9317 ins_pipe( ialu_reg_reg_long ); 9318 %} 9319 9320 // Xor Long Register with Immediate -1 9321 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9322 match(Set dst (XorL dst imm)); 9323 format %{ "NOT $dst.lo\n\t" 9324 "NOT $dst.hi" %} 9325 ins_encode %{ 9326 __ notl($dst$$Register); 9327 __ notl(HIGH_FROM_LOW($dst$$Register)); 9328 %} 9329 ins_pipe( ialu_reg_long ); 9330 %} 9331 9332 // Xor Long Register with Immediate 9333 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9334 match(Set dst (XorL dst src)); 9335 effect(KILL cr); 9336 format %{ "XOR $dst.lo,$src.lo\n\t" 9337 "XOR $dst.hi,$src.hi" %} 9338 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9339 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9340 ins_pipe( ialu_reg_long ); 9341 %} 9342 9343 // Xor Long Register with Memory 9344 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9345 match(Set dst (XorL dst (LoadL mem))); 9346 effect(KILL cr); 9347 ins_cost(125); 9348 format %{ "XOR $dst.lo,$mem\n\t" 9349 "XOR $dst.hi,$mem+4" %} 9350 opcode(0x33,0x33); 9351 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9352 ins_pipe( ialu_reg_long_mem ); 9353 %} 9354 9355 // Shift Left Long by 1 9356 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9357 predicate(UseNewLongLShift); 9358 match(Set dst (LShiftL dst cnt)); 9359 effect(KILL cr); 9360 ins_cost(100); 9361 format %{ "ADD $dst.lo,$dst.lo\n\t" 9362 "ADC $dst.hi,$dst.hi" %} 9363 ins_encode %{ 9364 __ addl($dst$$Register,$dst$$Register); 9365 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9366 %} 9367 ins_pipe( ialu_reg_long ); 9368 %} 9369 9370 // Shift Left Long by 2 9371 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9372 predicate(UseNewLongLShift); 9373 match(Set dst (LShiftL dst cnt)); 9374 effect(KILL cr); 9375 ins_cost(100); 9376 format %{ "ADD $dst.lo,$dst.lo\n\t" 9377 "ADC $dst.hi,$dst.hi\n\t" 9378 "ADD $dst.lo,$dst.lo\n\t" 9379 "ADC $dst.hi,$dst.hi" %} 9380 ins_encode %{ 9381 __ addl($dst$$Register,$dst$$Register); 9382 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9383 __ addl($dst$$Register,$dst$$Register); 9384 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9385 %} 9386 ins_pipe( ialu_reg_long ); 9387 %} 9388 9389 // Shift Left Long by 3 9390 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9391 predicate(UseNewLongLShift); 9392 match(Set dst (LShiftL dst cnt)); 9393 effect(KILL cr); 9394 ins_cost(100); 9395 format %{ "ADD $dst.lo,$dst.lo\n\t" 9396 "ADC $dst.hi,$dst.hi\n\t" 9397 "ADD $dst.lo,$dst.lo\n\t" 9398 "ADC $dst.hi,$dst.hi\n\t" 9399 "ADD $dst.lo,$dst.lo\n\t" 9400 "ADC $dst.hi,$dst.hi" %} 9401 ins_encode %{ 9402 __ addl($dst$$Register,$dst$$Register); 9403 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9404 __ addl($dst$$Register,$dst$$Register); 9405 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9406 __ addl($dst$$Register,$dst$$Register); 9407 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9408 %} 9409 ins_pipe( ialu_reg_long ); 9410 %} 9411 9412 // Shift Left Long by 1-31 9413 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9414 match(Set dst (LShiftL dst cnt)); 9415 effect(KILL cr); 9416 ins_cost(200); 9417 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9418 "SHL $dst.lo,$cnt" %} 9419 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9420 ins_encode( move_long_small_shift(dst,cnt) ); 9421 ins_pipe( ialu_reg_long ); 9422 %} 9423 9424 // Shift Left Long by 32-63 9425 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9426 match(Set dst (LShiftL dst cnt)); 9427 effect(KILL cr); 9428 ins_cost(300); 9429 format %{ "MOV $dst.hi,$dst.lo\n" 9430 "\tSHL $dst.hi,$cnt-32\n" 9431 "\tXOR $dst.lo,$dst.lo" %} 9432 opcode(0xC1, 0x4); /* C1 /4 ib */ 9433 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9434 ins_pipe( ialu_reg_long ); 9435 %} 9436 9437 // Shift Left Long by variable 9438 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9439 match(Set dst (LShiftL dst shift)); 9440 effect(KILL cr); 9441 ins_cost(500+200); 9442 size(17); 9443 format %{ "TEST $shift,32\n\t" 9444 "JEQ,s small\n\t" 9445 "MOV $dst.hi,$dst.lo\n\t" 9446 "XOR $dst.lo,$dst.lo\n" 9447 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9448 "SHL $dst.lo,$shift" %} 9449 ins_encode( shift_left_long( dst, shift ) ); 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 // Shift Right Long by 1-31 9454 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9455 match(Set dst (URShiftL dst cnt)); 9456 effect(KILL cr); 9457 ins_cost(200); 9458 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9459 "SHR $dst.hi,$cnt" %} 9460 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9461 ins_encode( move_long_small_shift(dst,cnt) ); 9462 ins_pipe( ialu_reg_long ); 9463 %} 9464 9465 // Shift Right Long by 32-63 9466 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9467 match(Set dst (URShiftL dst cnt)); 9468 effect(KILL cr); 9469 ins_cost(300); 9470 format %{ "MOV $dst.lo,$dst.hi\n" 9471 "\tSHR $dst.lo,$cnt-32\n" 9472 "\tXOR $dst.hi,$dst.hi" %} 9473 opcode(0xC1, 0x5); /* C1 /5 ib */ 9474 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9475 ins_pipe( ialu_reg_long ); 9476 %} 9477 9478 // Shift Right Long by variable 9479 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9480 match(Set dst (URShiftL dst shift)); 9481 effect(KILL cr); 9482 ins_cost(600); 9483 size(17); 9484 format %{ "TEST $shift,32\n\t" 9485 "JEQ,s small\n\t" 9486 "MOV $dst.lo,$dst.hi\n\t" 9487 "XOR $dst.hi,$dst.hi\n" 9488 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9489 "SHR $dst.hi,$shift" %} 9490 ins_encode( shift_right_long( dst, shift ) ); 9491 ins_pipe( pipe_slow ); 9492 %} 9493 9494 // Shift Right Long by 1-31 9495 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9496 match(Set dst (RShiftL dst cnt)); 9497 effect(KILL cr); 9498 ins_cost(200); 9499 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9500 "SAR $dst.hi,$cnt" %} 9501 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9502 ins_encode( move_long_small_shift(dst,cnt) ); 9503 ins_pipe( ialu_reg_long ); 9504 %} 9505 9506 // Shift Right Long by 32-63 9507 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9508 match(Set dst (RShiftL dst cnt)); 9509 effect(KILL cr); 9510 ins_cost(300); 9511 format %{ "MOV $dst.lo,$dst.hi\n" 9512 "\tSAR $dst.lo,$cnt-32\n" 9513 "\tSAR $dst.hi,31" %} 9514 opcode(0xC1, 0x7); /* C1 /7 ib */ 9515 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9516 ins_pipe( ialu_reg_long ); 9517 %} 9518 9519 // Shift Right arithmetic Long by variable 9520 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9521 match(Set dst (RShiftL dst shift)); 9522 effect(KILL cr); 9523 ins_cost(600); 9524 size(18); 9525 format %{ "TEST $shift,32\n\t" 9526 "JEQ,s small\n\t" 9527 "MOV $dst.lo,$dst.hi\n\t" 9528 "SAR $dst.hi,31\n" 9529 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9530 "SAR $dst.hi,$shift" %} 9531 ins_encode( shift_right_arith_long( dst, shift ) ); 9532 ins_pipe( pipe_slow ); 9533 %} 9534 9535 9536 //----------Double Instructions------------------------------------------------ 9537 // Double Math 9538 9539 // Compare & branch 9540 9541 // P6 version of float compare, sets condition codes in EFLAGS 9542 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9543 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9544 match(Set cr (CmpD src1 src2)); 9545 effect(KILL rax); 9546 ins_cost(150); 9547 format %{ "FLD $src1\n\t" 9548 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9549 "JNP exit\n\t" 9550 "MOV ah,1 // saw a NaN, set CF\n\t" 9551 "SAHF\n" 9552 "exit:\tNOP // avoid branch to branch" %} 9553 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9554 ins_encode( Push_Reg_DPR(src1), 9555 OpcP, RegOpc(src2), 9556 cmpF_P6_fixup ); 9557 ins_pipe( pipe_slow ); 9558 %} 9559 9560 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9561 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9562 match(Set cr (CmpD src1 src2)); 9563 ins_cost(150); 9564 format %{ "FLD $src1\n\t" 9565 "FUCOMIP ST,$src2 // P6 instruction" %} 9566 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9567 ins_encode( Push_Reg_DPR(src1), 9568 OpcP, RegOpc(src2)); 9569 ins_pipe( pipe_slow ); 9570 %} 9571 9572 // Compare & branch 9573 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9574 predicate(UseSSE<=1); 9575 match(Set cr (CmpD src1 src2)); 9576 effect(KILL rax); 9577 ins_cost(200); 9578 format %{ "FLD $src1\n\t" 9579 "FCOMp $src2\n\t" 9580 "FNSTSW AX\n\t" 9581 "TEST AX,0x400\n\t" 9582 "JZ,s flags\n\t" 9583 "MOV AH,1\t# unordered treat as LT\n" 9584 "flags:\tSAHF" %} 9585 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9586 ins_encode( Push_Reg_DPR(src1), 9587 OpcP, RegOpc(src2), 9588 fpu_flags); 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 // Compare vs zero into -1,0,1 9593 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9594 predicate(UseSSE<=1); 9595 match(Set dst (CmpD3 src1 zero)); 9596 effect(KILL cr, KILL rax); 9597 ins_cost(280); 9598 format %{ "FTSTD $dst,$src1" %} 9599 opcode(0xE4, 0xD9); 9600 ins_encode( Push_Reg_DPR(src1), 9601 OpcS, OpcP, PopFPU, 9602 CmpF_Result(dst)); 9603 ins_pipe( pipe_slow ); 9604 %} 9605 9606 // Compare into -1,0,1 9607 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9608 predicate(UseSSE<=1); 9609 match(Set dst (CmpD3 src1 src2)); 9610 effect(KILL cr, KILL rax); 9611 ins_cost(300); 9612 format %{ "FCMPD $dst,$src1,$src2" %} 9613 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9614 ins_encode( Push_Reg_DPR(src1), 9615 OpcP, RegOpc(src2), 9616 CmpF_Result(dst)); 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 // float compare and set condition codes in EFLAGS by XMM regs 9621 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9622 predicate(UseSSE>=2); 9623 match(Set cr (CmpD src1 src2)); 9624 ins_cost(145); 9625 format %{ "UCOMISD $src1,$src2\n\t" 9626 "JNP,s exit\n\t" 9627 "PUSHF\t# saw NaN, set CF\n\t" 9628 "AND [rsp], #0xffffff2b\n\t" 9629 "POPF\n" 9630 "exit:" %} 9631 ins_encode %{ 9632 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9633 emit_cmpfp_fixup(_masm); 9634 %} 9635 ins_pipe( pipe_slow ); 9636 %} 9637 9638 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9639 predicate(UseSSE>=2); 9640 match(Set cr (CmpD src1 src2)); 9641 ins_cost(100); 9642 format %{ "UCOMISD $src1,$src2" %} 9643 ins_encode %{ 9644 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 // float compare and set condition codes in EFLAGS by XMM regs 9650 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9651 predicate(UseSSE>=2); 9652 match(Set cr (CmpD src1 (LoadD src2))); 9653 ins_cost(145); 9654 format %{ "UCOMISD $src1,$src2\n\t" 9655 "JNP,s exit\n\t" 9656 "PUSHF\t# saw NaN, set CF\n\t" 9657 "AND [rsp], #0xffffff2b\n\t" 9658 "POPF\n" 9659 "exit:" %} 9660 ins_encode %{ 9661 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9662 emit_cmpfp_fixup(_masm); 9663 %} 9664 ins_pipe( pipe_slow ); 9665 %} 9666 9667 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9668 predicate(UseSSE>=2); 9669 match(Set cr (CmpD src1 (LoadD src2))); 9670 ins_cost(100); 9671 format %{ "UCOMISD $src1,$src2" %} 9672 ins_encode %{ 9673 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 // Compare into -1,0,1 in XMM 9679 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9680 predicate(UseSSE>=2); 9681 match(Set dst (CmpD3 src1 src2)); 9682 effect(KILL cr); 9683 ins_cost(255); 9684 format %{ "UCOMISD $src1, $src2\n\t" 9685 "MOV $dst, #-1\n\t" 9686 "JP,s done\n\t" 9687 "JB,s done\n\t" 9688 "SETNE $dst\n\t" 9689 "MOVZB $dst, $dst\n" 9690 "done:" %} 9691 ins_encode %{ 9692 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9693 emit_cmpfp3(_masm, $dst$$Register); 9694 %} 9695 ins_pipe( pipe_slow ); 9696 %} 9697 9698 // Compare into -1,0,1 in XMM and memory 9699 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9700 predicate(UseSSE>=2); 9701 match(Set dst (CmpD3 src1 (LoadD src2))); 9702 effect(KILL cr); 9703 ins_cost(275); 9704 format %{ "UCOMISD $src1, $src2\n\t" 9705 "MOV $dst, #-1\n\t" 9706 "JP,s done\n\t" 9707 "JB,s done\n\t" 9708 "SETNE $dst\n\t" 9709 "MOVZB $dst, $dst\n" 9710 "done:" %} 9711 ins_encode %{ 9712 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9713 emit_cmpfp3(_masm, $dst$$Register); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 9719 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9720 predicate (UseSSE <=1); 9721 match(Set dst (SubD dst src)); 9722 9723 format %{ "FLD $src\n\t" 9724 "DSUBp $dst,ST" %} 9725 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9726 ins_cost(150); 9727 ins_encode( Push_Reg_DPR(src), 9728 OpcP, RegOpc(dst) ); 9729 ins_pipe( fpu_reg_reg ); 9730 %} 9731 9732 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9733 predicate (UseSSE <=1); 9734 match(Set dst (RoundDouble (SubD src1 src2))); 9735 ins_cost(250); 9736 9737 format %{ "FLD $src2\n\t" 9738 "DSUB ST,$src1\n\t" 9739 "FSTP_D $dst\t# D-round" %} 9740 opcode(0xD8, 0x5); 9741 ins_encode( Push_Reg_DPR(src2), 9742 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9743 ins_pipe( fpu_mem_reg_reg ); 9744 %} 9745 9746 9747 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9748 predicate (UseSSE <=1); 9749 match(Set dst (SubD dst (LoadD src))); 9750 ins_cost(150); 9751 9752 format %{ "FLD $src\n\t" 9753 "DSUBp $dst,ST" %} 9754 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9756 OpcP, RegOpc(dst) ); 9757 ins_pipe( fpu_reg_mem ); 9758 %} 9759 9760 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9761 predicate (UseSSE<=1); 9762 match(Set dst (AbsD src)); 9763 ins_cost(100); 9764 format %{ "FABS" %} 9765 opcode(0xE1, 0xD9); 9766 ins_encode( OpcS, OpcP ); 9767 ins_pipe( fpu_reg_reg ); 9768 %} 9769 9770 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9771 predicate(UseSSE<=1); 9772 match(Set dst (NegD src)); 9773 ins_cost(100); 9774 format %{ "FCHS" %} 9775 opcode(0xE0, 0xD9); 9776 ins_encode( OpcS, OpcP ); 9777 ins_pipe( fpu_reg_reg ); 9778 %} 9779 9780 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9781 predicate(UseSSE<=1); 9782 match(Set dst (AddD dst src)); 9783 format %{ "FLD $src\n\t" 9784 "DADD $dst,ST" %} 9785 size(4); 9786 ins_cost(150); 9787 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9788 ins_encode( Push_Reg_DPR(src), 9789 OpcP, RegOpc(dst) ); 9790 ins_pipe( fpu_reg_reg ); 9791 %} 9792 9793 9794 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9795 predicate(UseSSE<=1); 9796 match(Set dst (RoundDouble (AddD src1 src2))); 9797 ins_cost(250); 9798 9799 format %{ "FLD $src2\n\t" 9800 "DADD ST,$src1\n\t" 9801 "FSTP_D $dst\t# D-round" %} 9802 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9803 ins_encode( Push_Reg_DPR(src2), 9804 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9805 ins_pipe( fpu_mem_reg_reg ); 9806 %} 9807 9808 9809 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9810 predicate(UseSSE<=1); 9811 match(Set dst (AddD dst (LoadD src))); 9812 ins_cost(150); 9813 9814 format %{ "FLD $src\n\t" 9815 "DADDp $dst,ST" %} 9816 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9817 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9818 OpcP, RegOpc(dst) ); 9819 ins_pipe( fpu_reg_mem ); 9820 %} 9821 9822 // add-to-memory 9823 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9824 predicate(UseSSE<=1); 9825 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9826 ins_cost(150); 9827 9828 format %{ "FLD_D $dst\n\t" 9829 "DADD ST,$src\n\t" 9830 "FST_D $dst" %} 9831 opcode(0xDD, 0x0); 9832 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9833 Opcode(0xD8), RegOpc(src), 9834 set_instruction_start, 9835 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9836 ins_pipe( fpu_reg_mem ); 9837 %} 9838 9839 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9840 predicate(UseSSE<=1); 9841 match(Set dst (AddD dst con)); 9842 ins_cost(125); 9843 format %{ "FLD1\n\t" 9844 "DADDp $dst,ST" %} 9845 ins_encode %{ 9846 __ fld1(); 9847 __ faddp($dst$$reg); 9848 %} 9849 ins_pipe(fpu_reg); 9850 %} 9851 9852 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9853 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9854 match(Set dst (AddD dst con)); 9855 ins_cost(200); 9856 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9857 "DADDp $dst,ST" %} 9858 ins_encode %{ 9859 __ fld_d($constantaddress($con)); 9860 __ faddp($dst$$reg); 9861 %} 9862 ins_pipe(fpu_reg_mem); 9863 %} 9864 9865 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9866 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9867 match(Set dst (RoundDouble (AddD src con))); 9868 ins_cost(200); 9869 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9870 "DADD ST,$src\n\t" 9871 "FSTP_D $dst\t# D-round" %} 9872 ins_encode %{ 9873 __ fld_d($constantaddress($con)); 9874 __ fadd($src$$reg); 9875 __ fstp_d(Address(rsp, $dst$$disp)); 9876 %} 9877 ins_pipe(fpu_mem_reg_con); 9878 %} 9879 9880 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9881 predicate(UseSSE<=1); 9882 match(Set dst (MulD dst src)); 9883 format %{ "FLD $src\n\t" 9884 "DMULp $dst,ST" %} 9885 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9886 ins_cost(150); 9887 ins_encode( Push_Reg_DPR(src), 9888 OpcP, RegOpc(dst) ); 9889 ins_pipe( fpu_reg_reg ); 9890 %} 9891 9892 // Strict FP instruction biases argument before multiply then 9893 // biases result to avoid double rounding of subnormals. 9894 // 9895 // scale arg1 by multiplying arg1 by 2^(-15360) 9896 // load arg2 9897 // multiply scaled arg1 by arg2 9898 // rescale product by 2^(15360) 9899 // 9900 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9901 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9902 match(Set dst (MulD dst src)); 9903 ins_cost(1); // Select this instruction for all strict FP double multiplies 9904 9905 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9906 "DMULp $dst,ST\n\t" 9907 "FLD $src\n\t" 9908 "DMULp $dst,ST\n\t" 9909 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9910 "DMULp $dst,ST\n\t" %} 9911 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9912 ins_encode( strictfp_bias1(dst), 9913 Push_Reg_DPR(src), 9914 OpcP, RegOpc(dst), 9915 strictfp_bias2(dst) ); 9916 ins_pipe( fpu_reg_reg ); 9917 %} 9918 9919 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9920 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9921 match(Set dst (MulD dst con)); 9922 ins_cost(200); 9923 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9924 "DMULp $dst,ST" %} 9925 ins_encode %{ 9926 __ fld_d($constantaddress($con)); 9927 __ fmulp($dst$$reg); 9928 %} 9929 ins_pipe(fpu_reg_mem); 9930 %} 9931 9932 9933 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9934 predicate( UseSSE<=1 ); 9935 match(Set dst (MulD dst (LoadD src))); 9936 ins_cost(200); 9937 format %{ "FLD_D $src\n\t" 9938 "DMULp $dst,ST" %} 9939 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9940 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9941 OpcP, RegOpc(dst) ); 9942 ins_pipe( fpu_reg_mem ); 9943 %} 9944 9945 // 9946 // Cisc-alternate to reg-reg multiply 9947 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9948 predicate( UseSSE<=1 ); 9949 match(Set dst (MulD src (LoadD mem))); 9950 ins_cost(250); 9951 format %{ "FLD_D $mem\n\t" 9952 "DMUL ST,$src\n\t" 9953 "FSTP_D $dst" %} 9954 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9955 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9956 OpcReg_FPR(src), 9957 Pop_Reg_DPR(dst) ); 9958 ins_pipe( fpu_reg_reg_mem ); 9959 %} 9960 9961 9962 // MACRO3 -- addDPR a mulDPR 9963 // This instruction is a '2-address' instruction in that the result goes 9964 // back to src2. This eliminates a move from the macro; possibly the 9965 // register allocator will have to add it back (and maybe not). 9966 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9967 predicate( UseSSE<=1 ); 9968 match(Set src2 (AddD (MulD src0 src1) src2)); 9969 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9970 "DMUL ST,$src1\n\t" 9971 "DADDp $src2,ST" %} 9972 ins_cost(250); 9973 opcode(0xDD); /* LoadD DD /0 */ 9974 ins_encode( Push_Reg_FPR(src0), 9975 FMul_ST_reg(src1), 9976 FAddP_reg_ST(src2) ); 9977 ins_pipe( fpu_reg_reg_reg ); 9978 %} 9979 9980 9981 // MACRO3 -- subDPR a mulDPR 9982 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9983 predicate( UseSSE<=1 ); 9984 match(Set src2 (SubD (MulD src0 src1) src2)); 9985 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9986 "DMUL ST,$src1\n\t" 9987 "DSUBRp $src2,ST" %} 9988 ins_cost(250); 9989 ins_encode( Push_Reg_FPR(src0), 9990 FMul_ST_reg(src1), 9991 Opcode(0xDE), Opc_plus(0xE0,src2)); 9992 ins_pipe( fpu_reg_reg_reg ); 9993 %} 9994 9995 9996 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9997 predicate( UseSSE<=1 ); 9998 match(Set dst (DivD dst src)); 9999 10000 format %{ "FLD $src\n\t" 10001 "FDIVp $dst,ST" %} 10002 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10003 ins_cost(150); 10004 ins_encode( Push_Reg_DPR(src), 10005 OpcP, RegOpc(dst) ); 10006 ins_pipe( fpu_reg_reg ); 10007 %} 10008 10009 // Strict FP instruction biases argument before division then 10010 // biases result, to avoid double rounding of subnormals. 10011 // 10012 // scale dividend by multiplying dividend by 2^(-15360) 10013 // load divisor 10014 // divide scaled dividend by divisor 10015 // rescale quotient by 2^(15360) 10016 // 10017 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 10018 predicate (UseSSE<=1); 10019 match(Set dst (DivD dst src)); 10020 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10021 ins_cost(01); 10022 10023 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10024 "DMULp $dst,ST\n\t" 10025 "FLD $src\n\t" 10026 "FDIVp $dst,ST\n\t" 10027 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10028 "DMULp $dst,ST\n\t" %} 10029 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10030 ins_encode( strictfp_bias1(dst), 10031 Push_Reg_DPR(src), 10032 OpcP, RegOpc(dst), 10033 strictfp_bias2(dst) ); 10034 ins_pipe( fpu_reg_reg ); 10035 %} 10036 10037 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 10038 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10039 match(Set dst (RoundDouble (DivD src1 src2))); 10040 10041 format %{ "FLD $src1\n\t" 10042 "FDIV ST,$src2\n\t" 10043 "FSTP_D $dst\t# D-round" %} 10044 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10045 ins_encode( Push_Reg_DPR(src1), 10046 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 10047 ins_pipe( fpu_mem_reg_reg ); 10048 %} 10049 10050 10051 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 10052 predicate(UseSSE<=1); 10053 match(Set dst (ModD dst src)); 10054 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10055 10056 format %{ "DMOD $dst,$src" %} 10057 ins_cost(250); 10058 ins_encode(Push_Reg_Mod_DPR(dst, src), 10059 emitModDPR(), 10060 Push_Result_Mod_DPR(src), 10061 Pop_Reg_DPR(dst)); 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10066 predicate(UseSSE>=2); 10067 match(Set dst (ModD src0 src1)); 10068 effect(KILL rax, KILL cr); 10069 10070 format %{ "SUB ESP,8\t # DMOD\n" 10071 "\tMOVSD [ESP+0],$src1\n" 10072 "\tFLD_D [ESP+0]\n" 10073 "\tMOVSD [ESP+0],$src0\n" 10074 "\tFLD_D [ESP+0]\n" 10075 "loop:\tFPREM\n" 10076 "\tFWAIT\n" 10077 "\tFNSTSW AX\n" 10078 "\tSAHF\n" 10079 "\tJP loop\n" 10080 "\tFSTP_D [ESP+0]\n" 10081 "\tMOVSD $dst,[ESP+0]\n" 10082 "\tADD ESP,8\n" 10083 "\tFSTP ST0\t # Restore FPU Stack" 10084 %} 10085 ins_cost(250); 10086 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10087 ins_pipe( pipe_slow ); 10088 %} 10089 10090 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10091 predicate (UseSSE<=1); 10092 match(Set dst(AtanD dst src)); 10093 format %{ "DATA $dst,$src" %} 10094 opcode(0xD9, 0xF3); 10095 ins_encode( Push_Reg_DPR(src), 10096 OpcP, OpcS, RegOpc(dst) ); 10097 ins_pipe( pipe_slow ); 10098 %} 10099 10100 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10101 predicate (UseSSE>=2); 10102 match(Set dst(AtanD dst src)); 10103 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10104 format %{ "DATA $dst,$src" %} 10105 opcode(0xD9, 0xF3); 10106 ins_encode( Push_SrcD(src), 10107 OpcP, OpcS, Push_ResultD(dst) ); 10108 ins_pipe( pipe_slow ); 10109 %} 10110 10111 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10112 predicate (UseSSE<=1); 10113 match(Set dst (SqrtD src)); 10114 format %{ "DSQRT $dst,$src" %} 10115 opcode(0xFA, 0xD9); 10116 ins_encode( Push_Reg_DPR(src), 10117 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10118 ins_pipe( pipe_slow ); 10119 %} 10120 10121 //-------------Float Instructions------------------------------- 10122 // Float Math 10123 10124 // Code for float compare: 10125 // fcompp(); 10126 // fwait(); fnstsw_ax(); 10127 // sahf(); 10128 // movl(dst, unordered_result); 10129 // jcc(Assembler::parity, exit); 10130 // movl(dst, less_result); 10131 // jcc(Assembler::below, exit); 10132 // movl(dst, equal_result); 10133 // jcc(Assembler::equal, exit); 10134 // movl(dst, greater_result); 10135 // exit: 10136 10137 // P6 version of float compare, sets condition codes in EFLAGS 10138 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10139 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10140 match(Set cr (CmpF src1 src2)); 10141 effect(KILL rax); 10142 ins_cost(150); 10143 format %{ "FLD $src1\n\t" 10144 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10145 "JNP exit\n\t" 10146 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10147 "SAHF\n" 10148 "exit:\tNOP // avoid branch to branch" %} 10149 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10150 ins_encode( Push_Reg_DPR(src1), 10151 OpcP, RegOpc(src2), 10152 cmpF_P6_fixup ); 10153 ins_pipe( pipe_slow ); 10154 %} 10155 10156 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10157 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10158 match(Set cr (CmpF src1 src2)); 10159 ins_cost(100); 10160 format %{ "FLD $src1\n\t" 10161 "FUCOMIP ST,$src2 // P6 instruction" %} 10162 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10163 ins_encode( Push_Reg_DPR(src1), 10164 OpcP, RegOpc(src2)); 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 10169 // Compare & branch 10170 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10171 predicate(UseSSE == 0); 10172 match(Set cr (CmpF src1 src2)); 10173 effect(KILL rax); 10174 ins_cost(200); 10175 format %{ "FLD $src1\n\t" 10176 "FCOMp $src2\n\t" 10177 "FNSTSW AX\n\t" 10178 "TEST AX,0x400\n\t" 10179 "JZ,s flags\n\t" 10180 "MOV AH,1\t# unordered treat as LT\n" 10181 "flags:\tSAHF" %} 10182 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10183 ins_encode( Push_Reg_DPR(src1), 10184 OpcP, RegOpc(src2), 10185 fpu_flags); 10186 ins_pipe( pipe_slow ); 10187 %} 10188 10189 // Compare vs zero into -1,0,1 10190 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10191 predicate(UseSSE == 0); 10192 match(Set dst (CmpF3 src1 zero)); 10193 effect(KILL cr, KILL rax); 10194 ins_cost(280); 10195 format %{ "FTSTF $dst,$src1" %} 10196 opcode(0xE4, 0xD9); 10197 ins_encode( Push_Reg_DPR(src1), 10198 OpcS, OpcP, PopFPU, 10199 CmpF_Result(dst)); 10200 ins_pipe( pipe_slow ); 10201 %} 10202 10203 // Compare into -1,0,1 10204 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10205 predicate(UseSSE == 0); 10206 match(Set dst (CmpF3 src1 src2)); 10207 effect(KILL cr, KILL rax); 10208 ins_cost(300); 10209 format %{ "FCMPF $dst,$src1,$src2" %} 10210 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10211 ins_encode( Push_Reg_DPR(src1), 10212 OpcP, RegOpc(src2), 10213 CmpF_Result(dst)); 10214 ins_pipe( pipe_slow ); 10215 %} 10216 10217 // float compare and set condition codes in EFLAGS by XMM regs 10218 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10219 predicate(UseSSE>=1); 10220 match(Set cr (CmpF src1 src2)); 10221 ins_cost(145); 10222 format %{ "UCOMISS $src1,$src2\n\t" 10223 "JNP,s exit\n\t" 10224 "PUSHF\t# saw NaN, set CF\n\t" 10225 "AND [rsp], #0xffffff2b\n\t" 10226 "POPF\n" 10227 "exit:" %} 10228 ins_encode %{ 10229 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10230 emit_cmpfp_fixup(_masm); 10231 %} 10232 ins_pipe( pipe_slow ); 10233 %} 10234 10235 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10236 predicate(UseSSE>=1); 10237 match(Set cr (CmpF src1 src2)); 10238 ins_cost(100); 10239 format %{ "UCOMISS $src1,$src2" %} 10240 ins_encode %{ 10241 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10242 %} 10243 ins_pipe( pipe_slow ); 10244 %} 10245 10246 // float compare and set condition codes in EFLAGS by XMM regs 10247 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10248 predicate(UseSSE>=1); 10249 match(Set cr (CmpF src1 (LoadF src2))); 10250 ins_cost(165); 10251 format %{ "UCOMISS $src1,$src2\n\t" 10252 "JNP,s exit\n\t" 10253 "PUSHF\t# saw NaN, set CF\n\t" 10254 "AND [rsp], #0xffffff2b\n\t" 10255 "POPF\n" 10256 "exit:" %} 10257 ins_encode %{ 10258 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10259 emit_cmpfp_fixup(_masm); 10260 %} 10261 ins_pipe( pipe_slow ); 10262 %} 10263 10264 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10265 predicate(UseSSE>=1); 10266 match(Set cr (CmpF src1 (LoadF src2))); 10267 ins_cost(100); 10268 format %{ "UCOMISS $src1,$src2" %} 10269 ins_encode %{ 10270 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10271 %} 10272 ins_pipe( pipe_slow ); 10273 %} 10274 10275 // Compare into -1,0,1 in XMM 10276 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10277 predicate(UseSSE>=1); 10278 match(Set dst (CmpF3 src1 src2)); 10279 effect(KILL cr); 10280 ins_cost(255); 10281 format %{ "UCOMISS $src1, $src2\n\t" 10282 "MOV $dst, #-1\n\t" 10283 "JP,s done\n\t" 10284 "JB,s done\n\t" 10285 "SETNE $dst\n\t" 10286 "MOVZB $dst, $dst\n" 10287 "done:" %} 10288 ins_encode %{ 10289 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10290 emit_cmpfp3(_masm, $dst$$Register); 10291 %} 10292 ins_pipe( pipe_slow ); 10293 %} 10294 10295 // Compare into -1,0,1 in XMM and memory 10296 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10297 predicate(UseSSE>=1); 10298 match(Set dst (CmpF3 src1 (LoadF src2))); 10299 effect(KILL cr); 10300 ins_cost(275); 10301 format %{ "UCOMISS $src1, $src2\n\t" 10302 "MOV $dst, #-1\n\t" 10303 "JP,s done\n\t" 10304 "JB,s done\n\t" 10305 "SETNE $dst\n\t" 10306 "MOVZB $dst, $dst\n" 10307 "done:" %} 10308 ins_encode %{ 10309 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10310 emit_cmpfp3(_masm, $dst$$Register); 10311 %} 10312 ins_pipe( pipe_slow ); 10313 %} 10314 10315 // Spill to obtain 24-bit precision 10316 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10317 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10318 match(Set dst (SubF src1 src2)); 10319 10320 format %{ "FSUB $dst,$src1 - $src2" %} 10321 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10322 ins_encode( Push_Reg_FPR(src1), 10323 OpcReg_FPR(src2), 10324 Pop_Mem_FPR(dst) ); 10325 ins_pipe( fpu_mem_reg_reg ); 10326 %} 10327 // 10328 // This instruction does not round to 24-bits 10329 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10330 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10331 match(Set dst (SubF dst src)); 10332 10333 format %{ "FSUB $dst,$src" %} 10334 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10335 ins_encode( Push_Reg_FPR(src), 10336 OpcP, RegOpc(dst) ); 10337 ins_pipe( fpu_reg_reg ); 10338 %} 10339 10340 // Spill to obtain 24-bit precision 10341 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10342 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10343 match(Set dst (AddF src1 src2)); 10344 10345 format %{ "FADD $dst,$src1,$src2" %} 10346 opcode(0xD8, 0x0); /* D8 C0+i */ 10347 ins_encode( Push_Reg_FPR(src2), 10348 OpcReg_FPR(src1), 10349 Pop_Mem_FPR(dst) ); 10350 ins_pipe( fpu_mem_reg_reg ); 10351 %} 10352 // 10353 // This instruction does not round to 24-bits 10354 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10355 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10356 match(Set dst (AddF dst src)); 10357 10358 format %{ "FLD $src\n\t" 10359 "FADDp $dst,ST" %} 10360 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10361 ins_encode( Push_Reg_FPR(src), 10362 OpcP, RegOpc(dst) ); 10363 ins_pipe( fpu_reg_reg ); 10364 %} 10365 10366 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10367 predicate(UseSSE==0); 10368 match(Set dst (AbsF src)); 10369 ins_cost(100); 10370 format %{ "FABS" %} 10371 opcode(0xE1, 0xD9); 10372 ins_encode( OpcS, OpcP ); 10373 ins_pipe( fpu_reg_reg ); 10374 %} 10375 10376 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10377 predicate(UseSSE==0); 10378 match(Set dst (NegF src)); 10379 ins_cost(100); 10380 format %{ "FCHS" %} 10381 opcode(0xE0, 0xD9); 10382 ins_encode( OpcS, OpcP ); 10383 ins_pipe( fpu_reg_reg ); 10384 %} 10385 10386 // Cisc-alternate to addFPR_reg 10387 // Spill to obtain 24-bit precision 10388 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10389 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10390 match(Set dst (AddF src1 (LoadF src2))); 10391 10392 format %{ "FLD $src2\n\t" 10393 "FADD ST,$src1\n\t" 10394 "FSTP_S $dst" %} 10395 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10396 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10397 OpcReg_FPR(src1), 10398 Pop_Mem_FPR(dst) ); 10399 ins_pipe( fpu_mem_reg_mem ); 10400 %} 10401 // 10402 // Cisc-alternate to addFPR_reg 10403 // This instruction does not round to 24-bits 10404 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10405 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10406 match(Set dst (AddF dst (LoadF src))); 10407 10408 format %{ "FADD $dst,$src" %} 10409 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10410 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10411 OpcP, RegOpc(dst) ); 10412 ins_pipe( fpu_reg_mem ); 10413 %} 10414 10415 // // Following two instructions for _222_mpegaudio 10416 // Spill to obtain 24-bit precision 10417 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10418 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10419 match(Set dst (AddF src1 src2)); 10420 10421 format %{ "FADD $dst,$src1,$src2" %} 10422 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10423 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10424 OpcReg_FPR(src2), 10425 Pop_Mem_FPR(dst) ); 10426 ins_pipe( fpu_mem_reg_mem ); 10427 %} 10428 10429 // Cisc-spill variant 10430 // Spill to obtain 24-bit precision 10431 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10432 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10433 match(Set dst (AddF src1 (LoadF src2))); 10434 10435 format %{ "FADD $dst,$src1,$src2 cisc" %} 10436 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10437 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10438 set_instruction_start, 10439 OpcP, RMopc_Mem(secondary,src1), 10440 Pop_Mem_FPR(dst) ); 10441 ins_pipe( fpu_mem_mem_mem ); 10442 %} 10443 10444 // Spill to obtain 24-bit precision 10445 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10446 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10447 match(Set dst (AddF src1 src2)); 10448 10449 format %{ "FADD $dst,$src1,$src2" %} 10450 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10451 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10452 set_instruction_start, 10453 OpcP, RMopc_Mem(secondary,src1), 10454 Pop_Mem_FPR(dst) ); 10455 ins_pipe( fpu_mem_mem_mem ); 10456 %} 10457 10458 10459 // Spill to obtain 24-bit precision 10460 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10461 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10462 match(Set dst (AddF src con)); 10463 format %{ "FLD $src\n\t" 10464 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10465 "FSTP_S $dst" %} 10466 ins_encode %{ 10467 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10468 __ fadd_s($constantaddress($con)); 10469 __ fstp_s(Address(rsp, $dst$$disp)); 10470 %} 10471 ins_pipe(fpu_mem_reg_con); 10472 %} 10473 // 10474 // This instruction does not round to 24-bits 10475 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10476 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10477 match(Set dst (AddF src con)); 10478 format %{ "FLD $src\n\t" 10479 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10480 "FSTP $dst" %} 10481 ins_encode %{ 10482 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10483 __ fadd_s($constantaddress($con)); 10484 __ fstp_d($dst$$reg); 10485 %} 10486 ins_pipe(fpu_reg_reg_con); 10487 %} 10488 10489 // Spill to obtain 24-bit precision 10490 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10491 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10492 match(Set dst (MulF src1 src2)); 10493 10494 format %{ "FLD $src1\n\t" 10495 "FMUL $src2\n\t" 10496 "FSTP_S $dst" %} 10497 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10498 ins_encode( Push_Reg_FPR(src1), 10499 OpcReg_FPR(src2), 10500 Pop_Mem_FPR(dst) ); 10501 ins_pipe( fpu_mem_reg_reg ); 10502 %} 10503 // 10504 // This instruction does not round to 24-bits 10505 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10506 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10507 match(Set dst (MulF src1 src2)); 10508 10509 format %{ "FLD $src1\n\t" 10510 "FMUL $src2\n\t" 10511 "FSTP_S $dst" %} 10512 opcode(0xD8, 0x1); /* D8 C8+i */ 10513 ins_encode( Push_Reg_FPR(src2), 10514 OpcReg_FPR(src1), 10515 Pop_Reg_FPR(dst) ); 10516 ins_pipe( fpu_reg_reg_reg ); 10517 %} 10518 10519 10520 // Spill to obtain 24-bit precision 10521 // Cisc-alternate to reg-reg multiply 10522 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10523 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10524 match(Set dst (MulF src1 (LoadF src2))); 10525 10526 format %{ "FLD_S $src2\n\t" 10527 "FMUL $src1\n\t" 10528 "FSTP_S $dst" %} 10529 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10530 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10531 OpcReg_FPR(src1), 10532 Pop_Mem_FPR(dst) ); 10533 ins_pipe( fpu_mem_reg_mem ); 10534 %} 10535 // 10536 // This instruction does not round to 24-bits 10537 // Cisc-alternate to reg-reg multiply 10538 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10539 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10540 match(Set dst (MulF src1 (LoadF src2))); 10541 10542 format %{ "FMUL $dst,$src1,$src2" %} 10543 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10544 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10545 OpcReg_FPR(src1), 10546 Pop_Reg_FPR(dst) ); 10547 ins_pipe( fpu_reg_reg_mem ); 10548 %} 10549 10550 // Spill to obtain 24-bit precision 10551 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10552 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10553 match(Set dst (MulF src1 src2)); 10554 10555 format %{ "FMUL $dst,$src1,$src2" %} 10556 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10557 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10558 set_instruction_start, 10559 OpcP, RMopc_Mem(secondary,src1), 10560 Pop_Mem_FPR(dst) ); 10561 ins_pipe( fpu_mem_mem_mem ); 10562 %} 10563 10564 // Spill to obtain 24-bit precision 10565 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10566 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10567 match(Set dst (MulF src con)); 10568 10569 format %{ "FLD $src\n\t" 10570 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10571 "FSTP_S $dst" %} 10572 ins_encode %{ 10573 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10574 __ fmul_s($constantaddress($con)); 10575 __ fstp_s(Address(rsp, $dst$$disp)); 10576 %} 10577 ins_pipe(fpu_mem_reg_con); 10578 %} 10579 // 10580 // This instruction does not round to 24-bits 10581 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10582 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10583 match(Set dst (MulF src con)); 10584 10585 format %{ "FLD $src\n\t" 10586 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10587 "FSTP $dst" %} 10588 ins_encode %{ 10589 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10590 __ fmul_s($constantaddress($con)); 10591 __ fstp_d($dst$$reg); 10592 %} 10593 ins_pipe(fpu_reg_reg_con); 10594 %} 10595 10596 10597 // 10598 // MACRO1 -- subsume unshared load into mulFPR 10599 // This instruction does not round to 24-bits 10600 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10601 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10602 match(Set dst (MulF (LoadF mem1) src)); 10603 10604 format %{ "FLD $mem1 ===MACRO1===\n\t" 10605 "FMUL ST,$src\n\t" 10606 "FSTP $dst" %} 10607 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10608 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10609 OpcReg_FPR(src), 10610 Pop_Reg_FPR(dst) ); 10611 ins_pipe( fpu_reg_reg_mem ); 10612 %} 10613 // 10614 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10615 // This instruction does not round to 24-bits 10616 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10617 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10618 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10619 ins_cost(95); 10620 10621 format %{ "FLD $mem1 ===MACRO2===\n\t" 10622 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10623 "FADD ST,$src2\n\t" 10624 "FSTP $dst" %} 10625 opcode(0xD9); /* LoadF D9 /0 */ 10626 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10627 FMul_ST_reg(src1), 10628 FAdd_ST_reg(src2), 10629 Pop_Reg_FPR(dst) ); 10630 ins_pipe( fpu_reg_mem_reg_reg ); 10631 %} 10632 10633 // MACRO3 -- addFPR a mulFPR 10634 // This instruction does not round to 24-bits. It is a '2-address' 10635 // instruction in that the result goes back to src2. This eliminates 10636 // a move from the macro; possibly the register allocator will have 10637 // to add it back (and maybe not). 10638 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10639 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10640 match(Set src2 (AddF (MulF src0 src1) src2)); 10641 10642 format %{ "FLD $src0 ===MACRO3===\n\t" 10643 "FMUL ST,$src1\n\t" 10644 "FADDP $src2,ST" %} 10645 opcode(0xD9); /* LoadF D9 /0 */ 10646 ins_encode( Push_Reg_FPR(src0), 10647 FMul_ST_reg(src1), 10648 FAddP_reg_ST(src2) ); 10649 ins_pipe( fpu_reg_reg_reg ); 10650 %} 10651 10652 // MACRO4 -- divFPR subFPR 10653 // This instruction does not round to 24-bits 10654 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10655 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10656 match(Set dst (DivF (SubF src2 src1) src3)); 10657 10658 format %{ "FLD $src2 ===MACRO4===\n\t" 10659 "FSUB ST,$src1\n\t" 10660 "FDIV ST,$src3\n\t" 10661 "FSTP $dst" %} 10662 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10663 ins_encode( Push_Reg_FPR(src2), 10664 subFPR_divFPR_encode(src1,src3), 10665 Pop_Reg_FPR(dst) ); 10666 ins_pipe( fpu_reg_reg_reg_reg ); 10667 %} 10668 10669 // Spill to obtain 24-bit precision 10670 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10671 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10672 match(Set dst (DivF src1 src2)); 10673 10674 format %{ "FDIV $dst,$src1,$src2" %} 10675 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10676 ins_encode( Push_Reg_FPR(src1), 10677 OpcReg_FPR(src2), 10678 Pop_Mem_FPR(dst) ); 10679 ins_pipe( fpu_mem_reg_reg ); 10680 %} 10681 // 10682 // This instruction does not round to 24-bits 10683 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10684 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10685 match(Set dst (DivF dst src)); 10686 10687 format %{ "FDIV $dst,$src" %} 10688 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10689 ins_encode( Push_Reg_FPR(src), 10690 OpcP, RegOpc(dst) ); 10691 ins_pipe( fpu_reg_reg ); 10692 %} 10693 10694 10695 // Spill to obtain 24-bit precision 10696 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10697 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10698 match(Set dst (ModF src1 src2)); 10699 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10700 10701 format %{ "FMOD $dst,$src1,$src2" %} 10702 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10703 emitModDPR(), 10704 Push_Result_Mod_DPR(src2), 10705 Pop_Mem_FPR(dst)); 10706 ins_pipe( pipe_slow ); 10707 %} 10708 // 10709 // This instruction does not round to 24-bits 10710 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10711 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10712 match(Set dst (ModF dst src)); 10713 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10714 10715 format %{ "FMOD $dst,$src" %} 10716 ins_encode(Push_Reg_Mod_DPR(dst, src), 10717 emitModDPR(), 10718 Push_Result_Mod_DPR(src), 10719 Pop_Reg_FPR(dst)); 10720 ins_pipe( pipe_slow ); 10721 %} 10722 10723 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10724 predicate(UseSSE>=1); 10725 match(Set dst (ModF src0 src1)); 10726 effect(KILL rax, KILL cr); 10727 format %{ "SUB ESP,4\t # FMOD\n" 10728 "\tMOVSS [ESP+0],$src1\n" 10729 "\tFLD_S [ESP+0]\n" 10730 "\tMOVSS [ESP+0],$src0\n" 10731 "\tFLD_S [ESP+0]\n" 10732 "loop:\tFPREM\n" 10733 "\tFWAIT\n" 10734 "\tFNSTSW AX\n" 10735 "\tSAHF\n" 10736 "\tJP loop\n" 10737 "\tFSTP_S [ESP+0]\n" 10738 "\tMOVSS $dst,[ESP+0]\n" 10739 "\tADD ESP,4\n" 10740 "\tFSTP ST0\t # Restore FPU Stack" 10741 %} 10742 ins_cost(250); 10743 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10744 ins_pipe( pipe_slow ); 10745 %} 10746 10747 10748 //----------Arithmetic Conversion Instructions--------------------------------- 10749 // The conversions operations are all Alpha sorted. Please keep it that way! 10750 10751 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10752 predicate(UseSSE==0); 10753 match(Set dst (RoundFloat src)); 10754 ins_cost(125); 10755 format %{ "FST_S $dst,$src\t# F-round" %} 10756 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10757 ins_pipe( fpu_mem_reg ); 10758 %} 10759 10760 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10761 predicate(UseSSE<=1); 10762 match(Set dst (RoundDouble src)); 10763 ins_cost(125); 10764 format %{ "FST_D $dst,$src\t# D-round" %} 10765 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10766 ins_pipe( fpu_mem_reg ); 10767 %} 10768 10769 // Force rounding to 24-bit precision and 6-bit exponent 10770 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10771 predicate(UseSSE==0); 10772 match(Set dst (ConvD2F src)); 10773 format %{ "FST_S $dst,$src\t# F-round" %} 10774 expand %{ 10775 roundFloat_mem_reg(dst,src); 10776 %} 10777 %} 10778 10779 // Force rounding to 24-bit precision and 6-bit exponent 10780 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10781 predicate(UseSSE==1); 10782 match(Set dst (ConvD2F src)); 10783 effect( KILL cr ); 10784 format %{ "SUB ESP,4\n\t" 10785 "FST_S [ESP],$src\t# F-round\n\t" 10786 "MOVSS $dst,[ESP]\n\t" 10787 "ADD ESP,4" %} 10788 ins_encode %{ 10789 __ subptr(rsp, 4); 10790 if ($src$$reg != FPR1L_enc) { 10791 __ fld_s($src$$reg-1); 10792 __ fstp_s(Address(rsp, 0)); 10793 } else { 10794 __ fst_s(Address(rsp, 0)); 10795 } 10796 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10797 __ addptr(rsp, 4); 10798 %} 10799 ins_pipe( pipe_slow ); 10800 %} 10801 10802 // Force rounding double precision to single precision 10803 instruct convD2F_reg(regF dst, regD src) %{ 10804 predicate(UseSSE>=2); 10805 match(Set dst (ConvD2F src)); 10806 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10807 ins_encode %{ 10808 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10809 %} 10810 ins_pipe( pipe_slow ); 10811 %} 10812 10813 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10814 predicate(UseSSE==0); 10815 match(Set dst (ConvF2D src)); 10816 format %{ "FST_S $dst,$src\t# D-round" %} 10817 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10818 ins_pipe( fpu_reg_reg ); 10819 %} 10820 10821 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10822 predicate(UseSSE==1); 10823 match(Set dst (ConvF2D src)); 10824 format %{ "FST_D $dst,$src\t# D-round" %} 10825 expand %{ 10826 roundDouble_mem_reg(dst,src); 10827 %} 10828 %} 10829 10830 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10831 predicate(UseSSE==1); 10832 match(Set dst (ConvF2D src)); 10833 effect( KILL cr ); 10834 format %{ "SUB ESP,4\n\t" 10835 "MOVSS [ESP] $src\n\t" 10836 "FLD_S [ESP]\n\t" 10837 "ADD ESP,4\n\t" 10838 "FSTP $dst\t# D-round" %} 10839 ins_encode %{ 10840 __ subptr(rsp, 4); 10841 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10842 __ fld_s(Address(rsp, 0)); 10843 __ addptr(rsp, 4); 10844 __ fstp_d($dst$$reg); 10845 %} 10846 ins_pipe( pipe_slow ); 10847 %} 10848 10849 instruct convF2D_reg(regD dst, regF src) %{ 10850 predicate(UseSSE>=2); 10851 match(Set dst (ConvF2D src)); 10852 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10853 ins_encode %{ 10854 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10855 %} 10856 ins_pipe( pipe_slow ); 10857 %} 10858 10859 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10860 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10861 predicate(UseSSE<=1); 10862 match(Set dst (ConvD2I src)); 10863 effect( KILL tmp, KILL cr ); 10864 format %{ "FLD $src\t# Convert double to int \n\t" 10865 "FLDCW trunc mode\n\t" 10866 "SUB ESP,4\n\t" 10867 "FISTp [ESP + #0]\n\t" 10868 "FLDCW std/24-bit mode\n\t" 10869 "POP EAX\n\t" 10870 "CMP EAX,0x80000000\n\t" 10871 "JNE,s fast\n\t" 10872 "FLD_D $src\n\t" 10873 "CALL d2i_wrapper\n" 10874 "fast:" %} 10875 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10876 ins_pipe( pipe_slow ); 10877 %} 10878 10879 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10880 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10881 predicate(UseSSE>=2); 10882 match(Set dst (ConvD2I src)); 10883 effect( KILL tmp, KILL cr ); 10884 format %{ "CVTTSD2SI $dst, $src\n\t" 10885 "CMP $dst,0x80000000\n\t" 10886 "JNE,s fast\n\t" 10887 "SUB ESP, 8\n\t" 10888 "MOVSD [ESP], $src\n\t" 10889 "FLD_D [ESP]\n\t" 10890 "ADD ESP, 8\n\t" 10891 "CALL d2i_wrapper\n" 10892 "fast:" %} 10893 ins_encode %{ 10894 Label fast; 10895 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10896 __ cmpl($dst$$Register, 0x80000000); 10897 __ jccb(Assembler::notEqual, fast); 10898 __ subptr(rsp, 8); 10899 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10900 __ fld_d(Address(rsp, 0)); 10901 __ addptr(rsp, 8); 10902 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10903 __ bind(fast); 10904 %} 10905 ins_pipe( pipe_slow ); 10906 %} 10907 10908 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10909 predicate(UseSSE<=1); 10910 match(Set dst (ConvD2L src)); 10911 effect( KILL cr ); 10912 format %{ "FLD $src\t# Convert double to long\n\t" 10913 "FLDCW trunc mode\n\t" 10914 "SUB ESP,8\n\t" 10915 "FISTp [ESP + #0]\n\t" 10916 "FLDCW std/24-bit mode\n\t" 10917 "POP EAX\n\t" 10918 "POP EDX\n\t" 10919 "CMP EDX,0x80000000\n\t" 10920 "JNE,s fast\n\t" 10921 "TEST EAX,EAX\n\t" 10922 "JNE,s fast\n\t" 10923 "FLD $src\n\t" 10924 "CALL d2l_wrapper\n" 10925 "fast:" %} 10926 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10927 ins_pipe( pipe_slow ); 10928 %} 10929 10930 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10931 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10932 predicate (UseSSE>=2); 10933 match(Set dst (ConvD2L src)); 10934 effect( KILL cr ); 10935 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10936 "MOVSD [ESP],$src\n\t" 10937 "FLD_D [ESP]\n\t" 10938 "FLDCW trunc mode\n\t" 10939 "FISTp [ESP + #0]\n\t" 10940 "FLDCW std/24-bit mode\n\t" 10941 "POP EAX\n\t" 10942 "POP EDX\n\t" 10943 "CMP EDX,0x80000000\n\t" 10944 "JNE,s fast\n\t" 10945 "TEST EAX,EAX\n\t" 10946 "JNE,s fast\n\t" 10947 "SUB ESP,8\n\t" 10948 "MOVSD [ESP],$src\n\t" 10949 "FLD_D [ESP]\n\t" 10950 "ADD ESP,8\n\t" 10951 "CALL d2l_wrapper\n" 10952 "fast:" %} 10953 ins_encode %{ 10954 Label fast; 10955 __ subptr(rsp, 8); 10956 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10957 __ fld_d(Address(rsp, 0)); 10958 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10959 __ fistp_d(Address(rsp, 0)); 10960 // Restore the rounding mode, mask the exception 10961 if (Compile::current()->in_24_bit_fp_mode()) { 10962 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10963 } else { 10964 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10965 } 10966 // Load the converted long, adjust CPU stack 10967 __ pop(rax); 10968 __ pop(rdx); 10969 __ cmpl(rdx, 0x80000000); 10970 __ jccb(Assembler::notEqual, fast); 10971 __ testl(rax, rax); 10972 __ jccb(Assembler::notEqual, fast); 10973 __ subptr(rsp, 8); 10974 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10975 __ fld_d(Address(rsp, 0)); 10976 __ addptr(rsp, 8); 10977 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10978 __ bind(fast); 10979 %} 10980 ins_pipe( pipe_slow ); 10981 %} 10982 10983 // Convert a double to an int. Java semantics require we do complex 10984 // manglations in the corner cases. So we set the rounding mode to 10985 // 'zero', store the darned double down as an int, and reset the 10986 // rounding mode to 'nearest'. The hardware stores a flag value down 10987 // if we would overflow or converted a NAN; we check for this and 10988 // and go the slow path if needed. 10989 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10990 predicate(UseSSE==0); 10991 match(Set dst (ConvF2I src)); 10992 effect( KILL tmp, KILL cr ); 10993 format %{ "FLD $src\t# Convert float to int \n\t" 10994 "FLDCW trunc mode\n\t" 10995 "SUB ESP,4\n\t" 10996 "FISTp [ESP + #0]\n\t" 10997 "FLDCW std/24-bit mode\n\t" 10998 "POP EAX\n\t" 10999 "CMP EAX,0x80000000\n\t" 11000 "JNE,s fast\n\t" 11001 "FLD $src\n\t" 11002 "CALL d2i_wrapper\n" 11003 "fast:" %} 11004 // DPR2I_encoding works for FPR2I 11005 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 11006 ins_pipe( pipe_slow ); 11007 %} 11008 11009 // Convert a float in xmm to an int reg. 11010 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11011 predicate(UseSSE>=1); 11012 match(Set dst (ConvF2I src)); 11013 effect( KILL tmp, KILL cr ); 11014 format %{ "CVTTSS2SI $dst, $src\n\t" 11015 "CMP $dst,0x80000000\n\t" 11016 "JNE,s fast\n\t" 11017 "SUB ESP, 4\n\t" 11018 "MOVSS [ESP], $src\n\t" 11019 "FLD [ESP]\n\t" 11020 "ADD ESP, 4\n\t" 11021 "CALL d2i_wrapper\n" 11022 "fast:" %} 11023 ins_encode %{ 11024 Label fast; 11025 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 11026 __ cmpl($dst$$Register, 0x80000000); 11027 __ jccb(Assembler::notEqual, fast); 11028 __ subptr(rsp, 4); 11029 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11030 __ fld_s(Address(rsp, 0)); 11031 __ addptr(rsp, 4); 11032 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 11033 __ bind(fast); 11034 %} 11035 ins_pipe( pipe_slow ); 11036 %} 11037 11038 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 11039 predicate(UseSSE==0); 11040 match(Set dst (ConvF2L src)); 11041 effect( KILL cr ); 11042 format %{ "FLD $src\t# Convert float to long\n\t" 11043 "FLDCW trunc mode\n\t" 11044 "SUB ESP,8\n\t" 11045 "FISTp [ESP + #0]\n\t" 11046 "FLDCW std/24-bit mode\n\t" 11047 "POP EAX\n\t" 11048 "POP EDX\n\t" 11049 "CMP EDX,0x80000000\n\t" 11050 "JNE,s fast\n\t" 11051 "TEST EAX,EAX\n\t" 11052 "JNE,s fast\n\t" 11053 "FLD $src\n\t" 11054 "CALL d2l_wrapper\n" 11055 "fast:" %} 11056 // DPR2L_encoding works for FPR2L 11057 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11058 ins_pipe( pipe_slow ); 11059 %} 11060 11061 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11062 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11063 predicate (UseSSE>=1); 11064 match(Set dst (ConvF2L src)); 11065 effect( KILL cr ); 11066 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11067 "MOVSS [ESP],$src\n\t" 11068 "FLD_S [ESP]\n\t" 11069 "FLDCW trunc mode\n\t" 11070 "FISTp [ESP + #0]\n\t" 11071 "FLDCW std/24-bit mode\n\t" 11072 "POP EAX\n\t" 11073 "POP EDX\n\t" 11074 "CMP EDX,0x80000000\n\t" 11075 "JNE,s fast\n\t" 11076 "TEST EAX,EAX\n\t" 11077 "JNE,s fast\n\t" 11078 "SUB ESP,4\t# Convert float to long\n\t" 11079 "MOVSS [ESP],$src\n\t" 11080 "FLD_S [ESP]\n\t" 11081 "ADD ESP,4\n\t" 11082 "CALL d2l_wrapper\n" 11083 "fast:" %} 11084 ins_encode %{ 11085 Label fast; 11086 __ subptr(rsp, 8); 11087 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11088 __ fld_s(Address(rsp, 0)); 11089 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11090 __ fistp_d(Address(rsp, 0)); 11091 // Restore the rounding mode, mask the exception 11092 if (Compile::current()->in_24_bit_fp_mode()) { 11093 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11094 } else { 11095 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11096 } 11097 // Load the converted long, adjust CPU stack 11098 __ pop(rax); 11099 __ pop(rdx); 11100 __ cmpl(rdx, 0x80000000); 11101 __ jccb(Assembler::notEqual, fast); 11102 __ testl(rax, rax); 11103 __ jccb(Assembler::notEqual, fast); 11104 __ subptr(rsp, 4); 11105 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11106 __ fld_s(Address(rsp, 0)); 11107 __ addptr(rsp, 4); 11108 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11109 __ bind(fast); 11110 %} 11111 ins_pipe( pipe_slow ); 11112 %} 11113 11114 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11115 predicate( UseSSE<=1 ); 11116 match(Set dst (ConvI2D src)); 11117 format %{ "FILD $src\n\t" 11118 "FSTP $dst" %} 11119 opcode(0xDB, 0x0); /* DB /0 */ 11120 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11121 ins_pipe( fpu_reg_mem ); 11122 %} 11123 11124 instruct convI2D_reg(regD dst, rRegI src) %{ 11125 predicate( UseSSE>=2 && !UseXmmI2D ); 11126 match(Set dst (ConvI2D src)); 11127 format %{ "CVTSI2SD $dst,$src" %} 11128 ins_encode %{ 11129 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11130 %} 11131 ins_pipe( pipe_slow ); 11132 %} 11133 11134 instruct convI2D_mem(regD dst, memory mem) %{ 11135 predicate( UseSSE>=2 ); 11136 match(Set dst (ConvI2D (LoadI mem))); 11137 format %{ "CVTSI2SD $dst,$mem" %} 11138 ins_encode %{ 11139 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11140 %} 11141 ins_pipe( pipe_slow ); 11142 %} 11143 11144 instruct convXI2D_reg(regD dst, rRegI src) 11145 %{ 11146 predicate( UseSSE>=2 && UseXmmI2D ); 11147 match(Set dst (ConvI2D src)); 11148 11149 format %{ "MOVD $dst,$src\n\t" 11150 "CVTDQ2PD $dst,$dst\t# i2d" %} 11151 ins_encode %{ 11152 __ movdl($dst$$XMMRegister, $src$$Register); 11153 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11154 %} 11155 ins_pipe(pipe_slow); // XXX 11156 %} 11157 11158 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11159 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11160 match(Set dst (ConvI2D (LoadI mem))); 11161 format %{ "FILD $mem\n\t" 11162 "FSTP $dst" %} 11163 opcode(0xDB); /* DB /0 */ 11164 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11165 Pop_Reg_DPR(dst)); 11166 ins_pipe( fpu_reg_mem ); 11167 %} 11168 11169 // Convert a byte to a float; no rounding step needed. 11170 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11171 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11172 match(Set dst (ConvI2F src)); 11173 format %{ "FILD $src\n\t" 11174 "FSTP $dst" %} 11175 11176 opcode(0xDB, 0x0); /* DB /0 */ 11177 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11178 ins_pipe( fpu_reg_mem ); 11179 %} 11180 11181 // In 24-bit mode, force exponent rounding by storing back out 11182 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11183 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11184 match(Set dst (ConvI2F src)); 11185 ins_cost(200); 11186 format %{ "FILD $src\n\t" 11187 "FSTP_S $dst" %} 11188 opcode(0xDB, 0x0); /* DB /0 */ 11189 ins_encode( Push_Mem_I(src), 11190 Pop_Mem_FPR(dst)); 11191 ins_pipe( fpu_mem_mem ); 11192 %} 11193 11194 // In 24-bit mode, force exponent rounding by storing back out 11195 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11196 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11197 match(Set dst (ConvI2F (LoadI mem))); 11198 ins_cost(200); 11199 format %{ "FILD $mem\n\t" 11200 "FSTP_S $dst" %} 11201 opcode(0xDB); /* DB /0 */ 11202 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11203 Pop_Mem_FPR(dst)); 11204 ins_pipe( fpu_mem_mem ); 11205 %} 11206 11207 // This instruction does not round to 24-bits 11208 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11209 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11210 match(Set dst (ConvI2F src)); 11211 format %{ "FILD $src\n\t" 11212 "FSTP $dst" %} 11213 opcode(0xDB, 0x0); /* DB /0 */ 11214 ins_encode( Push_Mem_I(src), 11215 Pop_Reg_FPR(dst)); 11216 ins_pipe( fpu_reg_mem ); 11217 %} 11218 11219 // This instruction does not round to 24-bits 11220 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11221 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11222 match(Set dst (ConvI2F (LoadI mem))); 11223 format %{ "FILD $mem\n\t" 11224 "FSTP $dst" %} 11225 opcode(0xDB); /* DB /0 */ 11226 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11227 Pop_Reg_FPR(dst)); 11228 ins_pipe( fpu_reg_mem ); 11229 %} 11230 11231 // Convert an int to a float in xmm; no rounding step needed. 11232 instruct convI2F_reg(regF dst, rRegI src) %{ 11233 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11234 match(Set dst (ConvI2F src)); 11235 format %{ "CVTSI2SS $dst, $src" %} 11236 ins_encode %{ 11237 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11238 %} 11239 ins_pipe( pipe_slow ); 11240 %} 11241 11242 instruct convXI2F_reg(regF dst, rRegI src) 11243 %{ 11244 predicate( UseSSE>=2 && UseXmmI2F ); 11245 match(Set dst (ConvI2F src)); 11246 11247 format %{ "MOVD $dst,$src\n\t" 11248 "CVTDQ2PS $dst,$dst\t# i2f" %} 11249 ins_encode %{ 11250 __ movdl($dst$$XMMRegister, $src$$Register); 11251 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11252 %} 11253 ins_pipe(pipe_slow); // XXX 11254 %} 11255 11256 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11257 match(Set dst (ConvI2L src)); 11258 effect(KILL cr); 11259 ins_cost(375); 11260 format %{ "MOV $dst.lo,$src\n\t" 11261 "MOV $dst.hi,$src\n\t" 11262 "SAR $dst.hi,31" %} 11263 ins_encode(convert_int_long(dst,src)); 11264 ins_pipe( ialu_reg_reg_long ); 11265 %} 11266 11267 // Zero-extend convert int to long 11268 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11269 match(Set dst (AndL (ConvI2L src) mask) ); 11270 effect( KILL flags ); 11271 ins_cost(250); 11272 format %{ "MOV $dst.lo,$src\n\t" 11273 "XOR $dst.hi,$dst.hi" %} 11274 opcode(0x33); // XOR 11275 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11276 ins_pipe( ialu_reg_reg_long ); 11277 %} 11278 11279 // Zero-extend long 11280 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11281 match(Set dst (AndL src mask) ); 11282 effect( KILL flags ); 11283 ins_cost(250); 11284 format %{ "MOV $dst.lo,$src.lo\n\t" 11285 "XOR $dst.hi,$dst.hi\n\t" %} 11286 opcode(0x33); // XOR 11287 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11288 ins_pipe( ialu_reg_reg_long ); 11289 %} 11290 11291 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11292 predicate (UseSSE<=1); 11293 match(Set dst (ConvL2D src)); 11294 effect( KILL cr ); 11295 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11296 "PUSH $src.lo\n\t" 11297 "FILD ST,[ESP + #0]\n\t" 11298 "ADD ESP,8\n\t" 11299 "FSTP_D $dst\t# D-round" %} 11300 opcode(0xDF, 0x5); /* DF /5 */ 11301 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11302 ins_pipe( pipe_slow ); 11303 %} 11304 11305 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11306 predicate (UseSSE>=2); 11307 match(Set dst (ConvL2D src)); 11308 effect( KILL cr ); 11309 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11310 "PUSH $src.lo\n\t" 11311 "FILD_D [ESP]\n\t" 11312 "FSTP_D [ESP]\n\t" 11313 "MOVSD $dst,[ESP]\n\t" 11314 "ADD ESP,8" %} 11315 opcode(0xDF, 0x5); /* DF /5 */ 11316 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11317 ins_pipe( pipe_slow ); 11318 %} 11319 11320 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11321 predicate (UseSSE>=1); 11322 match(Set dst (ConvL2F src)); 11323 effect( KILL cr ); 11324 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11325 "PUSH $src.lo\n\t" 11326 "FILD_D [ESP]\n\t" 11327 "FSTP_S [ESP]\n\t" 11328 "MOVSS $dst,[ESP]\n\t" 11329 "ADD ESP,8" %} 11330 opcode(0xDF, 0x5); /* DF /5 */ 11331 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11332 ins_pipe( pipe_slow ); 11333 %} 11334 11335 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11336 match(Set dst (ConvL2F src)); 11337 effect( KILL cr ); 11338 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11339 "PUSH $src.lo\n\t" 11340 "FILD ST,[ESP + #0]\n\t" 11341 "ADD ESP,8\n\t" 11342 "FSTP_S $dst\t# F-round" %} 11343 opcode(0xDF, 0x5); /* DF /5 */ 11344 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11345 ins_pipe( pipe_slow ); 11346 %} 11347 11348 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11349 match(Set dst (ConvL2I src)); 11350 effect( DEF dst, USE src ); 11351 format %{ "MOV $dst,$src.lo" %} 11352 ins_encode(enc_CopyL_Lo(dst,src)); 11353 ins_pipe( ialu_reg_reg ); 11354 %} 11355 11356 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11357 match(Set dst (MoveF2I src)); 11358 effect( DEF dst, USE src ); 11359 ins_cost(100); 11360 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11361 ins_encode %{ 11362 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11363 %} 11364 ins_pipe( ialu_reg_mem ); 11365 %} 11366 11367 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11368 predicate(UseSSE==0); 11369 match(Set dst (MoveF2I src)); 11370 effect( DEF dst, USE src ); 11371 11372 ins_cost(125); 11373 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11374 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11375 ins_pipe( fpu_mem_reg ); 11376 %} 11377 11378 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11379 predicate(UseSSE>=1); 11380 match(Set dst (MoveF2I src)); 11381 effect( DEF dst, USE src ); 11382 11383 ins_cost(95); 11384 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11385 ins_encode %{ 11386 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11387 %} 11388 ins_pipe( pipe_slow ); 11389 %} 11390 11391 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11392 predicate(UseSSE>=2); 11393 match(Set dst (MoveF2I src)); 11394 effect( DEF dst, USE src ); 11395 ins_cost(85); 11396 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11397 ins_encode %{ 11398 __ movdl($dst$$Register, $src$$XMMRegister); 11399 %} 11400 ins_pipe( pipe_slow ); 11401 %} 11402 11403 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11404 match(Set dst (MoveI2F src)); 11405 effect( DEF dst, USE src ); 11406 11407 ins_cost(100); 11408 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11409 ins_encode %{ 11410 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11411 %} 11412 ins_pipe( ialu_mem_reg ); 11413 %} 11414 11415 11416 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11417 predicate(UseSSE==0); 11418 match(Set dst (MoveI2F src)); 11419 effect(DEF dst, USE src); 11420 11421 ins_cost(125); 11422 format %{ "FLD_S $src\n\t" 11423 "FSTP $dst\t# MoveI2F_stack_reg" %} 11424 opcode(0xD9); /* D9 /0, FLD m32real */ 11425 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11426 Pop_Reg_FPR(dst) ); 11427 ins_pipe( fpu_reg_mem ); 11428 %} 11429 11430 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11431 predicate(UseSSE>=1); 11432 match(Set dst (MoveI2F src)); 11433 effect( DEF dst, USE src ); 11434 11435 ins_cost(95); 11436 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11437 ins_encode %{ 11438 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11439 %} 11440 ins_pipe( pipe_slow ); 11441 %} 11442 11443 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11444 predicate(UseSSE>=2); 11445 match(Set dst (MoveI2F src)); 11446 effect( DEF dst, USE src ); 11447 11448 ins_cost(85); 11449 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11450 ins_encode %{ 11451 __ movdl($dst$$XMMRegister, $src$$Register); 11452 %} 11453 ins_pipe( pipe_slow ); 11454 %} 11455 11456 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11457 match(Set dst (MoveD2L src)); 11458 effect(DEF dst, USE src); 11459 11460 ins_cost(250); 11461 format %{ "MOV $dst.lo,$src\n\t" 11462 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11463 opcode(0x8B, 0x8B); 11464 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11465 ins_pipe( ialu_mem_long_reg ); 11466 %} 11467 11468 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11469 predicate(UseSSE<=1); 11470 match(Set dst (MoveD2L src)); 11471 effect(DEF dst, USE src); 11472 11473 ins_cost(125); 11474 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11475 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11476 ins_pipe( fpu_mem_reg ); 11477 %} 11478 11479 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11480 predicate(UseSSE>=2); 11481 match(Set dst (MoveD2L src)); 11482 effect(DEF dst, USE src); 11483 ins_cost(95); 11484 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11485 ins_encode %{ 11486 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11487 %} 11488 ins_pipe( pipe_slow ); 11489 %} 11490 11491 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11492 predicate(UseSSE>=2); 11493 match(Set dst (MoveD2L src)); 11494 effect(DEF dst, USE src, TEMP tmp); 11495 ins_cost(85); 11496 format %{ "MOVD $dst.lo,$src\n\t" 11497 "PSHUFLW $tmp,$src,0x4E\n\t" 11498 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11499 ins_encode %{ 11500 __ movdl($dst$$Register, $src$$XMMRegister); 11501 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11502 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11503 %} 11504 ins_pipe( pipe_slow ); 11505 %} 11506 11507 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11508 match(Set dst (MoveL2D src)); 11509 effect(DEF dst, USE src); 11510 11511 ins_cost(200); 11512 format %{ "MOV $dst,$src.lo\n\t" 11513 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11514 opcode(0x89, 0x89); 11515 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11516 ins_pipe( ialu_mem_long_reg ); 11517 %} 11518 11519 11520 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11521 predicate(UseSSE<=1); 11522 match(Set dst (MoveL2D src)); 11523 effect(DEF dst, USE src); 11524 ins_cost(125); 11525 11526 format %{ "FLD_D $src\n\t" 11527 "FSTP $dst\t# MoveL2D_stack_reg" %} 11528 opcode(0xDD); /* DD /0, FLD m64real */ 11529 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11530 Pop_Reg_DPR(dst) ); 11531 ins_pipe( fpu_reg_mem ); 11532 %} 11533 11534 11535 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11536 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11537 match(Set dst (MoveL2D src)); 11538 effect(DEF dst, USE src); 11539 11540 ins_cost(95); 11541 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11542 ins_encode %{ 11543 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11544 %} 11545 ins_pipe( pipe_slow ); 11546 %} 11547 11548 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11549 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11550 match(Set dst (MoveL2D src)); 11551 effect(DEF dst, USE src); 11552 11553 ins_cost(95); 11554 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11555 ins_encode %{ 11556 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11557 %} 11558 ins_pipe( pipe_slow ); 11559 %} 11560 11561 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11562 predicate(UseSSE>=2); 11563 match(Set dst (MoveL2D src)); 11564 effect(TEMP dst, USE src, TEMP tmp); 11565 ins_cost(85); 11566 format %{ "MOVD $dst,$src.lo\n\t" 11567 "MOVD $tmp,$src.hi\n\t" 11568 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11569 ins_encode %{ 11570 __ movdl($dst$$XMMRegister, $src$$Register); 11571 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11572 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11573 %} 11574 ins_pipe( pipe_slow ); 11575 %} 11576 11577 11578 // ======================================================================= 11579 // fast clearing of an array 11580 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11581 predicate(!((ClearArrayNode*)n)->is_large()); 11582 match(Set dummy (ClearArray cnt base)); 11583 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11584 11585 format %{ $$template 11586 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11587 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11588 $$emit$$"JG LARGE\n\t" 11589 $$emit$$"SHL ECX, 1\n\t" 11590 $$emit$$"DEC ECX\n\t" 11591 $$emit$$"JS DONE\t# Zero length\n\t" 11592 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11593 $$emit$$"DEC ECX\n\t" 11594 $$emit$$"JGE LOOP\n\t" 11595 $$emit$$"JMP DONE\n\t" 11596 $$emit$$"# LARGE:\n\t" 11597 if (UseFastStosb) { 11598 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11599 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11600 } else if (UseXMMForObjInit) { 11601 $$emit$$"MOV RDI,RAX\n\t" 11602 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11603 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11604 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11605 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11606 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11607 $$emit$$"ADD 0x40,RAX\n\t" 11608 $$emit$$"# L_zero_64_bytes:\n\t" 11609 $$emit$$"SUB 0x8,RCX\n\t" 11610 $$emit$$"JGE L_loop\n\t" 11611 $$emit$$"ADD 0x4,RCX\n\t" 11612 $$emit$$"JL L_tail\n\t" 11613 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11614 $$emit$$"ADD 0x20,RAX\n\t" 11615 $$emit$$"SUB 0x4,RCX\n\t" 11616 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11617 $$emit$$"ADD 0x4,RCX\n\t" 11618 $$emit$$"JLE L_end\n\t" 11619 $$emit$$"DEC RCX\n\t" 11620 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11621 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11622 $$emit$$"ADD 0x8,RAX\n\t" 11623 $$emit$$"DEC RCX\n\t" 11624 $$emit$$"JGE L_sloop\n\t" 11625 $$emit$$"# L_end:\n\t" 11626 } else { 11627 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11628 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11629 } 11630 $$emit$$"# DONE" 11631 %} 11632 ins_encode %{ 11633 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11634 $tmp$$XMMRegister, false); 11635 %} 11636 ins_pipe( pipe_slow ); 11637 %} 11638 11639 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11640 predicate(((ClearArrayNode*)n)->is_large()); 11641 match(Set dummy (ClearArray cnt base)); 11642 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11643 format %{ $$template 11644 if (UseFastStosb) { 11645 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11646 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11647 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11648 } else if (UseXMMForObjInit) { 11649 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11650 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11651 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11652 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11653 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11654 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11655 $$emit$$"ADD 0x40,RAX\n\t" 11656 $$emit$$"# L_zero_64_bytes:\n\t" 11657 $$emit$$"SUB 0x8,RCX\n\t" 11658 $$emit$$"JGE L_loop\n\t" 11659 $$emit$$"ADD 0x4,RCX\n\t" 11660 $$emit$$"JL L_tail\n\t" 11661 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11662 $$emit$$"ADD 0x20,RAX\n\t" 11663 $$emit$$"SUB 0x4,RCX\n\t" 11664 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11665 $$emit$$"ADD 0x4,RCX\n\t" 11666 $$emit$$"JLE L_end\n\t" 11667 $$emit$$"DEC RCX\n\t" 11668 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11669 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11670 $$emit$$"ADD 0x8,RAX\n\t" 11671 $$emit$$"DEC RCX\n\t" 11672 $$emit$$"JGE L_sloop\n\t" 11673 $$emit$$"# L_end:\n\t" 11674 } else { 11675 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11676 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11677 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11678 } 11679 $$emit$$"# DONE" 11680 %} 11681 ins_encode %{ 11682 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11683 $tmp$$XMMRegister, true); 11684 %} 11685 ins_pipe( pipe_slow ); 11686 %} 11687 11688 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11689 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11690 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11691 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11692 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11693 11694 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11695 ins_encode %{ 11696 __ string_compare($str1$$Register, $str2$$Register, 11697 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11698 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11699 %} 11700 ins_pipe( pipe_slow ); 11701 %} 11702 11703 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11704 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11705 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11707 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11708 11709 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11710 ins_encode %{ 11711 __ string_compare($str1$$Register, $str2$$Register, 11712 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11713 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11714 %} 11715 ins_pipe( pipe_slow ); 11716 %} 11717 11718 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11719 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11720 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11721 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11722 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11723 11724 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11725 ins_encode %{ 11726 __ string_compare($str1$$Register, $str2$$Register, 11727 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11728 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11729 %} 11730 ins_pipe( pipe_slow ); 11731 %} 11732 11733 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11734 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11735 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11736 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11737 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11738 11739 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11740 ins_encode %{ 11741 __ string_compare($str2$$Register, $str1$$Register, 11742 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11743 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11744 %} 11745 ins_pipe( pipe_slow ); 11746 %} 11747 11748 // fast string equals 11749 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11750 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11751 match(Set result (StrEquals (Binary str1 str2) cnt)); 11752 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11753 11754 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11755 ins_encode %{ 11756 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11757 $cnt$$Register, $result$$Register, $tmp3$$Register, 11758 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11759 %} 11760 11761 ins_pipe( pipe_slow ); 11762 %} 11763 11764 // fast search of substring with known size. 11765 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11766 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11767 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11768 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11769 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11770 11771 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11772 ins_encode %{ 11773 int icnt2 = (int)$int_cnt2$$constant; 11774 if (icnt2 >= 16) { 11775 // IndexOf for constant substrings with size >= 16 elements 11776 // which don't need to be loaded through stack. 11777 __ string_indexofC8($str1$$Register, $str2$$Register, 11778 $cnt1$$Register, $cnt2$$Register, 11779 icnt2, $result$$Register, 11780 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11781 } else { 11782 // Small strings are loaded through stack if they cross page boundary. 11783 __ string_indexof($str1$$Register, $str2$$Register, 11784 $cnt1$$Register, $cnt2$$Register, 11785 icnt2, $result$$Register, 11786 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11787 } 11788 %} 11789 ins_pipe( pipe_slow ); 11790 %} 11791 11792 // fast search of substring with known size. 11793 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11794 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11795 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11796 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11797 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11798 11799 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11800 ins_encode %{ 11801 int icnt2 = (int)$int_cnt2$$constant; 11802 if (icnt2 >= 8) { 11803 // IndexOf for constant substrings with size >= 8 elements 11804 // which don't need to be loaded through stack. 11805 __ string_indexofC8($str1$$Register, $str2$$Register, 11806 $cnt1$$Register, $cnt2$$Register, 11807 icnt2, $result$$Register, 11808 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11809 } else { 11810 // Small strings are loaded through stack if they cross page boundary. 11811 __ string_indexof($str1$$Register, $str2$$Register, 11812 $cnt1$$Register, $cnt2$$Register, 11813 icnt2, $result$$Register, 11814 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11815 } 11816 %} 11817 ins_pipe( pipe_slow ); 11818 %} 11819 11820 // fast search of substring with known size. 11821 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11822 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11823 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11824 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11825 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11826 11827 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11828 ins_encode %{ 11829 int icnt2 = (int)$int_cnt2$$constant; 11830 if (icnt2 >= 8) { 11831 // IndexOf for constant substrings with size >= 8 elements 11832 // which don't need to be loaded through stack. 11833 __ string_indexofC8($str1$$Register, $str2$$Register, 11834 $cnt1$$Register, $cnt2$$Register, 11835 icnt2, $result$$Register, 11836 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11837 } else { 11838 // Small strings are loaded through stack if they cross page boundary. 11839 __ string_indexof($str1$$Register, $str2$$Register, 11840 $cnt1$$Register, $cnt2$$Register, 11841 icnt2, $result$$Register, 11842 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11843 } 11844 %} 11845 ins_pipe( pipe_slow ); 11846 %} 11847 11848 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11849 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11850 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11851 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11852 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11853 11854 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11855 ins_encode %{ 11856 __ string_indexof($str1$$Register, $str2$$Register, 11857 $cnt1$$Register, $cnt2$$Register, 11858 (-1), $result$$Register, 11859 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11860 %} 11861 ins_pipe( pipe_slow ); 11862 %} 11863 11864 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11865 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11866 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11867 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11868 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11869 11870 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11871 ins_encode %{ 11872 __ string_indexof($str1$$Register, $str2$$Register, 11873 $cnt1$$Register, $cnt2$$Register, 11874 (-1), $result$$Register, 11875 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11876 %} 11877 ins_pipe( pipe_slow ); 11878 %} 11879 11880 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11881 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11882 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11883 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11884 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11885 11886 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11887 ins_encode %{ 11888 __ string_indexof($str1$$Register, $str2$$Register, 11889 $cnt1$$Register, $cnt2$$Register, 11890 (-1), $result$$Register, 11891 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11892 %} 11893 ins_pipe( pipe_slow ); 11894 %} 11895 11896 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11897 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11898 predicate(UseSSE42Intrinsics); 11899 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11900 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11901 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11902 ins_encode %{ 11903 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11904 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11905 %} 11906 ins_pipe( pipe_slow ); 11907 %} 11908 11909 // fast array equals 11910 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11911 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11912 %{ 11913 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11914 match(Set result (AryEq ary1 ary2)); 11915 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11916 //ins_cost(300); 11917 11918 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11919 ins_encode %{ 11920 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11921 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11922 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11923 %} 11924 ins_pipe( pipe_slow ); 11925 %} 11926 11927 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11928 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11929 %{ 11930 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11931 match(Set result (AryEq ary1 ary2)); 11932 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11933 //ins_cost(300); 11934 11935 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11936 ins_encode %{ 11937 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11938 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11939 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11940 %} 11941 ins_pipe( pipe_slow ); 11942 %} 11943 11944 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11945 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11946 %{ 11947 match(Set result (HasNegatives ary1 len)); 11948 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11949 11950 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11951 ins_encode %{ 11952 __ has_negatives($ary1$$Register, $len$$Register, 11953 $result$$Register, $tmp3$$Register, 11954 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11955 %} 11956 ins_pipe( pipe_slow ); 11957 %} 11958 11959 // fast char[] to byte[] compression 11960 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11961 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11962 match(Set result (StrCompressedCopy src (Binary dst len))); 11963 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11964 11965 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11966 ins_encode %{ 11967 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11968 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11969 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11970 %} 11971 ins_pipe( pipe_slow ); 11972 %} 11973 11974 // fast byte[] to char[] inflation 11975 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11976 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11977 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11978 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11979 11980 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11981 ins_encode %{ 11982 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11983 $tmp1$$XMMRegister, $tmp2$$Register); 11984 %} 11985 ins_pipe( pipe_slow ); 11986 %} 11987 11988 // encode char[] to byte[] in ISO_8859_1 11989 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11990 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11991 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11992 match(Set result (EncodeISOArray src (Binary dst len))); 11993 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11994 11995 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11996 ins_encode %{ 11997 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11998 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11999 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 12000 %} 12001 ins_pipe( pipe_slow ); 12002 %} 12003 12004 12005 //----------Control Flow Instructions------------------------------------------ 12006 // Signed compare Instructions 12007 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12008 match(Set cr (CmpI op1 op2)); 12009 effect( DEF cr, USE op1, USE op2 ); 12010 format %{ "CMP $op1,$op2" %} 12011 opcode(0x3B); /* Opcode 3B /r */ 12012 ins_encode( OpcP, RegReg( op1, op2) ); 12013 ins_pipe( ialu_cr_reg_reg ); 12014 %} 12015 12016 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12017 match(Set cr (CmpI op1 op2)); 12018 effect( DEF cr, USE op1 ); 12019 format %{ "CMP $op1,$op2" %} 12020 opcode(0x81,0x07); /* Opcode 81 /7 */ 12021 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12022 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12023 ins_pipe( ialu_cr_reg_imm ); 12024 %} 12025 12026 // Cisc-spilled version of cmpI_eReg 12027 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12028 match(Set cr (CmpI op1 (LoadI op2))); 12029 12030 format %{ "CMP $op1,$op2" %} 12031 ins_cost(500); 12032 opcode(0x3B); /* Opcode 3B /r */ 12033 ins_encode( OpcP, RegMem( op1, op2) ); 12034 ins_pipe( ialu_cr_reg_mem ); 12035 %} 12036 12037 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 12038 match(Set cr (CmpI src zero)); 12039 effect( DEF cr, USE src ); 12040 12041 format %{ "TEST $src,$src" %} 12042 opcode(0x85); 12043 ins_encode( OpcP, RegReg( src, src ) ); 12044 ins_pipe( ialu_cr_reg_imm ); 12045 %} 12046 12047 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 12048 match(Set cr (CmpI (AndI src con) zero)); 12049 12050 format %{ "TEST $src,$con" %} 12051 opcode(0xF7,0x00); 12052 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12053 ins_pipe( ialu_cr_reg_imm ); 12054 %} 12055 12056 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 12057 match(Set cr (CmpI (AndI src mem) zero)); 12058 12059 format %{ "TEST $src,$mem" %} 12060 opcode(0x85); 12061 ins_encode( OpcP, RegMem( src, mem ) ); 12062 ins_pipe( ialu_cr_reg_mem ); 12063 %} 12064 12065 // Unsigned compare Instructions; really, same as signed except they 12066 // produce an eFlagsRegU instead of eFlagsReg. 12067 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12068 match(Set cr (CmpU op1 op2)); 12069 12070 format %{ "CMPu $op1,$op2" %} 12071 opcode(0x3B); /* Opcode 3B /r */ 12072 ins_encode( OpcP, RegReg( op1, op2) ); 12073 ins_pipe( ialu_cr_reg_reg ); 12074 %} 12075 12076 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12077 match(Set cr (CmpU op1 op2)); 12078 12079 format %{ "CMPu $op1,$op2" %} 12080 opcode(0x81,0x07); /* Opcode 81 /7 */ 12081 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12082 ins_pipe( ialu_cr_reg_imm ); 12083 %} 12084 12085 // // Cisc-spilled version of cmpU_eReg 12086 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12087 match(Set cr (CmpU op1 (LoadI op2))); 12088 12089 format %{ "CMPu $op1,$op2" %} 12090 ins_cost(500); 12091 opcode(0x3B); /* Opcode 3B /r */ 12092 ins_encode( OpcP, RegMem( op1, op2) ); 12093 ins_pipe( ialu_cr_reg_mem ); 12094 %} 12095 12096 // // Cisc-spilled version of cmpU_eReg 12097 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12098 // match(Set cr (CmpU (LoadI op1) op2)); 12099 // 12100 // format %{ "CMPu $op1,$op2" %} 12101 // ins_cost(500); 12102 // opcode(0x39); /* Opcode 39 /r */ 12103 // ins_encode( OpcP, RegMem( op1, op2) ); 12104 //%} 12105 12106 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12107 match(Set cr (CmpU src zero)); 12108 12109 format %{ "TESTu $src,$src" %} 12110 opcode(0x85); 12111 ins_encode( OpcP, RegReg( src, src ) ); 12112 ins_pipe( ialu_cr_reg_imm ); 12113 %} 12114 12115 // Unsigned pointer compare Instructions 12116 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12117 match(Set cr (CmpP op1 op2)); 12118 12119 format %{ "CMPu $op1,$op2" %} 12120 opcode(0x3B); /* Opcode 3B /r */ 12121 ins_encode( OpcP, RegReg( op1, op2) ); 12122 ins_pipe( ialu_cr_reg_reg ); 12123 %} 12124 12125 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12126 match(Set cr (CmpP op1 op2)); 12127 12128 format %{ "CMPu $op1,$op2" %} 12129 opcode(0x81,0x07); /* Opcode 81 /7 */ 12130 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12131 ins_pipe( ialu_cr_reg_imm ); 12132 %} 12133 12134 // // Cisc-spilled version of cmpP_eReg 12135 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12136 match(Set cr (CmpP op1 (LoadP op2))); 12137 12138 format %{ "CMPu $op1,$op2" %} 12139 ins_cost(500); 12140 opcode(0x3B); /* Opcode 3B /r */ 12141 ins_encode( OpcP, RegMem( op1, op2) ); 12142 ins_pipe( ialu_cr_reg_mem ); 12143 %} 12144 12145 // // Cisc-spilled version of cmpP_eReg 12146 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12147 // match(Set cr (CmpP (LoadP op1) op2)); 12148 // 12149 // format %{ "CMPu $op1,$op2" %} 12150 // ins_cost(500); 12151 // opcode(0x39); /* Opcode 39 /r */ 12152 // ins_encode( OpcP, RegMem( op1, op2) ); 12153 //%} 12154 12155 // Compare raw pointer (used in out-of-heap check). 12156 // Only works because non-oop pointers must be raw pointers 12157 // and raw pointers have no anti-dependencies. 12158 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12159 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12160 match(Set cr (CmpP op1 (LoadP op2))); 12161 12162 format %{ "CMPu $op1,$op2" %} 12163 opcode(0x3B); /* Opcode 3B /r */ 12164 ins_encode( OpcP, RegMem( op1, op2) ); 12165 ins_pipe( ialu_cr_reg_mem ); 12166 %} 12167 12168 // 12169 // This will generate a signed flags result. This should be ok 12170 // since any compare to a zero should be eq/neq. 12171 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12172 match(Set cr (CmpP src zero)); 12173 12174 format %{ "TEST $src,$src" %} 12175 opcode(0x85); 12176 ins_encode( OpcP, RegReg( src, src ) ); 12177 ins_pipe( ialu_cr_reg_imm ); 12178 %} 12179 12180 // Cisc-spilled version of testP_reg 12181 // This will generate a signed flags result. This should be ok 12182 // since any compare to a zero should be eq/neq. 12183 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12184 match(Set cr (CmpP (LoadP op) zero)); 12185 12186 format %{ "TEST $op,0xFFFFFFFF" %} 12187 ins_cost(500); 12188 opcode(0xF7); /* Opcode F7 /0 */ 12189 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12190 ins_pipe( ialu_cr_reg_imm ); 12191 %} 12192 12193 // Yanked all unsigned pointer compare operations. 12194 // Pointer compares are done with CmpP which is already unsigned. 12195 12196 //----------Max and Min-------------------------------------------------------- 12197 // Min Instructions 12198 //// 12199 // *** Min and Max using the conditional move are slower than the 12200 // *** branch version on a Pentium III. 12201 // // Conditional move for min 12202 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12203 // effect( USE_DEF op2, USE op1, USE cr ); 12204 // format %{ "CMOVlt $op2,$op1\t! min" %} 12205 // opcode(0x4C,0x0F); 12206 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12207 // ins_pipe( pipe_cmov_reg ); 12208 //%} 12209 // 12210 //// Min Register with Register (P6 version) 12211 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12212 // predicate(VM_Version::supports_cmov() ); 12213 // match(Set op2 (MinI op1 op2)); 12214 // ins_cost(200); 12215 // expand %{ 12216 // eFlagsReg cr; 12217 // compI_eReg(cr,op1,op2); 12218 // cmovI_reg_lt(op2,op1,cr); 12219 // %} 12220 //%} 12221 12222 // Min Register with Register (generic version) 12223 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12224 match(Set dst (MinI dst src)); 12225 effect(KILL flags); 12226 ins_cost(300); 12227 12228 format %{ "MIN $dst,$src" %} 12229 opcode(0xCC); 12230 ins_encode( min_enc(dst,src) ); 12231 ins_pipe( pipe_slow ); 12232 %} 12233 12234 // Max Register with Register 12235 // *** Min and Max using the conditional move are slower than the 12236 // *** branch version on a Pentium III. 12237 // // Conditional move for max 12238 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12239 // effect( USE_DEF op2, USE op1, USE cr ); 12240 // format %{ "CMOVgt $op2,$op1\t! max" %} 12241 // opcode(0x4F,0x0F); 12242 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12243 // ins_pipe( pipe_cmov_reg ); 12244 //%} 12245 // 12246 // // Max Register with Register (P6 version) 12247 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12248 // predicate(VM_Version::supports_cmov() ); 12249 // match(Set op2 (MaxI op1 op2)); 12250 // ins_cost(200); 12251 // expand %{ 12252 // eFlagsReg cr; 12253 // compI_eReg(cr,op1,op2); 12254 // cmovI_reg_gt(op2,op1,cr); 12255 // %} 12256 //%} 12257 12258 // Max Register with Register (generic version) 12259 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12260 match(Set dst (MaxI dst src)); 12261 effect(KILL flags); 12262 ins_cost(300); 12263 12264 format %{ "MAX $dst,$src" %} 12265 opcode(0xCC); 12266 ins_encode( max_enc(dst,src) ); 12267 ins_pipe( pipe_slow ); 12268 %} 12269 12270 // ============================================================================ 12271 // Counted Loop limit node which represents exact final iterator value. 12272 // Note: the resulting value should fit into integer range since 12273 // counted loops have limit check on overflow. 12274 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12275 match(Set limit (LoopLimit (Binary init limit) stride)); 12276 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12277 ins_cost(300); 12278 12279 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12280 ins_encode %{ 12281 int strd = (int)$stride$$constant; 12282 assert(strd != 1 && strd != -1, "sanity"); 12283 int m1 = (strd > 0) ? 1 : -1; 12284 // Convert limit to long (EAX:EDX) 12285 __ cdql(); 12286 // Convert init to long (init:tmp) 12287 __ movl($tmp$$Register, $init$$Register); 12288 __ sarl($tmp$$Register, 31); 12289 // $limit - $init 12290 __ subl($limit$$Register, $init$$Register); 12291 __ sbbl($limit_hi$$Register, $tmp$$Register); 12292 // + ($stride - 1) 12293 if (strd > 0) { 12294 __ addl($limit$$Register, (strd - 1)); 12295 __ adcl($limit_hi$$Register, 0); 12296 __ movl($tmp$$Register, strd); 12297 } else { 12298 __ addl($limit$$Register, (strd + 1)); 12299 __ adcl($limit_hi$$Register, -1); 12300 __ lneg($limit_hi$$Register, $limit$$Register); 12301 __ movl($tmp$$Register, -strd); 12302 } 12303 // signed devision: (EAX:EDX) / pos_stride 12304 __ idivl($tmp$$Register); 12305 if (strd < 0) { 12306 // restore sign 12307 __ negl($tmp$$Register); 12308 } 12309 // (EAX) * stride 12310 __ mull($tmp$$Register); 12311 // + init (ignore upper bits) 12312 __ addl($limit$$Register, $init$$Register); 12313 %} 12314 ins_pipe( pipe_slow ); 12315 %} 12316 12317 // ============================================================================ 12318 // Branch Instructions 12319 // Jump Table 12320 instruct jumpXtnd(rRegI switch_val) %{ 12321 match(Jump switch_val); 12322 ins_cost(350); 12323 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12324 ins_encode %{ 12325 // Jump to Address(table_base + switch_reg) 12326 Address index(noreg, $switch_val$$Register, Address::times_1); 12327 __ jump(ArrayAddress($constantaddress, index)); 12328 %} 12329 ins_pipe(pipe_jmp); 12330 %} 12331 12332 // Jump Direct - Label defines a relative address from JMP+1 12333 instruct jmpDir(label labl) %{ 12334 match(Goto); 12335 effect(USE labl); 12336 12337 ins_cost(300); 12338 format %{ "JMP $labl" %} 12339 size(5); 12340 ins_encode %{ 12341 Label* L = $labl$$label; 12342 __ jmp(*L, false); // Always long jump 12343 %} 12344 ins_pipe( pipe_jmp ); 12345 %} 12346 12347 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12348 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12349 match(If cop cr); 12350 effect(USE labl); 12351 12352 ins_cost(300); 12353 format %{ "J$cop $labl" %} 12354 size(6); 12355 ins_encode %{ 12356 Label* L = $labl$$label; 12357 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12358 %} 12359 ins_pipe( pipe_jcc ); 12360 %} 12361 12362 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12363 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12364 predicate(!n->has_vector_mask_set()); 12365 match(CountedLoopEnd cop cr); 12366 effect(USE labl); 12367 12368 ins_cost(300); 12369 format %{ "J$cop $labl\t# Loop end" %} 12370 size(6); 12371 ins_encode %{ 12372 Label* L = $labl$$label; 12373 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12374 %} 12375 ins_pipe( pipe_jcc ); 12376 %} 12377 12378 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12379 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12380 predicate(!n->has_vector_mask_set()); 12381 match(CountedLoopEnd cop cmp); 12382 effect(USE labl); 12383 12384 ins_cost(300); 12385 format %{ "J$cop,u $labl\t# Loop end" %} 12386 size(6); 12387 ins_encode %{ 12388 Label* L = $labl$$label; 12389 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12390 %} 12391 ins_pipe( pipe_jcc ); 12392 %} 12393 12394 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12395 predicate(!n->has_vector_mask_set()); 12396 match(CountedLoopEnd cop cmp); 12397 effect(USE labl); 12398 12399 ins_cost(200); 12400 format %{ "J$cop,u $labl\t# Loop end" %} 12401 size(6); 12402 ins_encode %{ 12403 Label* L = $labl$$label; 12404 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12405 %} 12406 ins_pipe( pipe_jcc ); 12407 %} 12408 12409 // mask version 12410 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12411 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12412 predicate(n->has_vector_mask_set()); 12413 match(CountedLoopEnd cop cr); 12414 effect(USE labl); 12415 12416 ins_cost(400); 12417 format %{ "J$cop $labl\t# Loop end\n\t" 12418 "restorevectmask \t# vector mask restore for loops" %} 12419 size(10); 12420 ins_encode %{ 12421 Label* L = $labl$$label; 12422 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12423 __ restorevectmask(); 12424 %} 12425 ins_pipe( pipe_jcc ); 12426 %} 12427 12428 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12429 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12430 predicate(n->has_vector_mask_set()); 12431 match(CountedLoopEnd cop cmp); 12432 effect(USE labl); 12433 12434 ins_cost(400); 12435 format %{ "J$cop,u $labl\t# Loop end\n\t" 12436 "restorevectmask \t# vector mask restore for loops" %} 12437 size(10); 12438 ins_encode %{ 12439 Label* L = $labl$$label; 12440 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12441 __ restorevectmask(); 12442 %} 12443 ins_pipe( pipe_jcc ); 12444 %} 12445 12446 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12447 predicate(n->has_vector_mask_set()); 12448 match(CountedLoopEnd cop cmp); 12449 effect(USE labl); 12450 12451 ins_cost(300); 12452 format %{ "J$cop,u $labl\t# Loop end\n\t" 12453 "restorevectmask \t# vector mask restore for loops" %} 12454 size(10); 12455 ins_encode %{ 12456 Label* L = $labl$$label; 12457 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12458 __ restorevectmask(); 12459 %} 12460 ins_pipe( pipe_jcc ); 12461 %} 12462 12463 // Jump Direct Conditional - using unsigned comparison 12464 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12465 match(If cop cmp); 12466 effect(USE labl); 12467 12468 ins_cost(300); 12469 format %{ "J$cop,u $labl" %} 12470 size(6); 12471 ins_encode %{ 12472 Label* L = $labl$$label; 12473 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12474 %} 12475 ins_pipe(pipe_jcc); 12476 %} 12477 12478 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12479 match(If cop cmp); 12480 effect(USE labl); 12481 12482 ins_cost(200); 12483 format %{ "J$cop,u $labl" %} 12484 size(6); 12485 ins_encode %{ 12486 Label* L = $labl$$label; 12487 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12488 %} 12489 ins_pipe(pipe_jcc); 12490 %} 12491 12492 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12493 match(If cop cmp); 12494 effect(USE labl); 12495 12496 ins_cost(200); 12497 format %{ $$template 12498 if ($cop$$cmpcode == Assembler::notEqual) { 12499 $$emit$$"JP,u $labl\n\t" 12500 $$emit$$"J$cop,u $labl" 12501 } else { 12502 $$emit$$"JP,u done\n\t" 12503 $$emit$$"J$cop,u $labl\n\t" 12504 $$emit$$"done:" 12505 } 12506 %} 12507 ins_encode %{ 12508 Label* l = $labl$$label; 12509 if ($cop$$cmpcode == Assembler::notEqual) { 12510 __ jcc(Assembler::parity, *l, false); 12511 __ jcc(Assembler::notEqual, *l, false); 12512 } else if ($cop$$cmpcode == Assembler::equal) { 12513 Label done; 12514 __ jccb(Assembler::parity, done); 12515 __ jcc(Assembler::equal, *l, false); 12516 __ bind(done); 12517 } else { 12518 ShouldNotReachHere(); 12519 } 12520 %} 12521 ins_pipe(pipe_jcc); 12522 %} 12523 12524 // ============================================================================ 12525 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12526 // array for an instance of the superklass. Set a hidden internal cache on a 12527 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12528 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12529 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12530 match(Set result (PartialSubtypeCheck sub super)); 12531 effect( KILL rcx, KILL cr ); 12532 12533 ins_cost(1100); // slightly larger than the next version 12534 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12535 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12536 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12537 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12538 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12539 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12540 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12541 "miss:\t" %} 12542 12543 opcode(0x1); // Force a XOR of EDI 12544 ins_encode( enc_PartialSubtypeCheck() ); 12545 ins_pipe( pipe_slow ); 12546 %} 12547 12548 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12549 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12550 effect( KILL rcx, KILL result ); 12551 12552 ins_cost(1000); 12553 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12554 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12555 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12556 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12557 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12558 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12559 "miss:\t" %} 12560 12561 opcode(0x0); // No need to XOR EDI 12562 ins_encode( enc_PartialSubtypeCheck() ); 12563 ins_pipe( pipe_slow ); 12564 %} 12565 12566 // ============================================================================ 12567 // Branch Instructions -- short offset versions 12568 // 12569 // These instructions are used to replace jumps of a long offset (the default 12570 // match) with jumps of a shorter offset. These instructions are all tagged 12571 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12572 // match rules in general matching. Instead, the ADLC generates a conversion 12573 // method in the MachNode which can be used to do in-place replacement of the 12574 // long variant with the shorter variant. The compiler will determine if a 12575 // branch can be taken by the is_short_branch_offset() predicate in the machine 12576 // specific code section of the file. 12577 12578 // Jump Direct - Label defines a relative address from JMP+1 12579 instruct jmpDir_short(label labl) %{ 12580 match(Goto); 12581 effect(USE labl); 12582 12583 ins_cost(300); 12584 format %{ "JMP,s $labl" %} 12585 size(2); 12586 ins_encode %{ 12587 Label* L = $labl$$label; 12588 __ jmpb(*L); 12589 %} 12590 ins_pipe( pipe_jmp ); 12591 ins_short_branch(1); 12592 %} 12593 12594 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12595 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12596 match(If cop cr); 12597 effect(USE labl); 12598 12599 ins_cost(300); 12600 format %{ "J$cop,s $labl" %} 12601 size(2); 12602 ins_encode %{ 12603 Label* L = $labl$$label; 12604 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12605 %} 12606 ins_pipe( pipe_jcc ); 12607 ins_short_branch(1); 12608 %} 12609 12610 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12611 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12612 match(CountedLoopEnd cop cr); 12613 effect(USE labl); 12614 12615 ins_cost(300); 12616 format %{ "J$cop,s $labl\t# Loop end" %} 12617 size(2); 12618 ins_encode %{ 12619 Label* L = $labl$$label; 12620 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12621 %} 12622 ins_pipe( pipe_jcc ); 12623 ins_short_branch(1); 12624 %} 12625 12626 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12627 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12628 match(CountedLoopEnd cop cmp); 12629 effect(USE labl); 12630 12631 ins_cost(300); 12632 format %{ "J$cop,us $labl\t# Loop end" %} 12633 size(2); 12634 ins_encode %{ 12635 Label* L = $labl$$label; 12636 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12637 %} 12638 ins_pipe( pipe_jcc ); 12639 ins_short_branch(1); 12640 %} 12641 12642 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12643 match(CountedLoopEnd cop cmp); 12644 effect(USE labl); 12645 12646 ins_cost(300); 12647 format %{ "J$cop,us $labl\t# Loop end" %} 12648 size(2); 12649 ins_encode %{ 12650 Label* L = $labl$$label; 12651 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12652 %} 12653 ins_pipe( pipe_jcc ); 12654 ins_short_branch(1); 12655 %} 12656 12657 // Jump Direct Conditional - using unsigned comparison 12658 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12659 match(If cop cmp); 12660 effect(USE labl); 12661 12662 ins_cost(300); 12663 format %{ "J$cop,us $labl" %} 12664 size(2); 12665 ins_encode %{ 12666 Label* L = $labl$$label; 12667 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12668 %} 12669 ins_pipe( pipe_jcc ); 12670 ins_short_branch(1); 12671 %} 12672 12673 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12674 match(If cop cmp); 12675 effect(USE labl); 12676 12677 ins_cost(300); 12678 format %{ "J$cop,us $labl" %} 12679 size(2); 12680 ins_encode %{ 12681 Label* L = $labl$$label; 12682 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12683 %} 12684 ins_pipe( pipe_jcc ); 12685 ins_short_branch(1); 12686 %} 12687 12688 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12689 match(If cop cmp); 12690 effect(USE labl); 12691 12692 ins_cost(300); 12693 format %{ $$template 12694 if ($cop$$cmpcode == Assembler::notEqual) { 12695 $$emit$$"JP,u,s $labl\n\t" 12696 $$emit$$"J$cop,u,s $labl" 12697 } else { 12698 $$emit$$"JP,u,s done\n\t" 12699 $$emit$$"J$cop,u,s $labl\n\t" 12700 $$emit$$"done:" 12701 } 12702 %} 12703 size(4); 12704 ins_encode %{ 12705 Label* l = $labl$$label; 12706 if ($cop$$cmpcode == Assembler::notEqual) { 12707 __ jccb(Assembler::parity, *l); 12708 __ jccb(Assembler::notEqual, *l); 12709 } else if ($cop$$cmpcode == Assembler::equal) { 12710 Label done; 12711 __ jccb(Assembler::parity, done); 12712 __ jccb(Assembler::equal, *l); 12713 __ bind(done); 12714 } else { 12715 ShouldNotReachHere(); 12716 } 12717 %} 12718 ins_pipe(pipe_jcc); 12719 ins_short_branch(1); 12720 %} 12721 12722 // ============================================================================ 12723 // Long Compare 12724 // 12725 // Currently we hold longs in 2 registers. Comparing such values efficiently 12726 // is tricky. The flavor of compare used depends on whether we are testing 12727 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12728 // The GE test is the negated LT test. The LE test can be had by commuting 12729 // the operands (yielding a GE test) and then negating; negate again for the 12730 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12731 // NE test is negated from that. 12732 12733 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12734 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12735 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12736 // are collapsed internally in the ADLC's dfa-gen code. The match for 12737 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12738 // foo match ends up with the wrong leaf. One fix is to not match both 12739 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12740 // both forms beat the trinary form of long-compare and both are very useful 12741 // on Intel which has so few registers. 12742 12743 // Manifest a CmpL result in an integer register. Very painful. 12744 // This is the test to avoid. 12745 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12746 match(Set dst (CmpL3 src1 src2)); 12747 effect( KILL flags ); 12748 ins_cost(1000); 12749 format %{ "XOR $dst,$dst\n\t" 12750 "CMP $src1.hi,$src2.hi\n\t" 12751 "JLT,s m_one\n\t" 12752 "JGT,s p_one\n\t" 12753 "CMP $src1.lo,$src2.lo\n\t" 12754 "JB,s m_one\n\t" 12755 "JEQ,s done\n" 12756 "p_one:\tINC $dst\n\t" 12757 "JMP,s done\n" 12758 "m_one:\tDEC $dst\n" 12759 "done:" %} 12760 ins_encode %{ 12761 Label p_one, m_one, done; 12762 __ xorptr($dst$$Register, $dst$$Register); 12763 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12764 __ jccb(Assembler::less, m_one); 12765 __ jccb(Assembler::greater, p_one); 12766 __ cmpl($src1$$Register, $src2$$Register); 12767 __ jccb(Assembler::below, m_one); 12768 __ jccb(Assembler::equal, done); 12769 __ bind(p_one); 12770 __ incrementl($dst$$Register); 12771 __ jmpb(done); 12772 __ bind(m_one); 12773 __ decrementl($dst$$Register); 12774 __ bind(done); 12775 %} 12776 ins_pipe( pipe_slow ); 12777 %} 12778 12779 //====== 12780 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12781 // compares. Can be used for LE or GT compares by reversing arguments. 12782 // NOT GOOD FOR EQ/NE tests. 12783 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12784 match( Set flags (CmpL src zero )); 12785 ins_cost(100); 12786 format %{ "TEST $src.hi,$src.hi" %} 12787 opcode(0x85); 12788 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12789 ins_pipe( ialu_cr_reg_reg ); 12790 %} 12791 12792 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12793 // compares. Can be used for LE or GT compares by reversing arguments. 12794 // NOT GOOD FOR EQ/NE tests. 12795 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12796 match( Set flags (CmpL src1 src2 )); 12797 effect( TEMP tmp ); 12798 ins_cost(300); 12799 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12800 "MOV $tmp,$src1.hi\n\t" 12801 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12802 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12803 ins_pipe( ialu_cr_reg_reg ); 12804 %} 12805 12806 // Long compares reg < zero/req OR reg >= zero/req. 12807 // Just a wrapper for a normal branch, plus the predicate test. 12808 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12809 match(If cmp flags); 12810 effect(USE labl); 12811 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12812 expand %{ 12813 jmpCon(cmp,flags,labl); // JLT or JGE... 12814 %} 12815 %} 12816 12817 //====== 12818 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12819 // compares. Can be used for LE or GT compares by reversing arguments. 12820 // NOT GOOD FOR EQ/NE tests. 12821 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12822 match(Set flags (CmpUL src zero)); 12823 ins_cost(100); 12824 format %{ "TEST $src.hi,$src.hi" %} 12825 opcode(0x85); 12826 ins_encode(OpcP, RegReg_Hi2(src, src)); 12827 ins_pipe(ialu_cr_reg_reg); 12828 %} 12829 12830 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12831 // compares. Can be used for LE or GT compares by reversing arguments. 12832 // NOT GOOD FOR EQ/NE tests. 12833 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12834 match(Set flags (CmpUL src1 src2)); 12835 effect(TEMP tmp); 12836 ins_cost(300); 12837 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12838 "MOV $tmp,$src1.hi\n\t" 12839 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12840 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12841 ins_pipe(ialu_cr_reg_reg); 12842 %} 12843 12844 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12845 // Just a wrapper for a normal branch, plus the predicate test. 12846 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12847 match(If cmp flags); 12848 effect(USE labl); 12849 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12850 expand %{ 12851 jmpCon(cmp, flags, labl); // JLT or JGE... 12852 %} 12853 %} 12854 12855 // Compare 2 longs and CMOVE longs. 12856 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12857 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12858 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12859 ins_cost(400); 12860 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12861 "CMOV$cmp $dst.hi,$src.hi" %} 12862 opcode(0x0F,0x40); 12863 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12864 ins_pipe( pipe_cmov_reg_long ); 12865 %} 12866 12867 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12868 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12869 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12870 ins_cost(500); 12871 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12872 "CMOV$cmp $dst.hi,$src.hi" %} 12873 opcode(0x0F,0x40); 12874 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12875 ins_pipe( pipe_cmov_reg_long ); 12876 %} 12877 12878 // Compare 2 longs and CMOVE ints. 12879 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12880 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12881 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12882 ins_cost(200); 12883 format %{ "CMOV$cmp $dst,$src" %} 12884 opcode(0x0F,0x40); 12885 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12886 ins_pipe( pipe_cmov_reg ); 12887 %} 12888 12889 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12890 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12891 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12892 ins_cost(250); 12893 format %{ "CMOV$cmp $dst,$src" %} 12894 opcode(0x0F,0x40); 12895 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12896 ins_pipe( pipe_cmov_mem ); 12897 %} 12898 12899 // Compare 2 longs and CMOVE ints. 12900 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12901 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12902 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12903 ins_cost(200); 12904 format %{ "CMOV$cmp $dst,$src" %} 12905 opcode(0x0F,0x40); 12906 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12907 ins_pipe( pipe_cmov_reg ); 12908 %} 12909 12910 // Compare 2 longs and CMOVE doubles 12911 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12912 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12913 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12914 ins_cost(200); 12915 expand %{ 12916 fcmovDPR_regS(cmp,flags,dst,src); 12917 %} 12918 %} 12919 12920 // Compare 2 longs and CMOVE doubles 12921 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12922 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12923 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12924 ins_cost(200); 12925 expand %{ 12926 fcmovD_regS(cmp,flags,dst,src); 12927 %} 12928 %} 12929 12930 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12931 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12932 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12933 ins_cost(200); 12934 expand %{ 12935 fcmovFPR_regS(cmp,flags,dst,src); 12936 %} 12937 %} 12938 12939 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12940 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12941 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12942 ins_cost(200); 12943 expand %{ 12944 fcmovF_regS(cmp,flags,dst,src); 12945 %} 12946 %} 12947 12948 //====== 12949 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12950 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12951 match( Set flags (CmpL src zero )); 12952 effect(TEMP tmp); 12953 ins_cost(200); 12954 format %{ "MOV $tmp,$src.lo\n\t" 12955 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12956 ins_encode( long_cmp_flags0( src, tmp ) ); 12957 ins_pipe( ialu_reg_reg_long ); 12958 %} 12959 12960 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12961 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12962 match( Set flags (CmpL src1 src2 )); 12963 ins_cost(200+300); 12964 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12965 "JNE,s skip\n\t" 12966 "CMP $src1.hi,$src2.hi\n\t" 12967 "skip:\t" %} 12968 ins_encode( long_cmp_flags1( src1, src2 ) ); 12969 ins_pipe( ialu_cr_reg_reg ); 12970 %} 12971 12972 // Long compare reg == zero/reg OR reg != zero/reg 12973 // Just a wrapper for a normal branch, plus the predicate test. 12974 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12975 match(If cmp flags); 12976 effect(USE labl); 12977 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12978 expand %{ 12979 jmpCon(cmp,flags,labl); // JEQ or JNE... 12980 %} 12981 %} 12982 12983 //====== 12984 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12985 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12986 match(Set flags (CmpUL src zero)); 12987 effect(TEMP tmp); 12988 ins_cost(200); 12989 format %{ "MOV $tmp,$src.lo\n\t" 12990 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12991 ins_encode(long_cmp_flags0(src, tmp)); 12992 ins_pipe(ialu_reg_reg_long); 12993 %} 12994 12995 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12996 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12997 match(Set flags (CmpUL src1 src2)); 12998 ins_cost(200+300); 12999 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13000 "JNE,s skip\n\t" 13001 "CMP $src1.hi,$src2.hi\n\t" 13002 "skip:\t" %} 13003 ins_encode(long_cmp_flags1(src1, src2)); 13004 ins_pipe(ialu_cr_reg_reg); 13005 %} 13006 13007 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13008 // Just a wrapper for a normal branch, plus the predicate test. 13009 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13010 match(If cmp flags); 13011 effect(USE labl); 13012 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13013 expand %{ 13014 jmpCon(cmp, flags, labl); // JEQ or JNE... 13015 %} 13016 %} 13017 13018 // Compare 2 longs and CMOVE longs. 13019 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13020 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13021 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13022 ins_cost(400); 13023 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13024 "CMOV$cmp $dst.hi,$src.hi" %} 13025 opcode(0x0F,0x40); 13026 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13027 ins_pipe( pipe_cmov_reg_long ); 13028 %} 13029 13030 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13031 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13032 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13033 ins_cost(500); 13034 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13035 "CMOV$cmp $dst.hi,$src.hi" %} 13036 opcode(0x0F,0x40); 13037 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13038 ins_pipe( pipe_cmov_reg_long ); 13039 %} 13040 13041 // Compare 2 longs and CMOVE ints. 13042 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13043 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13044 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13045 ins_cost(200); 13046 format %{ "CMOV$cmp $dst,$src" %} 13047 opcode(0x0F,0x40); 13048 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13049 ins_pipe( pipe_cmov_reg ); 13050 %} 13051 13052 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13053 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13054 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13055 ins_cost(250); 13056 format %{ "CMOV$cmp $dst,$src" %} 13057 opcode(0x0F,0x40); 13058 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13059 ins_pipe( pipe_cmov_mem ); 13060 %} 13061 13062 // Compare 2 longs and CMOVE ints. 13063 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13064 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13065 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13066 ins_cost(200); 13067 format %{ "CMOV$cmp $dst,$src" %} 13068 opcode(0x0F,0x40); 13069 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13070 ins_pipe( pipe_cmov_reg ); 13071 %} 13072 13073 // Compare 2 longs and CMOVE doubles 13074 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13075 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13076 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13077 ins_cost(200); 13078 expand %{ 13079 fcmovDPR_regS(cmp,flags,dst,src); 13080 %} 13081 %} 13082 13083 // Compare 2 longs and CMOVE doubles 13084 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13085 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13086 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13087 ins_cost(200); 13088 expand %{ 13089 fcmovD_regS(cmp,flags,dst,src); 13090 %} 13091 %} 13092 13093 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13094 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13095 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13096 ins_cost(200); 13097 expand %{ 13098 fcmovFPR_regS(cmp,flags,dst,src); 13099 %} 13100 %} 13101 13102 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13103 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13104 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13105 ins_cost(200); 13106 expand %{ 13107 fcmovF_regS(cmp,flags,dst,src); 13108 %} 13109 %} 13110 13111 //====== 13112 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13113 // Same as cmpL_reg_flags_LEGT except must negate src 13114 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13115 match( Set flags (CmpL src zero )); 13116 effect( TEMP tmp ); 13117 ins_cost(300); 13118 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13119 "CMP $tmp,$src.lo\n\t" 13120 "SBB $tmp,$src.hi\n\t" %} 13121 ins_encode( long_cmp_flags3(src, tmp) ); 13122 ins_pipe( ialu_reg_reg_long ); 13123 %} 13124 13125 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13126 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13127 // requires a commuted test to get the same result. 13128 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13129 match( Set flags (CmpL src1 src2 )); 13130 effect( TEMP tmp ); 13131 ins_cost(300); 13132 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13133 "MOV $tmp,$src2.hi\n\t" 13134 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13135 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13136 ins_pipe( ialu_cr_reg_reg ); 13137 %} 13138 13139 // Long compares reg < zero/req OR reg >= zero/req. 13140 // Just a wrapper for a normal branch, plus the predicate test 13141 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13142 match(If cmp flags); 13143 effect(USE labl); 13144 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13145 ins_cost(300); 13146 expand %{ 13147 jmpCon(cmp,flags,labl); // JGT or JLE... 13148 %} 13149 %} 13150 13151 //====== 13152 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13153 // Same as cmpUL_reg_flags_LEGT except must negate src 13154 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13155 match(Set flags (CmpUL src zero)); 13156 effect(TEMP tmp); 13157 ins_cost(300); 13158 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13159 "CMP $tmp,$src.lo\n\t" 13160 "SBB $tmp,$src.hi\n\t" %} 13161 ins_encode(long_cmp_flags3(src, tmp)); 13162 ins_pipe(ialu_reg_reg_long); 13163 %} 13164 13165 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13166 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13167 // requires a commuted test to get the same result. 13168 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13169 match(Set flags (CmpUL src1 src2)); 13170 effect(TEMP tmp); 13171 ins_cost(300); 13172 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13173 "MOV $tmp,$src2.hi\n\t" 13174 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13175 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13176 ins_pipe(ialu_cr_reg_reg); 13177 %} 13178 13179 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13180 // Just a wrapper for a normal branch, plus the predicate test 13181 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13182 match(If cmp flags); 13183 effect(USE labl); 13184 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13185 ins_cost(300); 13186 expand %{ 13187 jmpCon(cmp, flags, labl); // JGT or JLE... 13188 %} 13189 %} 13190 13191 // Compare 2 longs and CMOVE longs. 13192 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13193 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13194 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13195 ins_cost(400); 13196 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13197 "CMOV$cmp $dst.hi,$src.hi" %} 13198 opcode(0x0F,0x40); 13199 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13200 ins_pipe( pipe_cmov_reg_long ); 13201 %} 13202 13203 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13204 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13205 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13206 ins_cost(500); 13207 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13208 "CMOV$cmp $dst.hi,$src.hi+4" %} 13209 opcode(0x0F,0x40); 13210 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13211 ins_pipe( pipe_cmov_reg_long ); 13212 %} 13213 13214 // Compare 2 longs and CMOVE ints. 13215 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13216 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13217 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13218 ins_cost(200); 13219 format %{ "CMOV$cmp $dst,$src" %} 13220 opcode(0x0F,0x40); 13221 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13222 ins_pipe( pipe_cmov_reg ); 13223 %} 13224 13225 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13226 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13227 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13228 ins_cost(250); 13229 format %{ "CMOV$cmp $dst,$src" %} 13230 opcode(0x0F,0x40); 13231 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13232 ins_pipe( pipe_cmov_mem ); 13233 %} 13234 13235 // Compare 2 longs and CMOVE ptrs. 13236 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13237 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13238 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13239 ins_cost(200); 13240 format %{ "CMOV$cmp $dst,$src" %} 13241 opcode(0x0F,0x40); 13242 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13243 ins_pipe( pipe_cmov_reg ); 13244 %} 13245 13246 // Compare 2 longs and CMOVE doubles 13247 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13248 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13249 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13250 ins_cost(200); 13251 expand %{ 13252 fcmovDPR_regS(cmp,flags,dst,src); 13253 %} 13254 %} 13255 13256 // Compare 2 longs and CMOVE doubles 13257 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13258 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13259 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13260 ins_cost(200); 13261 expand %{ 13262 fcmovD_regS(cmp,flags,dst,src); 13263 %} 13264 %} 13265 13266 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13267 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13268 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13269 ins_cost(200); 13270 expand %{ 13271 fcmovFPR_regS(cmp,flags,dst,src); 13272 %} 13273 %} 13274 13275 13276 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13277 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13278 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13279 ins_cost(200); 13280 expand %{ 13281 fcmovF_regS(cmp,flags,dst,src); 13282 %} 13283 %} 13284 13285 13286 // ============================================================================ 13287 // Procedure Call/Return Instructions 13288 // Call Java Static Instruction 13289 // Note: If this code changes, the corresponding ret_addr_offset() and 13290 // compute_padding() functions will have to be adjusted. 13291 instruct CallStaticJavaDirect(method meth) %{ 13292 match(CallStaticJava); 13293 effect(USE meth); 13294 13295 ins_cost(300); 13296 format %{ "CALL,static " %} 13297 opcode(0xE8); /* E8 cd */ 13298 ins_encode( pre_call_resets, 13299 Java_Static_Call( meth ), 13300 call_epilog, 13301 post_call_FPU ); 13302 ins_pipe( pipe_slow ); 13303 ins_alignment(4); 13304 %} 13305 13306 // Call Java Dynamic Instruction 13307 // Note: If this code changes, the corresponding ret_addr_offset() and 13308 // compute_padding() functions will have to be adjusted. 13309 instruct CallDynamicJavaDirect(method meth) %{ 13310 match(CallDynamicJava); 13311 effect(USE meth); 13312 13313 ins_cost(300); 13314 format %{ "MOV EAX,(oop)-1\n\t" 13315 "CALL,dynamic" %} 13316 opcode(0xE8); /* E8 cd */ 13317 ins_encode( pre_call_resets, 13318 Java_Dynamic_Call( meth ), 13319 call_epilog, 13320 post_call_FPU ); 13321 ins_pipe( pipe_slow ); 13322 ins_alignment(4); 13323 %} 13324 13325 // Call Runtime Instruction 13326 instruct CallRuntimeDirect(method meth) %{ 13327 match(CallRuntime ); 13328 effect(USE meth); 13329 13330 ins_cost(300); 13331 format %{ "CALL,runtime " %} 13332 opcode(0xE8); /* E8 cd */ 13333 // Use FFREEs to clear entries in float stack 13334 ins_encode( pre_call_resets, 13335 FFree_Float_Stack_All, 13336 Java_To_Runtime( meth ), 13337 post_call_FPU ); 13338 ins_pipe( pipe_slow ); 13339 %} 13340 13341 // Call runtime without safepoint 13342 instruct CallLeafDirect(method meth) %{ 13343 match(CallLeaf); 13344 effect(USE meth); 13345 13346 ins_cost(300); 13347 format %{ "CALL_LEAF,runtime " %} 13348 opcode(0xE8); /* E8 cd */ 13349 ins_encode( pre_call_resets, 13350 FFree_Float_Stack_All, 13351 Java_To_Runtime( meth ), 13352 Verify_FPU_For_Leaf, post_call_FPU ); 13353 ins_pipe( pipe_slow ); 13354 %} 13355 13356 instruct CallLeafNoFPDirect(method meth) %{ 13357 match(CallLeafNoFP); 13358 effect(USE meth); 13359 13360 ins_cost(300); 13361 format %{ "CALL_LEAF_NOFP,runtime " %} 13362 opcode(0xE8); /* E8 cd */ 13363 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13364 ins_pipe( pipe_slow ); 13365 %} 13366 13367 13368 // Return Instruction 13369 // Remove the return address & jump to it. 13370 instruct Ret() %{ 13371 match(Return); 13372 format %{ "RET" %} 13373 opcode(0xC3); 13374 ins_encode(OpcP); 13375 ins_pipe( pipe_jmp ); 13376 %} 13377 13378 // Tail Call; Jump from runtime stub to Java code. 13379 // Also known as an 'interprocedural jump'. 13380 // Target of jump will eventually return to caller. 13381 // TailJump below removes the return address. 13382 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13383 match(TailCall jump_target method_oop ); 13384 ins_cost(300); 13385 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13386 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13387 ins_encode( OpcP, RegOpc(jump_target) ); 13388 ins_pipe( pipe_jmp ); 13389 %} 13390 13391 13392 // Tail Jump; remove the return address; jump to target. 13393 // TailCall above leaves the return address around. 13394 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13395 match( TailJump jump_target ex_oop ); 13396 ins_cost(300); 13397 format %{ "POP EDX\t# pop return address into dummy\n\t" 13398 "JMP $jump_target " %} 13399 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13400 ins_encode( enc_pop_rdx, 13401 OpcP, RegOpc(jump_target) ); 13402 ins_pipe( pipe_jmp ); 13403 %} 13404 13405 // Create exception oop: created by stack-crawling runtime code. 13406 // Created exception is now available to this handler, and is setup 13407 // just prior to jumping to this handler. No code emitted. 13408 instruct CreateException( eAXRegP ex_oop ) 13409 %{ 13410 match(Set ex_oop (CreateEx)); 13411 13412 size(0); 13413 // use the following format syntax 13414 format %{ "# exception oop is in EAX; no code emitted" %} 13415 ins_encode(); 13416 ins_pipe( empty ); 13417 %} 13418 13419 13420 // Rethrow exception: 13421 // The exception oop will come in the first argument position. 13422 // Then JUMP (not call) to the rethrow stub code. 13423 instruct RethrowException() 13424 %{ 13425 match(Rethrow); 13426 13427 // use the following format syntax 13428 format %{ "JMP rethrow_stub" %} 13429 ins_encode(enc_rethrow); 13430 ins_pipe( pipe_jmp ); 13431 %} 13432 13433 // inlined locking and unlocking 13434 13435 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13436 predicate(Compile::current()->use_rtm()); 13437 match(Set cr (FastLock object box)); 13438 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13439 ins_cost(300); 13440 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13441 ins_encode %{ 13442 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13443 $scr$$Register, $cx1$$Register, $cx2$$Register, 13444 _counters, _rtm_counters, _stack_rtm_counters, 13445 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13446 true, ra_->C->profile_rtm()); 13447 %} 13448 ins_pipe(pipe_slow); 13449 %} 13450 13451 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13452 predicate(!Compile::current()->use_rtm()); 13453 match(Set cr (FastLock object box)); 13454 effect(TEMP tmp, TEMP scr, USE_KILL box); 13455 ins_cost(300); 13456 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13457 ins_encode %{ 13458 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13459 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13460 %} 13461 ins_pipe(pipe_slow); 13462 %} 13463 13464 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13465 match(Set cr (FastUnlock object box)); 13466 effect(TEMP tmp, USE_KILL box); 13467 ins_cost(300); 13468 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13469 ins_encode %{ 13470 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13471 %} 13472 ins_pipe(pipe_slow); 13473 %} 13474 13475 13476 13477 // ============================================================================ 13478 // Safepoint Instruction 13479 instruct safePoint_poll(eFlagsReg cr) %{ 13480 predicate(SafepointMechanism::uses_global_page_poll()); 13481 match(SafePoint); 13482 effect(KILL cr); 13483 13484 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13485 // On SPARC that might be acceptable as we can generate the address with 13486 // just a sethi, saving an or. By polling at offset 0 we can end up 13487 // putting additional pressure on the index-0 in the D$. Because of 13488 // alignment (just like the situation at hand) the lower indices tend 13489 // to see more traffic. It'd be better to change the polling address 13490 // to offset 0 of the last $line in the polling page. 13491 13492 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13493 ins_cost(125); 13494 size(6) ; 13495 ins_encode( Safepoint_Poll() ); 13496 ins_pipe( ialu_reg_mem ); 13497 %} 13498 13499 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13500 predicate(SafepointMechanism::uses_thread_local_poll()); 13501 match(SafePoint poll); 13502 effect(KILL cr, USE poll); 13503 13504 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13505 ins_cost(125); 13506 // EBP would need size(3) 13507 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13508 ins_encode %{ 13509 __ relocate(relocInfo::poll_type); 13510 address pre_pc = __ pc(); 13511 __ testl(rax, Address($poll$$Register, 0)); 13512 address post_pc = __ pc(); 13513 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13514 %} 13515 ins_pipe(ialu_reg_mem); 13516 %} 13517 13518 13519 // ============================================================================ 13520 // This name is KNOWN by the ADLC and cannot be changed. 13521 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13522 // for this guy. 13523 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13524 match(Set dst (ThreadLocal)); 13525 effect(DEF dst, KILL cr); 13526 13527 format %{ "MOV $dst, Thread::current()" %} 13528 ins_encode %{ 13529 Register dstReg = as_Register($dst$$reg); 13530 __ get_thread(dstReg); 13531 %} 13532 ins_pipe( ialu_reg_fat ); 13533 %} 13534 13535 13536 13537 //----------PEEPHOLE RULES----------------------------------------------------- 13538 // These must follow all instruction definitions as they use the names 13539 // defined in the instructions definitions. 13540 // 13541 // peepmatch ( root_instr_name [preceding_instruction]* ); 13542 // 13543 // peepconstraint %{ 13544 // (instruction_number.operand_name relational_op instruction_number.operand_name 13545 // [, ...] ); 13546 // // instruction numbers are zero-based using left to right order in peepmatch 13547 // 13548 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13549 // // provide an instruction_number.operand_name for each operand that appears 13550 // // in the replacement instruction's match rule 13551 // 13552 // ---------VM FLAGS--------------------------------------------------------- 13553 // 13554 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13555 // 13556 // Each peephole rule is given an identifying number starting with zero and 13557 // increasing by one in the order seen by the parser. An individual peephole 13558 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13559 // on the command-line. 13560 // 13561 // ---------CURRENT LIMITATIONS---------------------------------------------- 13562 // 13563 // Only match adjacent instructions in same basic block 13564 // Only equality constraints 13565 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13566 // Only one replacement instruction 13567 // 13568 // ---------EXAMPLE---------------------------------------------------------- 13569 // 13570 // // pertinent parts of existing instructions in architecture description 13571 // instruct movI(rRegI dst, rRegI src) %{ 13572 // match(Set dst (CopyI src)); 13573 // %} 13574 // 13575 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13576 // match(Set dst (AddI dst src)); 13577 // effect(KILL cr); 13578 // %} 13579 // 13580 // // Change (inc mov) to lea 13581 // peephole %{ 13582 // // increment preceeded by register-register move 13583 // peepmatch ( incI_eReg movI ); 13584 // // require that the destination register of the increment 13585 // // match the destination register of the move 13586 // peepconstraint ( 0.dst == 1.dst ); 13587 // // construct a replacement instruction that sets 13588 // // the destination to ( move's source register + one ) 13589 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13590 // %} 13591 // 13592 // Implementation no longer uses movX instructions since 13593 // machine-independent system no longer uses CopyX nodes. 13594 // 13595 // peephole %{ 13596 // peepmatch ( incI_eReg movI ); 13597 // peepconstraint ( 0.dst == 1.dst ); 13598 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13599 // %} 13600 // 13601 // peephole %{ 13602 // peepmatch ( decI_eReg movI ); 13603 // peepconstraint ( 0.dst == 1.dst ); 13604 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13605 // %} 13606 // 13607 // peephole %{ 13608 // peepmatch ( addI_eReg_imm movI ); 13609 // peepconstraint ( 0.dst == 1.dst ); 13610 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13611 // %} 13612 // 13613 // peephole %{ 13614 // peepmatch ( addP_eReg_imm movP ); 13615 // peepconstraint ( 0.dst == 1.dst ); 13616 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13617 // %} 13618 13619 // // Change load of spilled value to only a spill 13620 // instruct storeI(memory mem, rRegI src) %{ 13621 // match(Set mem (StoreI mem src)); 13622 // %} 13623 // 13624 // instruct loadI(rRegI dst, memory mem) %{ 13625 // match(Set dst (LoadI mem)); 13626 // %} 13627 // 13628 peephole %{ 13629 peepmatch ( loadI storeI ); 13630 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13631 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13632 %} 13633 13634 //----------SMARTSPILL RULES--------------------------------------------------- 13635 // These must follow all instruction definitions as they use the names 13636 // defined in the instructions definitions.