1 // 2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return SafepointMechanism::uses_thread_local_poll(); 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 if (SafepointMechanism::uses_thread_local_poll()) { 710 Register pollReg = as_Register(EBX_enc); 711 MacroAssembler masm(&cbuf); 712 masm.get_thread(pollReg); 713 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 714 masm.relocate(relocInfo::poll_return_type); 715 masm.testl(rax, Address(pollReg, 0)); 716 } else { 717 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 718 emit_opcode(cbuf,0x85); 719 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 720 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 721 } 722 } 723 } 724 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 726 return MachNode::size(ra_); // too many variables; just compute it 727 // the hard way 728 } 729 730 int MachEpilogNode::reloc() const { 731 return 0; // a large enough number 732 } 733 734 const Pipeline * MachEpilogNode::pipeline() const { 735 return MachNode::pipeline_class(); 736 } 737 738 int MachEpilogNode::safepoint_offset() const { return 0; } 739 740 //============================================================================= 741 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 743 static enum RC rc_class( OptoReg::Name reg ) { 744 745 if( !OptoReg::is_valid(reg) ) return rc_bad; 746 if (OptoReg::is_stack(reg)) return rc_stack; 747 748 VMReg r = OptoReg::as_VMReg(reg); 749 if (r->is_Register()) return rc_int; 750 if (r->is_FloatRegister()) { 751 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 752 return rc_float; 753 } 754 assert(r->is_XMMRegister(), "must be"); 755 return rc_xmm; 756 } 757 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 759 int opcode, const char *op_str, int size, outputStream* st ) { 760 if( cbuf ) { 761 emit_opcode (*cbuf, opcode ); 762 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 763 #ifndef PRODUCT 764 } else if( !do_size ) { 765 if( size != 0 ) st->print("\n\t"); 766 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 767 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 768 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 769 } else { // FLD, FST, PUSH, POP 770 st->print("%s [ESP + #%d]",op_str,offset); 771 } 772 #endif 773 } 774 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 775 return size+3+offset_size; 776 } 777 778 // Helper for XMM registers. Extra opcode bits, limited syntax. 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 780 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 781 int in_size_in_bits = Assembler::EVEX_32bit; 782 int evex_encoding = 0; 783 if (reg_lo+1 == reg_hi) { 784 in_size_in_bits = Assembler::EVEX_64bit; 785 evex_encoding = Assembler::VEX_W; 786 } 787 if (cbuf) { 788 MacroAssembler _masm(cbuf); 789 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 790 // it maps more cases to single byte displacement 791 _masm.set_managed(); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 843 _masm.set_managed(); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 883 _masm.set_managed(); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 901 _masm.set_managed(); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1016 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Needs 2 CMOV's for longs. 1424 const int Matcher::long_cmove_cost() { return 1; } 1425 1426 // No CMOVF/CMOVD with SSE/SSE2 1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429 // Does the CPU require late expand (see block.cpp for description of late expand)? 1430 const bool Matcher::require_postalloc_expand = false; 1431 1432 // Do we need to mask the count passed to shift instructions or does 1433 // the cpu only look at the lower 5/6 bits anyway? 1434 const bool Matcher::need_masked_shift_count = false; 1435 1436 bool Matcher::narrow_oop_use_complex_address() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 bool Matcher::narrow_klass_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444 } 1445 1446 bool Matcher::const_oop_prefer_decode() { 1447 ShouldNotCallThis(); 1448 return true; 1449 } 1450 1451 bool Matcher::const_klass_prefer_decode() { 1452 ShouldNotCallThis(); 1453 return true; 1454 } 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 debug_only(int off1 = cbuf.insts_size()); 1890 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1891 %} 1892 1893 enc_class post_call_FPU %{ 1894 // If method sets FPU control word do it here also 1895 if (Compile::current()->in_24_bit_fp_mode()) { 1896 MacroAssembler masm(&cbuf); 1897 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1898 } 1899 %} 1900 1901 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1902 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1903 // who we intended to call. 1904 cbuf.set_insts_mark(); 1905 $$$emit8$primary; 1906 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), 1910 RELOC_IMM32); 1911 } else { 1912 int method_index = resolved_method_index(cbuf); 1913 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1914 : static_call_Relocation::spec(method_index); 1915 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1916 rspec, RELOC_DISP32); 1917 // Emit stubs for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943 // Following encoding is no longer used, but may be restored if calling 1944 // convention changes significantly. 1945 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946 // 1947 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948 // // int ic_reg = Matcher::inline_cache_reg(); 1949 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1950 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1952 // 1953 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954 // // // so we load it immediately before the call 1955 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957 // 1958 // // xor rbp,ebp 1959 // emit_opcode(cbuf, 0x33); 1960 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961 // 1962 // // CALL to interpreter. 1963 // cbuf.set_insts_mark(); 1964 // $$$emit8$primary; 1965 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967 // %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 emit_opcode(cbuf,0xF0); // [Lock] 2091 %} 2092 2093 // Cmp-xchg long value. 2094 // Note: we need to swap rbx, and rcx before and after the 2095 // cmpxchg8 instruction because the instruction uses 2096 // rcx as the high order word of the new value to store but 2097 // our register encoding uses rbx,. 2098 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2099 2100 // XCHG rbx,ecx 2101 emit_opcode(cbuf,0x87); 2102 emit_opcode(cbuf,0xD9); 2103 // [Lock] 2104 emit_opcode(cbuf,0xF0); 2105 // CMPXCHG8 [Eptr] 2106 emit_opcode(cbuf,0x0F); 2107 emit_opcode(cbuf,0xC7); 2108 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2109 // XCHG rbx,ecx 2110 emit_opcode(cbuf,0x87); 2111 emit_opcode(cbuf,0xD9); 2112 %} 2113 2114 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2115 // [Lock] 2116 emit_opcode(cbuf,0xF0); 2117 2118 // CMPXCHG [Eptr] 2119 emit_opcode(cbuf,0x0F); 2120 emit_opcode(cbuf,0xB1); 2121 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2122 %} 2123 2124 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2125 // [Lock] 2126 emit_opcode(cbuf,0xF0); 2127 2128 // CMPXCHGB [Eptr] 2129 emit_opcode(cbuf,0x0F); 2130 emit_opcode(cbuf,0xB0); 2131 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2132 %} 2133 2134 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2135 // [Lock] 2136 emit_opcode(cbuf,0xF0); 2137 2138 // 16-bit mode 2139 emit_opcode(cbuf, 0x66); 2140 2141 // CMPXCHGW [Eptr] 2142 emit_opcode(cbuf,0x0F); 2143 emit_opcode(cbuf,0xB1); 2144 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2145 %} 2146 2147 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2148 int res_encoding = $res$$reg; 2149 2150 // MOV res,0 2151 emit_opcode( cbuf, 0xB8 + res_encoding); 2152 emit_d32( cbuf, 0 ); 2153 // JNE,s fail 2154 emit_opcode(cbuf,0x75); 2155 emit_d8(cbuf, 5 ); 2156 // MOV res,1 2157 emit_opcode( cbuf, 0xB8 + res_encoding); 2158 emit_d32( cbuf, 1 ); 2159 // fail: 2160 %} 2161 2162 enc_class set_instruction_start( ) %{ 2163 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2164 %} 2165 2166 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2167 int reg_encoding = $ereg$$reg; 2168 int base = $mem$$base; 2169 int index = $mem$$index; 2170 int scale = $mem$$scale; 2171 int displace = $mem$$disp; 2172 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2173 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2174 %} 2175 2176 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2177 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2178 int base = $mem$$base; 2179 int index = $mem$$index; 2180 int scale = $mem$$scale; 2181 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2182 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2183 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2184 %} 2185 2186 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2187 int r1, r2; 2188 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2189 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2190 emit_opcode(cbuf,0x0F); 2191 emit_opcode(cbuf,$tertiary); 2192 emit_rm(cbuf, 0x3, r1, r2); 2193 emit_d8(cbuf,$cnt$$constant); 2194 emit_d8(cbuf,$primary); 2195 emit_rm(cbuf, 0x3, $secondary, r1); 2196 emit_d8(cbuf,$cnt$$constant); 2197 %} 2198 2199 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2200 emit_opcode( cbuf, 0x8B ); // Move 2201 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2202 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2203 emit_d8(cbuf,$primary); 2204 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2205 emit_d8(cbuf,$cnt$$constant-32); 2206 } 2207 emit_d8(cbuf,$primary); 2208 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2209 emit_d8(cbuf,31); 2210 %} 2211 2212 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2213 int r1, r2; 2214 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2215 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2216 2217 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2218 emit_rm(cbuf, 0x3, r1, r2); 2219 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2220 emit_opcode(cbuf,$primary); 2221 emit_rm(cbuf, 0x3, $secondary, r1); 2222 emit_d8(cbuf,$cnt$$constant-32); 2223 } 2224 emit_opcode(cbuf,0x33); // XOR r2,r2 2225 emit_rm(cbuf, 0x3, r2, r2); 2226 %} 2227 2228 // Clone of RegMem but accepts an extra parameter to access each 2229 // half of a double in memory; it never needs relocation info. 2230 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2231 emit_opcode(cbuf,$opcode$$constant); 2232 int reg_encoding = $rm_reg$$reg; 2233 int base = $mem$$base; 2234 int index = $mem$$index; 2235 int scale = $mem$$scale; 2236 int displace = $mem$$disp + $disp_for_half$$constant; 2237 relocInfo::relocType disp_reloc = relocInfo::none; 2238 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2239 %} 2240 2241 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2242 // 2243 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2244 // and it never needs relocation information. 2245 // Frequently used to move data between FPU's Stack Top and memory. 2246 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2247 int rm_byte_opcode = $rm_opcode$$constant; 2248 int base = $mem$$base; 2249 int index = $mem$$index; 2250 int scale = $mem$$scale; 2251 int displace = $mem$$disp; 2252 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2254 %} 2255 2256 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2257 int rm_byte_opcode = $rm_opcode$$constant; 2258 int base = $mem$$base; 2259 int index = $mem$$index; 2260 int scale = $mem$$scale; 2261 int displace = $mem$$disp; 2262 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2263 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2264 %} 2265 2266 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2267 int reg_encoding = $dst$$reg; 2268 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2269 int index = 0x04; // 0x04 indicates no index 2270 int scale = 0x00; // 0x00 indicates no scale 2271 int displace = $src1$$constant; // 0x00 indicates no displacement 2272 relocInfo::relocType disp_reloc = relocInfo::none; 2273 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2274 %} 2275 2276 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2277 // Compare dst,src 2278 emit_opcode(cbuf,0x3B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 // jmp dst < src around move 2281 emit_opcode(cbuf,0x7C); 2282 emit_d8(cbuf,2); 2283 // move dst,src 2284 emit_opcode(cbuf,0x8B); 2285 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2286 %} 2287 2288 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2289 // Compare dst,src 2290 emit_opcode(cbuf,0x3B); 2291 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2292 // jmp dst > src around move 2293 emit_opcode(cbuf,0x7F); 2294 emit_d8(cbuf,2); 2295 // move dst,src 2296 emit_opcode(cbuf,0x8B); 2297 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2298 %} 2299 2300 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2301 // If src is FPR1, we can just FST to store it. 2302 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2303 int reg_encoding = 0x2; // Just store 2304 int base = $mem$$base; 2305 int index = $mem$$index; 2306 int scale = $mem$$scale; 2307 int displace = $mem$$disp; 2308 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2309 if( $src$$reg != FPR1L_enc ) { 2310 reg_encoding = 0x3; // Store & pop 2311 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2312 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2313 } 2314 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2315 emit_opcode(cbuf,$primary); 2316 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2317 %} 2318 2319 enc_class neg_reg(rRegI dst) %{ 2320 // NEG $dst 2321 emit_opcode(cbuf,0xF7); 2322 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2323 %} 2324 2325 enc_class setLT_reg(eCXRegI dst) %{ 2326 // SETLT $dst 2327 emit_opcode(cbuf,0x0F); 2328 emit_opcode(cbuf,0x9C); 2329 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2330 %} 2331 2332 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2333 int tmpReg = $tmp$$reg; 2334 2335 // SUB $p,$q 2336 emit_opcode(cbuf,0x2B); 2337 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2338 // SBB $tmp,$tmp 2339 emit_opcode(cbuf,0x1B); 2340 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2341 // AND $tmp,$y 2342 emit_opcode(cbuf,0x23); 2343 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2344 // ADD $p,$tmp 2345 emit_opcode(cbuf,0x03); 2346 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2347 %} 2348 2349 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2350 // TEST shift,32 2351 emit_opcode(cbuf,0xF7); 2352 emit_rm(cbuf, 0x3, 0, ECX_enc); 2353 emit_d32(cbuf,0x20); 2354 // JEQ,s small 2355 emit_opcode(cbuf, 0x74); 2356 emit_d8(cbuf, 0x04); 2357 // MOV $dst.hi,$dst.lo 2358 emit_opcode( cbuf, 0x8B ); 2359 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2360 // CLR $dst.lo 2361 emit_opcode(cbuf, 0x33); 2362 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2363 // small: 2364 // SHLD $dst.hi,$dst.lo,$shift 2365 emit_opcode(cbuf,0x0F); 2366 emit_opcode(cbuf,0xA5); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2368 // SHL $dst.lo,$shift" 2369 emit_opcode(cbuf,0xD3); 2370 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2371 %} 2372 2373 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2374 // TEST shift,32 2375 emit_opcode(cbuf,0xF7); 2376 emit_rm(cbuf, 0x3, 0, ECX_enc); 2377 emit_d32(cbuf,0x20); 2378 // JEQ,s small 2379 emit_opcode(cbuf, 0x74); 2380 emit_d8(cbuf, 0x04); 2381 // MOV $dst.lo,$dst.hi 2382 emit_opcode( cbuf, 0x8B ); 2383 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2384 // CLR $dst.hi 2385 emit_opcode(cbuf, 0x33); 2386 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2387 // small: 2388 // SHRD $dst.lo,$dst.hi,$shift 2389 emit_opcode(cbuf,0x0F); 2390 emit_opcode(cbuf,0xAD); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2392 // SHR $dst.hi,$shift" 2393 emit_opcode(cbuf,0xD3); 2394 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2395 %} 2396 2397 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2398 // TEST shift,32 2399 emit_opcode(cbuf,0xF7); 2400 emit_rm(cbuf, 0x3, 0, ECX_enc); 2401 emit_d32(cbuf,0x20); 2402 // JEQ,s small 2403 emit_opcode(cbuf, 0x74); 2404 emit_d8(cbuf, 0x05); 2405 // MOV $dst.lo,$dst.hi 2406 emit_opcode( cbuf, 0x8B ); 2407 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2408 // SAR $dst.hi,31 2409 emit_opcode(cbuf, 0xC1); 2410 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2411 emit_d8(cbuf, 0x1F ); 2412 // small: 2413 // SHRD $dst.lo,$dst.hi,$shift 2414 emit_opcode(cbuf,0x0F); 2415 emit_opcode(cbuf,0xAD); 2416 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2417 // SAR $dst.hi,$shift" 2418 emit_opcode(cbuf,0xD3); 2419 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2420 %} 2421 2422 2423 // ----------------- Encodings for floating point unit ----------------- 2424 // May leave result in FPU-TOS or FPU reg depending on opcodes 2425 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2426 $$$emit8$primary; 2427 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2428 %} 2429 2430 // Pop argument in FPR0 with FSTP ST(0) 2431 enc_class PopFPU() %{ 2432 emit_opcode( cbuf, 0xDD ); 2433 emit_d8( cbuf, 0xD8 ); 2434 %} 2435 2436 // !!!!! equivalent to Pop_Reg_F 2437 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2438 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2439 emit_d8( cbuf, 0xD8+$dst$$reg ); 2440 %} 2441 2442 enc_class Push_Reg_DPR( regDPR dst ) %{ 2443 emit_opcode( cbuf, 0xD9 ); 2444 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2445 %} 2446 2447 enc_class strictfp_bias1( regDPR dst ) %{ 2448 emit_opcode( cbuf, 0xDB ); // FLD m80real 2449 emit_opcode( cbuf, 0x2D ); 2450 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2451 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2452 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2453 %} 2454 2455 enc_class strictfp_bias2( regDPR dst ) %{ 2456 emit_opcode( cbuf, 0xDB ); // FLD m80real 2457 emit_opcode( cbuf, 0x2D ); 2458 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2459 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2460 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2461 %} 2462 2463 // Special case for moving an integer register to a stack slot. 2464 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2465 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2466 %} 2467 2468 // Special case for moving a register to a stack slot. 2469 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2470 // Opcode already emitted 2471 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2472 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2473 emit_d32(cbuf, $dst$$disp); // Displacement 2474 %} 2475 2476 // Push the integer in stackSlot 'src' onto FP-stack 2477 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2478 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2479 %} 2480 2481 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2483 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2484 %} 2485 2486 // Same as Pop_Mem_F except for opcode 2487 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2488 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2489 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2490 %} 2491 2492 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2493 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2494 emit_d8( cbuf, 0xD8+$dst$$reg ); 2495 %} 2496 2497 enc_class Push_Reg_FPR( regFPR dst ) %{ 2498 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2499 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2500 %} 2501 2502 // Push FPU's float to a stack-slot, and pop FPU-stack 2503 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2504 int pop = 0x02; 2505 if ($src$$reg != FPR1L_enc) { 2506 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2507 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2508 pop = 0x03; 2509 } 2510 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2511 %} 2512 2513 // Push FPU's double to a stack-slot, and pop FPU-stack 2514 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2515 int pop = 0x02; 2516 if ($src$$reg != FPR1L_enc) { 2517 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2518 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2519 pop = 0x03; 2520 } 2521 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2522 %} 2523 2524 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2525 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2526 int pop = 0xD0 - 1; // -1 since we skip FLD 2527 if ($src$$reg != FPR1L_enc) { 2528 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2529 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2530 pop = 0xD8; 2531 } 2532 emit_opcode( cbuf, 0xDD ); 2533 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2534 %} 2535 2536 2537 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2538 // load dst in FPR0 2539 emit_opcode( cbuf, 0xD9 ); 2540 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2541 if ($src$$reg != FPR1L_enc) { 2542 // fincstp 2543 emit_opcode (cbuf, 0xD9); 2544 emit_opcode (cbuf, 0xF7); 2545 // swap src with FPR1: 2546 // FXCH FPR1 with src 2547 emit_opcode(cbuf, 0xD9); 2548 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2549 // fdecstp 2550 emit_opcode (cbuf, 0xD9); 2551 emit_opcode (cbuf, 0xF6); 2552 } 2553 %} 2554 2555 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ subptr(rsp, 8); 2558 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2559 __ fld_d(Address(rsp, 0)); 2560 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2561 __ fld_d(Address(rsp, 0)); 2562 %} 2563 2564 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ subptr(rsp, 4); 2567 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2568 __ fld_s(Address(rsp, 0)); 2569 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2570 __ fld_s(Address(rsp, 0)); 2571 %} 2572 2573 enc_class Push_ResultD(regD dst) %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ fstp_d(Address(rsp, 0)); 2576 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2577 __ addptr(rsp, 8); 2578 %} 2579 2580 enc_class Push_ResultF(regF dst, immI d8) %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ fstp_s(Address(rsp, 0)); 2583 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2584 __ addptr(rsp, $d8$$constant); 2585 %} 2586 2587 enc_class Push_SrcD(regD src) %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ subptr(rsp, 8); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class push_stack_temp_qword() %{ 2595 MacroAssembler _masm(&cbuf); 2596 __ subptr(rsp, 8); 2597 %} 2598 2599 enc_class pop_stack_temp_qword() %{ 2600 MacroAssembler _masm(&cbuf); 2601 __ addptr(rsp, 8); 2602 %} 2603 2604 enc_class push_xmm_to_fpr1(regD src) %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2607 __ fld_d(Address(rsp, 0)); 2608 %} 2609 2610 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2611 if ($src$$reg != FPR1L_enc) { 2612 // fincstp 2613 emit_opcode (cbuf, 0xD9); 2614 emit_opcode (cbuf, 0xF7); 2615 // FXCH FPR1 with src 2616 emit_opcode(cbuf, 0xD9); 2617 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2618 // fdecstp 2619 emit_opcode (cbuf, 0xD9); 2620 emit_opcode (cbuf, 0xF6); 2621 } 2622 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2623 // // FSTP FPR$dst$$reg 2624 // emit_opcode( cbuf, 0xDD ); 2625 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2626 %} 2627 2628 enc_class fnstsw_sahf_skip_parity() %{ 2629 // fnstsw ax 2630 emit_opcode( cbuf, 0xDF ); 2631 emit_opcode( cbuf, 0xE0 ); 2632 // sahf 2633 emit_opcode( cbuf, 0x9E ); 2634 // jnp ::skip 2635 emit_opcode( cbuf, 0x7B ); 2636 emit_opcode( cbuf, 0x05 ); 2637 %} 2638 2639 enc_class emitModDPR() %{ 2640 // fprem must be iterative 2641 // :: loop 2642 // fprem 2643 emit_opcode( cbuf, 0xD9 ); 2644 emit_opcode( cbuf, 0xF8 ); 2645 // wait 2646 emit_opcode( cbuf, 0x9b ); 2647 // fnstsw ax 2648 emit_opcode( cbuf, 0xDF ); 2649 emit_opcode( cbuf, 0xE0 ); 2650 // sahf 2651 emit_opcode( cbuf, 0x9E ); 2652 // jp ::loop 2653 emit_opcode( cbuf, 0x0F ); 2654 emit_opcode( cbuf, 0x8A ); 2655 emit_opcode( cbuf, 0xF4 ); 2656 emit_opcode( cbuf, 0xFF ); 2657 emit_opcode( cbuf, 0xFF ); 2658 emit_opcode( cbuf, 0xFF ); 2659 %} 2660 2661 enc_class fpu_flags() %{ 2662 // fnstsw_ax 2663 emit_opcode( cbuf, 0xDF); 2664 emit_opcode( cbuf, 0xE0); 2665 // test ax,0x0400 2666 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2667 emit_opcode( cbuf, 0xA9 ); 2668 emit_d16 ( cbuf, 0x0400 ); 2669 // // // This sequence works, but stalls for 12-16 cycles on PPro 2670 // // test rax,0x0400 2671 // emit_opcode( cbuf, 0xA9 ); 2672 // emit_d32 ( cbuf, 0x00000400 ); 2673 // 2674 // jz exit (no unordered comparison) 2675 emit_opcode( cbuf, 0x74 ); 2676 emit_d8 ( cbuf, 0x02 ); 2677 // mov ah,1 - treat as LT case (set carry flag) 2678 emit_opcode( cbuf, 0xB4 ); 2679 emit_d8 ( cbuf, 0x01 ); 2680 // sahf 2681 emit_opcode( cbuf, 0x9E); 2682 %} 2683 2684 enc_class cmpF_P6_fixup() %{ 2685 // Fixup the integer flags in case comparison involved a NaN 2686 // 2687 // JNP exit (no unordered comparison, P-flag is set by NaN) 2688 emit_opcode( cbuf, 0x7B ); 2689 emit_d8 ( cbuf, 0x03 ); 2690 // MOV AH,1 - treat as LT case (set carry flag) 2691 emit_opcode( cbuf, 0xB4 ); 2692 emit_d8 ( cbuf, 0x01 ); 2693 // SAHF 2694 emit_opcode( cbuf, 0x9E); 2695 // NOP // target for branch to avoid branch to branch 2696 emit_opcode( cbuf, 0x90); 2697 %} 2698 2699 // fnstsw_ax(); 2700 // sahf(); 2701 // movl(dst, nan_result); 2702 // jcc(Assembler::parity, exit); 2703 // movl(dst, less_result); 2704 // jcc(Assembler::below, exit); 2705 // movl(dst, equal_result); 2706 // jcc(Assembler::equal, exit); 2707 // movl(dst, greater_result); 2708 2709 // less_result = 1; 2710 // greater_result = -1; 2711 // equal_result = 0; 2712 // nan_result = -1; 2713 2714 enc_class CmpF_Result(rRegI dst) %{ 2715 // fnstsw_ax(); 2716 emit_opcode( cbuf, 0xDF); 2717 emit_opcode( cbuf, 0xE0); 2718 // sahf 2719 emit_opcode( cbuf, 0x9E); 2720 // movl(dst, nan_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, -1 ); 2723 // jcc(Assembler::parity, exit); 2724 emit_opcode( cbuf, 0x7A ); 2725 emit_d8 ( cbuf, 0x13 ); 2726 // movl(dst, less_result); 2727 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2728 emit_d32( cbuf, -1 ); 2729 // jcc(Assembler::below, exit); 2730 emit_opcode( cbuf, 0x72 ); 2731 emit_d8 ( cbuf, 0x0C ); 2732 // movl(dst, equal_result); 2733 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2734 emit_d32( cbuf, 0 ); 2735 // jcc(Assembler::equal, exit); 2736 emit_opcode( cbuf, 0x74 ); 2737 emit_d8 ( cbuf, 0x05 ); 2738 // movl(dst, greater_result); 2739 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2740 emit_d32( cbuf, 1 ); 2741 %} 2742 2743 2744 // Compare the longs and set flags 2745 // BROKEN! Do Not use as-is 2746 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2747 // CMP $src1.hi,$src2.hi 2748 emit_opcode( cbuf, 0x3B ); 2749 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2750 // JNE,s done 2751 emit_opcode(cbuf,0x75); 2752 emit_d8(cbuf, 2 ); 2753 // CMP $src1.lo,$src2.lo 2754 emit_opcode( cbuf, 0x3B ); 2755 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2756 // done: 2757 %} 2758 2759 enc_class convert_int_long( regL dst, rRegI src ) %{ 2760 // mov $dst.lo,$src 2761 int dst_encoding = $dst$$reg; 2762 int src_encoding = $src$$reg; 2763 encode_Copy( cbuf, dst_encoding , src_encoding ); 2764 // mov $dst.hi,$src 2765 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2766 // sar $dst.hi,31 2767 emit_opcode( cbuf, 0xC1 ); 2768 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2769 emit_d8(cbuf, 0x1F ); 2770 %} 2771 2772 enc_class convert_long_double( eRegL src ) %{ 2773 // push $src.hi 2774 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2775 // push $src.lo 2776 emit_opcode(cbuf, 0x50+$src$$reg ); 2777 // fild 64-bits at [SP] 2778 emit_opcode(cbuf,0xdf); 2779 emit_d8(cbuf, 0x6C); 2780 emit_d8(cbuf, 0x24); 2781 emit_d8(cbuf, 0x00); 2782 // pop stack 2783 emit_opcode(cbuf, 0x83); // add SP, #8 2784 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2785 emit_d8(cbuf, 0x8); 2786 %} 2787 2788 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2789 // IMUL EDX:EAX,$src1 2790 emit_opcode( cbuf, 0xF7 ); 2791 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2792 // SAR EDX,$cnt-32 2793 int shift_count = ((int)$cnt$$constant) - 32; 2794 if (shift_count > 0) { 2795 emit_opcode(cbuf, 0xC1); 2796 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2797 emit_d8(cbuf, shift_count); 2798 } 2799 %} 2800 2801 // this version doesn't have add sp, 8 2802 enc_class convert_long_double2( eRegL src ) %{ 2803 // push $src.hi 2804 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2805 // push $src.lo 2806 emit_opcode(cbuf, 0x50+$src$$reg ); 2807 // fild 64-bits at [SP] 2808 emit_opcode(cbuf,0xdf); 2809 emit_d8(cbuf, 0x6C); 2810 emit_d8(cbuf, 0x24); 2811 emit_d8(cbuf, 0x00); 2812 %} 2813 2814 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2815 // Basic idea: long = (long)int * (long)int 2816 // IMUL EDX:EAX, src 2817 emit_opcode( cbuf, 0xF7 ); 2818 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2819 %} 2820 2821 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2822 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2823 // MUL EDX:EAX, src 2824 emit_opcode( cbuf, 0xF7 ); 2825 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2826 %} 2827 2828 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2829 // Basic idea: lo(result) = lo(x_lo * y_lo) 2830 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2831 // MOV $tmp,$src.lo 2832 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2833 // IMUL $tmp,EDX 2834 emit_opcode( cbuf, 0x0F ); 2835 emit_opcode( cbuf, 0xAF ); 2836 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2837 // MOV EDX,$src.hi 2838 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2839 // IMUL EDX,EAX 2840 emit_opcode( cbuf, 0x0F ); 2841 emit_opcode( cbuf, 0xAF ); 2842 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2843 // ADD $tmp,EDX 2844 emit_opcode( cbuf, 0x03 ); 2845 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2846 // MUL EDX:EAX,$src.lo 2847 emit_opcode( cbuf, 0xF7 ); 2848 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2849 // ADD EDX,ESI 2850 emit_opcode( cbuf, 0x03 ); 2851 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2852 %} 2853 2854 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2855 // Basic idea: lo(result) = lo(src * y_lo) 2856 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2857 // IMUL $tmp,EDX,$src 2858 emit_opcode( cbuf, 0x6B ); 2859 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2860 emit_d8( cbuf, (int)$src$$constant ); 2861 // MOV EDX,$src 2862 emit_opcode(cbuf, 0xB8 + EDX_enc); 2863 emit_d32( cbuf, (int)$src$$constant ); 2864 // MUL EDX:EAX,EDX 2865 emit_opcode( cbuf, 0xF7 ); 2866 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2867 // ADD EDX,ESI 2868 emit_opcode( cbuf, 0x03 ); 2869 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2870 %} 2871 2872 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2873 // PUSH src1.hi 2874 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2875 // PUSH src1.lo 2876 emit_opcode(cbuf, 0x50+$src1$$reg ); 2877 // PUSH src2.hi 2878 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2879 // PUSH src2.lo 2880 emit_opcode(cbuf, 0x50+$src2$$reg ); 2881 // CALL directly to the runtime 2882 cbuf.set_insts_mark(); 2883 emit_opcode(cbuf,0xE8); // Call into runtime 2884 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2885 // Restore stack 2886 emit_opcode(cbuf, 0x83); // add SP, #framesize 2887 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2888 emit_d8(cbuf, 4*4); 2889 %} 2890 2891 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2892 // PUSH src1.hi 2893 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2894 // PUSH src1.lo 2895 emit_opcode(cbuf, 0x50+$src1$$reg ); 2896 // PUSH src2.hi 2897 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2898 // PUSH src2.lo 2899 emit_opcode(cbuf, 0x50+$src2$$reg ); 2900 // CALL directly to the runtime 2901 cbuf.set_insts_mark(); 2902 emit_opcode(cbuf,0xE8); // Call into runtime 2903 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2904 // Restore stack 2905 emit_opcode(cbuf, 0x83); // add SP, #framesize 2906 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2907 emit_d8(cbuf, 4*4); 2908 %} 2909 2910 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2911 // MOV $tmp,$src.lo 2912 emit_opcode(cbuf, 0x8B); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2914 // OR $tmp,$src.hi 2915 emit_opcode(cbuf, 0x0B); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2917 %} 2918 2919 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2920 // CMP $src1.lo,$src2.lo 2921 emit_opcode( cbuf, 0x3B ); 2922 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2923 // JNE,s skip 2924 emit_cc(cbuf, 0x70, 0x5); 2925 emit_d8(cbuf,2); 2926 // CMP $src1.hi,$src2.hi 2927 emit_opcode( cbuf, 0x3B ); 2928 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2929 %} 2930 2931 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2932 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2933 emit_opcode( cbuf, 0x3B ); 2934 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2935 // MOV $tmp,$src1.hi 2936 emit_opcode( cbuf, 0x8B ); 2937 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2938 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2939 emit_opcode( cbuf, 0x1B ); 2940 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2941 %} 2942 2943 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2944 // XOR $tmp,$tmp 2945 emit_opcode(cbuf,0x33); // XOR 2946 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2947 // CMP $tmp,$src.lo 2948 emit_opcode( cbuf, 0x3B ); 2949 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2950 // SBB $tmp,$src.hi 2951 emit_opcode( cbuf, 0x1B ); 2952 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2953 %} 2954 2955 // Sniff, sniff... smells like Gnu Superoptimizer 2956 enc_class neg_long( eRegL dst ) %{ 2957 emit_opcode(cbuf,0xF7); // NEG hi 2958 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2959 emit_opcode(cbuf,0xF7); // NEG lo 2960 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2961 emit_opcode(cbuf,0x83); // SBB hi,0 2962 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2963 emit_d8 (cbuf,0 ); 2964 %} 2965 2966 enc_class enc_pop_rdx() %{ 2967 emit_opcode(cbuf,0x5A); 2968 %} 2969 2970 enc_class enc_rethrow() %{ 2971 cbuf.set_insts_mark(); 2972 emit_opcode(cbuf, 0xE9); // jmp entry 2973 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2974 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2975 %} 2976 2977 2978 // Convert a double to an int. Java semantics require we do complex 2979 // manglelations in the corner cases. So we set the rounding mode to 2980 // 'zero', store the darned double down as an int, and reset the 2981 // rounding mode to 'nearest'. The hardware throws an exception which 2982 // patches up the correct value directly to the stack. 2983 enc_class DPR2I_encoding( regDPR src ) %{ 2984 // Flip to round-to-zero mode. We attempted to allow invalid-op 2985 // exceptions here, so that a NAN or other corner-case value will 2986 // thrown an exception (but normal values get converted at full speed). 2987 // However, I2C adapters and other float-stack manglers leave pending 2988 // invalid-op exceptions hanging. We would have to clear them before 2989 // enabling them and that is more expensive than just testing for the 2990 // invalid value Intel stores down in the corner cases. 2991 emit_opcode(cbuf,0xD9); // FLDCW trunc 2992 emit_opcode(cbuf,0x2D); 2993 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2994 // Allocate a word 2995 emit_opcode(cbuf,0x83); // SUB ESP,4 2996 emit_opcode(cbuf,0xEC); 2997 emit_d8(cbuf,0x04); 2998 // Encoding assumes a double has been pushed into FPR0. 2999 // Store down the double as an int, popping the FPU stack 3000 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3001 emit_opcode(cbuf,0x1C); 3002 emit_d8(cbuf,0x24); 3003 // Restore the rounding mode; mask the exception 3004 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3005 emit_opcode(cbuf,0x2D); 3006 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3007 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3008 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3009 3010 // Load the converted int; adjust CPU stack 3011 emit_opcode(cbuf,0x58); // POP EAX 3012 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3013 emit_d32 (cbuf,0x80000000); // 0x80000000 3014 emit_opcode(cbuf,0x75); // JNE around_slow_call 3015 emit_d8 (cbuf,0x07); // Size of slow_call 3016 // Push src onto stack slow-path 3017 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3018 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3019 // CALL directly to the runtime 3020 cbuf.set_insts_mark(); 3021 emit_opcode(cbuf,0xE8); // Call into runtime 3022 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3023 // Carry on here... 3024 %} 3025 3026 enc_class DPR2L_encoding( regDPR src ) %{ 3027 emit_opcode(cbuf,0xD9); // FLDCW trunc 3028 emit_opcode(cbuf,0x2D); 3029 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3030 // Allocate a word 3031 emit_opcode(cbuf,0x83); // SUB ESP,8 3032 emit_opcode(cbuf,0xEC); 3033 emit_d8(cbuf,0x08); 3034 // Encoding assumes a double has been pushed into FPR0. 3035 // Store down the double as a long, popping the FPU stack 3036 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3037 emit_opcode(cbuf,0x3C); 3038 emit_d8(cbuf,0x24); 3039 // Restore the rounding mode; mask the exception 3040 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3041 emit_opcode(cbuf,0x2D); 3042 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3043 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3044 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3045 3046 // Load the converted int; adjust CPU stack 3047 emit_opcode(cbuf,0x58); // POP EAX 3048 emit_opcode(cbuf,0x5A); // POP EDX 3049 emit_opcode(cbuf,0x81); // CMP EDX,imm 3050 emit_d8 (cbuf,0xFA); // rdx 3051 emit_d32 (cbuf,0x80000000); // 0x80000000 3052 emit_opcode(cbuf,0x75); // JNE around_slow_call 3053 emit_d8 (cbuf,0x07+4); // Size of slow_call 3054 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3055 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3056 emit_opcode(cbuf,0x75); // JNE around_slow_call 3057 emit_d8 (cbuf,0x07); // Size of slow_call 3058 // Push src onto stack slow-path 3059 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3060 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3061 // CALL directly to the runtime 3062 cbuf.set_insts_mark(); 3063 emit_opcode(cbuf,0xE8); // Call into runtime 3064 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3065 // Carry on here... 3066 %} 3067 3068 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3069 // Operand was loaded from memory into fp ST (stack top) 3070 // FMUL ST,$src /* D8 C8+i */ 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3073 %} 3074 3075 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3076 // FADDP ST,src2 /* D8 C0+i */ 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3079 //could use FADDP src2,fpST /* DE C0+i */ 3080 %} 3081 3082 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3083 // FADDP src2,ST /* DE C0+i */ 3084 emit_opcode(cbuf, 0xDE); 3085 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3086 %} 3087 3088 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3089 // Operand has been loaded into fp ST (stack top) 3090 // FSUB ST,$src1 3091 emit_opcode(cbuf, 0xD8); 3092 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3093 3094 // FDIV 3095 emit_opcode(cbuf, 0xD8); 3096 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3097 %} 3098 3099 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3100 // Operand was loaded from memory into fp ST (stack top) 3101 // FADD ST,$src /* D8 C0+i */ 3102 emit_opcode(cbuf, 0xD8); 3103 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3104 3105 // FMUL ST,src2 /* D8 C*+i */ 3106 emit_opcode(cbuf, 0xD8); 3107 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3108 %} 3109 3110 3111 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3112 // Operand was loaded from memory into fp ST (stack top) 3113 // FADD ST,$src /* D8 C0+i */ 3114 emit_opcode(cbuf, 0xD8); 3115 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3116 3117 // FMULP src2,ST /* DE C8+i */ 3118 emit_opcode(cbuf, 0xDE); 3119 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3120 %} 3121 3122 // Atomically load the volatile long 3123 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3124 emit_opcode(cbuf,0xDF); 3125 int rm_byte_opcode = 0x05; 3126 int base = $mem$$base; 3127 int index = $mem$$index; 3128 int scale = $mem$$scale; 3129 int displace = $mem$$disp; 3130 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3131 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3132 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3133 %} 3134 3135 // Volatile Store Long. Must be atomic, so move it into 3136 // the FP TOS and then do a 64-bit FIST. Has to probe the 3137 // target address before the store (for null-ptr checks) 3138 // so the memory operand is used twice in the encoding. 3139 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3140 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3141 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3142 emit_opcode(cbuf,0xDF); 3143 int rm_byte_opcode = 0x07; 3144 int base = $mem$$base; 3145 int index = $mem$$index; 3146 int scale = $mem$$scale; 3147 int displace = $mem$$disp; 3148 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3149 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3150 %} 3151 3152 // Safepoint Poll. This polls the safepoint page, and causes an 3153 // exception if it is not readable. Unfortunately, it kills the condition code 3154 // in the process 3155 // We current use TESTL [spp],EDI 3156 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3157 3158 enc_class Safepoint_Poll() %{ 3159 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3160 emit_opcode(cbuf,0x85); 3161 emit_rm (cbuf, 0x0, 0x7, 0x5); 3162 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3163 %} 3164 %} 3165 3166 3167 //----------FRAME-------------------------------------------------------------- 3168 // Definition of frame structure and management information. 3169 // 3170 // S T A C K L A Y O U T Allocators stack-slot number 3171 // | (to get allocators register number 3172 // G Owned by | | v add OptoReg::stack0()) 3173 // r CALLER | | 3174 // o | +--------+ pad to even-align allocators stack-slot 3175 // w V | pad0 | numbers; owned by CALLER 3176 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3177 // h ^ | in | 5 3178 // | | args | 4 Holes in incoming args owned by SELF 3179 // | | | | 3 3180 // | | +--------+ 3181 // V | | old out| Empty on Intel, window on Sparc 3182 // | old |preserve| Must be even aligned. 3183 // | SP-+--------+----> Matcher::_old_SP, even aligned 3184 // | | in | 3 area for Intel ret address 3185 // Owned by |preserve| Empty on Sparc. 3186 // SELF +--------+ 3187 // | | pad2 | 2 pad to align old SP 3188 // | +--------+ 1 3189 // | | locks | 0 3190 // | +--------+----> OptoReg::stack0(), even aligned 3191 // | | pad1 | 11 pad to align new SP 3192 // | +--------+ 3193 // | | | 10 3194 // | | spills | 9 spills 3195 // V | | 8 (pad0 slot for callee) 3196 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3197 // ^ | out | 7 3198 // | | args | 6 Holes in outgoing args owned by CALLEE 3199 // Owned by +--------+ 3200 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3201 // | new |preserve| Must be even-aligned. 3202 // | SP-+--------+----> Matcher::_new_SP, even aligned 3203 // | | | 3204 // 3205 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3206 // known from SELF's arguments and the Java calling convention. 3207 // Region 6-7 is determined per call site. 3208 // Note 2: If the calling convention leaves holes in the incoming argument 3209 // area, those holes are owned by SELF. Holes in the outgoing area 3210 // are owned by the CALLEE. Holes should not be nessecary in the 3211 // incoming area, as the Java calling convention is completely under 3212 // the control of the AD file. Doubles can be sorted and packed to 3213 // avoid holes. Holes in the outgoing arguments may be nessecary for 3214 // varargs C calling conventions. 3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3216 // even aligned with pad0 as needed. 3217 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3218 // region 6-11 is even aligned; it may be padded out more so that 3219 // the region from SP to FP meets the minimum stack alignment. 3220 3221 frame %{ 3222 // What direction does stack grow in (assumed to be same for C & Java) 3223 stack_direction(TOWARDS_LOW); 3224 3225 // These three registers define part of the calling convention 3226 // between compiled code and the interpreter. 3227 inline_cache_reg(EAX); // Inline Cache Register 3228 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3229 3230 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3231 cisc_spilling_operand_name(indOffset32); 3232 3233 // Number of stack slots consumed by locking an object 3234 sync_stack_slots(1); 3235 3236 // Compiled code's Frame Pointer 3237 frame_pointer(ESP); 3238 // Interpreter stores its frame pointer in a register which is 3239 // stored to the stack by I2CAdaptors. 3240 // I2CAdaptors convert from interpreted java to compiled java. 3241 interpreter_frame_pointer(EBP); 3242 3243 // Stack alignment requirement 3244 // Alignment size in bytes (128-bit -> 16 bytes) 3245 stack_alignment(StackAlignmentInBytes); 3246 3247 // Number of stack slots between incoming argument block and the start of 3248 // a new frame. The PROLOG must add this many slots to the stack. The 3249 // EPILOG must remove this many slots. Intel needs one slot for 3250 // return address and one for rbp, (must save rbp) 3251 in_preserve_stack_slots(2+VerifyStackAtCalls); 3252 3253 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3254 // for calls to C. Supports the var-args backing area for register parms. 3255 varargs_C_out_slots_killed(0); 3256 3257 // The after-PROLOG location of the return address. Location of 3258 // return address specifies a type (REG or STACK) and a number 3259 // representing the register number (i.e. - use a register name) or 3260 // stack slot. 3261 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3262 // Otherwise, it is above the locks and verification slot and alignment word 3263 return_addr(STACK - 1 + 3264 align_up((Compile::current()->in_preserve_stack_slots() + 3265 Compile::current()->fixed_slots()), 3266 stack_alignment_in_slots())); 3267 3268 // Body of function which returns an integer array locating 3269 // arguments either in registers or in stack slots. Passed an array 3270 // of ideal registers called "sig" and a "length" count. Stack-slot 3271 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3272 // arguments for a CALLEE. Incoming stack arguments are 3273 // automatically biased by the preserve_stack_slots field above. 3274 calling_convention %{ 3275 // No difference between ingoing/outgoing just pass false 3276 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3277 %} 3278 3279 3280 // Body of function which returns an integer array locating 3281 // arguments either in registers or in stack slots. Passed an array 3282 // of ideal registers called "sig" and a "length" count. Stack-slot 3283 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3284 // arguments for a CALLEE. Incoming stack arguments are 3285 // automatically biased by the preserve_stack_slots field above. 3286 c_calling_convention %{ 3287 // This is obviously always outgoing 3288 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3289 %} 3290 3291 // Location of C & interpreter return values 3292 c_return_value %{ 3293 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3294 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3295 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3296 3297 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3298 // that C functions return float and double results in XMM0. 3299 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3300 return OptoRegPair(XMM0b_num,XMM0_num); 3301 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3302 return OptoRegPair(OptoReg::Bad,XMM0_num); 3303 3304 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3305 %} 3306 3307 // Location of return values 3308 return_value %{ 3309 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3310 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3311 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3312 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3313 return OptoRegPair(XMM0b_num,XMM0_num); 3314 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3315 return OptoRegPair(OptoReg::Bad,XMM0_num); 3316 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3317 %} 3318 3319 %} 3320 3321 //----------ATTRIBUTES--------------------------------------------------------- 3322 //----------Operand Attributes------------------------------------------------- 3323 op_attrib op_cost(0); // Required cost attribute 3324 3325 //----------Instruction Attributes--------------------------------------------- 3326 ins_attrib ins_cost(100); // Required cost attribute 3327 ins_attrib ins_size(8); // Required size attribute (in bits) 3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3329 // non-matching short branch variant of some 3330 // long branch? 3331 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3332 // specifies the alignment that some part of the instruction (not 3333 // necessarily the start) requires. If > 1, a compute_padding() 3334 // function must be provided for the instruction 3335 3336 //----------OPERANDS----------------------------------------------------------- 3337 // Operand definitions must precede instruction definitions for correct parsing 3338 // in the ADLC because operands constitute user defined types which are used in 3339 // instruction definitions. 3340 3341 //----------Simple Operands---------------------------------------------------- 3342 // Immediate Operands 3343 // Integer Immediate 3344 operand immI() %{ 3345 match(ConI); 3346 3347 op_cost(10); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 // Constant for test vs zero 3353 operand immI0() %{ 3354 predicate(n->get_int() == 0); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 // Constant for increment 3363 operand immI1() %{ 3364 predicate(n->get_int() == 1); 3365 match(ConI); 3366 3367 op_cost(0); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 // Constant for decrement 3373 operand immI_M1() %{ 3374 predicate(n->get_int() == -1); 3375 match(ConI); 3376 3377 op_cost(0); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 // Valid scale values for addressing modes 3383 operand immI2() %{ 3384 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3385 match(ConI); 3386 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 operand immI8() %{ 3392 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3393 match(ConI); 3394 3395 op_cost(5); 3396 format %{ %} 3397 interface(CONST_INTER); 3398 %} 3399 3400 operand immI16() %{ 3401 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3402 match(ConI); 3403 3404 op_cost(10); 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Int Immediate non-negative 3410 operand immU31() 3411 %{ 3412 predicate(n->get_int() >= 0); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // Constant for long shifts 3421 operand immI_32() %{ 3422 predicate( n->get_int() == 32 ); 3423 match(ConI); 3424 3425 op_cost(0); 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 operand immI_1_31() %{ 3431 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3432 match(ConI); 3433 3434 op_cost(0); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 operand immI_32_63() %{ 3440 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3441 match(ConI); 3442 op_cost(0); 3443 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immI_1() %{ 3449 predicate( n->get_int() == 1 ); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 operand immI_2() %{ 3458 predicate( n->get_int() == 2 ); 3459 match(ConI); 3460 3461 op_cost(0); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 operand immI_3() %{ 3467 predicate( n->get_int() == 3 ); 3468 match(ConI); 3469 3470 op_cost(0); 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Pointer Immediate 3476 operand immP() %{ 3477 match(ConP); 3478 3479 op_cost(10); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 // NULL Pointer Immediate 3485 operand immP0() %{ 3486 predicate( n->get_ptr() == 0 ); 3487 match(ConP); 3488 op_cost(0); 3489 3490 format %{ %} 3491 interface(CONST_INTER); 3492 %} 3493 3494 // Long Immediate 3495 operand immL() %{ 3496 match(ConL); 3497 3498 op_cost(20); 3499 format %{ %} 3500 interface(CONST_INTER); 3501 %} 3502 3503 // Long Immediate zero 3504 operand immL0() %{ 3505 predicate( n->get_long() == 0L ); 3506 match(ConL); 3507 op_cost(0); 3508 3509 format %{ %} 3510 interface(CONST_INTER); 3511 %} 3512 3513 // Long Immediate zero 3514 operand immL_M1() %{ 3515 predicate( n->get_long() == -1L ); 3516 match(ConL); 3517 op_cost(0); 3518 3519 format %{ %} 3520 interface(CONST_INTER); 3521 %} 3522 3523 // Long immediate from 0 to 127. 3524 // Used for a shorter form of long mul by 10. 3525 operand immL_127() %{ 3526 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3527 match(ConL); 3528 op_cost(0); 3529 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 // Long Immediate: low 32-bit mask 3535 operand immL_32bits() %{ 3536 predicate(n->get_long() == 0xFFFFFFFFL); 3537 match(ConL); 3538 op_cost(0); 3539 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 // Long Immediate: low 32-bit mask 3545 operand immL32() %{ 3546 predicate(n->get_long() == (int)(n->get_long())); 3547 match(ConL); 3548 op_cost(20); 3549 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 //Double Immediate zero 3555 operand immDPR0() %{ 3556 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3557 // bug that generates code such that NaNs compare equal to 0.0 3558 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3559 match(ConD); 3560 3561 op_cost(5); 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 // Double Immediate one 3567 operand immDPR1() %{ 3568 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3569 match(ConD); 3570 3571 op_cost(5); 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Double Immediate 3577 operand immDPR() %{ 3578 predicate(UseSSE<=1); 3579 match(ConD); 3580 3581 op_cost(5); 3582 format %{ %} 3583 interface(CONST_INTER); 3584 %} 3585 3586 operand immD() %{ 3587 predicate(UseSSE>=2); 3588 match(ConD); 3589 3590 op_cost(5); 3591 format %{ %} 3592 interface(CONST_INTER); 3593 %} 3594 3595 // Double Immediate zero 3596 operand immD0() %{ 3597 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3598 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3599 // compare equal to -0.0. 3600 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3601 match(ConD); 3602 3603 format %{ %} 3604 interface(CONST_INTER); 3605 %} 3606 3607 // Float Immediate zero 3608 operand immFPR0() %{ 3609 predicate(UseSSE == 0 && n->getf() == 0.0F); 3610 match(ConF); 3611 3612 op_cost(5); 3613 format %{ %} 3614 interface(CONST_INTER); 3615 %} 3616 3617 // Float Immediate one 3618 operand immFPR1() %{ 3619 predicate(UseSSE == 0 && n->getf() == 1.0F); 3620 match(ConF); 3621 3622 op_cost(5); 3623 format %{ %} 3624 interface(CONST_INTER); 3625 %} 3626 3627 // Float Immediate 3628 operand immFPR() %{ 3629 predicate( UseSSE == 0 ); 3630 match(ConF); 3631 3632 op_cost(5); 3633 format %{ %} 3634 interface(CONST_INTER); 3635 %} 3636 3637 // Float Immediate 3638 operand immF() %{ 3639 predicate(UseSSE >= 1); 3640 match(ConF); 3641 3642 op_cost(5); 3643 format %{ %} 3644 interface(CONST_INTER); 3645 %} 3646 3647 // Float Immediate zero. Zero and not -0.0 3648 operand immF0() %{ 3649 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3650 match(ConF); 3651 3652 op_cost(5); 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Immediates for special shifts (sign extend) 3658 3659 // Constants for increment 3660 operand immI_16() %{ 3661 predicate( n->get_int() == 16 ); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 operand immI_24() %{ 3669 predicate( n->get_int() == 24 ); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 // Constant for byte-wide masking 3677 operand immI_255() %{ 3678 predicate( n->get_int() == 255 ); 3679 match(ConI); 3680 3681 format %{ %} 3682 interface(CONST_INTER); 3683 %} 3684 3685 // Constant for short-wide masking 3686 operand immI_65535() %{ 3687 predicate(n->get_int() == 65535); 3688 match(ConI); 3689 3690 format %{ %} 3691 interface(CONST_INTER); 3692 %} 3693 3694 // Register Operands 3695 // Integer Register 3696 operand rRegI() %{ 3697 constraint(ALLOC_IN_RC(int_reg)); 3698 match(RegI); 3699 match(xRegI); 3700 match(eAXRegI); 3701 match(eBXRegI); 3702 match(eCXRegI); 3703 match(eDXRegI); 3704 match(eDIRegI); 3705 match(eSIRegI); 3706 3707 format %{ %} 3708 interface(REG_INTER); 3709 %} 3710 3711 // Subset of Integer Register 3712 operand xRegI(rRegI reg) %{ 3713 constraint(ALLOC_IN_RC(int_x_reg)); 3714 match(reg); 3715 match(eAXRegI); 3716 match(eBXRegI); 3717 match(eCXRegI); 3718 match(eDXRegI); 3719 3720 format %{ %} 3721 interface(REG_INTER); 3722 %} 3723 3724 // Special Registers 3725 operand eAXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(eax_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EAX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 // Special Registers 3735 operand eBXRegI(xRegI reg) %{ 3736 constraint(ALLOC_IN_RC(ebx_reg)); 3737 match(reg); 3738 match(rRegI); 3739 3740 format %{ "EBX" %} 3741 interface(REG_INTER); 3742 %} 3743 3744 operand eCXRegI(xRegI reg) %{ 3745 constraint(ALLOC_IN_RC(ecx_reg)); 3746 match(reg); 3747 match(rRegI); 3748 3749 format %{ "ECX" %} 3750 interface(REG_INTER); 3751 %} 3752 3753 operand eDXRegI(xRegI reg) %{ 3754 constraint(ALLOC_IN_RC(edx_reg)); 3755 match(reg); 3756 match(rRegI); 3757 3758 format %{ "EDX" %} 3759 interface(REG_INTER); 3760 %} 3761 3762 operand eDIRegI(xRegI reg) %{ 3763 constraint(ALLOC_IN_RC(edi_reg)); 3764 match(reg); 3765 match(rRegI); 3766 3767 format %{ "EDI" %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand naxRegI() %{ 3772 constraint(ALLOC_IN_RC(nax_reg)); 3773 match(RegI); 3774 match(eCXRegI); 3775 match(eDXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 operand nadxRegI() %{ 3784 constraint(ALLOC_IN_RC(nadx_reg)); 3785 match(RegI); 3786 match(eBXRegI); 3787 match(eCXRegI); 3788 match(eSIRegI); 3789 match(eDIRegI); 3790 3791 format %{ %} 3792 interface(REG_INTER); 3793 %} 3794 3795 operand ncxRegI() %{ 3796 constraint(ALLOC_IN_RC(ncx_reg)); 3797 match(RegI); 3798 match(eAXRegI); 3799 match(eDXRegI); 3800 match(eSIRegI); 3801 match(eDIRegI); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3808 // // 3809 operand eSIRegI(xRegI reg) %{ 3810 constraint(ALLOC_IN_RC(esi_reg)); 3811 match(reg); 3812 match(rRegI); 3813 3814 format %{ "ESI" %} 3815 interface(REG_INTER); 3816 %} 3817 3818 // Pointer Register 3819 operand anyRegP() %{ 3820 constraint(ALLOC_IN_RC(any_reg)); 3821 match(RegP); 3822 match(eAXRegP); 3823 match(eBXRegP); 3824 match(eCXRegP); 3825 match(eDIRegP); 3826 match(eRegP); 3827 3828 format %{ %} 3829 interface(REG_INTER); 3830 %} 3831 3832 operand eRegP() %{ 3833 constraint(ALLOC_IN_RC(int_reg)); 3834 match(RegP); 3835 match(eAXRegP); 3836 match(eBXRegP); 3837 match(eCXRegP); 3838 match(eDIRegP); 3839 3840 format %{ %} 3841 interface(REG_INTER); 3842 %} 3843 3844 // On windows95, EBP is not safe to use for implicit null tests. 3845 operand eRegP_no_EBP() %{ 3846 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3847 match(RegP); 3848 match(eAXRegP); 3849 match(eBXRegP); 3850 match(eCXRegP); 3851 match(eDIRegP); 3852 3853 op_cost(100); 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand naxRegP() %{ 3859 constraint(ALLOC_IN_RC(nax_reg)); 3860 match(RegP); 3861 match(eBXRegP); 3862 match(eDXRegP); 3863 match(eCXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 operand nabxRegP() %{ 3872 constraint(ALLOC_IN_RC(nabx_reg)); 3873 match(RegP); 3874 match(eCXRegP); 3875 match(eDXRegP); 3876 match(eSIRegP); 3877 match(eDIRegP); 3878 3879 format %{ %} 3880 interface(REG_INTER); 3881 %} 3882 3883 operand pRegP() %{ 3884 constraint(ALLOC_IN_RC(p_reg)); 3885 match(RegP); 3886 match(eBXRegP); 3887 match(eDXRegP); 3888 match(eSIRegP); 3889 match(eDIRegP); 3890 3891 format %{ %} 3892 interface(REG_INTER); 3893 %} 3894 3895 // Special Registers 3896 // Return a pointer value 3897 operand eAXRegP(eRegP reg) %{ 3898 constraint(ALLOC_IN_RC(eax_reg)); 3899 match(reg); 3900 format %{ "EAX" %} 3901 interface(REG_INTER); 3902 %} 3903 3904 // Used in AtomicAdd 3905 operand eBXRegP(eRegP reg) %{ 3906 constraint(ALLOC_IN_RC(ebx_reg)); 3907 match(reg); 3908 format %{ "EBX" %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Tail-call (interprocedural jump) to interpreter 3913 operand eCXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(ecx_reg)); 3915 match(reg); 3916 format %{ "ECX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eSIRegP(eRegP reg) %{ 3921 constraint(ALLOC_IN_RC(esi_reg)); 3922 match(reg); 3923 format %{ "ESI" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 // Used in rep stosw 3928 operand eDIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(edi_reg)); 3930 match(reg); 3931 format %{ "EDI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eRegL() %{ 3936 constraint(ALLOC_IN_RC(long_reg)); 3937 match(RegL); 3938 match(eADXRegL); 3939 3940 format %{ %} 3941 interface(REG_INTER); 3942 %} 3943 3944 operand eADXRegL( eRegL reg ) %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(reg); 3947 3948 format %{ "EDX:EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eBCXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(ebcx_reg)); 3954 match(reg); 3955 3956 format %{ "EBX:ECX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 // Special case for integer high multiply 3961 operand eADXRegL_low_only() %{ 3962 constraint(ALLOC_IN_RC(eadx_reg)); 3963 match(RegL); 3964 3965 format %{ "EAX" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 // Flags register, used as output of compare instructions 3970 operand eFlagsReg() %{ 3971 constraint(ALLOC_IN_RC(int_flags)); 3972 match(RegFlags); 3973 3974 format %{ "EFLAGS" %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Flags register, used as output of FLOATING POINT compare instructions 3979 operand eFlagsRegU() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 3983 format %{ "EFLAGS_U" %} 3984 interface(REG_INTER); 3985 %} 3986 3987 operand eFlagsRegUCF() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 predicate(false); 3991 3992 format %{ "EFLAGS_U_CF" %} 3993 interface(REG_INTER); 3994 %} 3995 3996 // Condition Code Register used by long compare 3997 operand flagsReg_long_LTGE() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_LTGE" %} 4001 interface(REG_INTER); 4002 %} 4003 operand flagsReg_long_EQNE() %{ 4004 constraint(ALLOC_IN_RC(int_flags)); 4005 match(RegFlags); 4006 format %{ "FLAGS_EQNE" %} 4007 interface(REG_INTER); 4008 %} 4009 operand flagsReg_long_LEGT() %{ 4010 constraint(ALLOC_IN_RC(int_flags)); 4011 match(RegFlags); 4012 format %{ "FLAGS_LEGT" %} 4013 interface(REG_INTER); 4014 %} 4015 4016 // Condition Code Register used by unsigned long compare 4017 operand flagsReg_ulong_LTGE() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_U_LTGE" %} 4021 interface(REG_INTER); 4022 %} 4023 operand flagsReg_ulong_EQNE() %{ 4024 constraint(ALLOC_IN_RC(int_flags)); 4025 match(RegFlags); 4026 format %{ "FLAGS_U_EQNE" %} 4027 interface(REG_INTER); 4028 %} 4029 operand flagsReg_ulong_LEGT() %{ 4030 constraint(ALLOC_IN_RC(int_flags)); 4031 match(RegFlags); 4032 format %{ "FLAGS_U_LEGT" %} 4033 interface(REG_INTER); 4034 %} 4035 4036 // Float register operands 4037 operand regDPR() %{ 4038 predicate( UseSSE < 2 ); 4039 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4040 match(RegD); 4041 match(regDPR1); 4042 match(regDPR2); 4043 format %{ %} 4044 interface(REG_INTER); 4045 %} 4046 4047 operand regDPR1(regDPR reg) %{ 4048 predicate( UseSSE < 2 ); 4049 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4050 match(reg); 4051 format %{ "FPR1" %} 4052 interface(REG_INTER); 4053 %} 4054 4055 operand regDPR2(regDPR reg) %{ 4056 predicate( UseSSE < 2 ); 4057 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4058 match(reg); 4059 format %{ "FPR2" %} 4060 interface(REG_INTER); 4061 %} 4062 4063 operand regnotDPR1(regDPR reg) %{ 4064 predicate( UseSSE < 2 ); 4065 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4066 match(reg); 4067 format %{ %} 4068 interface(REG_INTER); 4069 %} 4070 4071 // Float register operands 4072 operand regFPR() %{ 4073 predicate( UseSSE < 2 ); 4074 constraint(ALLOC_IN_RC(fp_flt_reg)); 4075 match(RegF); 4076 match(regFPR1); 4077 format %{ %} 4078 interface(REG_INTER); 4079 %} 4080 4081 // Float register operands 4082 operand regFPR1(regFPR reg) %{ 4083 predicate( UseSSE < 2 ); 4084 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4085 match(reg); 4086 format %{ "FPR1" %} 4087 interface(REG_INTER); 4088 %} 4089 4090 // XMM Float register operands 4091 operand regF() %{ 4092 predicate( UseSSE>=1 ); 4093 constraint(ALLOC_IN_RC(float_reg_legacy)); 4094 match(RegF); 4095 format %{ %} 4096 interface(REG_INTER); 4097 %} 4098 4099 // Float register operands 4100 operand vlRegF() %{ 4101 constraint(ALLOC_IN_RC(float_reg_vl)); 4102 match(RegF); 4103 4104 format %{ %} 4105 interface(REG_INTER); 4106 %} 4107 4108 // XMM Double register operands 4109 operand regD() %{ 4110 predicate( UseSSE>=2 ); 4111 constraint(ALLOC_IN_RC(double_reg_legacy)); 4112 match(RegD); 4113 format %{ %} 4114 interface(REG_INTER); 4115 %} 4116 4117 // Double register operands 4118 operand vlRegD() %{ 4119 constraint(ALLOC_IN_RC(double_reg_vl)); 4120 match(RegD); 4121 4122 format %{ %} 4123 interface(REG_INTER); 4124 %} 4125 4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4127 // runtime code generation via reg_class_dynamic. 4128 operand vecS() %{ 4129 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4130 match(VecS); 4131 4132 format %{ %} 4133 interface(REG_INTER); 4134 %} 4135 4136 operand legVecS() %{ 4137 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4138 match(VecS); 4139 4140 format %{ %} 4141 interface(REG_INTER); 4142 %} 4143 4144 operand vecD() %{ 4145 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4146 match(VecD); 4147 4148 format %{ %} 4149 interface(REG_INTER); 4150 %} 4151 4152 operand legVecD() %{ 4153 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4154 match(VecD); 4155 4156 format %{ %} 4157 interface(REG_INTER); 4158 %} 4159 4160 operand vecX() %{ 4161 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4162 match(VecX); 4163 4164 format %{ %} 4165 interface(REG_INTER); 4166 %} 4167 4168 operand legVecX() %{ 4169 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4170 match(VecX); 4171 4172 format %{ %} 4173 interface(REG_INTER); 4174 %} 4175 4176 operand vecY() %{ 4177 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4178 match(VecY); 4179 4180 format %{ %} 4181 interface(REG_INTER); 4182 %} 4183 4184 operand legVecY() %{ 4185 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4186 match(VecY); 4187 4188 format %{ %} 4189 interface(REG_INTER); 4190 %} 4191 4192 //----------Memory Operands---------------------------------------------------- 4193 // Direct Memory Operand 4194 operand direct(immP addr) %{ 4195 match(addr); 4196 4197 format %{ "[$addr]" %} 4198 interface(MEMORY_INTER) %{ 4199 base(0xFFFFFFFF); 4200 index(0x4); 4201 scale(0x0); 4202 disp($addr); 4203 %} 4204 %} 4205 4206 // Indirect Memory Operand 4207 operand indirect(eRegP reg) %{ 4208 constraint(ALLOC_IN_RC(int_reg)); 4209 match(reg); 4210 4211 format %{ "[$reg]" %} 4212 interface(MEMORY_INTER) %{ 4213 base($reg); 4214 index(0x4); 4215 scale(0x0); 4216 disp(0x0); 4217 %} 4218 %} 4219 4220 // Indirect Memory Plus Short Offset Operand 4221 operand indOffset8(eRegP reg, immI8 off) %{ 4222 match(AddP reg off); 4223 4224 format %{ "[$reg + $off]" %} 4225 interface(MEMORY_INTER) %{ 4226 base($reg); 4227 index(0x4); 4228 scale(0x0); 4229 disp($off); 4230 %} 4231 %} 4232 4233 // Indirect Memory Plus Long Offset Operand 4234 operand indOffset32(eRegP reg, immI off) %{ 4235 match(AddP reg off); 4236 4237 format %{ "[$reg + $off]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index(0x4); 4241 scale(0x0); 4242 disp($off); 4243 %} 4244 %} 4245 4246 // Indirect Memory Plus Long Offset Operand 4247 operand indOffset32X(rRegI reg, immP off) %{ 4248 match(AddP off reg); 4249 4250 format %{ "[$reg + $off]" %} 4251 interface(MEMORY_INTER) %{ 4252 base($reg); 4253 index(0x4); 4254 scale(0x0); 4255 disp($off); 4256 %} 4257 %} 4258 4259 // Indirect Memory Plus Index Register Plus Offset Operand 4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4261 match(AddP (AddP reg ireg) off); 4262 4263 op_cost(10); 4264 format %{"[$reg + $off + $ireg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base($reg); 4267 index($ireg); 4268 scale(0x0); 4269 disp($off); 4270 %} 4271 %} 4272 4273 // Indirect Memory Plus Index Register Plus Offset Operand 4274 operand indIndex(eRegP reg, rRegI ireg) %{ 4275 match(AddP reg ireg); 4276 4277 op_cost(10); 4278 format %{"[$reg + $ireg]" %} 4279 interface(MEMORY_INTER) %{ 4280 base($reg); 4281 index($ireg); 4282 scale(0x0); 4283 disp(0x0); 4284 %} 4285 %} 4286 4287 // // ------------------------------------------------------------------------- 4288 // // 486 architecture doesn't support "scale * index + offset" with out a base 4289 // // ------------------------------------------------------------------------- 4290 // // Scaled Memory Operands 4291 // // Indirect Memory Times Scale Plus Offset Operand 4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4293 // match(AddP off (LShiftI ireg scale)); 4294 // 4295 // op_cost(10); 4296 // format %{"[$off + $ireg << $scale]" %} 4297 // interface(MEMORY_INTER) %{ 4298 // base(0x4); 4299 // index($ireg); 4300 // scale($scale); 4301 // disp($off); 4302 // %} 4303 // %} 4304 4305 // Indirect Memory Times Scale Plus Index Register 4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4307 match(AddP reg (LShiftI ireg scale)); 4308 4309 op_cost(10); 4310 format %{"[$reg + $ireg << $scale]" %} 4311 interface(MEMORY_INTER) %{ 4312 base($reg); 4313 index($ireg); 4314 scale($scale); 4315 disp(0x0); 4316 %} 4317 %} 4318 4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4321 match(AddP (AddP reg (LShiftI ireg scale)) off); 4322 4323 op_cost(10); 4324 format %{"[$reg + $off + $ireg << $scale]" %} 4325 interface(MEMORY_INTER) %{ 4326 base($reg); 4327 index($ireg); 4328 scale($scale); 4329 disp($off); 4330 %} 4331 %} 4332 4333 //----------Load Long Memory Operands------------------------------------------ 4334 // The load-long idiom will use it's address expression again after loading 4335 // the first word of the long. If the load-long destination overlaps with 4336 // registers used in the addressing expression, the 2nd half will be loaded 4337 // from a clobbered address. Fix this by requiring that load-long use 4338 // address registers that do not overlap with the load-long target. 4339 4340 // load-long support 4341 operand load_long_RegP() %{ 4342 constraint(ALLOC_IN_RC(esi_reg)); 4343 match(RegP); 4344 match(eSIRegP); 4345 op_cost(100); 4346 format %{ %} 4347 interface(REG_INTER); 4348 %} 4349 4350 // Indirect Memory Operand Long 4351 operand load_long_indirect(load_long_RegP reg) %{ 4352 constraint(ALLOC_IN_RC(esi_reg)); 4353 match(reg); 4354 4355 format %{ "[$reg]" %} 4356 interface(MEMORY_INTER) %{ 4357 base($reg); 4358 index(0x4); 4359 scale(0x0); 4360 disp(0x0); 4361 %} 4362 %} 4363 4364 // Indirect Memory Plus Long Offset Operand 4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4366 match(AddP reg off); 4367 4368 format %{ "[$reg + $off]" %} 4369 interface(MEMORY_INTER) %{ 4370 base($reg); 4371 index(0x4); 4372 scale(0x0); 4373 disp($off); 4374 %} 4375 %} 4376 4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4378 4379 4380 //----------Special Memory Operands-------------------------------------------- 4381 // Stack Slot Operand - This operand is used for loading and storing temporary 4382 // values on the stack where a match requires a value to 4383 // flow through memory. 4384 operand stackSlotP(sRegP reg) %{ 4385 constraint(ALLOC_IN_RC(stack_slots)); 4386 // No match rule because this operand is only generated in matching 4387 format %{ "[$reg]" %} 4388 interface(MEMORY_INTER) %{ 4389 base(0x4); // ESP 4390 index(0x4); // No Index 4391 scale(0x0); // No Scale 4392 disp($reg); // Stack Offset 4393 %} 4394 %} 4395 4396 operand stackSlotI(sRegI reg) %{ 4397 constraint(ALLOC_IN_RC(stack_slots)); 4398 // No match rule because this operand is only generated in matching 4399 format %{ "[$reg]" %} 4400 interface(MEMORY_INTER) %{ 4401 base(0x4); // ESP 4402 index(0x4); // No Index 4403 scale(0x0); // No Scale 4404 disp($reg); // Stack Offset 4405 %} 4406 %} 4407 4408 operand stackSlotF(sRegF reg) %{ 4409 constraint(ALLOC_IN_RC(stack_slots)); 4410 // No match rule because this operand is only generated in matching 4411 format %{ "[$reg]" %} 4412 interface(MEMORY_INTER) %{ 4413 base(0x4); // ESP 4414 index(0x4); // No Index 4415 scale(0x0); // No Scale 4416 disp($reg); // Stack Offset 4417 %} 4418 %} 4419 4420 operand stackSlotD(sRegD reg) %{ 4421 constraint(ALLOC_IN_RC(stack_slots)); 4422 // No match rule because this operand is only generated in matching 4423 format %{ "[$reg]" %} 4424 interface(MEMORY_INTER) %{ 4425 base(0x4); // ESP 4426 index(0x4); // No Index 4427 scale(0x0); // No Scale 4428 disp($reg); // Stack Offset 4429 %} 4430 %} 4431 4432 operand stackSlotL(sRegL reg) %{ 4433 constraint(ALLOC_IN_RC(stack_slots)); 4434 // No match rule because this operand is only generated in matching 4435 format %{ "[$reg]" %} 4436 interface(MEMORY_INTER) %{ 4437 base(0x4); // ESP 4438 index(0x4); // No Index 4439 scale(0x0); // No Scale 4440 disp($reg); // Stack Offset 4441 %} 4442 %} 4443 4444 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4445 // Indirect Memory Operand 4446 operand indirect_win95_safe(eRegP_no_EBP reg) 4447 %{ 4448 constraint(ALLOC_IN_RC(int_reg)); 4449 match(reg); 4450 4451 op_cost(100); 4452 format %{ "[$reg]" %} 4453 interface(MEMORY_INTER) %{ 4454 base($reg); 4455 index(0x4); 4456 scale(0x0); 4457 disp(0x0); 4458 %} 4459 %} 4460 4461 // Indirect Memory Plus Short Offset Operand 4462 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4463 %{ 4464 match(AddP reg off); 4465 4466 op_cost(100); 4467 format %{ "[$reg + $off]" %} 4468 interface(MEMORY_INTER) %{ 4469 base($reg); 4470 index(0x4); 4471 scale(0x0); 4472 disp($off); 4473 %} 4474 %} 4475 4476 // Indirect Memory Plus Long Offset Operand 4477 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4478 %{ 4479 match(AddP reg off); 4480 4481 op_cost(100); 4482 format %{ "[$reg + $off]" %} 4483 interface(MEMORY_INTER) %{ 4484 base($reg); 4485 index(0x4); 4486 scale(0x0); 4487 disp($off); 4488 %} 4489 %} 4490 4491 // Indirect Memory Plus Index Register Plus Offset Operand 4492 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4493 %{ 4494 match(AddP (AddP reg ireg) off); 4495 4496 op_cost(100); 4497 format %{"[$reg + $off + $ireg]" %} 4498 interface(MEMORY_INTER) %{ 4499 base($reg); 4500 index($ireg); 4501 scale(0x0); 4502 disp($off); 4503 %} 4504 %} 4505 4506 // Indirect Memory Times Scale Plus Index Register 4507 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4508 %{ 4509 match(AddP reg (LShiftI ireg scale)); 4510 4511 op_cost(100); 4512 format %{"[$reg + $ireg << $scale]" %} 4513 interface(MEMORY_INTER) %{ 4514 base($reg); 4515 index($ireg); 4516 scale($scale); 4517 disp(0x0); 4518 %} 4519 %} 4520 4521 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4522 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4523 %{ 4524 match(AddP (AddP reg (LShiftI ireg scale)) off); 4525 4526 op_cost(100); 4527 format %{"[$reg + $off + $ireg << $scale]" %} 4528 interface(MEMORY_INTER) %{ 4529 base($reg); 4530 index($ireg); 4531 scale($scale); 4532 disp($off); 4533 %} 4534 %} 4535 4536 //----------Conditional Branch Operands---------------------------------------- 4537 // Comparison Op - This is the operation of the comparison, and is limited to 4538 // the following set of codes: 4539 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4540 // 4541 // Other attributes of the comparison, such as unsignedness, are specified 4542 // by the comparison instruction that sets a condition code flags register. 4543 // That result is represented by a flags operand whose subtype is appropriate 4544 // to the unsignedness (etc.) of the comparison. 4545 // 4546 // Later, the instruction which matches both the Comparison Op (a Bool) and 4547 // the flags (produced by the Cmp) specifies the coding of the comparison op 4548 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4549 4550 // Comparision Code 4551 operand cmpOp() %{ 4552 match(Bool); 4553 4554 format %{ "" %} 4555 interface(COND_INTER) %{ 4556 equal(0x4, "e"); 4557 not_equal(0x5, "ne"); 4558 less(0xC, "l"); 4559 greater_equal(0xD, "ge"); 4560 less_equal(0xE, "le"); 4561 greater(0xF, "g"); 4562 overflow(0x0, "o"); 4563 no_overflow(0x1, "no"); 4564 %} 4565 %} 4566 4567 // Comparison Code, unsigned compare. Used by FP also, with 4568 // C2 (unordered) turned into GT or LT already. The other bits 4569 // C0 and C3 are turned into Carry & Zero flags. 4570 operand cmpOpU() %{ 4571 match(Bool); 4572 4573 format %{ "" %} 4574 interface(COND_INTER) %{ 4575 equal(0x4, "e"); 4576 not_equal(0x5, "ne"); 4577 less(0x2, "b"); 4578 greater_equal(0x3, "nb"); 4579 less_equal(0x6, "be"); 4580 greater(0x7, "nbe"); 4581 overflow(0x0, "o"); 4582 no_overflow(0x1, "no"); 4583 %} 4584 %} 4585 4586 // Floating comparisons that don't require any fixup for the unordered case 4587 operand cmpOpUCF() %{ 4588 match(Bool); 4589 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4590 n->as_Bool()->_test._test == BoolTest::ge || 4591 n->as_Bool()->_test._test == BoolTest::le || 4592 n->as_Bool()->_test._test == BoolTest::gt); 4593 format %{ "" %} 4594 interface(COND_INTER) %{ 4595 equal(0x4, "e"); 4596 not_equal(0x5, "ne"); 4597 less(0x2, "b"); 4598 greater_equal(0x3, "nb"); 4599 less_equal(0x6, "be"); 4600 greater(0x7, "nbe"); 4601 overflow(0x0, "o"); 4602 no_overflow(0x1, "no"); 4603 %} 4604 %} 4605 4606 4607 // Floating comparisons that can be fixed up with extra conditional jumps 4608 operand cmpOpUCF2() %{ 4609 match(Bool); 4610 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4611 n->as_Bool()->_test._test == BoolTest::eq); 4612 format %{ "" %} 4613 interface(COND_INTER) %{ 4614 equal(0x4, "e"); 4615 not_equal(0x5, "ne"); 4616 less(0x2, "b"); 4617 greater_equal(0x3, "nb"); 4618 less_equal(0x6, "be"); 4619 greater(0x7, "nbe"); 4620 overflow(0x0, "o"); 4621 no_overflow(0x1, "no"); 4622 %} 4623 %} 4624 4625 // Comparison Code for FP conditional move 4626 operand cmpOp_fcmov() %{ 4627 match(Bool); 4628 4629 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4630 n->as_Bool()->_test._test != BoolTest::no_overflow); 4631 format %{ "" %} 4632 interface(COND_INTER) %{ 4633 equal (0x0C8); 4634 not_equal (0x1C8); 4635 less (0x0C0); 4636 greater_equal(0x1C0); 4637 less_equal (0x0D0); 4638 greater (0x1D0); 4639 overflow(0x0, "o"); // not really supported by the instruction 4640 no_overflow(0x1, "no"); // not really supported by the instruction 4641 %} 4642 %} 4643 4644 // Comparison Code used in long compares 4645 operand cmpOp_commute() %{ 4646 match(Bool); 4647 4648 format %{ "" %} 4649 interface(COND_INTER) %{ 4650 equal(0x4, "e"); 4651 not_equal(0x5, "ne"); 4652 less(0xF, "g"); 4653 greater_equal(0xE, "le"); 4654 less_equal(0xD, "ge"); 4655 greater(0xC, "l"); 4656 overflow(0x0, "o"); 4657 no_overflow(0x1, "no"); 4658 %} 4659 %} 4660 4661 // Comparison Code used in unsigned long compares 4662 operand cmpOpU_commute() %{ 4663 match(Bool); 4664 4665 format %{ "" %} 4666 interface(COND_INTER) %{ 4667 equal(0x4, "e"); 4668 not_equal(0x5, "ne"); 4669 less(0x7, "nbe"); 4670 greater_equal(0x6, "be"); 4671 less_equal(0x3, "nb"); 4672 greater(0x2, "b"); 4673 overflow(0x0, "o"); 4674 no_overflow(0x1, "no"); 4675 %} 4676 %} 4677 4678 //----------OPERAND CLASSES---------------------------------------------------- 4679 // Operand Classes are groups of operands that are used as to simplify 4680 // instruction definitions by not requiring the AD writer to specify separate 4681 // instructions for every form of operand when the instruction accepts 4682 // multiple operand types with the same basic encoding and format. The classic 4683 // case of this is memory operands. 4684 4685 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4686 indIndex, indIndexScale, indIndexScaleOffset); 4687 4688 // Long memory operations are encoded in 2 instructions and a +4 offset. 4689 // This means some kind of offset is always required and you cannot use 4690 // an oop as the offset (done when working on static globals). 4691 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4692 indIndex, indIndexScale, indIndexScaleOffset); 4693 4694 4695 //----------PIPELINE----------------------------------------------------------- 4696 // Rules which define the behavior of the target architectures pipeline. 4697 pipeline %{ 4698 4699 //----------ATTRIBUTES--------------------------------------------------------- 4700 attributes %{ 4701 variable_size_instructions; // Fixed size instructions 4702 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4703 instruction_unit_size = 1; // An instruction is 1 bytes long 4704 instruction_fetch_unit_size = 16; // The processor fetches one line 4705 instruction_fetch_units = 1; // of 16 bytes 4706 4707 // List of nop instructions 4708 nops( MachNop ); 4709 %} 4710 4711 //----------RESOURCES---------------------------------------------------------- 4712 // Resources are the functional units available to the machine 4713 4714 // Generic P2/P3 pipeline 4715 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4716 // 3 instructions decoded per cycle. 4717 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4718 // 2 ALU op, only ALU0 handles mul/div instructions. 4719 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4720 MS0, MS1, MEM = MS0 | MS1, 4721 BR, FPU, 4722 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4723 4724 //----------PIPELINE DESCRIPTION----------------------------------------------- 4725 // Pipeline Description specifies the stages in the machine's pipeline 4726 4727 // Generic P2/P3 pipeline 4728 pipe_desc(S0, S1, S2, S3, S4, S5); 4729 4730 //----------PIPELINE CLASSES--------------------------------------------------- 4731 // Pipeline Classes describe the stages in which input and output are 4732 // referenced by the hardware pipeline. 4733 4734 // Naming convention: ialu or fpu 4735 // Then: _reg 4736 // Then: _reg if there is a 2nd register 4737 // Then: _long if it's a pair of instructions implementing a long 4738 // Then: _fat if it requires the big decoder 4739 // Or: _mem if it requires the big decoder and a memory unit. 4740 4741 // Integer ALU reg operation 4742 pipe_class ialu_reg(rRegI dst) %{ 4743 single_instruction; 4744 dst : S4(write); 4745 dst : S3(read); 4746 DECODE : S0; // any decoder 4747 ALU : S3; // any alu 4748 %} 4749 4750 // Long ALU reg operation 4751 pipe_class ialu_reg_long(eRegL dst) %{ 4752 instruction_count(2); 4753 dst : S4(write); 4754 dst : S3(read); 4755 DECODE : S0(2); // any 2 decoders 4756 ALU : S3(2); // both alus 4757 %} 4758 4759 // Integer ALU reg operation using big decoder 4760 pipe_class ialu_reg_fat(rRegI dst) %{ 4761 single_instruction; 4762 dst : S4(write); 4763 dst : S3(read); 4764 D0 : S0; // big decoder only 4765 ALU : S3; // any alu 4766 %} 4767 4768 // Long ALU reg operation using big decoder 4769 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4770 instruction_count(2); 4771 dst : S4(write); 4772 dst : S3(read); 4773 D0 : S0(2); // big decoder only; twice 4774 ALU : S3(2); // any 2 alus 4775 %} 4776 4777 // Integer ALU reg-reg operation 4778 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4779 single_instruction; 4780 dst : S4(write); 4781 src : S3(read); 4782 DECODE : S0; // any decoder 4783 ALU : S3; // any alu 4784 %} 4785 4786 // Long ALU reg-reg operation 4787 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4788 instruction_count(2); 4789 dst : S4(write); 4790 src : S3(read); 4791 DECODE : S0(2); // any 2 decoders 4792 ALU : S3(2); // both alus 4793 %} 4794 4795 // Integer ALU reg-reg operation 4796 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4797 single_instruction; 4798 dst : S4(write); 4799 src : S3(read); 4800 D0 : S0; // big decoder only 4801 ALU : S3; // any alu 4802 %} 4803 4804 // Long ALU reg-reg operation 4805 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4806 instruction_count(2); 4807 dst : S4(write); 4808 src : S3(read); 4809 D0 : S0(2); // big decoder only; twice 4810 ALU : S3(2); // both alus 4811 %} 4812 4813 // Integer ALU reg-mem operation 4814 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4815 single_instruction; 4816 dst : S5(write); 4817 mem : S3(read); 4818 D0 : S0; // big decoder only 4819 ALU : S4; // any alu 4820 MEM : S3; // any mem 4821 %} 4822 4823 // Long ALU reg-mem operation 4824 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4825 instruction_count(2); 4826 dst : S5(write); 4827 mem : S3(read); 4828 D0 : S0(2); // big decoder only; twice 4829 ALU : S4(2); // any 2 alus 4830 MEM : S3(2); // both mems 4831 %} 4832 4833 // Integer mem operation (prefetch) 4834 pipe_class ialu_mem(memory mem) 4835 %{ 4836 single_instruction; 4837 mem : S3(read); 4838 D0 : S0; // big decoder only 4839 MEM : S3; // any mem 4840 %} 4841 4842 // Integer Store to Memory 4843 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4844 single_instruction; 4845 mem : S3(read); 4846 src : S5(read); 4847 D0 : S0; // big decoder only 4848 ALU : S4; // any alu 4849 MEM : S3; 4850 %} 4851 4852 // Long Store to Memory 4853 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4854 instruction_count(2); 4855 mem : S3(read); 4856 src : S5(read); 4857 D0 : S0(2); // big decoder only; twice 4858 ALU : S4(2); // any 2 alus 4859 MEM : S3(2); // Both mems 4860 %} 4861 4862 // Integer Store to Memory 4863 pipe_class ialu_mem_imm(memory mem) %{ 4864 single_instruction; 4865 mem : S3(read); 4866 D0 : S0; // big decoder only 4867 ALU : S4; // any alu 4868 MEM : S3; 4869 %} 4870 4871 // Integer ALU0 reg-reg operation 4872 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4873 single_instruction; 4874 dst : S4(write); 4875 src : S3(read); 4876 D0 : S0; // Big decoder only 4877 ALU0 : S3; // only alu0 4878 %} 4879 4880 // Integer ALU0 reg-mem operation 4881 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4882 single_instruction; 4883 dst : S5(write); 4884 mem : S3(read); 4885 D0 : S0; // big decoder only 4886 ALU0 : S4; // ALU0 only 4887 MEM : S3; // any mem 4888 %} 4889 4890 // Integer ALU reg-reg operation 4891 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4892 single_instruction; 4893 cr : S4(write); 4894 src1 : S3(read); 4895 src2 : S3(read); 4896 DECODE : S0; // any decoder 4897 ALU : S3; // any alu 4898 %} 4899 4900 // Integer ALU reg-imm operation 4901 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4902 single_instruction; 4903 cr : S4(write); 4904 src1 : S3(read); 4905 DECODE : S0; // any decoder 4906 ALU : S3; // any alu 4907 %} 4908 4909 // Integer ALU reg-mem operation 4910 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4911 single_instruction; 4912 cr : S4(write); 4913 src1 : S3(read); 4914 src2 : S3(read); 4915 D0 : S0; // big decoder only 4916 ALU : S4; // any alu 4917 MEM : S3; 4918 %} 4919 4920 // Conditional move reg-reg 4921 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4922 instruction_count(4); 4923 y : S4(read); 4924 q : S3(read); 4925 p : S3(read); 4926 DECODE : S0(4); // any decoder 4927 %} 4928 4929 // Conditional move reg-reg 4930 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4931 single_instruction; 4932 dst : S4(write); 4933 src : S3(read); 4934 cr : S3(read); 4935 DECODE : S0; // any decoder 4936 %} 4937 4938 // Conditional move reg-mem 4939 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4940 single_instruction; 4941 dst : S4(write); 4942 src : S3(read); 4943 cr : S3(read); 4944 DECODE : S0; // any decoder 4945 MEM : S3; 4946 %} 4947 4948 // Conditional move reg-reg long 4949 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4950 single_instruction; 4951 dst : S4(write); 4952 src : S3(read); 4953 cr : S3(read); 4954 DECODE : S0(2); // any 2 decoders 4955 %} 4956 4957 // Conditional move double reg-reg 4958 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4959 single_instruction; 4960 dst : S4(write); 4961 src : S3(read); 4962 cr : S3(read); 4963 DECODE : S0; // any decoder 4964 %} 4965 4966 // Float reg-reg operation 4967 pipe_class fpu_reg(regDPR dst) %{ 4968 instruction_count(2); 4969 dst : S3(read); 4970 DECODE : S0(2); // any 2 decoders 4971 FPU : S3; 4972 %} 4973 4974 // Float reg-reg operation 4975 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4976 instruction_count(2); 4977 dst : S4(write); 4978 src : S3(read); 4979 DECODE : S0(2); // any 2 decoders 4980 FPU : S3; 4981 %} 4982 4983 // Float reg-reg operation 4984 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4985 instruction_count(3); 4986 dst : S4(write); 4987 src1 : S3(read); 4988 src2 : S3(read); 4989 DECODE : S0(3); // any 3 decoders 4990 FPU : S3(2); 4991 %} 4992 4993 // Float reg-reg operation 4994 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4995 instruction_count(4); 4996 dst : S4(write); 4997 src1 : S3(read); 4998 src2 : S3(read); 4999 src3 : S3(read); 5000 DECODE : S0(4); // any 3 decoders 5001 FPU : S3(2); 5002 %} 5003 5004 // Float reg-reg operation 5005 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 5006 instruction_count(4); 5007 dst : S4(write); 5008 src1 : S3(read); 5009 src2 : S3(read); 5010 src3 : S3(read); 5011 DECODE : S1(3); // any 3 decoders 5012 D0 : S0; // Big decoder only 5013 FPU : S3(2); 5014 MEM : S3; 5015 %} 5016 5017 // Float reg-mem operation 5018 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 5019 instruction_count(2); 5020 dst : S5(write); 5021 mem : S3(read); 5022 D0 : S0; // big decoder only 5023 DECODE : S1; // any decoder for FPU POP 5024 FPU : S4; 5025 MEM : S3; // any mem 5026 %} 5027 5028 // Float reg-mem operation 5029 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 5030 instruction_count(3); 5031 dst : S5(write); 5032 src1 : S3(read); 5033 mem : S3(read); 5034 D0 : S0; // big decoder only 5035 DECODE : S1(2); // any decoder for FPU POP 5036 FPU : S4; 5037 MEM : S3; // any mem 5038 %} 5039 5040 // Float mem-reg operation 5041 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5042 instruction_count(2); 5043 src : S5(read); 5044 mem : S3(read); 5045 DECODE : S0; // any decoder for FPU PUSH 5046 D0 : S1; // big decoder only 5047 FPU : S4; 5048 MEM : S3; // any mem 5049 %} 5050 5051 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5052 instruction_count(3); 5053 src1 : S3(read); 5054 src2 : S3(read); 5055 mem : S3(read); 5056 DECODE : S0(2); // any decoder for FPU PUSH 5057 D0 : S1; // big decoder only 5058 FPU : S4; 5059 MEM : S3; // any mem 5060 %} 5061 5062 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5063 instruction_count(3); 5064 src1 : S3(read); 5065 src2 : S3(read); 5066 mem : S4(read); 5067 DECODE : S0; // any decoder for FPU PUSH 5068 D0 : S0(2); // big decoder only 5069 FPU : S4; 5070 MEM : S3(2); // any mem 5071 %} 5072 5073 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5074 instruction_count(2); 5075 src1 : S3(read); 5076 dst : S4(read); 5077 D0 : S0(2); // big decoder only 5078 MEM : S3(2); // any mem 5079 %} 5080 5081 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5082 instruction_count(3); 5083 src1 : S3(read); 5084 src2 : S3(read); 5085 dst : S4(read); 5086 D0 : S0(3); // big decoder only 5087 FPU : S4; 5088 MEM : S3(3); // any mem 5089 %} 5090 5091 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5092 instruction_count(3); 5093 src1 : S4(read); 5094 mem : S4(read); 5095 DECODE : S0; // any decoder for FPU PUSH 5096 D0 : S0(2); // big decoder only 5097 FPU : S4; 5098 MEM : S3(2); // any mem 5099 %} 5100 5101 // Float load constant 5102 pipe_class fpu_reg_con(regDPR dst) %{ 5103 instruction_count(2); 5104 dst : S5(write); 5105 D0 : S0; // big decoder only for the load 5106 DECODE : S1; // any decoder for FPU POP 5107 FPU : S4; 5108 MEM : S3; // any mem 5109 %} 5110 5111 // Float load constant 5112 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5113 instruction_count(3); 5114 dst : S5(write); 5115 src : S3(read); 5116 D0 : S0; // big decoder only for the load 5117 DECODE : S1(2); // any decoder for FPU POP 5118 FPU : S4; 5119 MEM : S3; // any mem 5120 %} 5121 5122 // UnConditional branch 5123 pipe_class pipe_jmp( label labl ) %{ 5124 single_instruction; 5125 BR : S3; 5126 %} 5127 5128 // Conditional branch 5129 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5130 single_instruction; 5131 cr : S1(read); 5132 BR : S3; 5133 %} 5134 5135 // Allocation idiom 5136 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5137 instruction_count(1); force_serialization; 5138 fixed_latency(6); 5139 heap_ptr : S3(read); 5140 DECODE : S0(3); 5141 D0 : S2; 5142 MEM : S3; 5143 ALU : S3(2); 5144 dst : S5(write); 5145 BR : S5; 5146 %} 5147 5148 // Generic big/slow expanded idiom 5149 pipe_class pipe_slow( ) %{ 5150 instruction_count(10); multiple_bundles; force_serialization; 5151 fixed_latency(100); 5152 D0 : S0(2); 5153 MEM : S3(2); 5154 %} 5155 5156 // The real do-nothing guy 5157 pipe_class empty( ) %{ 5158 instruction_count(0); 5159 %} 5160 5161 // Define the class for the Nop node 5162 define %{ 5163 MachNop = empty; 5164 %} 5165 5166 %} 5167 5168 //----------INSTRUCTIONS------------------------------------------------------- 5169 // 5170 // match -- States which machine-independent subtree may be replaced 5171 // by this instruction. 5172 // ins_cost -- The estimated cost of this instruction is used by instruction 5173 // selection to identify a minimum cost tree of machine 5174 // instructions that matches a tree of machine-independent 5175 // instructions. 5176 // format -- A string providing the disassembly for this instruction. 5177 // The value of an instruction's operand may be inserted 5178 // by referring to it with a '$' prefix. 5179 // opcode -- Three instruction opcodes may be provided. These are referred 5180 // to within an encode class as $primary, $secondary, and $tertiary 5181 // respectively. The primary opcode is commonly used to 5182 // indicate the type of machine instruction, while secondary 5183 // and tertiary are often used for prefix options or addressing 5184 // modes. 5185 // ins_encode -- A list of encode classes with parameters. The encode class 5186 // name must have been defined in an 'enc_class' specification 5187 // in the encode section of the architecture description. 5188 5189 //----------BSWAP-Instruction-------------------------------------------------- 5190 instruct bytes_reverse_int(rRegI dst) %{ 5191 match(Set dst (ReverseBytesI dst)); 5192 5193 format %{ "BSWAP $dst" %} 5194 opcode(0x0F, 0xC8); 5195 ins_encode( OpcP, OpcSReg(dst) ); 5196 ins_pipe( ialu_reg ); 5197 %} 5198 5199 instruct bytes_reverse_long(eRegL dst) %{ 5200 match(Set dst (ReverseBytesL dst)); 5201 5202 format %{ "BSWAP $dst.lo\n\t" 5203 "BSWAP $dst.hi\n\t" 5204 "XCHG $dst.lo $dst.hi" %} 5205 5206 ins_cost(125); 5207 ins_encode( bswap_long_bytes(dst) ); 5208 ins_pipe( ialu_reg_reg); 5209 %} 5210 5211 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5212 match(Set dst (ReverseBytesUS dst)); 5213 effect(KILL cr); 5214 5215 format %{ "BSWAP $dst\n\t" 5216 "SHR $dst,16\n\t" %} 5217 ins_encode %{ 5218 __ bswapl($dst$$Register); 5219 __ shrl($dst$$Register, 16); 5220 %} 5221 ins_pipe( ialu_reg ); 5222 %} 5223 5224 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5225 match(Set dst (ReverseBytesS dst)); 5226 effect(KILL cr); 5227 5228 format %{ "BSWAP $dst\n\t" 5229 "SAR $dst,16\n\t" %} 5230 ins_encode %{ 5231 __ bswapl($dst$$Register); 5232 __ sarl($dst$$Register, 16); 5233 %} 5234 ins_pipe( ialu_reg ); 5235 %} 5236 5237 5238 //---------- Zeros Count Instructions ------------------------------------------ 5239 5240 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5241 predicate(UseCountLeadingZerosInstruction); 5242 match(Set dst (CountLeadingZerosI src)); 5243 effect(KILL cr); 5244 5245 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5246 ins_encode %{ 5247 __ lzcntl($dst$$Register, $src$$Register); 5248 %} 5249 ins_pipe(ialu_reg); 5250 %} 5251 5252 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5253 predicate(!UseCountLeadingZerosInstruction); 5254 match(Set dst (CountLeadingZerosI src)); 5255 effect(KILL cr); 5256 5257 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5258 "JNZ skip\n\t" 5259 "MOV $dst, -1\n" 5260 "skip:\n\t" 5261 "NEG $dst\n\t" 5262 "ADD $dst, 31" %} 5263 ins_encode %{ 5264 Register Rdst = $dst$$Register; 5265 Register Rsrc = $src$$Register; 5266 Label skip; 5267 __ bsrl(Rdst, Rsrc); 5268 __ jccb(Assembler::notZero, skip); 5269 __ movl(Rdst, -1); 5270 __ bind(skip); 5271 __ negl(Rdst); 5272 __ addl(Rdst, BitsPerInt - 1); 5273 %} 5274 ins_pipe(ialu_reg); 5275 %} 5276 5277 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5278 predicate(UseCountLeadingZerosInstruction); 5279 match(Set dst (CountLeadingZerosL src)); 5280 effect(TEMP dst, KILL cr); 5281 5282 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5283 "JNC done\n\t" 5284 "LZCNT $dst, $src.lo\n\t" 5285 "ADD $dst, 32\n" 5286 "done:" %} 5287 ins_encode %{ 5288 Register Rdst = $dst$$Register; 5289 Register Rsrc = $src$$Register; 5290 Label done; 5291 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5292 __ jccb(Assembler::carryClear, done); 5293 __ lzcntl(Rdst, Rsrc); 5294 __ addl(Rdst, BitsPerInt); 5295 __ bind(done); 5296 %} 5297 ins_pipe(ialu_reg); 5298 %} 5299 5300 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5301 predicate(!UseCountLeadingZerosInstruction); 5302 match(Set dst (CountLeadingZerosL src)); 5303 effect(TEMP dst, KILL cr); 5304 5305 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5306 "JZ msw_is_zero\n\t" 5307 "ADD $dst, 32\n\t" 5308 "JMP not_zero\n" 5309 "msw_is_zero:\n\t" 5310 "BSR $dst, $src.lo\n\t" 5311 "JNZ not_zero\n\t" 5312 "MOV $dst, -1\n" 5313 "not_zero:\n\t" 5314 "NEG $dst\n\t" 5315 "ADD $dst, 63\n" %} 5316 ins_encode %{ 5317 Register Rdst = $dst$$Register; 5318 Register Rsrc = $src$$Register; 5319 Label msw_is_zero; 5320 Label not_zero; 5321 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5322 __ jccb(Assembler::zero, msw_is_zero); 5323 __ addl(Rdst, BitsPerInt); 5324 __ jmpb(not_zero); 5325 __ bind(msw_is_zero); 5326 __ bsrl(Rdst, Rsrc); 5327 __ jccb(Assembler::notZero, not_zero); 5328 __ movl(Rdst, -1); 5329 __ bind(not_zero); 5330 __ negl(Rdst); 5331 __ addl(Rdst, BitsPerLong - 1); 5332 %} 5333 ins_pipe(ialu_reg); 5334 %} 5335 5336 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5337 predicate(UseCountTrailingZerosInstruction); 5338 match(Set dst (CountTrailingZerosI src)); 5339 effect(KILL cr); 5340 5341 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5342 ins_encode %{ 5343 __ tzcntl($dst$$Register, $src$$Register); 5344 %} 5345 ins_pipe(ialu_reg); 5346 %} 5347 5348 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5349 predicate(!UseCountTrailingZerosInstruction); 5350 match(Set dst (CountTrailingZerosI src)); 5351 effect(KILL cr); 5352 5353 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5354 "JNZ done\n\t" 5355 "MOV $dst, 32\n" 5356 "done:" %} 5357 ins_encode %{ 5358 Register Rdst = $dst$$Register; 5359 Label done; 5360 __ bsfl(Rdst, $src$$Register); 5361 __ jccb(Assembler::notZero, done); 5362 __ movl(Rdst, BitsPerInt); 5363 __ bind(done); 5364 %} 5365 ins_pipe(ialu_reg); 5366 %} 5367 5368 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5369 predicate(UseCountTrailingZerosInstruction); 5370 match(Set dst (CountTrailingZerosL src)); 5371 effect(TEMP dst, KILL cr); 5372 5373 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5374 "JNC done\n\t" 5375 "TZCNT $dst, $src.hi\n\t" 5376 "ADD $dst, 32\n" 5377 "done:" %} 5378 ins_encode %{ 5379 Register Rdst = $dst$$Register; 5380 Register Rsrc = $src$$Register; 5381 Label done; 5382 __ tzcntl(Rdst, Rsrc); 5383 __ jccb(Assembler::carryClear, done); 5384 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5385 __ addl(Rdst, BitsPerInt); 5386 __ bind(done); 5387 %} 5388 ins_pipe(ialu_reg); 5389 %} 5390 5391 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5392 predicate(!UseCountTrailingZerosInstruction); 5393 match(Set dst (CountTrailingZerosL src)); 5394 effect(TEMP dst, KILL cr); 5395 5396 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5397 "JNZ done\n\t" 5398 "BSF $dst, $src.hi\n\t" 5399 "JNZ msw_not_zero\n\t" 5400 "MOV $dst, 32\n" 5401 "msw_not_zero:\n\t" 5402 "ADD $dst, 32\n" 5403 "done:" %} 5404 ins_encode %{ 5405 Register Rdst = $dst$$Register; 5406 Register Rsrc = $src$$Register; 5407 Label msw_not_zero; 5408 Label done; 5409 __ bsfl(Rdst, Rsrc); 5410 __ jccb(Assembler::notZero, done); 5411 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5412 __ jccb(Assembler::notZero, msw_not_zero); 5413 __ movl(Rdst, BitsPerInt); 5414 __ bind(msw_not_zero); 5415 __ addl(Rdst, BitsPerInt); 5416 __ bind(done); 5417 %} 5418 ins_pipe(ialu_reg); 5419 %} 5420 5421 5422 //---------- Population Count Instructions ------------------------------------- 5423 5424 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5425 predicate(UsePopCountInstruction); 5426 match(Set dst (PopCountI src)); 5427 effect(KILL cr); 5428 5429 format %{ "POPCNT $dst, $src" %} 5430 ins_encode %{ 5431 __ popcntl($dst$$Register, $src$$Register); 5432 %} 5433 ins_pipe(ialu_reg); 5434 %} 5435 5436 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5437 predicate(UsePopCountInstruction); 5438 match(Set dst (PopCountI (LoadI mem))); 5439 effect(KILL cr); 5440 5441 format %{ "POPCNT $dst, $mem" %} 5442 ins_encode %{ 5443 __ popcntl($dst$$Register, $mem$$Address); 5444 %} 5445 ins_pipe(ialu_reg); 5446 %} 5447 5448 // Note: Long.bitCount(long) returns an int. 5449 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5450 predicate(UsePopCountInstruction); 5451 match(Set dst (PopCountL src)); 5452 effect(KILL cr, TEMP tmp, TEMP dst); 5453 5454 format %{ "POPCNT $dst, $src.lo\n\t" 5455 "POPCNT $tmp, $src.hi\n\t" 5456 "ADD $dst, $tmp" %} 5457 ins_encode %{ 5458 __ popcntl($dst$$Register, $src$$Register); 5459 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5460 __ addl($dst$$Register, $tmp$$Register); 5461 %} 5462 ins_pipe(ialu_reg); 5463 %} 5464 5465 // Note: Long.bitCount(long) returns an int. 5466 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5467 predicate(UsePopCountInstruction); 5468 match(Set dst (PopCountL (LoadL mem))); 5469 effect(KILL cr, TEMP tmp, TEMP dst); 5470 5471 format %{ "POPCNT $dst, $mem\n\t" 5472 "POPCNT $tmp, $mem+4\n\t" 5473 "ADD $dst, $tmp" %} 5474 ins_encode %{ 5475 //__ popcntl($dst$$Register, $mem$$Address$$first); 5476 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5477 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5478 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5479 __ addl($dst$$Register, $tmp$$Register); 5480 %} 5481 ins_pipe(ialu_reg); 5482 %} 5483 5484 5485 //----------Load/Store/Move Instructions--------------------------------------- 5486 //----------Load Instructions-------------------------------------------------- 5487 // Load Byte (8bit signed) 5488 instruct loadB(xRegI dst, memory mem) %{ 5489 match(Set dst (LoadB mem)); 5490 5491 ins_cost(125); 5492 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5493 5494 ins_encode %{ 5495 __ movsbl($dst$$Register, $mem$$Address); 5496 %} 5497 5498 ins_pipe(ialu_reg_mem); 5499 %} 5500 5501 // Load Byte (8bit signed) into Long Register 5502 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5503 match(Set dst (ConvI2L (LoadB mem))); 5504 effect(KILL cr); 5505 5506 ins_cost(375); 5507 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5508 "MOV $dst.hi,$dst.lo\n\t" 5509 "SAR $dst.hi,7" %} 5510 5511 ins_encode %{ 5512 __ movsbl($dst$$Register, $mem$$Address); 5513 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5514 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5515 %} 5516 5517 ins_pipe(ialu_reg_mem); 5518 %} 5519 5520 // Load Unsigned Byte (8bit UNsigned) 5521 instruct loadUB(xRegI dst, memory mem) %{ 5522 match(Set dst (LoadUB mem)); 5523 5524 ins_cost(125); 5525 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5526 5527 ins_encode %{ 5528 __ movzbl($dst$$Register, $mem$$Address); 5529 %} 5530 5531 ins_pipe(ialu_reg_mem); 5532 %} 5533 5534 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5535 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5536 match(Set dst (ConvI2L (LoadUB mem))); 5537 effect(KILL cr); 5538 5539 ins_cost(250); 5540 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5541 "XOR $dst.hi,$dst.hi" %} 5542 5543 ins_encode %{ 5544 Register Rdst = $dst$$Register; 5545 __ movzbl(Rdst, $mem$$Address); 5546 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5547 %} 5548 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5553 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5555 effect(KILL cr); 5556 5557 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5558 "XOR $dst.hi,$dst.hi\n\t" 5559 "AND $dst.lo,right_n_bits($mask, 8)" %} 5560 ins_encode %{ 5561 Register Rdst = $dst$$Register; 5562 __ movzbl(Rdst, $mem$$Address); 5563 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5564 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5565 %} 5566 ins_pipe(ialu_reg_mem); 5567 %} 5568 5569 // Load Short (16bit signed) 5570 instruct loadS(rRegI dst, memory mem) %{ 5571 match(Set dst (LoadS mem)); 5572 5573 ins_cost(125); 5574 format %{ "MOVSX $dst,$mem\t# short" %} 5575 5576 ins_encode %{ 5577 __ movswl($dst$$Register, $mem$$Address); 5578 %} 5579 5580 ins_pipe(ialu_reg_mem); 5581 %} 5582 5583 // Load Short (16 bit signed) to Byte (8 bit signed) 5584 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5585 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5586 5587 ins_cost(125); 5588 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5589 ins_encode %{ 5590 __ movsbl($dst$$Register, $mem$$Address); 5591 %} 5592 ins_pipe(ialu_reg_mem); 5593 %} 5594 5595 // Load Short (16bit signed) into Long Register 5596 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5597 match(Set dst (ConvI2L (LoadS mem))); 5598 effect(KILL cr); 5599 5600 ins_cost(375); 5601 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5602 "MOV $dst.hi,$dst.lo\n\t" 5603 "SAR $dst.hi,15" %} 5604 5605 ins_encode %{ 5606 __ movswl($dst$$Register, $mem$$Address); 5607 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5608 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5609 %} 5610 5611 ins_pipe(ialu_reg_mem); 5612 %} 5613 5614 // Load Unsigned Short/Char (16bit unsigned) 5615 instruct loadUS(rRegI dst, memory mem) %{ 5616 match(Set dst (LoadUS mem)); 5617 5618 ins_cost(125); 5619 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5620 5621 ins_encode %{ 5622 __ movzwl($dst$$Register, $mem$$Address); 5623 %} 5624 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5629 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5630 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5631 5632 ins_cost(125); 5633 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5634 ins_encode %{ 5635 __ movsbl($dst$$Register, $mem$$Address); 5636 %} 5637 ins_pipe(ialu_reg_mem); 5638 %} 5639 5640 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5641 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5642 match(Set dst (ConvI2L (LoadUS mem))); 5643 effect(KILL cr); 5644 5645 ins_cost(250); 5646 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5647 "XOR $dst.hi,$dst.hi" %} 5648 5649 ins_encode %{ 5650 __ movzwl($dst$$Register, $mem$$Address); 5651 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5652 %} 5653 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5658 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5659 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5660 effect(KILL cr); 5661 5662 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5663 "XOR $dst.hi,$dst.hi" %} 5664 ins_encode %{ 5665 Register Rdst = $dst$$Register; 5666 __ movzbl(Rdst, $mem$$Address); 5667 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5668 %} 5669 ins_pipe(ialu_reg_mem); 5670 %} 5671 5672 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5673 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5674 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5675 effect(KILL cr); 5676 5677 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5678 "XOR $dst.hi,$dst.hi\n\t" 5679 "AND $dst.lo,right_n_bits($mask, 16)" %} 5680 ins_encode %{ 5681 Register Rdst = $dst$$Register; 5682 __ movzwl(Rdst, $mem$$Address); 5683 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5684 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5685 %} 5686 ins_pipe(ialu_reg_mem); 5687 %} 5688 5689 // Load Integer 5690 instruct loadI(rRegI dst, memory mem) %{ 5691 match(Set dst (LoadI mem)); 5692 5693 ins_cost(125); 5694 format %{ "MOV $dst,$mem\t# int" %} 5695 5696 ins_encode %{ 5697 __ movl($dst$$Register, $mem$$Address); 5698 %} 5699 5700 ins_pipe(ialu_reg_mem); 5701 %} 5702 5703 // Load Integer (32 bit signed) to Byte (8 bit signed) 5704 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5705 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5706 5707 ins_cost(125); 5708 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5709 ins_encode %{ 5710 __ movsbl($dst$$Register, $mem$$Address); 5711 %} 5712 ins_pipe(ialu_reg_mem); 5713 %} 5714 5715 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5716 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5717 match(Set dst (AndI (LoadI mem) mask)); 5718 5719 ins_cost(125); 5720 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5721 ins_encode %{ 5722 __ movzbl($dst$$Register, $mem$$Address); 5723 %} 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Integer (32 bit signed) to Short (16 bit signed) 5728 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5729 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5730 5731 ins_cost(125); 5732 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5733 ins_encode %{ 5734 __ movswl($dst$$Register, $mem$$Address); 5735 %} 5736 ins_pipe(ialu_reg_mem); 5737 %} 5738 5739 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5740 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5741 match(Set dst (AndI (LoadI mem) mask)); 5742 5743 ins_cost(125); 5744 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5745 ins_encode %{ 5746 __ movzwl($dst$$Register, $mem$$Address); 5747 %} 5748 ins_pipe(ialu_reg_mem); 5749 %} 5750 5751 // Load Integer into Long Register 5752 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5753 match(Set dst (ConvI2L (LoadI mem))); 5754 effect(KILL cr); 5755 5756 ins_cost(375); 5757 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5758 "MOV $dst.hi,$dst.lo\n\t" 5759 "SAR $dst.hi,31" %} 5760 5761 ins_encode %{ 5762 __ movl($dst$$Register, $mem$$Address); 5763 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5764 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5765 %} 5766 5767 ins_pipe(ialu_reg_mem); 5768 %} 5769 5770 // Load Integer with mask 0xFF into Long Register 5771 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5772 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5773 effect(KILL cr); 5774 5775 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5776 "XOR $dst.hi,$dst.hi" %} 5777 ins_encode %{ 5778 Register Rdst = $dst$$Register; 5779 __ movzbl(Rdst, $mem$$Address); 5780 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5781 %} 5782 ins_pipe(ialu_reg_mem); 5783 %} 5784 5785 // Load Integer with mask 0xFFFF into Long Register 5786 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5787 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5788 effect(KILL cr); 5789 5790 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5791 "XOR $dst.hi,$dst.hi" %} 5792 ins_encode %{ 5793 Register Rdst = $dst$$Register; 5794 __ movzwl(Rdst, $mem$$Address); 5795 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5796 %} 5797 ins_pipe(ialu_reg_mem); 5798 %} 5799 5800 // Load Integer with 31-bit mask into Long Register 5801 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5802 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5803 effect(KILL cr); 5804 5805 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5806 "XOR $dst.hi,$dst.hi\n\t" 5807 "AND $dst.lo,$mask" %} 5808 ins_encode %{ 5809 Register Rdst = $dst$$Register; 5810 __ movl(Rdst, $mem$$Address); 5811 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5812 __ andl(Rdst, $mask$$constant); 5813 %} 5814 ins_pipe(ialu_reg_mem); 5815 %} 5816 5817 // Load Unsigned Integer into Long Register 5818 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5819 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5820 effect(KILL cr); 5821 5822 ins_cost(250); 5823 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5824 "XOR $dst.hi,$dst.hi" %} 5825 5826 ins_encode %{ 5827 __ movl($dst$$Register, $mem$$Address); 5828 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5829 %} 5830 5831 ins_pipe(ialu_reg_mem); 5832 %} 5833 5834 // Load Long. Cannot clobber address while loading, so restrict address 5835 // register to ESI 5836 instruct loadL(eRegL dst, load_long_memory mem) %{ 5837 predicate(!((LoadLNode*)n)->require_atomic_access()); 5838 match(Set dst (LoadL mem)); 5839 5840 ins_cost(250); 5841 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5842 "MOV $dst.hi,$mem+4" %} 5843 5844 ins_encode %{ 5845 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5846 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5847 __ movl($dst$$Register, Amemlo); 5848 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5849 %} 5850 5851 ins_pipe(ialu_reg_long_mem); 5852 %} 5853 5854 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5855 // then store it down to the stack and reload on the int 5856 // side. 5857 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5858 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5859 match(Set dst (LoadL mem)); 5860 5861 ins_cost(200); 5862 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5863 "FISTp $dst" %} 5864 ins_encode(enc_loadL_volatile(mem,dst)); 5865 ins_pipe( fpu_reg_mem ); 5866 %} 5867 5868 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5869 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5870 match(Set dst (LoadL mem)); 5871 effect(TEMP tmp); 5872 ins_cost(180); 5873 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5874 "MOVSD $dst,$tmp" %} 5875 ins_encode %{ 5876 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5877 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5878 %} 5879 ins_pipe( pipe_slow ); 5880 %} 5881 5882 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5883 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5884 match(Set dst (LoadL mem)); 5885 effect(TEMP tmp); 5886 ins_cost(160); 5887 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5888 "MOVD $dst.lo,$tmp\n\t" 5889 "PSRLQ $tmp,32\n\t" 5890 "MOVD $dst.hi,$tmp" %} 5891 ins_encode %{ 5892 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5893 __ movdl($dst$$Register, $tmp$$XMMRegister); 5894 __ psrlq($tmp$$XMMRegister, 32); 5895 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5896 %} 5897 ins_pipe( pipe_slow ); 5898 %} 5899 5900 // Load Range 5901 instruct loadRange(rRegI dst, memory mem) %{ 5902 match(Set dst (LoadRange mem)); 5903 5904 ins_cost(125); 5905 format %{ "MOV $dst,$mem" %} 5906 opcode(0x8B); 5907 ins_encode( OpcP, RegMem(dst,mem)); 5908 ins_pipe( ialu_reg_mem ); 5909 %} 5910 5911 5912 // Load Pointer 5913 instruct loadP(eRegP dst, memory mem) %{ 5914 match(Set dst (LoadP mem)); 5915 5916 ins_cost(125); 5917 format %{ "MOV $dst,$mem" %} 5918 opcode(0x8B); 5919 ins_encode( OpcP, RegMem(dst,mem)); 5920 ins_pipe( ialu_reg_mem ); 5921 %} 5922 5923 // Load Klass Pointer 5924 instruct loadKlass(eRegP dst, memory mem) %{ 5925 match(Set dst (LoadKlass mem)); 5926 5927 ins_cost(125); 5928 format %{ "MOV $dst,$mem" %} 5929 opcode(0x8B); 5930 ins_encode( OpcP, RegMem(dst,mem)); 5931 ins_pipe( ialu_reg_mem ); 5932 %} 5933 5934 // Load Double 5935 instruct loadDPR(regDPR dst, memory mem) %{ 5936 predicate(UseSSE<=1); 5937 match(Set dst (LoadD mem)); 5938 5939 ins_cost(150); 5940 format %{ "FLD_D ST,$mem\n\t" 5941 "FSTP $dst" %} 5942 opcode(0xDD); /* DD /0 */ 5943 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5944 Pop_Reg_DPR(dst) ); 5945 ins_pipe( fpu_reg_mem ); 5946 %} 5947 5948 // Load Double to XMM 5949 instruct loadD(regD dst, memory mem) %{ 5950 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5951 match(Set dst (LoadD mem)); 5952 ins_cost(145); 5953 format %{ "MOVSD $dst,$mem" %} 5954 ins_encode %{ 5955 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 instruct loadD_partial(regD dst, memory mem) %{ 5961 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5962 match(Set dst (LoadD mem)); 5963 ins_cost(145); 5964 format %{ "MOVLPD $dst,$mem" %} 5965 ins_encode %{ 5966 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 // Load to XMM register (single-precision floating point) 5972 // MOVSS instruction 5973 instruct loadF(regF dst, memory mem) %{ 5974 predicate(UseSSE>=1); 5975 match(Set dst (LoadF mem)); 5976 ins_cost(145); 5977 format %{ "MOVSS $dst,$mem" %} 5978 ins_encode %{ 5979 __ movflt ($dst$$XMMRegister, $mem$$Address); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 // Load Float 5985 instruct loadFPR(regFPR dst, memory mem) %{ 5986 predicate(UseSSE==0); 5987 match(Set dst (LoadF mem)); 5988 5989 ins_cost(150); 5990 format %{ "FLD_S ST,$mem\n\t" 5991 "FSTP $dst" %} 5992 opcode(0xD9); /* D9 /0 */ 5993 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5994 Pop_Reg_FPR(dst) ); 5995 ins_pipe( fpu_reg_mem ); 5996 %} 5997 5998 // Load Effective Address 5999 instruct leaP8(eRegP dst, indOffset8 mem) %{ 6000 match(Set dst mem); 6001 6002 ins_cost(110); 6003 format %{ "LEA $dst,$mem" %} 6004 opcode(0x8D); 6005 ins_encode( OpcP, RegMem(dst,mem)); 6006 ins_pipe( ialu_reg_reg_fat ); 6007 %} 6008 6009 instruct leaP32(eRegP dst, indOffset32 mem) %{ 6010 match(Set dst mem); 6011 6012 ins_cost(110); 6013 format %{ "LEA $dst,$mem" %} 6014 opcode(0x8D); 6015 ins_encode( OpcP, RegMem(dst,mem)); 6016 ins_pipe( ialu_reg_reg_fat ); 6017 %} 6018 6019 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 6020 match(Set dst mem); 6021 6022 ins_cost(110); 6023 format %{ "LEA $dst,$mem" %} 6024 opcode(0x8D); 6025 ins_encode( OpcP, RegMem(dst,mem)); 6026 ins_pipe( ialu_reg_reg_fat ); 6027 %} 6028 6029 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 6030 match(Set dst mem); 6031 6032 ins_cost(110); 6033 format %{ "LEA $dst,$mem" %} 6034 opcode(0x8D); 6035 ins_encode( OpcP, RegMem(dst,mem)); 6036 ins_pipe( ialu_reg_reg_fat ); 6037 %} 6038 6039 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6040 match(Set dst mem); 6041 6042 ins_cost(110); 6043 format %{ "LEA $dst,$mem" %} 6044 opcode(0x8D); 6045 ins_encode( OpcP, RegMem(dst,mem)); 6046 ins_pipe( ialu_reg_reg_fat ); 6047 %} 6048 6049 // Load Constant 6050 instruct loadConI(rRegI dst, immI src) %{ 6051 match(Set dst src); 6052 6053 format %{ "MOV $dst,$src" %} 6054 ins_encode( LdImmI(dst, src) ); 6055 ins_pipe( ialu_reg_fat ); 6056 %} 6057 6058 // Load Constant zero 6059 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6060 match(Set dst src); 6061 effect(KILL cr); 6062 6063 ins_cost(50); 6064 format %{ "XOR $dst,$dst" %} 6065 opcode(0x33); /* + rd */ 6066 ins_encode( OpcP, RegReg( dst, dst ) ); 6067 ins_pipe( ialu_reg ); 6068 %} 6069 6070 instruct loadConP(eRegP dst, immP src) %{ 6071 match(Set dst src); 6072 6073 format %{ "MOV $dst,$src" %} 6074 opcode(0xB8); /* + rd */ 6075 ins_encode( LdImmP(dst, src) ); 6076 ins_pipe( ialu_reg_fat ); 6077 %} 6078 6079 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6080 match(Set dst src); 6081 effect(KILL cr); 6082 ins_cost(200); 6083 format %{ "MOV $dst.lo,$src.lo\n\t" 6084 "MOV $dst.hi,$src.hi" %} 6085 opcode(0xB8); 6086 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6087 ins_pipe( ialu_reg_long_fat ); 6088 %} 6089 6090 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6091 match(Set dst src); 6092 effect(KILL cr); 6093 ins_cost(150); 6094 format %{ "XOR $dst.lo,$dst.lo\n\t" 6095 "XOR $dst.hi,$dst.hi" %} 6096 opcode(0x33,0x33); 6097 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6098 ins_pipe( ialu_reg_long ); 6099 %} 6100 6101 // The instruction usage is guarded by predicate in operand immFPR(). 6102 instruct loadConFPR(regFPR dst, immFPR con) %{ 6103 match(Set dst con); 6104 ins_cost(125); 6105 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6106 "FSTP $dst" %} 6107 ins_encode %{ 6108 __ fld_s($constantaddress($con)); 6109 __ fstp_d($dst$$reg); 6110 %} 6111 ins_pipe(fpu_reg_con); 6112 %} 6113 6114 // The instruction usage is guarded by predicate in operand immFPR0(). 6115 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6116 match(Set dst con); 6117 ins_cost(125); 6118 format %{ "FLDZ ST\n\t" 6119 "FSTP $dst" %} 6120 ins_encode %{ 6121 __ fldz(); 6122 __ fstp_d($dst$$reg); 6123 %} 6124 ins_pipe(fpu_reg_con); 6125 %} 6126 6127 // The instruction usage is guarded by predicate in operand immFPR1(). 6128 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6129 match(Set dst con); 6130 ins_cost(125); 6131 format %{ "FLD1 ST\n\t" 6132 "FSTP $dst" %} 6133 ins_encode %{ 6134 __ fld1(); 6135 __ fstp_d($dst$$reg); 6136 %} 6137 ins_pipe(fpu_reg_con); 6138 %} 6139 6140 // The instruction usage is guarded by predicate in operand immF(). 6141 instruct loadConF(regF dst, immF con) %{ 6142 match(Set dst con); 6143 ins_cost(125); 6144 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6145 ins_encode %{ 6146 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6147 %} 6148 ins_pipe(pipe_slow); 6149 %} 6150 6151 // The instruction usage is guarded by predicate in operand immF0(). 6152 instruct loadConF0(regF dst, immF0 src) %{ 6153 match(Set dst src); 6154 ins_cost(100); 6155 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6156 ins_encode %{ 6157 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6158 %} 6159 ins_pipe(pipe_slow); 6160 %} 6161 6162 // The instruction usage is guarded by predicate in operand immDPR(). 6163 instruct loadConDPR(regDPR dst, immDPR con) %{ 6164 match(Set dst con); 6165 ins_cost(125); 6166 6167 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6168 "FSTP $dst" %} 6169 ins_encode %{ 6170 __ fld_d($constantaddress($con)); 6171 __ fstp_d($dst$$reg); 6172 %} 6173 ins_pipe(fpu_reg_con); 6174 %} 6175 6176 // The instruction usage is guarded by predicate in operand immDPR0(). 6177 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6178 match(Set dst con); 6179 ins_cost(125); 6180 6181 format %{ "FLDZ ST\n\t" 6182 "FSTP $dst" %} 6183 ins_encode %{ 6184 __ fldz(); 6185 __ fstp_d($dst$$reg); 6186 %} 6187 ins_pipe(fpu_reg_con); 6188 %} 6189 6190 // The instruction usage is guarded by predicate in operand immDPR1(). 6191 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6192 match(Set dst con); 6193 ins_cost(125); 6194 6195 format %{ "FLD1 ST\n\t" 6196 "FSTP $dst" %} 6197 ins_encode %{ 6198 __ fld1(); 6199 __ fstp_d($dst$$reg); 6200 %} 6201 ins_pipe(fpu_reg_con); 6202 %} 6203 6204 // The instruction usage is guarded by predicate in operand immD(). 6205 instruct loadConD(regD dst, immD con) %{ 6206 match(Set dst con); 6207 ins_cost(125); 6208 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6209 ins_encode %{ 6210 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6211 %} 6212 ins_pipe(pipe_slow); 6213 %} 6214 6215 // The instruction usage is guarded by predicate in operand immD0(). 6216 instruct loadConD0(regD dst, immD0 src) %{ 6217 match(Set dst src); 6218 ins_cost(100); 6219 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6220 ins_encode %{ 6221 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6222 %} 6223 ins_pipe( pipe_slow ); 6224 %} 6225 6226 // Load Stack Slot 6227 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6228 match(Set dst src); 6229 ins_cost(125); 6230 6231 format %{ "MOV $dst,$src" %} 6232 opcode(0x8B); 6233 ins_encode( OpcP, RegMem(dst,src)); 6234 ins_pipe( ialu_reg_mem ); 6235 %} 6236 6237 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6238 match(Set dst src); 6239 6240 ins_cost(200); 6241 format %{ "MOV $dst,$src.lo\n\t" 6242 "MOV $dst+4,$src.hi" %} 6243 opcode(0x8B, 0x8B); 6244 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6245 ins_pipe( ialu_mem_long_reg ); 6246 %} 6247 6248 // Load Stack Slot 6249 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6250 match(Set dst src); 6251 ins_cost(125); 6252 6253 format %{ "MOV $dst,$src" %} 6254 opcode(0x8B); 6255 ins_encode( OpcP, RegMem(dst,src)); 6256 ins_pipe( ialu_reg_mem ); 6257 %} 6258 6259 // Load Stack Slot 6260 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6261 match(Set dst src); 6262 ins_cost(125); 6263 6264 format %{ "FLD_S $src\n\t" 6265 "FSTP $dst" %} 6266 opcode(0xD9); /* D9 /0, FLD m32real */ 6267 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6268 Pop_Reg_FPR(dst) ); 6269 ins_pipe( fpu_reg_mem ); 6270 %} 6271 6272 // Load Stack Slot 6273 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6274 match(Set dst src); 6275 ins_cost(125); 6276 6277 format %{ "FLD_D $src\n\t" 6278 "FSTP $dst" %} 6279 opcode(0xDD); /* DD /0, FLD m64real */ 6280 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6281 Pop_Reg_DPR(dst) ); 6282 ins_pipe( fpu_reg_mem ); 6283 %} 6284 6285 // Prefetch instructions for allocation. 6286 // Must be safe to execute with invalid address (cannot fault). 6287 6288 instruct prefetchAlloc0( memory mem ) %{ 6289 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6290 match(PrefetchAllocation mem); 6291 ins_cost(0); 6292 size(0); 6293 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6294 ins_encode(); 6295 ins_pipe(empty); 6296 %} 6297 6298 instruct prefetchAlloc( memory mem ) %{ 6299 predicate(AllocatePrefetchInstr==3); 6300 match( PrefetchAllocation mem ); 6301 ins_cost(100); 6302 6303 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6304 ins_encode %{ 6305 __ prefetchw($mem$$Address); 6306 %} 6307 ins_pipe(ialu_mem); 6308 %} 6309 6310 instruct prefetchAllocNTA( memory mem ) %{ 6311 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6312 match(PrefetchAllocation mem); 6313 ins_cost(100); 6314 6315 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6316 ins_encode %{ 6317 __ prefetchnta($mem$$Address); 6318 %} 6319 ins_pipe(ialu_mem); 6320 %} 6321 6322 instruct prefetchAllocT0( memory mem ) %{ 6323 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6324 match(PrefetchAllocation mem); 6325 ins_cost(100); 6326 6327 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6328 ins_encode %{ 6329 __ prefetcht0($mem$$Address); 6330 %} 6331 ins_pipe(ialu_mem); 6332 %} 6333 6334 instruct prefetchAllocT2( memory mem ) %{ 6335 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6336 match(PrefetchAllocation mem); 6337 ins_cost(100); 6338 6339 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6340 ins_encode %{ 6341 __ prefetcht2($mem$$Address); 6342 %} 6343 ins_pipe(ialu_mem); 6344 %} 6345 6346 //----------Store Instructions------------------------------------------------- 6347 6348 // Store Byte 6349 instruct storeB(memory mem, xRegI src) %{ 6350 match(Set mem (StoreB mem src)); 6351 6352 ins_cost(125); 6353 format %{ "MOV8 $mem,$src" %} 6354 opcode(0x88); 6355 ins_encode( OpcP, RegMem( src, mem ) ); 6356 ins_pipe( ialu_mem_reg ); 6357 %} 6358 6359 // Store Char/Short 6360 instruct storeC(memory mem, rRegI src) %{ 6361 match(Set mem (StoreC mem src)); 6362 6363 ins_cost(125); 6364 format %{ "MOV16 $mem,$src" %} 6365 opcode(0x89, 0x66); 6366 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6367 ins_pipe( ialu_mem_reg ); 6368 %} 6369 6370 // Store Integer 6371 instruct storeI(memory mem, rRegI src) %{ 6372 match(Set mem (StoreI mem src)); 6373 6374 ins_cost(125); 6375 format %{ "MOV $mem,$src" %} 6376 opcode(0x89); 6377 ins_encode( OpcP, RegMem( src, mem ) ); 6378 ins_pipe( ialu_mem_reg ); 6379 %} 6380 6381 // Store Long 6382 instruct storeL(long_memory mem, eRegL src) %{ 6383 predicate(!((StoreLNode*)n)->require_atomic_access()); 6384 match(Set mem (StoreL mem src)); 6385 6386 ins_cost(200); 6387 format %{ "MOV $mem,$src.lo\n\t" 6388 "MOV $mem+4,$src.hi" %} 6389 opcode(0x89, 0x89); 6390 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6391 ins_pipe( ialu_mem_long_reg ); 6392 %} 6393 6394 // Store Long to Integer 6395 instruct storeL2I(memory mem, eRegL src) %{ 6396 match(Set mem (StoreI mem (ConvL2I src))); 6397 6398 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6399 ins_encode %{ 6400 __ movl($mem$$Address, $src$$Register); 6401 %} 6402 ins_pipe(ialu_mem_reg); 6403 %} 6404 6405 // Volatile Store Long. Must be atomic, so move it into 6406 // the FP TOS and then do a 64-bit FIST. Has to probe the 6407 // target address before the store (for null-ptr checks) 6408 // so the memory operand is used twice in the encoding. 6409 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6410 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6411 match(Set mem (StoreL mem src)); 6412 effect( KILL cr ); 6413 ins_cost(400); 6414 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6415 "FILD $src\n\t" 6416 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6417 opcode(0x3B); 6418 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6419 ins_pipe( fpu_reg_mem ); 6420 %} 6421 6422 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6423 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6424 match(Set mem (StoreL mem src)); 6425 effect( TEMP tmp, KILL cr ); 6426 ins_cost(380); 6427 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6428 "MOVSD $tmp,$src\n\t" 6429 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6430 ins_encode %{ 6431 __ cmpl(rax, $mem$$Address); 6432 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6433 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6434 %} 6435 ins_pipe( pipe_slow ); 6436 %} 6437 6438 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6439 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6440 match(Set mem (StoreL mem src)); 6441 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6442 ins_cost(360); 6443 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6444 "MOVD $tmp,$src.lo\n\t" 6445 "MOVD $tmp2,$src.hi\n\t" 6446 "PUNPCKLDQ $tmp,$tmp2\n\t" 6447 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6448 ins_encode %{ 6449 __ cmpl(rax, $mem$$Address); 6450 __ movdl($tmp$$XMMRegister, $src$$Register); 6451 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6452 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6453 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 // Store Pointer; for storing unknown oops and raw pointers 6459 instruct storeP(memory mem, anyRegP src) %{ 6460 match(Set mem (StoreP mem src)); 6461 6462 ins_cost(125); 6463 format %{ "MOV $mem,$src" %} 6464 opcode(0x89); 6465 ins_encode( OpcP, RegMem( src, mem ) ); 6466 ins_pipe( ialu_mem_reg ); 6467 %} 6468 6469 // Store Integer Immediate 6470 instruct storeImmI(memory mem, immI src) %{ 6471 match(Set mem (StoreI mem src)); 6472 6473 ins_cost(150); 6474 format %{ "MOV $mem,$src" %} 6475 opcode(0xC7); /* C7 /0 */ 6476 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6477 ins_pipe( ialu_mem_imm ); 6478 %} 6479 6480 // Store Short/Char Immediate 6481 instruct storeImmI16(memory mem, immI16 src) %{ 6482 predicate(UseStoreImmI16); 6483 match(Set mem (StoreC mem src)); 6484 6485 ins_cost(150); 6486 format %{ "MOV16 $mem,$src" %} 6487 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6488 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6489 ins_pipe( ialu_mem_imm ); 6490 %} 6491 6492 // Store Pointer Immediate; null pointers or constant oops that do not 6493 // need card-mark barriers. 6494 instruct storeImmP(memory mem, immP src) %{ 6495 match(Set mem (StoreP mem src)); 6496 6497 ins_cost(150); 6498 format %{ "MOV $mem,$src" %} 6499 opcode(0xC7); /* C7 /0 */ 6500 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6501 ins_pipe( ialu_mem_imm ); 6502 %} 6503 6504 // Store Byte Immediate 6505 instruct storeImmB(memory mem, immI8 src) %{ 6506 match(Set mem (StoreB mem src)); 6507 6508 ins_cost(150); 6509 format %{ "MOV8 $mem,$src" %} 6510 opcode(0xC6); /* C6 /0 */ 6511 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6512 ins_pipe( ialu_mem_imm ); 6513 %} 6514 6515 // Store CMS card-mark Immediate 6516 instruct storeImmCM(memory mem, immI8 src) %{ 6517 match(Set mem (StoreCM mem src)); 6518 6519 ins_cost(150); 6520 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6521 opcode(0xC6); /* C6 /0 */ 6522 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6523 ins_pipe( ialu_mem_imm ); 6524 %} 6525 6526 // Store Double 6527 instruct storeDPR( memory mem, regDPR1 src) %{ 6528 predicate(UseSSE<=1); 6529 match(Set mem (StoreD mem src)); 6530 6531 ins_cost(100); 6532 format %{ "FST_D $mem,$src" %} 6533 opcode(0xDD); /* DD /2 */ 6534 ins_encode( enc_FPR_store(mem,src) ); 6535 ins_pipe( fpu_mem_reg ); 6536 %} 6537 6538 // Store double does rounding on x86 6539 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6540 predicate(UseSSE<=1); 6541 match(Set mem (StoreD mem (RoundDouble src))); 6542 6543 ins_cost(100); 6544 format %{ "FST_D $mem,$src\t# round" %} 6545 opcode(0xDD); /* DD /2 */ 6546 ins_encode( enc_FPR_store(mem,src) ); 6547 ins_pipe( fpu_mem_reg ); 6548 %} 6549 6550 // Store XMM register to memory (double-precision floating points) 6551 // MOVSD instruction 6552 instruct storeD(memory mem, regD src) %{ 6553 predicate(UseSSE>=2); 6554 match(Set mem (StoreD mem src)); 6555 ins_cost(95); 6556 format %{ "MOVSD $mem,$src" %} 6557 ins_encode %{ 6558 __ movdbl($mem$$Address, $src$$XMMRegister); 6559 %} 6560 ins_pipe( pipe_slow ); 6561 %} 6562 6563 // Load Double 6564 instruct MoveD2VL(vlRegD dst, regD src) %{ 6565 match(Set dst src); 6566 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6567 ins_encode %{ 6568 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6569 %} 6570 ins_pipe( fpu_reg_reg ); 6571 %} 6572 6573 // Load Double 6574 instruct MoveVL2D(regD dst, vlRegD src) %{ 6575 match(Set dst src); 6576 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6577 ins_encode %{ 6578 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6579 %} 6580 ins_pipe( fpu_reg_reg ); 6581 %} 6582 6583 // Store XMM register to memory (single-precision floating point) 6584 // MOVSS instruction 6585 instruct storeF(memory mem, regF src) %{ 6586 predicate(UseSSE>=1); 6587 match(Set mem (StoreF mem src)); 6588 ins_cost(95); 6589 format %{ "MOVSS $mem,$src" %} 6590 ins_encode %{ 6591 __ movflt($mem$$Address, $src$$XMMRegister); 6592 %} 6593 ins_pipe( pipe_slow ); 6594 %} 6595 6596 // Load Float 6597 instruct MoveF2VL(vlRegF dst, regF src) %{ 6598 match(Set dst src); 6599 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6600 ins_encode %{ 6601 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6602 %} 6603 ins_pipe( fpu_reg_reg ); 6604 %} 6605 6606 // Load Float 6607 instruct MoveVL2F(regF dst, vlRegF src) %{ 6608 match(Set dst src); 6609 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6610 ins_encode %{ 6611 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6612 %} 6613 ins_pipe( fpu_reg_reg ); 6614 %} 6615 6616 // Store Float 6617 instruct storeFPR( memory mem, regFPR1 src) %{ 6618 predicate(UseSSE==0); 6619 match(Set mem (StoreF mem src)); 6620 6621 ins_cost(100); 6622 format %{ "FST_S $mem,$src" %} 6623 opcode(0xD9); /* D9 /2 */ 6624 ins_encode( enc_FPR_store(mem,src) ); 6625 ins_pipe( fpu_mem_reg ); 6626 %} 6627 6628 // Store Float does rounding on x86 6629 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6630 predicate(UseSSE==0); 6631 match(Set mem (StoreF mem (RoundFloat src))); 6632 6633 ins_cost(100); 6634 format %{ "FST_S $mem,$src\t# round" %} 6635 opcode(0xD9); /* D9 /2 */ 6636 ins_encode( enc_FPR_store(mem,src) ); 6637 ins_pipe( fpu_mem_reg ); 6638 %} 6639 6640 // Store Float does rounding on x86 6641 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6642 predicate(UseSSE<=1); 6643 match(Set mem (StoreF mem (ConvD2F src))); 6644 6645 ins_cost(100); 6646 format %{ "FST_S $mem,$src\t# D-round" %} 6647 opcode(0xD9); /* D9 /2 */ 6648 ins_encode( enc_FPR_store(mem,src) ); 6649 ins_pipe( fpu_mem_reg ); 6650 %} 6651 6652 // Store immediate Float value (it is faster than store from FPU register) 6653 // The instruction usage is guarded by predicate in operand immFPR(). 6654 instruct storeFPR_imm( memory mem, immFPR src) %{ 6655 match(Set mem (StoreF mem src)); 6656 6657 ins_cost(50); 6658 format %{ "MOV $mem,$src\t# store float" %} 6659 opcode(0xC7); /* C7 /0 */ 6660 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6661 ins_pipe( ialu_mem_imm ); 6662 %} 6663 6664 // Store immediate Float value (it is faster than store from XMM register) 6665 // The instruction usage is guarded by predicate in operand immF(). 6666 instruct storeF_imm( memory mem, immF src) %{ 6667 match(Set mem (StoreF mem src)); 6668 6669 ins_cost(50); 6670 format %{ "MOV $mem,$src\t# store float" %} 6671 opcode(0xC7); /* C7 /0 */ 6672 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6673 ins_pipe( ialu_mem_imm ); 6674 %} 6675 6676 // Store Integer to stack slot 6677 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6678 match(Set dst src); 6679 6680 ins_cost(100); 6681 format %{ "MOV $dst,$src" %} 6682 opcode(0x89); 6683 ins_encode( OpcPRegSS( dst, src ) ); 6684 ins_pipe( ialu_mem_reg ); 6685 %} 6686 6687 // Store Integer to stack slot 6688 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6689 match(Set dst src); 6690 6691 ins_cost(100); 6692 format %{ "MOV $dst,$src" %} 6693 opcode(0x89); 6694 ins_encode( OpcPRegSS( dst, src ) ); 6695 ins_pipe( ialu_mem_reg ); 6696 %} 6697 6698 // Store Long to stack slot 6699 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6700 match(Set dst src); 6701 6702 ins_cost(200); 6703 format %{ "MOV $dst,$src.lo\n\t" 6704 "MOV $dst+4,$src.hi" %} 6705 opcode(0x89, 0x89); 6706 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6707 ins_pipe( ialu_mem_long_reg ); 6708 %} 6709 6710 //----------MemBar Instructions----------------------------------------------- 6711 // Memory barrier flavors 6712 6713 instruct membar_acquire() %{ 6714 match(MemBarAcquire); 6715 match(LoadFence); 6716 ins_cost(400); 6717 6718 size(0); 6719 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6720 ins_encode(); 6721 ins_pipe(empty); 6722 %} 6723 6724 instruct membar_acquire_lock() %{ 6725 match(MemBarAcquireLock); 6726 ins_cost(0); 6727 6728 size(0); 6729 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6730 ins_encode( ); 6731 ins_pipe(empty); 6732 %} 6733 6734 instruct membar_release() %{ 6735 match(MemBarRelease); 6736 match(StoreFence); 6737 ins_cost(400); 6738 6739 size(0); 6740 format %{ "MEMBAR-release ! (empty encoding)" %} 6741 ins_encode( ); 6742 ins_pipe(empty); 6743 %} 6744 6745 instruct membar_release_lock() %{ 6746 match(MemBarReleaseLock); 6747 ins_cost(0); 6748 6749 size(0); 6750 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6751 ins_encode( ); 6752 ins_pipe(empty); 6753 %} 6754 6755 instruct membar_volatile(eFlagsReg cr) %{ 6756 match(MemBarVolatile); 6757 effect(KILL cr); 6758 ins_cost(400); 6759 6760 format %{ 6761 $$template 6762 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6763 %} 6764 ins_encode %{ 6765 __ membar(Assembler::StoreLoad); 6766 %} 6767 ins_pipe(pipe_slow); 6768 %} 6769 6770 instruct unnecessary_membar_volatile() %{ 6771 match(MemBarVolatile); 6772 predicate(Matcher::post_store_load_barrier(n)); 6773 ins_cost(0); 6774 6775 size(0); 6776 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6777 ins_encode( ); 6778 ins_pipe(empty); 6779 %} 6780 6781 instruct membar_storestore() %{ 6782 match(MemBarStoreStore); 6783 ins_cost(0); 6784 6785 size(0); 6786 format %{ "MEMBAR-storestore (empty encoding)" %} 6787 ins_encode( ); 6788 ins_pipe(empty); 6789 %} 6790 6791 //----------Move Instructions-------------------------------------------------- 6792 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6793 match(Set dst (CastX2P src)); 6794 format %{ "# X2P $dst, $src" %} 6795 ins_encode( /*empty encoding*/ ); 6796 ins_cost(0); 6797 ins_pipe(empty); 6798 %} 6799 6800 instruct castP2X(rRegI dst, eRegP src ) %{ 6801 match(Set dst (CastP2X src)); 6802 ins_cost(50); 6803 format %{ "MOV $dst, $src\t# CastP2X" %} 6804 ins_encode( enc_Copy( dst, src) ); 6805 ins_pipe( ialu_reg_reg ); 6806 %} 6807 6808 //----------Conditional Move--------------------------------------------------- 6809 // Conditional move 6810 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6811 predicate(!VM_Version::supports_cmov() ); 6812 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6813 ins_cost(200); 6814 format %{ "J$cop,us skip\t# signed cmove\n\t" 6815 "MOV $dst,$src\n" 6816 "skip:" %} 6817 ins_encode %{ 6818 Label Lskip; 6819 // Invert sense of branch from sense of CMOV 6820 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6821 __ movl($dst$$Register, $src$$Register); 6822 __ bind(Lskip); 6823 %} 6824 ins_pipe( pipe_cmov_reg ); 6825 %} 6826 6827 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6828 predicate(!VM_Version::supports_cmov() ); 6829 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6830 ins_cost(200); 6831 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6832 "MOV $dst,$src\n" 6833 "skip:" %} 6834 ins_encode %{ 6835 Label Lskip; 6836 // Invert sense of branch from sense of CMOV 6837 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6838 __ movl($dst$$Register, $src$$Register); 6839 __ bind(Lskip); 6840 %} 6841 ins_pipe( pipe_cmov_reg ); 6842 %} 6843 6844 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6845 predicate(VM_Version::supports_cmov() ); 6846 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6847 ins_cost(200); 6848 format %{ "CMOV$cop $dst,$src" %} 6849 opcode(0x0F,0x40); 6850 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6851 ins_pipe( pipe_cmov_reg ); 6852 %} 6853 6854 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6855 predicate(VM_Version::supports_cmov() ); 6856 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6857 ins_cost(200); 6858 format %{ "CMOV$cop $dst,$src" %} 6859 opcode(0x0F,0x40); 6860 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6861 ins_pipe( pipe_cmov_reg ); 6862 %} 6863 6864 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6865 predicate(VM_Version::supports_cmov() ); 6866 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6867 ins_cost(200); 6868 expand %{ 6869 cmovI_regU(cop, cr, dst, src); 6870 %} 6871 %} 6872 6873 // Conditional move 6874 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6875 predicate(VM_Version::supports_cmov() ); 6876 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6877 ins_cost(250); 6878 format %{ "CMOV$cop $dst,$src" %} 6879 opcode(0x0F,0x40); 6880 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6881 ins_pipe( pipe_cmov_mem ); 6882 %} 6883 6884 // Conditional move 6885 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6886 predicate(VM_Version::supports_cmov() ); 6887 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6888 ins_cost(250); 6889 format %{ "CMOV$cop $dst,$src" %} 6890 opcode(0x0F,0x40); 6891 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6892 ins_pipe( pipe_cmov_mem ); 6893 %} 6894 6895 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6896 predicate(VM_Version::supports_cmov() ); 6897 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6898 ins_cost(250); 6899 expand %{ 6900 cmovI_memU(cop, cr, dst, src); 6901 %} 6902 %} 6903 6904 // Conditional move 6905 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6906 predicate(VM_Version::supports_cmov() ); 6907 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6908 ins_cost(200); 6909 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6910 opcode(0x0F,0x40); 6911 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6912 ins_pipe( pipe_cmov_reg ); 6913 %} 6914 6915 // Conditional move (non-P6 version) 6916 // Note: a CMoveP is generated for stubs and native wrappers 6917 // regardless of whether we are on a P6, so we 6918 // emulate a cmov here 6919 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6920 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6921 ins_cost(300); 6922 format %{ "Jn$cop skip\n\t" 6923 "MOV $dst,$src\t# pointer\n" 6924 "skip:" %} 6925 opcode(0x8b); 6926 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6927 ins_pipe( pipe_cmov_reg ); 6928 %} 6929 6930 // Conditional move 6931 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6932 predicate(VM_Version::supports_cmov() ); 6933 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6934 ins_cost(200); 6935 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6936 opcode(0x0F,0x40); 6937 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6938 ins_pipe( pipe_cmov_reg ); 6939 %} 6940 6941 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6942 predicate(VM_Version::supports_cmov() ); 6943 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6944 ins_cost(200); 6945 expand %{ 6946 cmovP_regU(cop, cr, dst, src); 6947 %} 6948 %} 6949 6950 // DISABLED: Requires the ADLC to emit a bottom_type call that 6951 // correctly meets the two pointer arguments; one is an incoming 6952 // register but the other is a memory operand. ALSO appears to 6953 // be buggy with implicit null checks. 6954 // 6955 //// Conditional move 6956 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6957 // predicate(VM_Version::supports_cmov() ); 6958 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6959 // ins_cost(250); 6960 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6961 // opcode(0x0F,0x40); 6962 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6963 // ins_pipe( pipe_cmov_mem ); 6964 //%} 6965 // 6966 //// Conditional move 6967 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6968 // predicate(VM_Version::supports_cmov() ); 6969 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6970 // ins_cost(250); 6971 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6972 // opcode(0x0F,0x40); 6973 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6974 // ins_pipe( pipe_cmov_mem ); 6975 //%} 6976 6977 // Conditional move 6978 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6979 predicate(UseSSE<=1); 6980 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6981 ins_cost(200); 6982 format %{ "FCMOV$cop $dst,$src\t# double" %} 6983 opcode(0xDA); 6984 ins_encode( enc_cmov_dpr(cop,src) ); 6985 ins_pipe( pipe_cmovDPR_reg ); 6986 %} 6987 6988 // Conditional move 6989 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6990 predicate(UseSSE==0); 6991 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6992 ins_cost(200); 6993 format %{ "FCMOV$cop $dst,$src\t# float" %} 6994 opcode(0xDA); 6995 ins_encode( enc_cmov_dpr(cop,src) ); 6996 ins_pipe( pipe_cmovDPR_reg ); 6997 %} 6998 6999 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7000 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 7001 predicate(UseSSE<=1); 7002 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 format %{ "Jn$cop skip\n\t" 7005 "MOV $dst,$src\t# double\n" 7006 "skip:" %} 7007 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7008 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 7009 ins_pipe( pipe_cmovDPR_reg ); 7010 %} 7011 7012 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7013 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 7014 predicate(UseSSE==0); 7015 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7016 ins_cost(200); 7017 format %{ "Jn$cop skip\n\t" 7018 "MOV $dst,$src\t# float\n" 7019 "skip:" %} 7020 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7021 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 7022 ins_pipe( pipe_cmovDPR_reg ); 7023 %} 7024 7025 // No CMOVE with SSE/SSE2 7026 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7027 predicate (UseSSE>=1); 7028 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7029 ins_cost(200); 7030 format %{ "Jn$cop skip\n\t" 7031 "MOVSS $dst,$src\t# float\n" 7032 "skip:" %} 7033 ins_encode %{ 7034 Label skip; 7035 // Invert sense of branch from sense of CMOV 7036 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7037 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7038 __ bind(skip); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 // No CMOVE with SSE/SSE2 7044 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7045 predicate (UseSSE>=2); 7046 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7047 ins_cost(200); 7048 format %{ "Jn$cop skip\n\t" 7049 "MOVSD $dst,$src\t# float\n" 7050 "skip:" %} 7051 ins_encode %{ 7052 Label skip; 7053 // Invert sense of branch from sense of CMOV 7054 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7055 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7056 __ bind(skip); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 // unsigned version 7062 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 7063 predicate (UseSSE>=1); 7064 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7065 ins_cost(200); 7066 format %{ "Jn$cop skip\n\t" 7067 "MOVSS $dst,$src\t# float\n" 7068 "skip:" %} 7069 ins_encode %{ 7070 Label skip; 7071 // Invert sense of branch from sense of CMOV 7072 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7073 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7074 __ bind(skip); 7075 %} 7076 ins_pipe( pipe_slow ); 7077 %} 7078 7079 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7080 predicate (UseSSE>=1); 7081 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7082 ins_cost(200); 7083 expand %{ 7084 fcmovF_regU(cop, cr, dst, src); 7085 %} 7086 %} 7087 7088 // unsigned version 7089 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7090 predicate (UseSSE>=2); 7091 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7092 ins_cost(200); 7093 format %{ "Jn$cop skip\n\t" 7094 "MOVSD $dst,$src\t# float\n" 7095 "skip:" %} 7096 ins_encode %{ 7097 Label skip; 7098 // Invert sense of branch from sense of CMOV 7099 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7100 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7101 __ bind(skip); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7107 predicate (UseSSE>=2); 7108 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7109 ins_cost(200); 7110 expand %{ 7111 fcmovD_regU(cop, cr, dst, src); 7112 %} 7113 %} 7114 7115 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7116 predicate(VM_Version::supports_cmov() ); 7117 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7118 ins_cost(200); 7119 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7120 "CMOV$cop $dst.hi,$src.hi" %} 7121 opcode(0x0F,0x40); 7122 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7123 ins_pipe( pipe_cmov_reg_long ); 7124 %} 7125 7126 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7127 predicate(VM_Version::supports_cmov() ); 7128 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7129 ins_cost(200); 7130 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7131 "CMOV$cop $dst.hi,$src.hi" %} 7132 opcode(0x0F,0x40); 7133 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7134 ins_pipe( pipe_cmov_reg_long ); 7135 %} 7136 7137 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7138 predicate(VM_Version::supports_cmov() ); 7139 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7140 ins_cost(200); 7141 expand %{ 7142 cmovL_regU(cop, cr, dst, src); 7143 %} 7144 %} 7145 7146 //----------Arithmetic Instructions-------------------------------------------- 7147 //----------Addition Instructions---------------------------------------------- 7148 7149 // Integer Addition Instructions 7150 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7151 match(Set dst (AddI dst src)); 7152 effect(KILL cr); 7153 7154 size(2); 7155 format %{ "ADD $dst,$src" %} 7156 opcode(0x03); 7157 ins_encode( OpcP, RegReg( dst, src) ); 7158 ins_pipe( ialu_reg_reg ); 7159 %} 7160 7161 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7162 match(Set dst (AddI dst src)); 7163 effect(KILL cr); 7164 7165 format %{ "ADD $dst,$src" %} 7166 opcode(0x81, 0x00); /* /0 id */ 7167 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7168 ins_pipe( ialu_reg ); 7169 %} 7170 7171 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7172 predicate(UseIncDec); 7173 match(Set dst (AddI dst src)); 7174 effect(KILL cr); 7175 7176 size(1); 7177 format %{ "INC $dst" %} 7178 opcode(0x40); /* */ 7179 ins_encode( Opc_plus( primary, dst ) ); 7180 ins_pipe( ialu_reg ); 7181 %} 7182 7183 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7184 match(Set dst (AddI src0 src1)); 7185 ins_cost(110); 7186 7187 format %{ "LEA $dst,[$src0 + $src1]" %} 7188 opcode(0x8D); /* 0x8D /r */ 7189 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7190 ins_pipe( ialu_reg_reg ); 7191 %} 7192 7193 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7194 match(Set dst (AddP src0 src1)); 7195 ins_cost(110); 7196 7197 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7198 opcode(0x8D); /* 0x8D /r */ 7199 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7200 ins_pipe( ialu_reg_reg ); 7201 %} 7202 7203 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7204 predicate(UseIncDec); 7205 match(Set dst (AddI dst src)); 7206 effect(KILL cr); 7207 7208 size(1); 7209 format %{ "DEC $dst" %} 7210 opcode(0x48); /* */ 7211 ins_encode( Opc_plus( primary, dst ) ); 7212 ins_pipe( ialu_reg ); 7213 %} 7214 7215 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7216 match(Set dst (AddP dst src)); 7217 effect(KILL cr); 7218 7219 size(2); 7220 format %{ "ADD $dst,$src" %} 7221 opcode(0x03); 7222 ins_encode( OpcP, RegReg( dst, src) ); 7223 ins_pipe( ialu_reg_reg ); 7224 %} 7225 7226 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7227 match(Set dst (AddP dst src)); 7228 effect(KILL cr); 7229 7230 format %{ "ADD $dst,$src" %} 7231 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7232 // ins_encode( RegImm( dst, src) ); 7233 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7234 ins_pipe( ialu_reg ); 7235 %} 7236 7237 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7238 match(Set dst (AddI dst (LoadI src))); 7239 effect(KILL cr); 7240 7241 ins_cost(125); 7242 format %{ "ADD $dst,$src" %} 7243 opcode(0x03); 7244 ins_encode( OpcP, RegMem( dst, src) ); 7245 ins_pipe( ialu_reg_mem ); 7246 %} 7247 7248 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7249 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7250 effect(KILL cr); 7251 7252 ins_cost(150); 7253 format %{ "ADD $dst,$src" %} 7254 opcode(0x01); /* Opcode 01 /r */ 7255 ins_encode( OpcP, RegMem( src, dst ) ); 7256 ins_pipe( ialu_mem_reg ); 7257 %} 7258 7259 // Add Memory with Immediate 7260 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7261 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7262 effect(KILL cr); 7263 7264 ins_cost(125); 7265 format %{ "ADD $dst,$src" %} 7266 opcode(0x81); /* Opcode 81 /0 id */ 7267 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7268 ins_pipe( ialu_mem_imm ); 7269 %} 7270 7271 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7272 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7273 effect(KILL cr); 7274 7275 ins_cost(125); 7276 format %{ "INC $dst" %} 7277 opcode(0xFF); /* Opcode FF /0 */ 7278 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7279 ins_pipe( ialu_mem_imm ); 7280 %} 7281 7282 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7283 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7284 effect(KILL cr); 7285 7286 ins_cost(125); 7287 format %{ "DEC $dst" %} 7288 opcode(0xFF); /* Opcode FF /1 */ 7289 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7290 ins_pipe( ialu_mem_imm ); 7291 %} 7292 7293 7294 instruct checkCastPP( eRegP dst ) %{ 7295 match(Set dst (CheckCastPP dst)); 7296 7297 size(0); 7298 format %{ "#checkcastPP of $dst" %} 7299 ins_encode( /*empty encoding*/ ); 7300 ins_pipe( empty ); 7301 %} 7302 7303 instruct castPP( eRegP dst ) %{ 7304 match(Set dst (CastPP dst)); 7305 format %{ "#castPP of $dst" %} 7306 ins_encode( /*empty encoding*/ ); 7307 ins_pipe( empty ); 7308 %} 7309 7310 instruct castII( rRegI dst ) %{ 7311 match(Set dst (CastII dst)); 7312 format %{ "#castII of $dst" %} 7313 ins_encode( /*empty encoding*/ ); 7314 ins_cost(0); 7315 ins_pipe( empty ); 7316 %} 7317 7318 7319 // Load-locked - same as a regular pointer load when used with compare-swap 7320 instruct loadPLocked(eRegP dst, memory mem) %{ 7321 match(Set dst (LoadPLocked mem)); 7322 7323 ins_cost(125); 7324 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7325 opcode(0x8B); 7326 ins_encode( OpcP, RegMem(dst,mem)); 7327 ins_pipe( ialu_reg_mem ); 7328 %} 7329 7330 // Conditional-store of the updated heap-top. 7331 // Used during allocation of the shared heap. 7332 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7333 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7334 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7335 // EAX is killed if there is contention, but then it's also unused. 7336 // In the common case of no contention, EAX holds the new oop address. 7337 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7338 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7339 ins_pipe( pipe_cmpxchg ); 7340 %} 7341 7342 // Conditional-store of an int value. 7343 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7344 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7345 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7346 effect(KILL oldval); 7347 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7348 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7349 ins_pipe( pipe_cmpxchg ); 7350 %} 7351 7352 // Conditional-store of a long value. 7353 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7354 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7355 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7356 effect(KILL oldval); 7357 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7358 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7359 "XCHG EBX,ECX" 7360 %} 7361 ins_encode %{ 7362 // Note: we need to swap rbx, and rcx before and after the 7363 // cmpxchg8 instruction because the instruction uses 7364 // rcx as the high order word of the new value to store but 7365 // our register encoding uses rbx. 7366 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7367 __ lock(); 7368 __ cmpxchg8($mem$$Address); 7369 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7370 %} 7371 ins_pipe( pipe_cmpxchg ); 7372 %} 7373 7374 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7375 7376 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7377 predicate(VM_Version::supports_cx8()); 7378 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7379 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7380 effect(KILL cr, KILL oldval); 7381 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7382 "MOV $res,0\n\t" 7383 "JNE,s fail\n\t" 7384 "MOV $res,1\n" 7385 "fail:" %} 7386 ins_encode( enc_cmpxchg8(mem_ptr), 7387 enc_flags_ne_to_boolean(res) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7392 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7393 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7394 effect(KILL cr, KILL oldval); 7395 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7396 "MOV $res,0\n\t" 7397 "JNE,s fail\n\t" 7398 "MOV $res,1\n" 7399 "fail:" %} 7400 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7401 ins_pipe( pipe_cmpxchg ); 7402 %} 7403 7404 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7405 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7406 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7407 effect(KILL cr, KILL oldval); 7408 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7409 "MOV $res,0\n\t" 7410 "JNE,s fail\n\t" 7411 "MOV $res,1\n" 7412 "fail:" %} 7413 ins_encode( enc_cmpxchgb(mem_ptr), 7414 enc_flags_ne_to_boolean(res) ); 7415 ins_pipe( pipe_cmpxchg ); 7416 %} 7417 7418 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7419 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7420 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7421 effect(KILL cr, KILL oldval); 7422 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7423 "MOV $res,0\n\t" 7424 "JNE,s fail\n\t" 7425 "MOV $res,1\n" 7426 "fail:" %} 7427 ins_encode( enc_cmpxchgw(mem_ptr), 7428 enc_flags_ne_to_boolean(res) ); 7429 ins_pipe( pipe_cmpxchg ); 7430 %} 7431 7432 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7433 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7434 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7435 effect(KILL cr, KILL oldval); 7436 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7437 "MOV $res,0\n\t" 7438 "JNE,s fail\n\t" 7439 "MOV $res,1\n" 7440 "fail:" %} 7441 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7442 ins_pipe( pipe_cmpxchg ); 7443 %} 7444 7445 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7446 predicate(VM_Version::supports_cx8()); 7447 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7448 effect(KILL cr); 7449 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7450 ins_encode( enc_cmpxchg8(mem_ptr) ); 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7455 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7456 effect(KILL cr); 7457 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7458 ins_encode( enc_cmpxchg(mem_ptr) ); 7459 ins_pipe( pipe_cmpxchg ); 7460 %} 7461 7462 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7463 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7464 effect(KILL cr); 7465 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7466 ins_encode( enc_cmpxchgb(mem_ptr) ); 7467 ins_pipe( pipe_cmpxchg ); 7468 %} 7469 7470 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7471 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7472 effect(KILL cr); 7473 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7474 ins_encode( enc_cmpxchgw(mem_ptr) ); 7475 ins_pipe( pipe_cmpxchg ); 7476 %} 7477 7478 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7479 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7480 effect(KILL cr); 7481 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7482 ins_encode( enc_cmpxchg(mem_ptr) ); 7483 ins_pipe( pipe_cmpxchg ); 7484 %} 7485 7486 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7487 predicate(n->as_LoadStore()->result_not_used()); 7488 match(Set dummy (GetAndAddB mem add)); 7489 effect(KILL cr); 7490 format %{ "ADDB [$mem],$add" %} 7491 ins_encode %{ 7492 __ lock(); 7493 __ addb($mem$$Address, $add$$constant); 7494 %} 7495 ins_pipe( pipe_cmpxchg ); 7496 %} 7497 7498 // Important to match to xRegI: only 8-bit regs. 7499 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7500 match(Set newval (GetAndAddB mem newval)); 7501 effect(KILL cr); 7502 format %{ "XADDB [$mem],$newval" %} 7503 ins_encode %{ 7504 __ lock(); 7505 __ xaddb($mem$$Address, $newval$$Register); 7506 %} 7507 ins_pipe( pipe_cmpxchg ); 7508 %} 7509 7510 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7511 predicate(n->as_LoadStore()->result_not_used()); 7512 match(Set dummy (GetAndAddS mem add)); 7513 effect(KILL cr); 7514 format %{ "ADDS [$mem],$add" %} 7515 ins_encode %{ 7516 __ lock(); 7517 __ addw($mem$$Address, $add$$constant); 7518 %} 7519 ins_pipe( pipe_cmpxchg ); 7520 %} 7521 7522 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7523 match(Set newval (GetAndAddS mem newval)); 7524 effect(KILL cr); 7525 format %{ "XADDS [$mem],$newval" %} 7526 ins_encode %{ 7527 __ lock(); 7528 __ xaddw($mem$$Address, $newval$$Register); 7529 %} 7530 ins_pipe( pipe_cmpxchg ); 7531 %} 7532 7533 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7534 predicate(n->as_LoadStore()->result_not_used()); 7535 match(Set dummy (GetAndAddI mem add)); 7536 effect(KILL cr); 7537 format %{ "ADDL [$mem],$add" %} 7538 ins_encode %{ 7539 __ lock(); 7540 __ addl($mem$$Address, $add$$constant); 7541 %} 7542 ins_pipe( pipe_cmpxchg ); 7543 %} 7544 7545 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7546 match(Set newval (GetAndAddI mem newval)); 7547 effect(KILL cr); 7548 format %{ "XADDL [$mem],$newval" %} 7549 ins_encode %{ 7550 __ lock(); 7551 __ xaddl($mem$$Address, $newval$$Register); 7552 %} 7553 ins_pipe( pipe_cmpxchg ); 7554 %} 7555 7556 // Important to match to xRegI: only 8-bit regs. 7557 instruct xchgB( memory mem, xRegI newval) %{ 7558 match(Set newval (GetAndSetB mem newval)); 7559 format %{ "XCHGB $newval,[$mem]" %} 7560 ins_encode %{ 7561 __ xchgb($newval$$Register, $mem$$Address); 7562 %} 7563 ins_pipe( pipe_cmpxchg ); 7564 %} 7565 7566 instruct xchgS( memory mem, rRegI newval) %{ 7567 match(Set newval (GetAndSetS mem newval)); 7568 format %{ "XCHGW $newval,[$mem]" %} 7569 ins_encode %{ 7570 __ xchgw($newval$$Register, $mem$$Address); 7571 %} 7572 ins_pipe( pipe_cmpxchg ); 7573 %} 7574 7575 instruct xchgI( memory mem, rRegI newval) %{ 7576 match(Set newval (GetAndSetI mem newval)); 7577 format %{ "XCHGL $newval,[$mem]" %} 7578 ins_encode %{ 7579 __ xchgl($newval$$Register, $mem$$Address); 7580 %} 7581 ins_pipe( pipe_cmpxchg ); 7582 %} 7583 7584 instruct xchgP( memory mem, pRegP newval) %{ 7585 match(Set newval (GetAndSetP mem newval)); 7586 format %{ "XCHGL $newval,[$mem]" %} 7587 ins_encode %{ 7588 __ xchgl($newval$$Register, $mem$$Address); 7589 %} 7590 ins_pipe( pipe_cmpxchg ); 7591 %} 7592 7593 //----------Subtraction Instructions------------------------------------------- 7594 7595 // Integer Subtraction Instructions 7596 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7597 match(Set dst (SubI dst src)); 7598 effect(KILL cr); 7599 7600 size(2); 7601 format %{ "SUB $dst,$src" %} 7602 opcode(0x2B); 7603 ins_encode( OpcP, RegReg( dst, src) ); 7604 ins_pipe( ialu_reg_reg ); 7605 %} 7606 7607 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7608 match(Set dst (SubI dst src)); 7609 effect(KILL cr); 7610 7611 format %{ "SUB $dst,$src" %} 7612 opcode(0x81,0x05); /* Opcode 81 /5 */ 7613 // ins_encode( RegImm( dst, src) ); 7614 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7615 ins_pipe( ialu_reg ); 7616 %} 7617 7618 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7619 match(Set dst (SubI dst (LoadI src))); 7620 effect(KILL cr); 7621 7622 ins_cost(125); 7623 format %{ "SUB $dst,$src" %} 7624 opcode(0x2B); 7625 ins_encode( OpcP, RegMem( dst, src) ); 7626 ins_pipe( ialu_reg_mem ); 7627 %} 7628 7629 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7630 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7631 effect(KILL cr); 7632 7633 ins_cost(150); 7634 format %{ "SUB $dst,$src" %} 7635 opcode(0x29); /* Opcode 29 /r */ 7636 ins_encode( OpcP, RegMem( src, dst ) ); 7637 ins_pipe( ialu_mem_reg ); 7638 %} 7639 7640 // Subtract from a pointer 7641 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7642 match(Set dst (AddP dst (SubI zero src))); 7643 effect(KILL cr); 7644 7645 size(2); 7646 format %{ "SUB $dst,$src" %} 7647 opcode(0x2B); 7648 ins_encode( OpcP, RegReg( dst, src) ); 7649 ins_pipe( ialu_reg_reg ); 7650 %} 7651 7652 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7653 match(Set dst (SubI zero dst)); 7654 effect(KILL cr); 7655 7656 size(2); 7657 format %{ "NEG $dst" %} 7658 opcode(0xF7,0x03); // Opcode F7 /3 7659 ins_encode( OpcP, RegOpc( dst ) ); 7660 ins_pipe( ialu_reg ); 7661 %} 7662 7663 //----------Multiplication/Division Instructions------------------------------- 7664 // Integer Multiplication Instructions 7665 // Multiply Register 7666 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7667 match(Set dst (MulI dst src)); 7668 effect(KILL cr); 7669 7670 size(3); 7671 ins_cost(300); 7672 format %{ "IMUL $dst,$src" %} 7673 opcode(0xAF, 0x0F); 7674 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7675 ins_pipe( ialu_reg_reg_alu0 ); 7676 %} 7677 7678 // Multiply 32-bit Immediate 7679 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7680 match(Set dst (MulI src imm)); 7681 effect(KILL cr); 7682 7683 ins_cost(300); 7684 format %{ "IMUL $dst,$src,$imm" %} 7685 opcode(0x69); /* 69 /r id */ 7686 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7687 ins_pipe( ialu_reg_reg_alu0 ); 7688 %} 7689 7690 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7691 match(Set dst src); 7692 effect(KILL cr); 7693 7694 // Note that this is artificially increased to make it more expensive than loadConL 7695 ins_cost(250); 7696 format %{ "MOV EAX,$src\t// low word only" %} 7697 opcode(0xB8); 7698 ins_encode( LdImmL_Lo(dst, src) ); 7699 ins_pipe( ialu_reg_fat ); 7700 %} 7701 7702 // Multiply by 32-bit Immediate, taking the shifted high order results 7703 // (special case for shift by 32) 7704 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7705 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7706 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7707 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7708 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7709 effect(USE src1, KILL cr); 7710 7711 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7712 ins_cost(0*100 + 1*400 - 150); 7713 format %{ "IMUL EDX:EAX,$src1" %} 7714 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 // Multiply by 32-bit Immediate, taking the shifted high order results 7719 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7720 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7721 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7722 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7723 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7724 effect(USE src1, KILL cr); 7725 7726 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7727 ins_cost(1*100 + 1*400 - 150); 7728 format %{ "IMUL EDX:EAX,$src1\n\t" 7729 "SAR EDX,$cnt-32" %} 7730 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 // Multiply Memory 32-bit Immediate 7735 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7736 match(Set dst (MulI (LoadI src) imm)); 7737 effect(KILL cr); 7738 7739 ins_cost(300); 7740 format %{ "IMUL $dst,$src,$imm" %} 7741 opcode(0x69); /* 69 /r id */ 7742 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7743 ins_pipe( ialu_reg_mem_alu0 ); 7744 %} 7745 7746 // Multiply Memory 7747 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7748 match(Set dst (MulI dst (LoadI src))); 7749 effect(KILL cr); 7750 7751 ins_cost(350); 7752 format %{ "IMUL $dst,$src" %} 7753 opcode(0xAF, 0x0F); 7754 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7755 ins_pipe( ialu_reg_mem_alu0 ); 7756 %} 7757 7758 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7759 %{ 7760 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7761 effect(KILL cr, KILL src2); 7762 7763 expand %{ mulI_eReg(dst, src1, cr); 7764 mulI_eReg(src2, src3, cr); 7765 addI_eReg(dst, src2, cr); %} 7766 %} 7767 7768 // Multiply Register Int to Long 7769 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7770 // Basic Idea: long = (long)int * (long)int 7771 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7772 effect(DEF dst, USE src, USE src1, KILL flags); 7773 7774 ins_cost(300); 7775 format %{ "IMUL $dst,$src1" %} 7776 7777 ins_encode( long_int_multiply( dst, src1 ) ); 7778 ins_pipe( ialu_reg_reg_alu0 ); 7779 %} 7780 7781 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7782 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7783 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7784 effect(KILL flags); 7785 7786 ins_cost(300); 7787 format %{ "MUL $dst,$src1" %} 7788 7789 ins_encode( long_uint_multiply(dst, src1) ); 7790 ins_pipe( ialu_reg_reg_alu0 ); 7791 %} 7792 7793 // Multiply Register Long 7794 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7795 match(Set dst (MulL dst src)); 7796 effect(KILL cr, TEMP tmp); 7797 ins_cost(4*100+3*400); 7798 // Basic idea: lo(result) = lo(x_lo * y_lo) 7799 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7800 format %{ "MOV $tmp,$src.lo\n\t" 7801 "IMUL $tmp,EDX\n\t" 7802 "MOV EDX,$src.hi\n\t" 7803 "IMUL EDX,EAX\n\t" 7804 "ADD $tmp,EDX\n\t" 7805 "MUL EDX:EAX,$src.lo\n\t" 7806 "ADD EDX,$tmp" %} 7807 ins_encode( long_multiply( dst, src, tmp ) ); 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 // Multiply Register Long where the left operand's high 32 bits are zero 7812 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7813 predicate(is_operand_hi32_zero(n->in(1))); 7814 match(Set dst (MulL dst src)); 7815 effect(KILL cr, TEMP tmp); 7816 ins_cost(2*100+2*400); 7817 // Basic idea: lo(result) = lo(x_lo * y_lo) 7818 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7819 format %{ "MOV $tmp,$src.hi\n\t" 7820 "IMUL $tmp,EAX\n\t" 7821 "MUL EDX:EAX,$src.lo\n\t" 7822 "ADD EDX,$tmp" %} 7823 ins_encode %{ 7824 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7825 __ imull($tmp$$Register, rax); 7826 __ mull($src$$Register); 7827 __ addl(rdx, $tmp$$Register); 7828 %} 7829 ins_pipe( pipe_slow ); 7830 %} 7831 7832 // Multiply Register Long where the right operand's high 32 bits are zero 7833 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7834 predicate(is_operand_hi32_zero(n->in(2))); 7835 match(Set dst (MulL dst src)); 7836 effect(KILL cr, TEMP tmp); 7837 ins_cost(2*100+2*400); 7838 // Basic idea: lo(result) = lo(x_lo * y_lo) 7839 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7840 format %{ "MOV $tmp,$src.lo\n\t" 7841 "IMUL $tmp,EDX\n\t" 7842 "MUL EDX:EAX,$src.lo\n\t" 7843 "ADD EDX,$tmp" %} 7844 ins_encode %{ 7845 __ movl($tmp$$Register, $src$$Register); 7846 __ imull($tmp$$Register, rdx); 7847 __ mull($src$$Register); 7848 __ addl(rdx, $tmp$$Register); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7854 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7855 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7856 match(Set dst (MulL dst src)); 7857 effect(KILL cr); 7858 ins_cost(1*400); 7859 // Basic idea: lo(result) = lo(x_lo * y_lo) 7860 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7861 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7862 ins_encode %{ 7863 __ mull($src$$Register); 7864 %} 7865 ins_pipe( pipe_slow ); 7866 %} 7867 7868 // Multiply Register Long by small constant 7869 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7870 match(Set dst (MulL dst src)); 7871 effect(KILL cr, TEMP tmp); 7872 ins_cost(2*100+2*400); 7873 size(12); 7874 // Basic idea: lo(result) = lo(src * EAX) 7875 // hi(result) = hi(src * EAX) + lo(src * EDX) 7876 format %{ "IMUL $tmp,EDX,$src\n\t" 7877 "MOV EDX,$src\n\t" 7878 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7879 "ADD EDX,$tmp" %} 7880 ins_encode( long_multiply_con( dst, src, tmp ) ); 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 // Integer DIV with Register 7885 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7886 match(Set rax (DivI rax div)); 7887 effect(KILL rdx, KILL cr); 7888 size(26); 7889 ins_cost(30*100+10*100); 7890 format %{ "CMP EAX,0x80000000\n\t" 7891 "JNE,s normal\n\t" 7892 "XOR EDX,EDX\n\t" 7893 "CMP ECX,-1\n\t" 7894 "JE,s done\n" 7895 "normal: CDQ\n\t" 7896 "IDIV $div\n\t" 7897 "done:" %} 7898 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7899 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7900 ins_pipe( ialu_reg_reg_alu0 ); 7901 %} 7902 7903 // Divide Register Long 7904 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7905 match(Set dst (DivL src1 src2)); 7906 effect( KILL cr, KILL cx, KILL bx ); 7907 ins_cost(10000); 7908 format %{ "PUSH $src1.hi\n\t" 7909 "PUSH $src1.lo\n\t" 7910 "PUSH $src2.hi\n\t" 7911 "PUSH $src2.lo\n\t" 7912 "CALL SharedRuntime::ldiv\n\t" 7913 "ADD ESP,16" %} 7914 ins_encode( long_div(src1,src2) ); 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 // Integer DIVMOD with Register, both quotient and mod results 7919 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7920 match(DivModI rax div); 7921 effect(KILL cr); 7922 size(26); 7923 ins_cost(30*100+10*100); 7924 format %{ "CMP EAX,0x80000000\n\t" 7925 "JNE,s normal\n\t" 7926 "XOR EDX,EDX\n\t" 7927 "CMP ECX,-1\n\t" 7928 "JE,s done\n" 7929 "normal: CDQ\n\t" 7930 "IDIV $div\n\t" 7931 "done:" %} 7932 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7933 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7934 ins_pipe( pipe_slow ); 7935 %} 7936 7937 // Integer MOD with Register 7938 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7939 match(Set rdx (ModI rax div)); 7940 effect(KILL rax, KILL cr); 7941 7942 size(26); 7943 ins_cost(300); 7944 format %{ "CDQ\n\t" 7945 "IDIV $div" %} 7946 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7947 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7948 ins_pipe( ialu_reg_reg_alu0 ); 7949 %} 7950 7951 // Remainder Register Long 7952 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7953 match(Set dst (ModL src1 src2)); 7954 effect( KILL cr, KILL cx, KILL bx ); 7955 ins_cost(10000); 7956 format %{ "PUSH $src1.hi\n\t" 7957 "PUSH $src1.lo\n\t" 7958 "PUSH $src2.hi\n\t" 7959 "PUSH $src2.lo\n\t" 7960 "CALL SharedRuntime::lrem\n\t" 7961 "ADD ESP,16" %} 7962 ins_encode( long_mod(src1,src2) ); 7963 ins_pipe( pipe_slow ); 7964 %} 7965 7966 // Divide Register Long (no special case since divisor != -1) 7967 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7968 match(Set dst (DivL dst imm)); 7969 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7970 ins_cost(1000); 7971 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7972 "XOR $tmp2,$tmp2\n\t" 7973 "CMP $tmp,EDX\n\t" 7974 "JA,s fast\n\t" 7975 "MOV $tmp2,EAX\n\t" 7976 "MOV EAX,EDX\n\t" 7977 "MOV EDX,0\n\t" 7978 "JLE,s pos\n\t" 7979 "LNEG EAX : $tmp2\n\t" 7980 "DIV $tmp # unsigned division\n\t" 7981 "XCHG EAX,$tmp2\n\t" 7982 "DIV $tmp\n\t" 7983 "LNEG $tmp2 : EAX\n\t" 7984 "JMP,s done\n" 7985 "pos:\n\t" 7986 "DIV $tmp\n\t" 7987 "XCHG EAX,$tmp2\n" 7988 "fast:\n\t" 7989 "DIV $tmp\n" 7990 "done:\n\t" 7991 "MOV EDX,$tmp2\n\t" 7992 "NEG EDX:EAX # if $imm < 0" %} 7993 ins_encode %{ 7994 int con = (int)$imm$$constant; 7995 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7996 int pcon = (con > 0) ? con : -con; 7997 Label Lfast, Lpos, Ldone; 7998 7999 __ movl($tmp$$Register, pcon); 8000 __ xorl($tmp2$$Register,$tmp2$$Register); 8001 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8002 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 8003 8004 __ movl($tmp2$$Register, $dst$$Register); // save 8005 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8006 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8007 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8008 8009 // Negative dividend. 8010 // convert value to positive to use unsigned division 8011 __ lneg($dst$$Register, $tmp2$$Register); 8012 __ divl($tmp$$Register); 8013 __ xchgl($dst$$Register, $tmp2$$Register); 8014 __ divl($tmp$$Register); 8015 // revert result back to negative 8016 __ lneg($tmp2$$Register, $dst$$Register); 8017 __ jmpb(Ldone); 8018 8019 __ bind(Lpos); 8020 __ divl($tmp$$Register); // Use unsigned division 8021 __ xchgl($dst$$Register, $tmp2$$Register); 8022 // Fallthrow for final divide, tmp2 has 32 bit hi result 8023 8024 __ bind(Lfast); 8025 // fast path: src is positive 8026 __ divl($tmp$$Register); // Use unsigned division 8027 8028 __ bind(Ldone); 8029 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8030 if (con < 0) { 8031 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8032 } 8033 %} 8034 ins_pipe( pipe_slow ); 8035 %} 8036 8037 // Remainder Register Long (remainder fit into 32 bits) 8038 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8039 match(Set dst (ModL dst imm)); 8040 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8041 ins_cost(1000); 8042 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8043 "CMP $tmp,EDX\n\t" 8044 "JA,s fast\n\t" 8045 "MOV $tmp2,EAX\n\t" 8046 "MOV EAX,EDX\n\t" 8047 "MOV EDX,0\n\t" 8048 "JLE,s pos\n\t" 8049 "LNEG EAX : $tmp2\n\t" 8050 "DIV $tmp # unsigned division\n\t" 8051 "MOV EAX,$tmp2\n\t" 8052 "DIV $tmp\n\t" 8053 "NEG EDX\n\t" 8054 "JMP,s done\n" 8055 "pos:\n\t" 8056 "DIV $tmp\n\t" 8057 "MOV EAX,$tmp2\n" 8058 "fast:\n\t" 8059 "DIV $tmp\n" 8060 "done:\n\t" 8061 "MOV EAX,EDX\n\t" 8062 "SAR EDX,31\n\t" %} 8063 ins_encode %{ 8064 int con = (int)$imm$$constant; 8065 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8066 int pcon = (con > 0) ? con : -con; 8067 Label Lfast, Lpos, Ldone; 8068 8069 __ movl($tmp$$Register, pcon); 8070 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8071 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8072 8073 __ movl($tmp2$$Register, $dst$$Register); // save 8074 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8075 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8076 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8077 8078 // Negative dividend. 8079 // convert value to positive to use unsigned division 8080 __ lneg($dst$$Register, $tmp2$$Register); 8081 __ divl($tmp$$Register); 8082 __ movl($dst$$Register, $tmp2$$Register); 8083 __ divl($tmp$$Register); 8084 // revert remainder back to negative 8085 __ negl(HIGH_FROM_LOW($dst$$Register)); 8086 __ jmpb(Ldone); 8087 8088 __ bind(Lpos); 8089 __ divl($tmp$$Register); 8090 __ movl($dst$$Register, $tmp2$$Register); 8091 8092 __ bind(Lfast); 8093 // fast path: src is positive 8094 __ divl($tmp$$Register); 8095 8096 __ bind(Ldone); 8097 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8098 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8099 8100 %} 8101 ins_pipe( pipe_slow ); 8102 %} 8103 8104 // Integer Shift Instructions 8105 // Shift Left by one 8106 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8107 match(Set dst (LShiftI dst shift)); 8108 effect(KILL cr); 8109 8110 size(2); 8111 format %{ "SHL $dst,$shift" %} 8112 opcode(0xD1, 0x4); /* D1 /4 */ 8113 ins_encode( OpcP, RegOpc( dst ) ); 8114 ins_pipe( ialu_reg ); 8115 %} 8116 8117 // Shift Left by 8-bit immediate 8118 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8119 match(Set dst (LShiftI dst shift)); 8120 effect(KILL cr); 8121 8122 size(3); 8123 format %{ "SHL $dst,$shift" %} 8124 opcode(0xC1, 0x4); /* C1 /4 ib */ 8125 ins_encode( RegOpcImm( dst, shift) ); 8126 ins_pipe( ialu_reg ); 8127 %} 8128 8129 // Shift Left by variable 8130 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8131 match(Set dst (LShiftI dst shift)); 8132 effect(KILL cr); 8133 8134 size(2); 8135 format %{ "SHL $dst,$shift" %} 8136 opcode(0xD3, 0x4); /* D3 /4 */ 8137 ins_encode( OpcP, RegOpc( dst ) ); 8138 ins_pipe( ialu_reg_reg ); 8139 %} 8140 8141 // Arithmetic shift right by one 8142 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8143 match(Set dst (RShiftI dst shift)); 8144 effect(KILL cr); 8145 8146 size(2); 8147 format %{ "SAR $dst,$shift" %} 8148 opcode(0xD1, 0x7); /* D1 /7 */ 8149 ins_encode( OpcP, RegOpc( dst ) ); 8150 ins_pipe( ialu_reg ); 8151 %} 8152 8153 // Arithmetic shift right by one 8154 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8155 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8156 effect(KILL cr); 8157 format %{ "SAR $dst,$shift" %} 8158 opcode(0xD1, 0x7); /* D1 /7 */ 8159 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8160 ins_pipe( ialu_mem_imm ); 8161 %} 8162 8163 // Arithmetic Shift Right by 8-bit immediate 8164 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8165 match(Set dst (RShiftI dst shift)); 8166 effect(KILL cr); 8167 8168 size(3); 8169 format %{ "SAR $dst,$shift" %} 8170 opcode(0xC1, 0x7); /* C1 /7 ib */ 8171 ins_encode( RegOpcImm( dst, shift ) ); 8172 ins_pipe( ialu_mem_imm ); 8173 %} 8174 8175 // Arithmetic Shift Right by 8-bit immediate 8176 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8177 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8178 effect(KILL cr); 8179 8180 format %{ "SAR $dst,$shift" %} 8181 opcode(0xC1, 0x7); /* C1 /7 ib */ 8182 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8183 ins_pipe( ialu_mem_imm ); 8184 %} 8185 8186 // Arithmetic Shift Right by variable 8187 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8188 match(Set dst (RShiftI dst shift)); 8189 effect(KILL cr); 8190 8191 size(2); 8192 format %{ "SAR $dst,$shift" %} 8193 opcode(0xD3, 0x7); /* D3 /7 */ 8194 ins_encode( OpcP, RegOpc( dst ) ); 8195 ins_pipe( ialu_reg_reg ); 8196 %} 8197 8198 // Logical shift right by one 8199 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8200 match(Set dst (URShiftI dst shift)); 8201 effect(KILL cr); 8202 8203 size(2); 8204 format %{ "SHR $dst,$shift" %} 8205 opcode(0xD1, 0x5); /* D1 /5 */ 8206 ins_encode( OpcP, RegOpc( dst ) ); 8207 ins_pipe( ialu_reg ); 8208 %} 8209 8210 // Logical Shift Right by 8-bit immediate 8211 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8212 match(Set dst (URShiftI dst shift)); 8213 effect(KILL cr); 8214 8215 size(3); 8216 format %{ "SHR $dst,$shift" %} 8217 opcode(0xC1, 0x5); /* C1 /5 ib */ 8218 ins_encode( RegOpcImm( dst, shift) ); 8219 ins_pipe( ialu_reg ); 8220 %} 8221 8222 8223 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8224 // This idiom is used by the compiler for the i2b bytecode. 8225 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8226 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8227 8228 size(3); 8229 format %{ "MOVSX $dst,$src :8" %} 8230 ins_encode %{ 8231 __ movsbl($dst$$Register, $src$$Register); 8232 %} 8233 ins_pipe(ialu_reg_reg); 8234 %} 8235 8236 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8237 // This idiom is used by the compiler the i2s bytecode. 8238 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8239 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8240 8241 size(3); 8242 format %{ "MOVSX $dst,$src :16" %} 8243 ins_encode %{ 8244 __ movswl($dst$$Register, $src$$Register); 8245 %} 8246 ins_pipe(ialu_reg_reg); 8247 %} 8248 8249 8250 // Logical Shift Right by variable 8251 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8252 match(Set dst (URShiftI dst shift)); 8253 effect(KILL cr); 8254 8255 size(2); 8256 format %{ "SHR $dst,$shift" %} 8257 opcode(0xD3, 0x5); /* D3 /5 */ 8258 ins_encode( OpcP, RegOpc( dst ) ); 8259 ins_pipe( ialu_reg_reg ); 8260 %} 8261 8262 8263 //----------Logical Instructions----------------------------------------------- 8264 //----------Integer Logical Instructions--------------------------------------- 8265 // And Instructions 8266 // And Register with Register 8267 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8268 match(Set dst (AndI dst src)); 8269 effect(KILL cr); 8270 8271 size(2); 8272 format %{ "AND $dst,$src" %} 8273 opcode(0x23); 8274 ins_encode( OpcP, RegReg( dst, src) ); 8275 ins_pipe( ialu_reg_reg ); 8276 %} 8277 8278 // And Register with Immediate 8279 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8280 match(Set dst (AndI dst src)); 8281 effect(KILL cr); 8282 8283 format %{ "AND $dst,$src" %} 8284 opcode(0x81,0x04); /* Opcode 81 /4 */ 8285 // ins_encode( RegImm( dst, src) ); 8286 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8287 ins_pipe( ialu_reg ); 8288 %} 8289 8290 // And Register with Memory 8291 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8292 match(Set dst (AndI dst (LoadI src))); 8293 effect(KILL cr); 8294 8295 ins_cost(125); 8296 format %{ "AND $dst,$src" %} 8297 opcode(0x23); 8298 ins_encode( OpcP, RegMem( dst, src) ); 8299 ins_pipe( ialu_reg_mem ); 8300 %} 8301 8302 // And Memory with Register 8303 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8304 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8305 effect(KILL cr); 8306 8307 ins_cost(150); 8308 format %{ "AND $dst,$src" %} 8309 opcode(0x21); /* Opcode 21 /r */ 8310 ins_encode( OpcP, RegMem( src, dst ) ); 8311 ins_pipe( ialu_mem_reg ); 8312 %} 8313 8314 // And Memory with Immediate 8315 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8316 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8317 effect(KILL cr); 8318 8319 ins_cost(125); 8320 format %{ "AND $dst,$src" %} 8321 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8322 // ins_encode( MemImm( dst, src) ); 8323 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8324 ins_pipe( ialu_mem_imm ); 8325 %} 8326 8327 // BMI1 instructions 8328 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8329 match(Set dst (AndI (XorI src1 minus_1) src2)); 8330 predicate(UseBMI1Instructions); 8331 effect(KILL cr); 8332 8333 format %{ "ANDNL $dst, $src1, $src2" %} 8334 8335 ins_encode %{ 8336 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8337 %} 8338 ins_pipe(ialu_reg); 8339 %} 8340 8341 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8342 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8343 predicate(UseBMI1Instructions); 8344 effect(KILL cr); 8345 8346 ins_cost(125); 8347 format %{ "ANDNL $dst, $src1, $src2" %} 8348 8349 ins_encode %{ 8350 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8351 %} 8352 ins_pipe(ialu_reg_mem); 8353 %} 8354 8355 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8356 match(Set dst (AndI (SubI imm_zero src) src)); 8357 predicate(UseBMI1Instructions); 8358 effect(KILL cr); 8359 8360 format %{ "BLSIL $dst, $src" %} 8361 8362 ins_encode %{ 8363 __ blsil($dst$$Register, $src$$Register); 8364 %} 8365 ins_pipe(ialu_reg); 8366 %} 8367 8368 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8369 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8370 predicate(UseBMI1Instructions); 8371 effect(KILL cr); 8372 8373 ins_cost(125); 8374 format %{ "BLSIL $dst, $src" %} 8375 8376 ins_encode %{ 8377 __ blsil($dst$$Register, $src$$Address); 8378 %} 8379 ins_pipe(ialu_reg_mem); 8380 %} 8381 8382 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8383 %{ 8384 match(Set dst (XorI (AddI src minus_1) src)); 8385 predicate(UseBMI1Instructions); 8386 effect(KILL cr); 8387 8388 format %{ "BLSMSKL $dst, $src" %} 8389 8390 ins_encode %{ 8391 __ blsmskl($dst$$Register, $src$$Register); 8392 %} 8393 8394 ins_pipe(ialu_reg); 8395 %} 8396 8397 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8398 %{ 8399 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8400 predicate(UseBMI1Instructions); 8401 effect(KILL cr); 8402 8403 ins_cost(125); 8404 format %{ "BLSMSKL $dst, $src" %} 8405 8406 ins_encode %{ 8407 __ blsmskl($dst$$Register, $src$$Address); 8408 %} 8409 8410 ins_pipe(ialu_reg_mem); 8411 %} 8412 8413 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8414 %{ 8415 match(Set dst (AndI (AddI src minus_1) src) ); 8416 predicate(UseBMI1Instructions); 8417 effect(KILL cr); 8418 8419 format %{ "BLSRL $dst, $src" %} 8420 8421 ins_encode %{ 8422 __ blsrl($dst$$Register, $src$$Register); 8423 %} 8424 8425 ins_pipe(ialu_reg); 8426 %} 8427 8428 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8429 %{ 8430 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8431 predicate(UseBMI1Instructions); 8432 effect(KILL cr); 8433 8434 ins_cost(125); 8435 format %{ "BLSRL $dst, $src" %} 8436 8437 ins_encode %{ 8438 __ blsrl($dst$$Register, $src$$Address); 8439 %} 8440 8441 ins_pipe(ialu_reg_mem); 8442 %} 8443 8444 // Or Instructions 8445 // Or Register with Register 8446 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8447 match(Set dst (OrI dst src)); 8448 effect(KILL cr); 8449 8450 size(2); 8451 format %{ "OR $dst,$src" %} 8452 opcode(0x0B); 8453 ins_encode( OpcP, RegReg( dst, src) ); 8454 ins_pipe( ialu_reg_reg ); 8455 %} 8456 8457 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8458 match(Set dst (OrI dst (CastP2X src))); 8459 effect(KILL cr); 8460 8461 size(2); 8462 format %{ "OR $dst,$src" %} 8463 opcode(0x0B); 8464 ins_encode( OpcP, RegReg( dst, src) ); 8465 ins_pipe( ialu_reg_reg ); 8466 %} 8467 8468 8469 // Or Register with Immediate 8470 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8471 match(Set dst (OrI dst src)); 8472 effect(KILL cr); 8473 8474 format %{ "OR $dst,$src" %} 8475 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8476 // ins_encode( RegImm( dst, src) ); 8477 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8478 ins_pipe( ialu_reg ); 8479 %} 8480 8481 // Or Register with Memory 8482 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8483 match(Set dst (OrI dst (LoadI src))); 8484 effect(KILL cr); 8485 8486 ins_cost(125); 8487 format %{ "OR $dst,$src" %} 8488 opcode(0x0B); 8489 ins_encode( OpcP, RegMem( dst, src) ); 8490 ins_pipe( ialu_reg_mem ); 8491 %} 8492 8493 // Or Memory with Register 8494 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8495 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8496 effect(KILL cr); 8497 8498 ins_cost(150); 8499 format %{ "OR $dst,$src" %} 8500 opcode(0x09); /* Opcode 09 /r */ 8501 ins_encode( OpcP, RegMem( src, dst ) ); 8502 ins_pipe( ialu_mem_reg ); 8503 %} 8504 8505 // Or Memory with Immediate 8506 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8507 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8508 effect(KILL cr); 8509 8510 ins_cost(125); 8511 format %{ "OR $dst,$src" %} 8512 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8513 // ins_encode( MemImm( dst, src) ); 8514 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8515 ins_pipe( ialu_mem_imm ); 8516 %} 8517 8518 // ROL/ROR 8519 // ROL expand 8520 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8521 effect(USE_DEF dst, USE shift, KILL cr); 8522 8523 format %{ "ROL $dst, $shift" %} 8524 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8525 ins_encode( OpcP, RegOpc( dst )); 8526 ins_pipe( ialu_reg ); 8527 %} 8528 8529 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8530 effect(USE_DEF dst, USE shift, KILL cr); 8531 8532 format %{ "ROL $dst, $shift" %} 8533 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8534 ins_encode( RegOpcImm(dst, shift) ); 8535 ins_pipe(ialu_reg); 8536 %} 8537 8538 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8539 effect(USE_DEF dst, USE shift, KILL cr); 8540 8541 format %{ "ROL $dst, $shift" %} 8542 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8543 ins_encode(OpcP, RegOpc(dst)); 8544 ins_pipe( ialu_reg_reg ); 8545 %} 8546 // end of ROL expand 8547 8548 // ROL 32bit by one once 8549 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8550 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8551 8552 expand %{ 8553 rolI_eReg_imm1(dst, lshift, cr); 8554 %} 8555 %} 8556 8557 // ROL 32bit var by imm8 once 8558 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8559 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8560 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8561 8562 expand %{ 8563 rolI_eReg_imm8(dst, lshift, cr); 8564 %} 8565 %} 8566 8567 // ROL 32bit var by var once 8568 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8569 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8570 8571 expand %{ 8572 rolI_eReg_CL(dst, shift, cr); 8573 %} 8574 %} 8575 8576 // ROL 32bit var by var once 8577 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8578 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8579 8580 expand %{ 8581 rolI_eReg_CL(dst, shift, cr); 8582 %} 8583 %} 8584 8585 // ROR expand 8586 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8587 effect(USE_DEF dst, USE shift, KILL cr); 8588 8589 format %{ "ROR $dst, $shift" %} 8590 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8591 ins_encode( OpcP, RegOpc( dst ) ); 8592 ins_pipe( ialu_reg ); 8593 %} 8594 8595 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8596 effect (USE_DEF dst, USE shift, KILL cr); 8597 8598 format %{ "ROR $dst, $shift" %} 8599 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8600 ins_encode( RegOpcImm(dst, shift) ); 8601 ins_pipe( ialu_reg ); 8602 %} 8603 8604 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8605 effect(USE_DEF dst, USE shift, KILL cr); 8606 8607 format %{ "ROR $dst, $shift" %} 8608 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8609 ins_encode(OpcP, RegOpc(dst)); 8610 ins_pipe( ialu_reg_reg ); 8611 %} 8612 // end of ROR expand 8613 8614 // ROR right once 8615 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8616 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8617 8618 expand %{ 8619 rorI_eReg_imm1(dst, rshift, cr); 8620 %} 8621 %} 8622 8623 // ROR 32bit by immI8 once 8624 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8625 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8626 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8627 8628 expand %{ 8629 rorI_eReg_imm8(dst, rshift, cr); 8630 %} 8631 %} 8632 8633 // ROR 32bit var by var once 8634 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8635 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8636 8637 expand %{ 8638 rorI_eReg_CL(dst, shift, cr); 8639 %} 8640 %} 8641 8642 // ROR 32bit var by var once 8643 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8644 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8645 8646 expand %{ 8647 rorI_eReg_CL(dst, shift, cr); 8648 %} 8649 %} 8650 8651 // Xor Instructions 8652 // Xor Register with Register 8653 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8654 match(Set dst (XorI dst src)); 8655 effect(KILL cr); 8656 8657 size(2); 8658 format %{ "XOR $dst,$src" %} 8659 opcode(0x33); 8660 ins_encode( OpcP, RegReg( dst, src) ); 8661 ins_pipe( ialu_reg_reg ); 8662 %} 8663 8664 // Xor Register with Immediate -1 8665 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8666 match(Set dst (XorI dst imm)); 8667 8668 size(2); 8669 format %{ "NOT $dst" %} 8670 ins_encode %{ 8671 __ notl($dst$$Register); 8672 %} 8673 ins_pipe( ialu_reg ); 8674 %} 8675 8676 // Xor Register with Immediate 8677 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8678 match(Set dst (XorI dst src)); 8679 effect(KILL cr); 8680 8681 format %{ "XOR $dst,$src" %} 8682 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8683 // ins_encode( RegImm( dst, src) ); 8684 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8685 ins_pipe( ialu_reg ); 8686 %} 8687 8688 // Xor Register with Memory 8689 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8690 match(Set dst (XorI dst (LoadI src))); 8691 effect(KILL cr); 8692 8693 ins_cost(125); 8694 format %{ "XOR $dst,$src" %} 8695 opcode(0x33); 8696 ins_encode( OpcP, RegMem(dst, src) ); 8697 ins_pipe( ialu_reg_mem ); 8698 %} 8699 8700 // Xor Memory with Register 8701 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8702 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8703 effect(KILL cr); 8704 8705 ins_cost(150); 8706 format %{ "XOR $dst,$src" %} 8707 opcode(0x31); /* Opcode 31 /r */ 8708 ins_encode( OpcP, RegMem( src, dst ) ); 8709 ins_pipe( ialu_mem_reg ); 8710 %} 8711 8712 // Xor Memory with Immediate 8713 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8714 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8715 effect(KILL cr); 8716 8717 ins_cost(125); 8718 format %{ "XOR $dst,$src" %} 8719 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8720 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8721 ins_pipe( ialu_mem_imm ); 8722 %} 8723 8724 //----------Convert Int to Boolean--------------------------------------------- 8725 8726 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8727 effect( DEF dst, USE src ); 8728 format %{ "MOV $dst,$src" %} 8729 ins_encode( enc_Copy( dst, src) ); 8730 ins_pipe( ialu_reg_reg ); 8731 %} 8732 8733 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8734 effect( USE_DEF dst, USE src, KILL cr ); 8735 8736 size(4); 8737 format %{ "NEG $dst\n\t" 8738 "ADC $dst,$src" %} 8739 ins_encode( neg_reg(dst), 8740 OpcRegReg(0x13,dst,src) ); 8741 ins_pipe( ialu_reg_reg_long ); 8742 %} 8743 8744 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8745 match(Set dst (Conv2B src)); 8746 8747 expand %{ 8748 movI_nocopy(dst,src); 8749 ci2b(dst,src,cr); 8750 %} 8751 %} 8752 8753 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8754 effect( DEF dst, USE src ); 8755 format %{ "MOV $dst,$src" %} 8756 ins_encode( enc_Copy( dst, src) ); 8757 ins_pipe( ialu_reg_reg ); 8758 %} 8759 8760 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8761 effect( USE_DEF dst, USE src, KILL cr ); 8762 format %{ "NEG $dst\n\t" 8763 "ADC $dst,$src" %} 8764 ins_encode( neg_reg(dst), 8765 OpcRegReg(0x13,dst,src) ); 8766 ins_pipe( ialu_reg_reg_long ); 8767 %} 8768 8769 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8770 match(Set dst (Conv2B src)); 8771 8772 expand %{ 8773 movP_nocopy(dst,src); 8774 cp2b(dst,src,cr); 8775 %} 8776 %} 8777 8778 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8779 match(Set dst (CmpLTMask p q)); 8780 effect(KILL cr); 8781 ins_cost(400); 8782 8783 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8784 format %{ "XOR $dst,$dst\n\t" 8785 "CMP $p,$q\n\t" 8786 "SETlt $dst\n\t" 8787 "NEG $dst" %} 8788 ins_encode %{ 8789 Register Rp = $p$$Register; 8790 Register Rq = $q$$Register; 8791 Register Rd = $dst$$Register; 8792 Label done; 8793 __ xorl(Rd, Rd); 8794 __ cmpl(Rp, Rq); 8795 __ setb(Assembler::less, Rd); 8796 __ negl(Rd); 8797 %} 8798 8799 ins_pipe(pipe_slow); 8800 %} 8801 8802 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8803 match(Set dst (CmpLTMask dst zero)); 8804 effect(DEF dst, KILL cr); 8805 ins_cost(100); 8806 8807 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8808 ins_encode %{ 8809 __ sarl($dst$$Register, 31); 8810 %} 8811 ins_pipe(ialu_reg); 8812 %} 8813 8814 /* better to save a register than avoid a branch */ 8815 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8816 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8817 effect(KILL cr); 8818 ins_cost(400); 8819 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8820 "JGE done\n\t" 8821 "ADD $p,$y\n" 8822 "done: " %} 8823 ins_encode %{ 8824 Register Rp = $p$$Register; 8825 Register Rq = $q$$Register; 8826 Register Ry = $y$$Register; 8827 Label done; 8828 __ subl(Rp, Rq); 8829 __ jccb(Assembler::greaterEqual, done); 8830 __ addl(Rp, Ry); 8831 __ bind(done); 8832 %} 8833 8834 ins_pipe(pipe_cmplt); 8835 %} 8836 8837 /* better to save a register than avoid a branch */ 8838 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8839 match(Set y (AndI (CmpLTMask p q) y)); 8840 effect(KILL cr); 8841 8842 ins_cost(300); 8843 8844 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8845 "JLT done\n\t" 8846 "XORL $y, $y\n" 8847 "done: " %} 8848 ins_encode %{ 8849 Register Rp = $p$$Register; 8850 Register Rq = $q$$Register; 8851 Register Ry = $y$$Register; 8852 Label done; 8853 __ cmpl(Rp, Rq); 8854 __ jccb(Assembler::less, done); 8855 __ xorl(Ry, Ry); 8856 __ bind(done); 8857 %} 8858 8859 ins_pipe(pipe_cmplt); 8860 %} 8861 8862 /* If I enable this, I encourage spilling in the inner loop of compress. 8863 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8864 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8865 */ 8866 //----------Overflow Math Instructions----------------------------------------- 8867 8868 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8869 %{ 8870 match(Set cr (OverflowAddI op1 op2)); 8871 effect(DEF cr, USE_KILL op1, USE op2); 8872 8873 format %{ "ADD $op1, $op2\t# overflow check int" %} 8874 8875 ins_encode %{ 8876 __ addl($op1$$Register, $op2$$Register); 8877 %} 8878 ins_pipe(ialu_reg_reg); 8879 %} 8880 8881 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8882 %{ 8883 match(Set cr (OverflowAddI op1 op2)); 8884 effect(DEF cr, USE_KILL op1, USE op2); 8885 8886 format %{ "ADD $op1, $op2\t# overflow check int" %} 8887 8888 ins_encode %{ 8889 __ addl($op1$$Register, $op2$$constant); 8890 %} 8891 ins_pipe(ialu_reg_reg); 8892 %} 8893 8894 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8895 %{ 8896 match(Set cr (OverflowSubI op1 op2)); 8897 8898 format %{ "CMP $op1, $op2\t# overflow check int" %} 8899 ins_encode %{ 8900 __ cmpl($op1$$Register, $op2$$Register); 8901 %} 8902 ins_pipe(ialu_reg_reg); 8903 %} 8904 8905 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8906 %{ 8907 match(Set cr (OverflowSubI op1 op2)); 8908 8909 format %{ "CMP $op1, $op2\t# overflow check int" %} 8910 ins_encode %{ 8911 __ cmpl($op1$$Register, $op2$$constant); 8912 %} 8913 ins_pipe(ialu_reg_reg); 8914 %} 8915 8916 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8917 %{ 8918 match(Set cr (OverflowSubI zero op2)); 8919 effect(DEF cr, USE_KILL op2); 8920 8921 format %{ "NEG $op2\t# overflow check int" %} 8922 ins_encode %{ 8923 __ negl($op2$$Register); 8924 %} 8925 ins_pipe(ialu_reg_reg); 8926 %} 8927 8928 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8929 %{ 8930 match(Set cr (OverflowMulI op1 op2)); 8931 effect(DEF cr, USE_KILL op1, USE op2); 8932 8933 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8934 ins_encode %{ 8935 __ imull($op1$$Register, $op2$$Register); 8936 %} 8937 ins_pipe(ialu_reg_reg_alu0); 8938 %} 8939 8940 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8941 %{ 8942 match(Set cr (OverflowMulI op1 op2)); 8943 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8944 8945 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8946 ins_encode %{ 8947 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8948 %} 8949 ins_pipe(ialu_reg_reg_alu0); 8950 %} 8951 8952 //----------Long Instructions------------------------------------------------ 8953 // Add Long Register with Register 8954 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8955 match(Set dst (AddL dst src)); 8956 effect(KILL cr); 8957 ins_cost(200); 8958 format %{ "ADD $dst.lo,$src.lo\n\t" 8959 "ADC $dst.hi,$src.hi" %} 8960 opcode(0x03, 0x13); 8961 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8962 ins_pipe( ialu_reg_reg_long ); 8963 %} 8964 8965 // Add Long Register with Immediate 8966 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8967 match(Set dst (AddL dst src)); 8968 effect(KILL cr); 8969 format %{ "ADD $dst.lo,$src.lo\n\t" 8970 "ADC $dst.hi,$src.hi" %} 8971 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8972 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8973 ins_pipe( ialu_reg_long ); 8974 %} 8975 8976 // Add Long Register with Memory 8977 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8978 match(Set dst (AddL dst (LoadL mem))); 8979 effect(KILL cr); 8980 ins_cost(125); 8981 format %{ "ADD $dst.lo,$mem\n\t" 8982 "ADC $dst.hi,$mem+4" %} 8983 opcode(0x03, 0x13); 8984 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8985 ins_pipe( ialu_reg_long_mem ); 8986 %} 8987 8988 // Subtract Long Register with Register. 8989 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8990 match(Set dst (SubL dst src)); 8991 effect(KILL cr); 8992 ins_cost(200); 8993 format %{ "SUB $dst.lo,$src.lo\n\t" 8994 "SBB $dst.hi,$src.hi" %} 8995 opcode(0x2B, 0x1B); 8996 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8997 ins_pipe( ialu_reg_reg_long ); 8998 %} 8999 9000 // Subtract Long Register with Immediate 9001 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9002 match(Set dst (SubL dst src)); 9003 effect(KILL cr); 9004 format %{ "SUB $dst.lo,$src.lo\n\t" 9005 "SBB $dst.hi,$src.hi" %} 9006 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9007 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9008 ins_pipe( ialu_reg_long ); 9009 %} 9010 9011 // Subtract Long Register with Memory 9012 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9013 match(Set dst (SubL dst (LoadL mem))); 9014 effect(KILL cr); 9015 ins_cost(125); 9016 format %{ "SUB $dst.lo,$mem\n\t" 9017 "SBB $dst.hi,$mem+4" %} 9018 opcode(0x2B, 0x1B); 9019 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9020 ins_pipe( ialu_reg_long_mem ); 9021 %} 9022 9023 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9024 match(Set dst (SubL zero dst)); 9025 effect(KILL cr); 9026 ins_cost(300); 9027 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9028 ins_encode( neg_long(dst) ); 9029 ins_pipe( ialu_reg_reg_long ); 9030 %} 9031 9032 // And Long Register with Register 9033 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9034 match(Set dst (AndL dst src)); 9035 effect(KILL cr); 9036 format %{ "AND $dst.lo,$src.lo\n\t" 9037 "AND $dst.hi,$src.hi" %} 9038 opcode(0x23,0x23); 9039 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9040 ins_pipe( ialu_reg_reg_long ); 9041 %} 9042 9043 // And Long Register with Immediate 9044 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9045 match(Set dst (AndL dst src)); 9046 effect(KILL cr); 9047 format %{ "AND $dst.lo,$src.lo\n\t" 9048 "AND $dst.hi,$src.hi" %} 9049 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9050 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9051 ins_pipe( ialu_reg_long ); 9052 %} 9053 9054 // And Long Register with Memory 9055 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9056 match(Set dst (AndL dst (LoadL mem))); 9057 effect(KILL cr); 9058 ins_cost(125); 9059 format %{ "AND $dst.lo,$mem\n\t" 9060 "AND $dst.hi,$mem+4" %} 9061 opcode(0x23, 0x23); 9062 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9063 ins_pipe( ialu_reg_long_mem ); 9064 %} 9065 9066 // BMI1 instructions 9067 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9068 match(Set dst (AndL (XorL src1 minus_1) src2)); 9069 predicate(UseBMI1Instructions); 9070 effect(KILL cr, TEMP dst); 9071 9072 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9073 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9074 %} 9075 9076 ins_encode %{ 9077 Register Rdst = $dst$$Register; 9078 Register Rsrc1 = $src1$$Register; 9079 Register Rsrc2 = $src2$$Register; 9080 __ andnl(Rdst, Rsrc1, Rsrc2); 9081 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9082 %} 9083 ins_pipe(ialu_reg_reg_long); 9084 %} 9085 9086 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9087 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9088 predicate(UseBMI1Instructions); 9089 effect(KILL cr, TEMP dst); 9090 9091 ins_cost(125); 9092 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9093 "ANDNL $dst.hi, $src1.hi, $src2+4" 9094 %} 9095 9096 ins_encode %{ 9097 Register Rdst = $dst$$Register; 9098 Register Rsrc1 = $src1$$Register; 9099 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9100 9101 __ andnl(Rdst, Rsrc1, $src2$$Address); 9102 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9103 %} 9104 ins_pipe(ialu_reg_mem); 9105 %} 9106 9107 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9108 match(Set dst (AndL (SubL imm_zero src) src)); 9109 predicate(UseBMI1Instructions); 9110 effect(KILL cr, TEMP dst); 9111 9112 format %{ "MOVL $dst.hi, 0\n\t" 9113 "BLSIL $dst.lo, $src.lo\n\t" 9114 "JNZ done\n\t" 9115 "BLSIL $dst.hi, $src.hi\n" 9116 "done:" 9117 %} 9118 9119 ins_encode %{ 9120 Label done; 9121 Register Rdst = $dst$$Register; 9122 Register Rsrc = $src$$Register; 9123 __ movl(HIGH_FROM_LOW(Rdst), 0); 9124 __ blsil(Rdst, Rsrc); 9125 __ jccb(Assembler::notZero, done); 9126 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9127 __ bind(done); 9128 %} 9129 ins_pipe(ialu_reg); 9130 %} 9131 9132 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9133 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9134 predicate(UseBMI1Instructions); 9135 effect(KILL cr, TEMP dst); 9136 9137 ins_cost(125); 9138 format %{ "MOVL $dst.hi, 0\n\t" 9139 "BLSIL $dst.lo, $src\n\t" 9140 "JNZ done\n\t" 9141 "BLSIL $dst.hi, $src+4\n" 9142 "done:" 9143 %} 9144 9145 ins_encode %{ 9146 Label done; 9147 Register Rdst = $dst$$Register; 9148 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9149 9150 __ movl(HIGH_FROM_LOW(Rdst), 0); 9151 __ blsil(Rdst, $src$$Address); 9152 __ jccb(Assembler::notZero, done); 9153 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9154 __ bind(done); 9155 %} 9156 ins_pipe(ialu_reg_mem); 9157 %} 9158 9159 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9160 %{ 9161 match(Set dst (XorL (AddL src minus_1) src)); 9162 predicate(UseBMI1Instructions); 9163 effect(KILL cr, TEMP dst); 9164 9165 format %{ "MOVL $dst.hi, 0\n\t" 9166 "BLSMSKL $dst.lo, $src.lo\n\t" 9167 "JNC done\n\t" 9168 "BLSMSKL $dst.hi, $src.hi\n" 9169 "done:" 9170 %} 9171 9172 ins_encode %{ 9173 Label done; 9174 Register Rdst = $dst$$Register; 9175 Register Rsrc = $src$$Register; 9176 __ movl(HIGH_FROM_LOW(Rdst), 0); 9177 __ blsmskl(Rdst, Rsrc); 9178 __ jccb(Assembler::carryClear, done); 9179 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9180 __ bind(done); 9181 %} 9182 9183 ins_pipe(ialu_reg); 9184 %} 9185 9186 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9187 %{ 9188 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9189 predicate(UseBMI1Instructions); 9190 effect(KILL cr, TEMP dst); 9191 9192 ins_cost(125); 9193 format %{ "MOVL $dst.hi, 0\n\t" 9194 "BLSMSKL $dst.lo, $src\n\t" 9195 "JNC done\n\t" 9196 "BLSMSKL $dst.hi, $src+4\n" 9197 "done:" 9198 %} 9199 9200 ins_encode %{ 9201 Label done; 9202 Register Rdst = $dst$$Register; 9203 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9204 9205 __ movl(HIGH_FROM_LOW(Rdst), 0); 9206 __ blsmskl(Rdst, $src$$Address); 9207 __ jccb(Assembler::carryClear, done); 9208 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9209 __ bind(done); 9210 %} 9211 9212 ins_pipe(ialu_reg_mem); 9213 %} 9214 9215 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9216 %{ 9217 match(Set dst (AndL (AddL src minus_1) src) ); 9218 predicate(UseBMI1Instructions); 9219 effect(KILL cr, TEMP dst); 9220 9221 format %{ "MOVL $dst.hi, $src.hi\n\t" 9222 "BLSRL $dst.lo, $src.lo\n\t" 9223 "JNC done\n\t" 9224 "BLSRL $dst.hi, $src.hi\n" 9225 "done:" 9226 %} 9227 9228 ins_encode %{ 9229 Label done; 9230 Register Rdst = $dst$$Register; 9231 Register Rsrc = $src$$Register; 9232 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9233 __ blsrl(Rdst, Rsrc); 9234 __ jccb(Assembler::carryClear, done); 9235 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9236 __ bind(done); 9237 %} 9238 9239 ins_pipe(ialu_reg); 9240 %} 9241 9242 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9243 %{ 9244 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9245 predicate(UseBMI1Instructions); 9246 effect(KILL cr, TEMP dst); 9247 9248 ins_cost(125); 9249 format %{ "MOVL $dst.hi, $src+4\n\t" 9250 "BLSRL $dst.lo, $src\n\t" 9251 "JNC done\n\t" 9252 "BLSRL $dst.hi, $src+4\n" 9253 "done:" 9254 %} 9255 9256 ins_encode %{ 9257 Label done; 9258 Register Rdst = $dst$$Register; 9259 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9260 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9261 __ blsrl(Rdst, $src$$Address); 9262 __ jccb(Assembler::carryClear, done); 9263 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9264 __ bind(done); 9265 %} 9266 9267 ins_pipe(ialu_reg_mem); 9268 %} 9269 9270 // Or Long Register with Register 9271 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9272 match(Set dst (OrL dst src)); 9273 effect(KILL cr); 9274 format %{ "OR $dst.lo,$src.lo\n\t" 9275 "OR $dst.hi,$src.hi" %} 9276 opcode(0x0B,0x0B); 9277 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9278 ins_pipe( ialu_reg_reg_long ); 9279 %} 9280 9281 // Or Long Register with Immediate 9282 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9283 match(Set dst (OrL dst src)); 9284 effect(KILL cr); 9285 format %{ "OR $dst.lo,$src.lo\n\t" 9286 "OR $dst.hi,$src.hi" %} 9287 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9288 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9289 ins_pipe( ialu_reg_long ); 9290 %} 9291 9292 // Or Long Register with Memory 9293 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9294 match(Set dst (OrL dst (LoadL mem))); 9295 effect(KILL cr); 9296 ins_cost(125); 9297 format %{ "OR $dst.lo,$mem\n\t" 9298 "OR $dst.hi,$mem+4" %} 9299 opcode(0x0B,0x0B); 9300 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9301 ins_pipe( ialu_reg_long_mem ); 9302 %} 9303 9304 // Xor Long Register with Register 9305 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9306 match(Set dst (XorL dst src)); 9307 effect(KILL cr); 9308 format %{ "XOR $dst.lo,$src.lo\n\t" 9309 "XOR $dst.hi,$src.hi" %} 9310 opcode(0x33,0x33); 9311 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9312 ins_pipe( ialu_reg_reg_long ); 9313 %} 9314 9315 // Xor Long Register with Immediate -1 9316 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9317 match(Set dst (XorL dst imm)); 9318 format %{ "NOT $dst.lo\n\t" 9319 "NOT $dst.hi" %} 9320 ins_encode %{ 9321 __ notl($dst$$Register); 9322 __ notl(HIGH_FROM_LOW($dst$$Register)); 9323 %} 9324 ins_pipe( ialu_reg_long ); 9325 %} 9326 9327 // Xor Long Register with Immediate 9328 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9329 match(Set dst (XorL dst src)); 9330 effect(KILL cr); 9331 format %{ "XOR $dst.lo,$src.lo\n\t" 9332 "XOR $dst.hi,$src.hi" %} 9333 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9334 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9335 ins_pipe( ialu_reg_long ); 9336 %} 9337 9338 // Xor Long Register with Memory 9339 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9340 match(Set dst (XorL dst (LoadL mem))); 9341 effect(KILL cr); 9342 ins_cost(125); 9343 format %{ "XOR $dst.lo,$mem\n\t" 9344 "XOR $dst.hi,$mem+4" %} 9345 opcode(0x33,0x33); 9346 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9347 ins_pipe( ialu_reg_long_mem ); 9348 %} 9349 9350 // Shift Left Long by 1 9351 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9352 predicate(UseNewLongLShift); 9353 match(Set dst (LShiftL dst cnt)); 9354 effect(KILL cr); 9355 ins_cost(100); 9356 format %{ "ADD $dst.lo,$dst.lo\n\t" 9357 "ADC $dst.hi,$dst.hi" %} 9358 ins_encode %{ 9359 __ addl($dst$$Register,$dst$$Register); 9360 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9361 %} 9362 ins_pipe( ialu_reg_long ); 9363 %} 9364 9365 // Shift Left Long by 2 9366 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9367 predicate(UseNewLongLShift); 9368 match(Set dst (LShiftL dst cnt)); 9369 effect(KILL cr); 9370 ins_cost(100); 9371 format %{ "ADD $dst.lo,$dst.lo\n\t" 9372 "ADC $dst.hi,$dst.hi\n\t" 9373 "ADD $dst.lo,$dst.lo\n\t" 9374 "ADC $dst.hi,$dst.hi" %} 9375 ins_encode %{ 9376 __ addl($dst$$Register,$dst$$Register); 9377 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9378 __ addl($dst$$Register,$dst$$Register); 9379 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9380 %} 9381 ins_pipe( ialu_reg_long ); 9382 %} 9383 9384 // Shift Left Long by 3 9385 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9386 predicate(UseNewLongLShift); 9387 match(Set dst (LShiftL dst cnt)); 9388 effect(KILL cr); 9389 ins_cost(100); 9390 format %{ "ADD $dst.lo,$dst.lo\n\t" 9391 "ADC $dst.hi,$dst.hi\n\t" 9392 "ADD $dst.lo,$dst.lo\n\t" 9393 "ADC $dst.hi,$dst.hi\n\t" 9394 "ADD $dst.lo,$dst.lo\n\t" 9395 "ADC $dst.hi,$dst.hi" %} 9396 ins_encode %{ 9397 __ addl($dst$$Register,$dst$$Register); 9398 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9399 __ addl($dst$$Register,$dst$$Register); 9400 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9401 __ addl($dst$$Register,$dst$$Register); 9402 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9403 %} 9404 ins_pipe( ialu_reg_long ); 9405 %} 9406 9407 // Shift Left Long by 1-31 9408 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9409 match(Set dst (LShiftL dst cnt)); 9410 effect(KILL cr); 9411 ins_cost(200); 9412 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9413 "SHL $dst.lo,$cnt" %} 9414 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9415 ins_encode( move_long_small_shift(dst,cnt) ); 9416 ins_pipe( ialu_reg_long ); 9417 %} 9418 9419 // Shift Left Long by 32-63 9420 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9421 match(Set dst (LShiftL dst cnt)); 9422 effect(KILL cr); 9423 ins_cost(300); 9424 format %{ "MOV $dst.hi,$dst.lo\n" 9425 "\tSHL $dst.hi,$cnt-32\n" 9426 "\tXOR $dst.lo,$dst.lo" %} 9427 opcode(0xC1, 0x4); /* C1 /4 ib */ 9428 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9429 ins_pipe( ialu_reg_long ); 9430 %} 9431 9432 // Shift Left Long by variable 9433 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9434 match(Set dst (LShiftL dst shift)); 9435 effect(KILL cr); 9436 ins_cost(500+200); 9437 size(17); 9438 format %{ "TEST $shift,32\n\t" 9439 "JEQ,s small\n\t" 9440 "MOV $dst.hi,$dst.lo\n\t" 9441 "XOR $dst.lo,$dst.lo\n" 9442 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9443 "SHL $dst.lo,$shift" %} 9444 ins_encode( shift_left_long( dst, shift ) ); 9445 ins_pipe( pipe_slow ); 9446 %} 9447 9448 // Shift Right Long by 1-31 9449 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9450 match(Set dst (URShiftL dst cnt)); 9451 effect(KILL cr); 9452 ins_cost(200); 9453 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9454 "SHR $dst.hi,$cnt" %} 9455 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9456 ins_encode( move_long_small_shift(dst,cnt) ); 9457 ins_pipe( ialu_reg_long ); 9458 %} 9459 9460 // Shift Right Long by 32-63 9461 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9462 match(Set dst (URShiftL dst cnt)); 9463 effect(KILL cr); 9464 ins_cost(300); 9465 format %{ "MOV $dst.lo,$dst.hi\n" 9466 "\tSHR $dst.lo,$cnt-32\n" 9467 "\tXOR $dst.hi,$dst.hi" %} 9468 opcode(0xC1, 0x5); /* C1 /5 ib */ 9469 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9470 ins_pipe( ialu_reg_long ); 9471 %} 9472 9473 // Shift Right Long by variable 9474 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9475 match(Set dst (URShiftL dst shift)); 9476 effect(KILL cr); 9477 ins_cost(600); 9478 size(17); 9479 format %{ "TEST $shift,32\n\t" 9480 "JEQ,s small\n\t" 9481 "MOV $dst.lo,$dst.hi\n\t" 9482 "XOR $dst.hi,$dst.hi\n" 9483 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9484 "SHR $dst.hi,$shift" %} 9485 ins_encode( shift_right_long( dst, shift ) ); 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 // Shift Right Long by 1-31 9490 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9491 match(Set dst (RShiftL dst cnt)); 9492 effect(KILL cr); 9493 ins_cost(200); 9494 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9495 "SAR $dst.hi,$cnt" %} 9496 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9497 ins_encode( move_long_small_shift(dst,cnt) ); 9498 ins_pipe( ialu_reg_long ); 9499 %} 9500 9501 // Shift Right Long by 32-63 9502 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9503 match(Set dst (RShiftL dst cnt)); 9504 effect(KILL cr); 9505 ins_cost(300); 9506 format %{ "MOV $dst.lo,$dst.hi\n" 9507 "\tSAR $dst.lo,$cnt-32\n" 9508 "\tSAR $dst.hi,31" %} 9509 opcode(0xC1, 0x7); /* C1 /7 ib */ 9510 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9511 ins_pipe( ialu_reg_long ); 9512 %} 9513 9514 // Shift Right arithmetic Long by variable 9515 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9516 match(Set dst (RShiftL dst shift)); 9517 effect(KILL cr); 9518 ins_cost(600); 9519 size(18); 9520 format %{ "TEST $shift,32\n\t" 9521 "JEQ,s small\n\t" 9522 "MOV $dst.lo,$dst.hi\n\t" 9523 "SAR $dst.hi,31\n" 9524 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9525 "SAR $dst.hi,$shift" %} 9526 ins_encode( shift_right_arith_long( dst, shift ) ); 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 9531 //----------Double Instructions------------------------------------------------ 9532 // Double Math 9533 9534 // Compare & branch 9535 9536 // P6 version of float compare, sets condition codes in EFLAGS 9537 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9538 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9539 match(Set cr (CmpD src1 src2)); 9540 effect(KILL rax); 9541 ins_cost(150); 9542 format %{ "FLD $src1\n\t" 9543 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9544 "JNP exit\n\t" 9545 "MOV ah,1 // saw a NaN, set CF\n\t" 9546 "SAHF\n" 9547 "exit:\tNOP // avoid branch to branch" %} 9548 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9549 ins_encode( Push_Reg_DPR(src1), 9550 OpcP, RegOpc(src2), 9551 cmpF_P6_fixup ); 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9556 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9557 match(Set cr (CmpD src1 src2)); 9558 ins_cost(150); 9559 format %{ "FLD $src1\n\t" 9560 "FUCOMIP ST,$src2 // P6 instruction" %} 9561 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9562 ins_encode( Push_Reg_DPR(src1), 9563 OpcP, RegOpc(src2)); 9564 ins_pipe( pipe_slow ); 9565 %} 9566 9567 // Compare & branch 9568 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9569 predicate(UseSSE<=1); 9570 match(Set cr (CmpD src1 src2)); 9571 effect(KILL rax); 9572 ins_cost(200); 9573 format %{ "FLD $src1\n\t" 9574 "FCOMp $src2\n\t" 9575 "FNSTSW AX\n\t" 9576 "TEST AX,0x400\n\t" 9577 "JZ,s flags\n\t" 9578 "MOV AH,1\t# unordered treat as LT\n" 9579 "flags:\tSAHF" %} 9580 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9581 ins_encode( Push_Reg_DPR(src1), 9582 OpcP, RegOpc(src2), 9583 fpu_flags); 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 // Compare vs zero into -1,0,1 9588 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9589 predicate(UseSSE<=1); 9590 match(Set dst (CmpD3 src1 zero)); 9591 effect(KILL cr, KILL rax); 9592 ins_cost(280); 9593 format %{ "FTSTD $dst,$src1" %} 9594 opcode(0xE4, 0xD9); 9595 ins_encode( Push_Reg_DPR(src1), 9596 OpcS, OpcP, PopFPU, 9597 CmpF_Result(dst)); 9598 ins_pipe( pipe_slow ); 9599 %} 9600 9601 // Compare into -1,0,1 9602 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9603 predicate(UseSSE<=1); 9604 match(Set dst (CmpD3 src1 src2)); 9605 effect(KILL cr, KILL rax); 9606 ins_cost(300); 9607 format %{ "FCMPD $dst,$src1,$src2" %} 9608 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9609 ins_encode( Push_Reg_DPR(src1), 9610 OpcP, RegOpc(src2), 9611 CmpF_Result(dst)); 9612 ins_pipe( pipe_slow ); 9613 %} 9614 9615 // float compare and set condition codes in EFLAGS by XMM regs 9616 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9617 predicate(UseSSE>=2); 9618 match(Set cr (CmpD src1 src2)); 9619 ins_cost(145); 9620 format %{ "UCOMISD $src1,$src2\n\t" 9621 "JNP,s exit\n\t" 9622 "PUSHF\t# saw NaN, set CF\n\t" 9623 "AND [rsp], #0xffffff2b\n\t" 9624 "POPF\n" 9625 "exit:" %} 9626 ins_encode %{ 9627 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9628 emit_cmpfp_fixup(_masm); 9629 %} 9630 ins_pipe( pipe_slow ); 9631 %} 9632 9633 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9634 predicate(UseSSE>=2); 9635 match(Set cr (CmpD src1 src2)); 9636 ins_cost(100); 9637 format %{ "UCOMISD $src1,$src2" %} 9638 ins_encode %{ 9639 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 // float compare and set condition codes in EFLAGS by XMM regs 9645 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9646 predicate(UseSSE>=2); 9647 match(Set cr (CmpD src1 (LoadD src2))); 9648 ins_cost(145); 9649 format %{ "UCOMISD $src1,$src2\n\t" 9650 "JNP,s exit\n\t" 9651 "PUSHF\t# saw NaN, set CF\n\t" 9652 "AND [rsp], #0xffffff2b\n\t" 9653 "POPF\n" 9654 "exit:" %} 9655 ins_encode %{ 9656 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9657 emit_cmpfp_fixup(_masm); 9658 %} 9659 ins_pipe( pipe_slow ); 9660 %} 9661 9662 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9663 predicate(UseSSE>=2); 9664 match(Set cr (CmpD src1 (LoadD src2))); 9665 ins_cost(100); 9666 format %{ "UCOMISD $src1,$src2" %} 9667 ins_encode %{ 9668 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9669 %} 9670 ins_pipe( pipe_slow ); 9671 %} 9672 9673 // Compare into -1,0,1 in XMM 9674 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9675 predicate(UseSSE>=2); 9676 match(Set dst (CmpD3 src1 src2)); 9677 effect(KILL cr); 9678 ins_cost(255); 9679 format %{ "UCOMISD $src1, $src2\n\t" 9680 "MOV $dst, #-1\n\t" 9681 "JP,s done\n\t" 9682 "JB,s done\n\t" 9683 "SETNE $dst\n\t" 9684 "MOVZB $dst, $dst\n" 9685 "done:" %} 9686 ins_encode %{ 9687 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9688 emit_cmpfp3(_masm, $dst$$Register); 9689 %} 9690 ins_pipe( pipe_slow ); 9691 %} 9692 9693 // Compare into -1,0,1 in XMM and memory 9694 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9695 predicate(UseSSE>=2); 9696 match(Set dst (CmpD3 src1 (LoadD src2))); 9697 effect(KILL cr); 9698 ins_cost(275); 9699 format %{ "UCOMISD $src1, $src2\n\t" 9700 "MOV $dst, #-1\n\t" 9701 "JP,s done\n\t" 9702 "JB,s done\n\t" 9703 "SETNE $dst\n\t" 9704 "MOVZB $dst, $dst\n" 9705 "done:" %} 9706 ins_encode %{ 9707 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9708 emit_cmpfp3(_masm, $dst$$Register); 9709 %} 9710 ins_pipe( pipe_slow ); 9711 %} 9712 9713 9714 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9715 predicate (UseSSE <=1); 9716 match(Set dst (SubD dst src)); 9717 9718 format %{ "FLD $src\n\t" 9719 "DSUBp $dst,ST" %} 9720 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9721 ins_cost(150); 9722 ins_encode( Push_Reg_DPR(src), 9723 OpcP, RegOpc(dst) ); 9724 ins_pipe( fpu_reg_reg ); 9725 %} 9726 9727 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9728 predicate (UseSSE <=1); 9729 match(Set dst (RoundDouble (SubD src1 src2))); 9730 ins_cost(250); 9731 9732 format %{ "FLD $src2\n\t" 9733 "DSUB ST,$src1\n\t" 9734 "FSTP_D $dst\t# D-round" %} 9735 opcode(0xD8, 0x5); 9736 ins_encode( Push_Reg_DPR(src2), 9737 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9738 ins_pipe( fpu_mem_reg_reg ); 9739 %} 9740 9741 9742 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9743 predicate (UseSSE <=1); 9744 match(Set dst (SubD dst (LoadD src))); 9745 ins_cost(150); 9746 9747 format %{ "FLD $src\n\t" 9748 "DSUBp $dst,ST" %} 9749 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9750 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9751 OpcP, RegOpc(dst) ); 9752 ins_pipe( fpu_reg_mem ); 9753 %} 9754 9755 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9756 predicate (UseSSE<=1); 9757 match(Set dst (AbsD src)); 9758 ins_cost(100); 9759 format %{ "FABS" %} 9760 opcode(0xE1, 0xD9); 9761 ins_encode( OpcS, OpcP ); 9762 ins_pipe( fpu_reg_reg ); 9763 %} 9764 9765 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9766 predicate(UseSSE<=1); 9767 match(Set dst (NegD src)); 9768 ins_cost(100); 9769 format %{ "FCHS" %} 9770 opcode(0xE0, 0xD9); 9771 ins_encode( OpcS, OpcP ); 9772 ins_pipe( fpu_reg_reg ); 9773 %} 9774 9775 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9776 predicate(UseSSE<=1); 9777 match(Set dst (AddD dst src)); 9778 format %{ "FLD $src\n\t" 9779 "DADD $dst,ST" %} 9780 size(4); 9781 ins_cost(150); 9782 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9783 ins_encode( Push_Reg_DPR(src), 9784 OpcP, RegOpc(dst) ); 9785 ins_pipe( fpu_reg_reg ); 9786 %} 9787 9788 9789 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9790 predicate(UseSSE<=1); 9791 match(Set dst (RoundDouble (AddD src1 src2))); 9792 ins_cost(250); 9793 9794 format %{ "FLD $src2\n\t" 9795 "DADD ST,$src1\n\t" 9796 "FSTP_D $dst\t# D-round" %} 9797 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9798 ins_encode( Push_Reg_DPR(src2), 9799 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9800 ins_pipe( fpu_mem_reg_reg ); 9801 %} 9802 9803 9804 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9805 predicate(UseSSE<=1); 9806 match(Set dst (AddD dst (LoadD src))); 9807 ins_cost(150); 9808 9809 format %{ "FLD $src\n\t" 9810 "DADDp $dst,ST" %} 9811 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9812 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9813 OpcP, RegOpc(dst) ); 9814 ins_pipe( fpu_reg_mem ); 9815 %} 9816 9817 // add-to-memory 9818 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9819 predicate(UseSSE<=1); 9820 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9821 ins_cost(150); 9822 9823 format %{ "FLD_D $dst\n\t" 9824 "DADD ST,$src\n\t" 9825 "FST_D $dst" %} 9826 opcode(0xDD, 0x0); 9827 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9828 Opcode(0xD8), RegOpc(src), 9829 set_instruction_start, 9830 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9831 ins_pipe( fpu_reg_mem ); 9832 %} 9833 9834 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9835 predicate(UseSSE<=1); 9836 match(Set dst (AddD dst con)); 9837 ins_cost(125); 9838 format %{ "FLD1\n\t" 9839 "DADDp $dst,ST" %} 9840 ins_encode %{ 9841 __ fld1(); 9842 __ faddp($dst$$reg); 9843 %} 9844 ins_pipe(fpu_reg); 9845 %} 9846 9847 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9848 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9849 match(Set dst (AddD dst con)); 9850 ins_cost(200); 9851 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9852 "DADDp $dst,ST" %} 9853 ins_encode %{ 9854 __ fld_d($constantaddress($con)); 9855 __ faddp($dst$$reg); 9856 %} 9857 ins_pipe(fpu_reg_mem); 9858 %} 9859 9860 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9861 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9862 match(Set dst (RoundDouble (AddD src con))); 9863 ins_cost(200); 9864 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9865 "DADD ST,$src\n\t" 9866 "FSTP_D $dst\t# D-round" %} 9867 ins_encode %{ 9868 __ fld_d($constantaddress($con)); 9869 __ fadd($src$$reg); 9870 __ fstp_d(Address(rsp, $dst$$disp)); 9871 %} 9872 ins_pipe(fpu_mem_reg_con); 9873 %} 9874 9875 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9876 predicate(UseSSE<=1); 9877 match(Set dst (MulD dst src)); 9878 format %{ "FLD $src\n\t" 9879 "DMULp $dst,ST" %} 9880 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9881 ins_cost(150); 9882 ins_encode( Push_Reg_DPR(src), 9883 OpcP, RegOpc(dst) ); 9884 ins_pipe( fpu_reg_reg ); 9885 %} 9886 9887 // Strict FP instruction biases argument before multiply then 9888 // biases result to avoid double rounding of subnormals. 9889 // 9890 // scale arg1 by multiplying arg1 by 2^(-15360) 9891 // load arg2 9892 // multiply scaled arg1 by arg2 9893 // rescale product by 2^(15360) 9894 // 9895 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9896 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9897 match(Set dst (MulD dst src)); 9898 ins_cost(1); // Select this instruction for all strict FP double multiplies 9899 9900 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9901 "DMULp $dst,ST\n\t" 9902 "FLD $src\n\t" 9903 "DMULp $dst,ST\n\t" 9904 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9905 "DMULp $dst,ST\n\t" %} 9906 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9907 ins_encode( strictfp_bias1(dst), 9908 Push_Reg_DPR(src), 9909 OpcP, RegOpc(dst), 9910 strictfp_bias2(dst) ); 9911 ins_pipe( fpu_reg_reg ); 9912 %} 9913 9914 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9915 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9916 match(Set dst (MulD dst con)); 9917 ins_cost(200); 9918 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9919 "DMULp $dst,ST" %} 9920 ins_encode %{ 9921 __ fld_d($constantaddress($con)); 9922 __ fmulp($dst$$reg); 9923 %} 9924 ins_pipe(fpu_reg_mem); 9925 %} 9926 9927 9928 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9929 predicate( UseSSE<=1 ); 9930 match(Set dst (MulD dst (LoadD src))); 9931 ins_cost(200); 9932 format %{ "FLD_D $src\n\t" 9933 "DMULp $dst,ST" %} 9934 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9935 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9936 OpcP, RegOpc(dst) ); 9937 ins_pipe( fpu_reg_mem ); 9938 %} 9939 9940 // 9941 // Cisc-alternate to reg-reg multiply 9942 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9943 predicate( UseSSE<=1 ); 9944 match(Set dst (MulD src (LoadD mem))); 9945 ins_cost(250); 9946 format %{ "FLD_D $mem\n\t" 9947 "DMUL ST,$src\n\t" 9948 "FSTP_D $dst" %} 9949 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9950 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9951 OpcReg_FPR(src), 9952 Pop_Reg_DPR(dst) ); 9953 ins_pipe( fpu_reg_reg_mem ); 9954 %} 9955 9956 9957 // MACRO3 -- addDPR a mulDPR 9958 // This instruction is a '2-address' instruction in that the result goes 9959 // back to src2. This eliminates a move from the macro; possibly the 9960 // register allocator will have to add it back (and maybe not). 9961 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9962 predicate( UseSSE<=1 ); 9963 match(Set src2 (AddD (MulD src0 src1) src2)); 9964 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9965 "DMUL ST,$src1\n\t" 9966 "DADDp $src2,ST" %} 9967 ins_cost(250); 9968 opcode(0xDD); /* LoadD DD /0 */ 9969 ins_encode( Push_Reg_FPR(src0), 9970 FMul_ST_reg(src1), 9971 FAddP_reg_ST(src2) ); 9972 ins_pipe( fpu_reg_reg_reg ); 9973 %} 9974 9975 9976 // MACRO3 -- subDPR a mulDPR 9977 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9978 predicate( UseSSE<=1 ); 9979 match(Set src2 (SubD (MulD src0 src1) src2)); 9980 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9981 "DMUL ST,$src1\n\t" 9982 "DSUBRp $src2,ST" %} 9983 ins_cost(250); 9984 ins_encode( Push_Reg_FPR(src0), 9985 FMul_ST_reg(src1), 9986 Opcode(0xDE), Opc_plus(0xE0,src2)); 9987 ins_pipe( fpu_reg_reg_reg ); 9988 %} 9989 9990 9991 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9992 predicate( UseSSE<=1 ); 9993 match(Set dst (DivD dst src)); 9994 9995 format %{ "FLD $src\n\t" 9996 "FDIVp $dst,ST" %} 9997 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9998 ins_cost(150); 9999 ins_encode( Push_Reg_DPR(src), 10000 OpcP, RegOpc(dst) ); 10001 ins_pipe( fpu_reg_reg ); 10002 %} 10003 10004 // Strict FP instruction biases argument before division then 10005 // biases result, to avoid double rounding of subnormals. 10006 // 10007 // scale dividend by multiplying dividend by 2^(-15360) 10008 // load divisor 10009 // divide scaled dividend by divisor 10010 // rescale quotient by 2^(15360) 10011 // 10012 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 10013 predicate (UseSSE<=1); 10014 match(Set dst (DivD dst src)); 10015 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10016 ins_cost(01); 10017 10018 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10019 "DMULp $dst,ST\n\t" 10020 "FLD $src\n\t" 10021 "FDIVp $dst,ST\n\t" 10022 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10023 "DMULp $dst,ST\n\t" %} 10024 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10025 ins_encode( strictfp_bias1(dst), 10026 Push_Reg_DPR(src), 10027 OpcP, RegOpc(dst), 10028 strictfp_bias2(dst) ); 10029 ins_pipe( fpu_reg_reg ); 10030 %} 10031 10032 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 10033 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10034 match(Set dst (RoundDouble (DivD src1 src2))); 10035 10036 format %{ "FLD $src1\n\t" 10037 "FDIV ST,$src2\n\t" 10038 "FSTP_D $dst\t# D-round" %} 10039 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10040 ins_encode( Push_Reg_DPR(src1), 10041 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 10042 ins_pipe( fpu_mem_reg_reg ); 10043 %} 10044 10045 10046 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 10047 predicate(UseSSE<=1); 10048 match(Set dst (ModD dst src)); 10049 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10050 10051 format %{ "DMOD $dst,$src" %} 10052 ins_cost(250); 10053 ins_encode(Push_Reg_Mod_DPR(dst, src), 10054 emitModDPR(), 10055 Push_Result_Mod_DPR(src), 10056 Pop_Reg_DPR(dst)); 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10061 predicate(UseSSE>=2); 10062 match(Set dst (ModD src0 src1)); 10063 effect(KILL rax, KILL cr); 10064 10065 format %{ "SUB ESP,8\t # DMOD\n" 10066 "\tMOVSD [ESP+0],$src1\n" 10067 "\tFLD_D [ESP+0]\n" 10068 "\tMOVSD [ESP+0],$src0\n" 10069 "\tFLD_D [ESP+0]\n" 10070 "loop:\tFPREM\n" 10071 "\tFWAIT\n" 10072 "\tFNSTSW AX\n" 10073 "\tSAHF\n" 10074 "\tJP loop\n" 10075 "\tFSTP_D [ESP+0]\n" 10076 "\tMOVSD $dst,[ESP+0]\n" 10077 "\tADD ESP,8\n" 10078 "\tFSTP ST0\t # Restore FPU Stack" 10079 %} 10080 ins_cost(250); 10081 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10082 ins_pipe( pipe_slow ); 10083 %} 10084 10085 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10086 predicate (UseSSE<=1); 10087 match(Set dst(AtanD dst src)); 10088 format %{ "DATA $dst,$src" %} 10089 opcode(0xD9, 0xF3); 10090 ins_encode( Push_Reg_DPR(src), 10091 OpcP, OpcS, RegOpc(dst) ); 10092 ins_pipe( pipe_slow ); 10093 %} 10094 10095 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10096 predicate (UseSSE>=2); 10097 match(Set dst(AtanD dst src)); 10098 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10099 format %{ "DATA $dst,$src" %} 10100 opcode(0xD9, 0xF3); 10101 ins_encode( Push_SrcD(src), 10102 OpcP, OpcS, Push_ResultD(dst) ); 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10107 predicate (UseSSE<=1); 10108 match(Set dst (SqrtD src)); 10109 format %{ "DSQRT $dst,$src" %} 10110 opcode(0xFA, 0xD9); 10111 ins_encode( Push_Reg_DPR(src), 10112 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10113 ins_pipe( pipe_slow ); 10114 %} 10115 10116 //-------------Float Instructions------------------------------- 10117 // Float Math 10118 10119 // Code for float compare: 10120 // fcompp(); 10121 // fwait(); fnstsw_ax(); 10122 // sahf(); 10123 // movl(dst, unordered_result); 10124 // jcc(Assembler::parity, exit); 10125 // movl(dst, less_result); 10126 // jcc(Assembler::below, exit); 10127 // movl(dst, equal_result); 10128 // jcc(Assembler::equal, exit); 10129 // movl(dst, greater_result); 10130 // exit: 10131 10132 // P6 version of float compare, sets condition codes in EFLAGS 10133 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10134 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10135 match(Set cr (CmpF src1 src2)); 10136 effect(KILL rax); 10137 ins_cost(150); 10138 format %{ "FLD $src1\n\t" 10139 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10140 "JNP exit\n\t" 10141 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10142 "SAHF\n" 10143 "exit:\tNOP // avoid branch to branch" %} 10144 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10145 ins_encode( Push_Reg_DPR(src1), 10146 OpcP, RegOpc(src2), 10147 cmpF_P6_fixup ); 10148 ins_pipe( pipe_slow ); 10149 %} 10150 10151 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10152 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10153 match(Set cr (CmpF src1 src2)); 10154 ins_cost(100); 10155 format %{ "FLD $src1\n\t" 10156 "FUCOMIP ST,$src2 // P6 instruction" %} 10157 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10158 ins_encode( Push_Reg_DPR(src1), 10159 OpcP, RegOpc(src2)); 10160 ins_pipe( pipe_slow ); 10161 %} 10162 10163 10164 // Compare & branch 10165 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10166 predicate(UseSSE == 0); 10167 match(Set cr (CmpF src1 src2)); 10168 effect(KILL rax); 10169 ins_cost(200); 10170 format %{ "FLD $src1\n\t" 10171 "FCOMp $src2\n\t" 10172 "FNSTSW AX\n\t" 10173 "TEST AX,0x400\n\t" 10174 "JZ,s flags\n\t" 10175 "MOV AH,1\t# unordered treat as LT\n" 10176 "flags:\tSAHF" %} 10177 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10178 ins_encode( Push_Reg_DPR(src1), 10179 OpcP, RegOpc(src2), 10180 fpu_flags); 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 // Compare vs zero into -1,0,1 10185 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10186 predicate(UseSSE == 0); 10187 match(Set dst (CmpF3 src1 zero)); 10188 effect(KILL cr, KILL rax); 10189 ins_cost(280); 10190 format %{ "FTSTF $dst,$src1" %} 10191 opcode(0xE4, 0xD9); 10192 ins_encode( Push_Reg_DPR(src1), 10193 OpcS, OpcP, PopFPU, 10194 CmpF_Result(dst)); 10195 ins_pipe( pipe_slow ); 10196 %} 10197 10198 // Compare into -1,0,1 10199 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10200 predicate(UseSSE == 0); 10201 match(Set dst (CmpF3 src1 src2)); 10202 effect(KILL cr, KILL rax); 10203 ins_cost(300); 10204 format %{ "FCMPF $dst,$src1,$src2" %} 10205 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10206 ins_encode( Push_Reg_DPR(src1), 10207 OpcP, RegOpc(src2), 10208 CmpF_Result(dst)); 10209 ins_pipe( pipe_slow ); 10210 %} 10211 10212 // float compare and set condition codes in EFLAGS by XMM regs 10213 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10214 predicate(UseSSE>=1); 10215 match(Set cr (CmpF src1 src2)); 10216 ins_cost(145); 10217 format %{ "UCOMISS $src1,$src2\n\t" 10218 "JNP,s exit\n\t" 10219 "PUSHF\t# saw NaN, set CF\n\t" 10220 "AND [rsp], #0xffffff2b\n\t" 10221 "POPF\n" 10222 "exit:" %} 10223 ins_encode %{ 10224 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10225 emit_cmpfp_fixup(_masm); 10226 %} 10227 ins_pipe( pipe_slow ); 10228 %} 10229 10230 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10231 predicate(UseSSE>=1); 10232 match(Set cr (CmpF src1 src2)); 10233 ins_cost(100); 10234 format %{ "UCOMISS $src1,$src2" %} 10235 ins_encode %{ 10236 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 // float compare and set condition codes in EFLAGS by XMM regs 10242 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10243 predicate(UseSSE>=1); 10244 match(Set cr (CmpF src1 (LoadF src2))); 10245 ins_cost(165); 10246 format %{ "UCOMISS $src1,$src2\n\t" 10247 "JNP,s exit\n\t" 10248 "PUSHF\t# saw NaN, set CF\n\t" 10249 "AND [rsp], #0xffffff2b\n\t" 10250 "POPF\n" 10251 "exit:" %} 10252 ins_encode %{ 10253 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10254 emit_cmpfp_fixup(_masm); 10255 %} 10256 ins_pipe( pipe_slow ); 10257 %} 10258 10259 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10260 predicate(UseSSE>=1); 10261 match(Set cr (CmpF src1 (LoadF src2))); 10262 ins_cost(100); 10263 format %{ "UCOMISS $src1,$src2" %} 10264 ins_encode %{ 10265 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10266 %} 10267 ins_pipe( pipe_slow ); 10268 %} 10269 10270 // Compare into -1,0,1 in XMM 10271 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10272 predicate(UseSSE>=1); 10273 match(Set dst (CmpF3 src1 src2)); 10274 effect(KILL cr); 10275 ins_cost(255); 10276 format %{ "UCOMISS $src1, $src2\n\t" 10277 "MOV $dst, #-1\n\t" 10278 "JP,s done\n\t" 10279 "JB,s done\n\t" 10280 "SETNE $dst\n\t" 10281 "MOVZB $dst, $dst\n" 10282 "done:" %} 10283 ins_encode %{ 10284 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10285 emit_cmpfp3(_masm, $dst$$Register); 10286 %} 10287 ins_pipe( pipe_slow ); 10288 %} 10289 10290 // Compare into -1,0,1 in XMM and memory 10291 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10292 predicate(UseSSE>=1); 10293 match(Set dst (CmpF3 src1 (LoadF src2))); 10294 effect(KILL cr); 10295 ins_cost(275); 10296 format %{ "UCOMISS $src1, $src2\n\t" 10297 "MOV $dst, #-1\n\t" 10298 "JP,s done\n\t" 10299 "JB,s done\n\t" 10300 "SETNE $dst\n\t" 10301 "MOVZB $dst, $dst\n" 10302 "done:" %} 10303 ins_encode %{ 10304 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10305 emit_cmpfp3(_masm, $dst$$Register); 10306 %} 10307 ins_pipe( pipe_slow ); 10308 %} 10309 10310 // Spill to obtain 24-bit precision 10311 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10312 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10313 match(Set dst (SubF src1 src2)); 10314 10315 format %{ "FSUB $dst,$src1 - $src2" %} 10316 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10317 ins_encode( Push_Reg_FPR(src1), 10318 OpcReg_FPR(src2), 10319 Pop_Mem_FPR(dst) ); 10320 ins_pipe( fpu_mem_reg_reg ); 10321 %} 10322 // 10323 // This instruction does not round to 24-bits 10324 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10325 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10326 match(Set dst (SubF dst src)); 10327 10328 format %{ "FSUB $dst,$src" %} 10329 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10330 ins_encode( Push_Reg_FPR(src), 10331 OpcP, RegOpc(dst) ); 10332 ins_pipe( fpu_reg_reg ); 10333 %} 10334 10335 // Spill to obtain 24-bit precision 10336 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10337 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10338 match(Set dst (AddF src1 src2)); 10339 10340 format %{ "FADD $dst,$src1,$src2" %} 10341 opcode(0xD8, 0x0); /* D8 C0+i */ 10342 ins_encode( Push_Reg_FPR(src2), 10343 OpcReg_FPR(src1), 10344 Pop_Mem_FPR(dst) ); 10345 ins_pipe( fpu_mem_reg_reg ); 10346 %} 10347 // 10348 // This instruction does not round to 24-bits 10349 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10350 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10351 match(Set dst (AddF dst src)); 10352 10353 format %{ "FLD $src\n\t" 10354 "FADDp $dst,ST" %} 10355 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10356 ins_encode( Push_Reg_FPR(src), 10357 OpcP, RegOpc(dst) ); 10358 ins_pipe( fpu_reg_reg ); 10359 %} 10360 10361 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10362 predicate(UseSSE==0); 10363 match(Set dst (AbsF src)); 10364 ins_cost(100); 10365 format %{ "FABS" %} 10366 opcode(0xE1, 0xD9); 10367 ins_encode( OpcS, OpcP ); 10368 ins_pipe( fpu_reg_reg ); 10369 %} 10370 10371 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10372 predicate(UseSSE==0); 10373 match(Set dst (NegF src)); 10374 ins_cost(100); 10375 format %{ "FCHS" %} 10376 opcode(0xE0, 0xD9); 10377 ins_encode( OpcS, OpcP ); 10378 ins_pipe( fpu_reg_reg ); 10379 %} 10380 10381 // Cisc-alternate to addFPR_reg 10382 // Spill to obtain 24-bit precision 10383 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10384 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10385 match(Set dst (AddF src1 (LoadF src2))); 10386 10387 format %{ "FLD $src2\n\t" 10388 "FADD ST,$src1\n\t" 10389 "FSTP_S $dst" %} 10390 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10391 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10392 OpcReg_FPR(src1), 10393 Pop_Mem_FPR(dst) ); 10394 ins_pipe( fpu_mem_reg_mem ); 10395 %} 10396 // 10397 // Cisc-alternate to addFPR_reg 10398 // This instruction does not round to 24-bits 10399 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10400 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10401 match(Set dst (AddF dst (LoadF src))); 10402 10403 format %{ "FADD $dst,$src" %} 10404 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10405 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10406 OpcP, RegOpc(dst) ); 10407 ins_pipe( fpu_reg_mem ); 10408 %} 10409 10410 // // Following two instructions for _222_mpegaudio 10411 // Spill to obtain 24-bit precision 10412 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10413 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10414 match(Set dst (AddF src1 src2)); 10415 10416 format %{ "FADD $dst,$src1,$src2" %} 10417 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10418 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10419 OpcReg_FPR(src2), 10420 Pop_Mem_FPR(dst) ); 10421 ins_pipe( fpu_mem_reg_mem ); 10422 %} 10423 10424 // Cisc-spill variant 10425 // Spill to obtain 24-bit precision 10426 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10427 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10428 match(Set dst (AddF src1 (LoadF src2))); 10429 10430 format %{ "FADD $dst,$src1,$src2 cisc" %} 10431 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10432 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10433 set_instruction_start, 10434 OpcP, RMopc_Mem(secondary,src1), 10435 Pop_Mem_FPR(dst) ); 10436 ins_pipe( fpu_mem_mem_mem ); 10437 %} 10438 10439 // Spill to obtain 24-bit precision 10440 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10441 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10442 match(Set dst (AddF src1 src2)); 10443 10444 format %{ "FADD $dst,$src1,$src2" %} 10445 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10446 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10447 set_instruction_start, 10448 OpcP, RMopc_Mem(secondary,src1), 10449 Pop_Mem_FPR(dst) ); 10450 ins_pipe( fpu_mem_mem_mem ); 10451 %} 10452 10453 10454 // Spill to obtain 24-bit precision 10455 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10456 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10457 match(Set dst (AddF src con)); 10458 format %{ "FLD $src\n\t" 10459 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10460 "FSTP_S $dst" %} 10461 ins_encode %{ 10462 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10463 __ fadd_s($constantaddress($con)); 10464 __ fstp_s(Address(rsp, $dst$$disp)); 10465 %} 10466 ins_pipe(fpu_mem_reg_con); 10467 %} 10468 // 10469 // This instruction does not round to 24-bits 10470 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10471 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10472 match(Set dst (AddF src con)); 10473 format %{ "FLD $src\n\t" 10474 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10475 "FSTP $dst" %} 10476 ins_encode %{ 10477 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10478 __ fadd_s($constantaddress($con)); 10479 __ fstp_d($dst$$reg); 10480 %} 10481 ins_pipe(fpu_reg_reg_con); 10482 %} 10483 10484 // Spill to obtain 24-bit precision 10485 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10486 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10487 match(Set dst (MulF src1 src2)); 10488 10489 format %{ "FLD $src1\n\t" 10490 "FMUL $src2\n\t" 10491 "FSTP_S $dst" %} 10492 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10493 ins_encode( Push_Reg_FPR(src1), 10494 OpcReg_FPR(src2), 10495 Pop_Mem_FPR(dst) ); 10496 ins_pipe( fpu_mem_reg_reg ); 10497 %} 10498 // 10499 // This instruction does not round to 24-bits 10500 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10501 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10502 match(Set dst (MulF src1 src2)); 10503 10504 format %{ "FLD $src1\n\t" 10505 "FMUL $src2\n\t" 10506 "FSTP_S $dst" %} 10507 opcode(0xD8, 0x1); /* D8 C8+i */ 10508 ins_encode( Push_Reg_FPR(src2), 10509 OpcReg_FPR(src1), 10510 Pop_Reg_FPR(dst) ); 10511 ins_pipe( fpu_reg_reg_reg ); 10512 %} 10513 10514 10515 // Spill to obtain 24-bit precision 10516 // Cisc-alternate to reg-reg multiply 10517 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10518 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10519 match(Set dst (MulF src1 (LoadF src2))); 10520 10521 format %{ "FLD_S $src2\n\t" 10522 "FMUL $src1\n\t" 10523 "FSTP_S $dst" %} 10524 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10525 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10526 OpcReg_FPR(src1), 10527 Pop_Mem_FPR(dst) ); 10528 ins_pipe( fpu_mem_reg_mem ); 10529 %} 10530 // 10531 // This instruction does not round to 24-bits 10532 // Cisc-alternate to reg-reg multiply 10533 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10534 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10535 match(Set dst (MulF src1 (LoadF src2))); 10536 10537 format %{ "FMUL $dst,$src1,$src2" %} 10538 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10539 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10540 OpcReg_FPR(src1), 10541 Pop_Reg_FPR(dst) ); 10542 ins_pipe( fpu_reg_reg_mem ); 10543 %} 10544 10545 // Spill to obtain 24-bit precision 10546 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10547 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10548 match(Set dst (MulF src1 src2)); 10549 10550 format %{ "FMUL $dst,$src1,$src2" %} 10551 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10552 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10553 set_instruction_start, 10554 OpcP, RMopc_Mem(secondary,src1), 10555 Pop_Mem_FPR(dst) ); 10556 ins_pipe( fpu_mem_mem_mem ); 10557 %} 10558 10559 // Spill to obtain 24-bit precision 10560 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10561 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10562 match(Set dst (MulF src con)); 10563 10564 format %{ "FLD $src\n\t" 10565 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10566 "FSTP_S $dst" %} 10567 ins_encode %{ 10568 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10569 __ fmul_s($constantaddress($con)); 10570 __ fstp_s(Address(rsp, $dst$$disp)); 10571 %} 10572 ins_pipe(fpu_mem_reg_con); 10573 %} 10574 // 10575 // This instruction does not round to 24-bits 10576 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10577 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10578 match(Set dst (MulF src con)); 10579 10580 format %{ "FLD $src\n\t" 10581 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10582 "FSTP $dst" %} 10583 ins_encode %{ 10584 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10585 __ fmul_s($constantaddress($con)); 10586 __ fstp_d($dst$$reg); 10587 %} 10588 ins_pipe(fpu_reg_reg_con); 10589 %} 10590 10591 10592 // 10593 // MACRO1 -- subsume unshared load into mulFPR 10594 // This instruction does not round to 24-bits 10595 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10596 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10597 match(Set dst (MulF (LoadF mem1) src)); 10598 10599 format %{ "FLD $mem1 ===MACRO1===\n\t" 10600 "FMUL ST,$src\n\t" 10601 "FSTP $dst" %} 10602 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10603 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10604 OpcReg_FPR(src), 10605 Pop_Reg_FPR(dst) ); 10606 ins_pipe( fpu_reg_reg_mem ); 10607 %} 10608 // 10609 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10610 // This instruction does not round to 24-bits 10611 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10612 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10613 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10614 ins_cost(95); 10615 10616 format %{ "FLD $mem1 ===MACRO2===\n\t" 10617 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10618 "FADD ST,$src2\n\t" 10619 "FSTP $dst" %} 10620 opcode(0xD9); /* LoadF D9 /0 */ 10621 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10622 FMul_ST_reg(src1), 10623 FAdd_ST_reg(src2), 10624 Pop_Reg_FPR(dst) ); 10625 ins_pipe( fpu_reg_mem_reg_reg ); 10626 %} 10627 10628 // MACRO3 -- addFPR a mulFPR 10629 // This instruction does not round to 24-bits. It is a '2-address' 10630 // instruction in that the result goes back to src2. This eliminates 10631 // a move from the macro; possibly the register allocator will have 10632 // to add it back (and maybe not). 10633 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10634 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10635 match(Set src2 (AddF (MulF src0 src1) src2)); 10636 10637 format %{ "FLD $src0 ===MACRO3===\n\t" 10638 "FMUL ST,$src1\n\t" 10639 "FADDP $src2,ST" %} 10640 opcode(0xD9); /* LoadF D9 /0 */ 10641 ins_encode( Push_Reg_FPR(src0), 10642 FMul_ST_reg(src1), 10643 FAddP_reg_ST(src2) ); 10644 ins_pipe( fpu_reg_reg_reg ); 10645 %} 10646 10647 // MACRO4 -- divFPR subFPR 10648 // This instruction does not round to 24-bits 10649 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10650 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10651 match(Set dst (DivF (SubF src2 src1) src3)); 10652 10653 format %{ "FLD $src2 ===MACRO4===\n\t" 10654 "FSUB ST,$src1\n\t" 10655 "FDIV ST,$src3\n\t" 10656 "FSTP $dst" %} 10657 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10658 ins_encode( Push_Reg_FPR(src2), 10659 subFPR_divFPR_encode(src1,src3), 10660 Pop_Reg_FPR(dst) ); 10661 ins_pipe( fpu_reg_reg_reg_reg ); 10662 %} 10663 10664 // Spill to obtain 24-bit precision 10665 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10666 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10667 match(Set dst (DivF src1 src2)); 10668 10669 format %{ "FDIV $dst,$src1,$src2" %} 10670 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10671 ins_encode( Push_Reg_FPR(src1), 10672 OpcReg_FPR(src2), 10673 Pop_Mem_FPR(dst) ); 10674 ins_pipe( fpu_mem_reg_reg ); 10675 %} 10676 // 10677 // This instruction does not round to 24-bits 10678 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10679 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10680 match(Set dst (DivF dst src)); 10681 10682 format %{ "FDIV $dst,$src" %} 10683 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10684 ins_encode( Push_Reg_FPR(src), 10685 OpcP, RegOpc(dst) ); 10686 ins_pipe( fpu_reg_reg ); 10687 %} 10688 10689 10690 // Spill to obtain 24-bit precision 10691 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10692 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10693 match(Set dst (ModF src1 src2)); 10694 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10695 10696 format %{ "FMOD $dst,$src1,$src2" %} 10697 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10698 emitModDPR(), 10699 Push_Result_Mod_DPR(src2), 10700 Pop_Mem_FPR(dst)); 10701 ins_pipe( pipe_slow ); 10702 %} 10703 // 10704 // This instruction does not round to 24-bits 10705 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10706 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10707 match(Set dst (ModF dst src)); 10708 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10709 10710 format %{ "FMOD $dst,$src" %} 10711 ins_encode(Push_Reg_Mod_DPR(dst, src), 10712 emitModDPR(), 10713 Push_Result_Mod_DPR(src), 10714 Pop_Reg_FPR(dst)); 10715 ins_pipe( pipe_slow ); 10716 %} 10717 10718 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10719 predicate(UseSSE>=1); 10720 match(Set dst (ModF src0 src1)); 10721 effect(KILL rax, KILL cr); 10722 format %{ "SUB ESP,4\t # FMOD\n" 10723 "\tMOVSS [ESP+0],$src1\n" 10724 "\tFLD_S [ESP+0]\n" 10725 "\tMOVSS [ESP+0],$src0\n" 10726 "\tFLD_S [ESP+0]\n" 10727 "loop:\tFPREM\n" 10728 "\tFWAIT\n" 10729 "\tFNSTSW AX\n" 10730 "\tSAHF\n" 10731 "\tJP loop\n" 10732 "\tFSTP_S [ESP+0]\n" 10733 "\tMOVSS $dst,[ESP+0]\n" 10734 "\tADD ESP,4\n" 10735 "\tFSTP ST0\t # Restore FPU Stack" 10736 %} 10737 ins_cost(250); 10738 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10739 ins_pipe( pipe_slow ); 10740 %} 10741 10742 10743 //----------Arithmetic Conversion Instructions--------------------------------- 10744 // The conversions operations are all Alpha sorted. Please keep it that way! 10745 10746 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10747 predicate(UseSSE==0); 10748 match(Set dst (RoundFloat src)); 10749 ins_cost(125); 10750 format %{ "FST_S $dst,$src\t# F-round" %} 10751 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10752 ins_pipe( fpu_mem_reg ); 10753 %} 10754 10755 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10756 predicate(UseSSE<=1); 10757 match(Set dst (RoundDouble src)); 10758 ins_cost(125); 10759 format %{ "FST_D $dst,$src\t# D-round" %} 10760 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10761 ins_pipe( fpu_mem_reg ); 10762 %} 10763 10764 // Force rounding to 24-bit precision and 6-bit exponent 10765 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10766 predicate(UseSSE==0); 10767 match(Set dst (ConvD2F src)); 10768 format %{ "FST_S $dst,$src\t# F-round" %} 10769 expand %{ 10770 roundFloat_mem_reg(dst,src); 10771 %} 10772 %} 10773 10774 // Force rounding to 24-bit precision and 6-bit exponent 10775 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10776 predicate(UseSSE==1); 10777 match(Set dst (ConvD2F src)); 10778 effect( KILL cr ); 10779 format %{ "SUB ESP,4\n\t" 10780 "FST_S [ESP],$src\t# F-round\n\t" 10781 "MOVSS $dst,[ESP]\n\t" 10782 "ADD ESP,4" %} 10783 ins_encode %{ 10784 __ subptr(rsp, 4); 10785 if ($src$$reg != FPR1L_enc) { 10786 __ fld_s($src$$reg-1); 10787 __ fstp_s(Address(rsp, 0)); 10788 } else { 10789 __ fst_s(Address(rsp, 0)); 10790 } 10791 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10792 __ addptr(rsp, 4); 10793 %} 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 // Force rounding double precision to single precision 10798 instruct convD2F_reg(regF dst, regD src) %{ 10799 predicate(UseSSE>=2); 10800 match(Set dst (ConvD2F src)); 10801 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10802 ins_encode %{ 10803 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10804 %} 10805 ins_pipe( pipe_slow ); 10806 %} 10807 10808 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10809 predicate(UseSSE==0); 10810 match(Set dst (ConvF2D src)); 10811 format %{ "FST_S $dst,$src\t# D-round" %} 10812 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10813 ins_pipe( fpu_reg_reg ); 10814 %} 10815 10816 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10817 predicate(UseSSE==1); 10818 match(Set dst (ConvF2D src)); 10819 format %{ "FST_D $dst,$src\t# D-round" %} 10820 expand %{ 10821 roundDouble_mem_reg(dst,src); 10822 %} 10823 %} 10824 10825 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10826 predicate(UseSSE==1); 10827 match(Set dst (ConvF2D src)); 10828 effect( KILL cr ); 10829 format %{ "SUB ESP,4\n\t" 10830 "MOVSS [ESP] $src\n\t" 10831 "FLD_S [ESP]\n\t" 10832 "ADD ESP,4\n\t" 10833 "FSTP $dst\t# D-round" %} 10834 ins_encode %{ 10835 __ subptr(rsp, 4); 10836 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10837 __ fld_s(Address(rsp, 0)); 10838 __ addptr(rsp, 4); 10839 __ fstp_d($dst$$reg); 10840 %} 10841 ins_pipe( pipe_slow ); 10842 %} 10843 10844 instruct convF2D_reg(regD dst, regF src) %{ 10845 predicate(UseSSE>=2); 10846 match(Set dst (ConvF2D src)); 10847 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10848 ins_encode %{ 10849 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10850 %} 10851 ins_pipe( pipe_slow ); 10852 %} 10853 10854 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10855 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10856 predicate(UseSSE<=1); 10857 match(Set dst (ConvD2I src)); 10858 effect( KILL tmp, KILL cr ); 10859 format %{ "FLD $src\t# Convert double to int \n\t" 10860 "FLDCW trunc mode\n\t" 10861 "SUB ESP,4\n\t" 10862 "FISTp [ESP + #0]\n\t" 10863 "FLDCW std/24-bit mode\n\t" 10864 "POP EAX\n\t" 10865 "CMP EAX,0x80000000\n\t" 10866 "JNE,s fast\n\t" 10867 "FLD_D $src\n\t" 10868 "CALL d2i_wrapper\n" 10869 "fast:" %} 10870 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10871 ins_pipe( pipe_slow ); 10872 %} 10873 10874 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10875 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10876 predicate(UseSSE>=2); 10877 match(Set dst (ConvD2I src)); 10878 effect( KILL tmp, KILL cr ); 10879 format %{ "CVTTSD2SI $dst, $src\n\t" 10880 "CMP $dst,0x80000000\n\t" 10881 "JNE,s fast\n\t" 10882 "SUB ESP, 8\n\t" 10883 "MOVSD [ESP], $src\n\t" 10884 "FLD_D [ESP]\n\t" 10885 "ADD ESP, 8\n\t" 10886 "CALL d2i_wrapper\n" 10887 "fast:" %} 10888 ins_encode %{ 10889 Label fast; 10890 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10891 __ cmpl($dst$$Register, 0x80000000); 10892 __ jccb(Assembler::notEqual, fast); 10893 __ subptr(rsp, 8); 10894 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10895 __ fld_d(Address(rsp, 0)); 10896 __ addptr(rsp, 8); 10897 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10898 __ bind(fast); 10899 %} 10900 ins_pipe( pipe_slow ); 10901 %} 10902 10903 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10904 predicate(UseSSE<=1); 10905 match(Set dst (ConvD2L src)); 10906 effect( KILL cr ); 10907 format %{ "FLD $src\t# Convert double to long\n\t" 10908 "FLDCW trunc mode\n\t" 10909 "SUB ESP,8\n\t" 10910 "FISTp [ESP + #0]\n\t" 10911 "FLDCW std/24-bit mode\n\t" 10912 "POP EAX\n\t" 10913 "POP EDX\n\t" 10914 "CMP EDX,0x80000000\n\t" 10915 "JNE,s fast\n\t" 10916 "TEST EAX,EAX\n\t" 10917 "JNE,s fast\n\t" 10918 "FLD $src\n\t" 10919 "CALL d2l_wrapper\n" 10920 "fast:" %} 10921 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10922 ins_pipe( pipe_slow ); 10923 %} 10924 10925 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10926 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10927 predicate (UseSSE>=2); 10928 match(Set dst (ConvD2L src)); 10929 effect( KILL cr ); 10930 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10931 "MOVSD [ESP],$src\n\t" 10932 "FLD_D [ESP]\n\t" 10933 "FLDCW trunc mode\n\t" 10934 "FISTp [ESP + #0]\n\t" 10935 "FLDCW std/24-bit mode\n\t" 10936 "POP EAX\n\t" 10937 "POP EDX\n\t" 10938 "CMP EDX,0x80000000\n\t" 10939 "JNE,s fast\n\t" 10940 "TEST EAX,EAX\n\t" 10941 "JNE,s fast\n\t" 10942 "SUB ESP,8\n\t" 10943 "MOVSD [ESP],$src\n\t" 10944 "FLD_D [ESP]\n\t" 10945 "ADD ESP,8\n\t" 10946 "CALL d2l_wrapper\n" 10947 "fast:" %} 10948 ins_encode %{ 10949 Label fast; 10950 __ subptr(rsp, 8); 10951 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10952 __ fld_d(Address(rsp, 0)); 10953 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10954 __ fistp_d(Address(rsp, 0)); 10955 // Restore the rounding mode, mask the exception 10956 if (Compile::current()->in_24_bit_fp_mode()) { 10957 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10958 } else { 10959 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10960 } 10961 // Load the converted long, adjust CPU stack 10962 __ pop(rax); 10963 __ pop(rdx); 10964 __ cmpl(rdx, 0x80000000); 10965 __ jccb(Assembler::notEqual, fast); 10966 __ testl(rax, rax); 10967 __ jccb(Assembler::notEqual, fast); 10968 __ subptr(rsp, 8); 10969 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10970 __ fld_d(Address(rsp, 0)); 10971 __ addptr(rsp, 8); 10972 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10973 __ bind(fast); 10974 %} 10975 ins_pipe( pipe_slow ); 10976 %} 10977 10978 // Convert a double to an int. Java semantics require we do complex 10979 // manglations in the corner cases. So we set the rounding mode to 10980 // 'zero', store the darned double down as an int, and reset the 10981 // rounding mode to 'nearest'. The hardware stores a flag value down 10982 // if we would overflow or converted a NAN; we check for this and 10983 // and go the slow path if needed. 10984 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10985 predicate(UseSSE==0); 10986 match(Set dst (ConvF2I src)); 10987 effect( KILL tmp, KILL cr ); 10988 format %{ "FLD $src\t# Convert float to int \n\t" 10989 "FLDCW trunc mode\n\t" 10990 "SUB ESP,4\n\t" 10991 "FISTp [ESP + #0]\n\t" 10992 "FLDCW std/24-bit mode\n\t" 10993 "POP EAX\n\t" 10994 "CMP EAX,0x80000000\n\t" 10995 "JNE,s fast\n\t" 10996 "FLD $src\n\t" 10997 "CALL d2i_wrapper\n" 10998 "fast:" %} 10999 // DPR2I_encoding works for FPR2I 11000 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 11001 ins_pipe( pipe_slow ); 11002 %} 11003 11004 // Convert a float in xmm to an int reg. 11005 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11006 predicate(UseSSE>=1); 11007 match(Set dst (ConvF2I src)); 11008 effect( KILL tmp, KILL cr ); 11009 format %{ "CVTTSS2SI $dst, $src\n\t" 11010 "CMP $dst,0x80000000\n\t" 11011 "JNE,s fast\n\t" 11012 "SUB ESP, 4\n\t" 11013 "MOVSS [ESP], $src\n\t" 11014 "FLD [ESP]\n\t" 11015 "ADD ESP, 4\n\t" 11016 "CALL d2i_wrapper\n" 11017 "fast:" %} 11018 ins_encode %{ 11019 Label fast; 11020 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 11021 __ cmpl($dst$$Register, 0x80000000); 11022 __ jccb(Assembler::notEqual, fast); 11023 __ subptr(rsp, 4); 11024 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11025 __ fld_s(Address(rsp, 0)); 11026 __ addptr(rsp, 4); 11027 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 11028 __ bind(fast); 11029 %} 11030 ins_pipe( pipe_slow ); 11031 %} 11032 11033 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 11034 predicate(UseSSE==0); 11035 match(Set dst (ConvF2L src)); 11036 effect( KILL cr ); 11037 format %{ "FLD $src\t# Convert float to long\n\t" 11038 "FLDCW trunc mode\n\t" 11039 "SUB ESP,8\n\t" 11040 "FISTp [ESP + #0]\n\t" 11041 "FLDCW std/24-bit mode\n\t" 11042 "POP EAX\n\t" 11043 "POP EDX\n\t" 11044 "CMP EDX,0x80000000\n\t" 11045 "JNE,s fast\n\t" 11046 "TEST EAX,EAX\n\t" 11047 "JNE,s fast\n\t" 11048 "FLD $src\n\t" 11049 "CALL d2l_wrapper\n" 11050 "fast:" %} 11051 // DPR2L_encoding works for FPR2L 11052 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11053 ins_pipe( pipe_slow ); 11054 %} 11055 11056 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11057 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11058 predicate (UseSSE>=1); 11059 match(Set dst (ConvF2L src)); 11060 effect( KILL cr ); 11061 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11062 "MOVSS [ESP],$src\n\t" 11063 "FLD_S [ESP]\n\t" 11064 "FLDCW trunc mode\n\t" 11065 "FISTp [ESP + #0]\n\t" 11066 "FLDCW std/24-bit mode\n\t" 11067 "POP EAX\n\t" 11068 "POP EDX\n\t" 11069 "CMP EDX,0x80000000\n\t" 11070 "JNE,s fast\n\t" 11071 "TEST EAX,EAX\n\t" 11072 "JNE,s fast\n\t" 11073 "SUB ESP,4\t# Convert float to long\n\t" 11074 "MOVSS [ESP],$src\n\t" 11075 "FLD_S [ESP]\n\t" 11076 "ADD ESP,4\n\t" 11077 "CALL d2l_wrapper\n" 11078 "fast:" %} 11079 ins_encode %{ 11080 Label fast; 11081 __ subptr(rsp, 8); 11082 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11083 __ fld_s(Address(rsp, 0)); 11084 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11085 __ fistp_d(Address(rsp, 0)); 11086 // Restore the rounding mode, mask the exception 11087 if (Compile::current()->in_24_bit_fp_mode()) { 11088 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11089 } else { 11090 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11091 } 11092 // Load the converted long, adjust CPU stack 11093 __ pop(rax); 11094 __ pop(rdx); 11095 __ cmpl(rdx, 0x80000000); 11096 __ jccb(Assembler::notEqual, fast); 11097 __ testl(rax, rax); 11098 __ jccb(Assembler::notEqual, fast); 11099 __ subptr(rsp, 4); 11100 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11101 __ fld_s(Address(rsp, 0)); 11102 __ addptr(rsp, 4); 11103 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11104 __ bind(fast); 11105 %} 11106 ins_pipe( pipe_slow ); 11107 %} 11108 11109 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11110 predicate( UseSSE<=1 ); 11111 match(Set dst (ConvI2D src)); 11112 format %{ "FILD $src\n\t" 11113 "FSTP $dst" %} 11114 opcode(0xDB, 0x0); /* DB /0 */ 11115 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11116 ins_pipe( fpu_reg_mem ); 11117 %} 11118 11119 instruct convI2D_reg(regD dst, rRegI src) %{ 11120 predicate( UseSSE>=2 && !UseXmmI2D ); 11121 match(Set dst (ConvI2D src)); 11122 format %{ "CVTSI2SD $dst,$src" %} 11123 ins_encode %{ 11124 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11125 %} 11126 ins_pipe( pipe_slow ); 11127 %} 11128 11129 instruct convI2D_mem(regD dst, memory mem) %{ 11130 predicate( UseSSE>=2 ); 11131 match(Set dst (ConvI2D (LoadI mem))); 11132 format %{ "CVTSI2SD $dst,$mem" %} 11133 ins_encode %{ 11134 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11135 %} 11136 ins_pipe( pipe_slow ); 11137 %} 11138 11139 instruct convXI2D_reg(regD dst, rRegI src) 11140 %{ 11141 predicate( UseSSE>=2 && UseXmmI2D ); 11142 match(Set dst (ConvI2D src)); 11143 11144 format %{ "MOVD $dst,$src\n\t" 11145 "CVTDQ2PD $dst,$dst\t# i2d" %} 11146 ins_encode %{ 11147 __ movdl($dst$$XMMRegister, $src$$Register); 11148 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11149 %} 11150 ins_pipe(pipe_slow); // XXX 11151 %} 11152 11153 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11154 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11155 match(Set dst (ConvI2D (LoadI mem))); 11156 format %{ "FILD $mem\n\t" 11157 "FSTP $dst" %} 11158 opcode(0xDB); /* DB /0 */ 11159 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11160 Pop_Reg_DPR(dst)); 11161 ins_pipe( fpu_reg_mem ); 11162 %} 11163 11164 // Convert a byte to a float; no rounding step needed. 11165 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11166 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11167 match(Set dst (ConvI2F src)); 11168 format %{ "FILD $src\n\t" 11169 "FSTP $dst" %} 11170 11171 opcode(0xDB, 0x0); /* DB /0 */ 11172 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11173 ins_pipe( fpu_reg_mem ); 11174 %} 11175 11176 // In 24-bit mode, force exponent rounding by storing back out 11177 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11178 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11179 match(Set dst (ConvI2F src)); 11180 ins_cost(200); 11181 format %{ "FILD $src\n\t" 11182 "FSTP_S $dst" %} 11183 opcode(0xDB, 0x0); /* DB /0 */ 11184 ins_encode( Push_Mem_I(src), 11185 Pop_Mem_FPR(dst)); 11186 ins_pipe( fpu_mem_mem ); 11187 %} 11188 11189 // In 24-bit mode, force exponent rounding by storing back out 11190 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11191 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11192 match(Set dst (ConvI2F (LoadI mem))); 11193 ins_cost(200); 11194 format %{ "FILD $mem\n\t" 11195 "FSTP_S $dst" %} 11196 opcode(0xDB); /* DB /0 */ 11197 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11198 Pop_Mem_FPR(dst)); 11199 ins_pipe( fpu_mem_mem ); 11200 %} 11201 11202 // This instruction does not round to 24-bits 11203 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11204 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11205 match(Set dst (ConvI2F src)); 11206 format %{ "FILD $src\n\t" 11207 "FSTP $dst" %} 11208 opcode(0xDB, 0x0); /* DB /0 */ 11209 ins_encode( Push_Mem_I(src), 11210 Pop_Reg_FPR(dst)); 11211 ins_pipe( fpu_reg_mem ); 11212 %} 11213 11214 // This instruction does not round to 24-bits 11215 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11216 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11217 match(Set dst (ConvI2F (LoadI mem))); 11218 format %{ "FILD $mem\n\t" 11219 "FSTP $dst" %} 11220 opcode(0xDB); /* DB /0 */ 11221 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11222 Pop_Reg_FPR(dst)); 11223 ins_pipe( fpu_reg_mem ); 11224 %} 11225 11226 // Convert an int to a float in xmm; no rounding step needed. 11227 instruct convI2F_reg(regF dst, rRegI src) %{ 11228 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11229 match(Set dst (ConvI2F src)); 11230 format %{ "CVTSI2SS $dst, $src" %} 11231 ins_encode %{ 11232 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11233 %} 11234 ins_pipe( pipe_slow ); 11235 %} 11236 11237 instruct convXI2F_reg(regF dst, rRegI src) 11238 %{ 11239 predicate( UseSSE>=2 && UseXmmI2F ); 11240 match(Set dst (ConvI2F src)); 11241 11242 format %{ "MOVD $dst,$src\n\t" 11243 "CVTDQ2PS $dst,$dst\t# i2f" %} 11244 ins_encode %{ 11245 __ movdl($dst$$XMMRegister, $src$$Register); 11246 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11247 %} 11248 ins_pipe(pipe_slow); // XXX 11249 %} 11250 11251 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11252 match(Set dst (ConvI2L src)); 11253 effect(KILL cr); 11254 ins_cost(375); 11255 format %{ "MOV $dst.lo,$src\n\t" 11256 "MOV $dst.hi,$src\n\t" 11257 "SAR $dst.hi,31" %} 11258 ins_encode(convert_int_long(dst,src)); 11259 ins_pipe( ialu_reg_reg_long ); 11260 %} 11261 11262 // Zero-extend convert int to long 11263 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11264 match(Set dst (AndL (ConvI2L src) mask) ); 11265 effect( KILL flags ); 11266 ins_cost(250); 11267 format %{ "MOV $dst.lo,$src\n\t" 11268 "XOR $dst.hi,$dst.hi" %} 11269 opcode(0x33); // XOR 11270 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11271 ins_pipe( ialu_reg_reg_long ); 11272 %} 11273 11274 // Zero-extend long 11275 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11276 match(Set dst (AndL src mask) ); 11277 effect( KILL flags ); 11278 ins_cost(250); 11279 format %{ "MOV $dst.lo,$src.lo\n\t" 11280 "XOR $dst.hi,$dst.hi\n\t" %} 11281 opcode(0x33); // XOR 11282 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11283 ins_pipe( ialu_reg_reg_long ); 11284 %} 11285 11286 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11287 predicate (UseSSE<=1); 11288 match(Set dst (ConvL2D src)); 11289 effect( KILL cr ); 11290 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11291 "PUSH $src.lo\n\t" 11292 "FILD ST,[ESP + #0]\n\t" 11293 "ADD ESP,8\n\t" 11294 "FSTP_D $dst\t# D-round" %} 11295 opcode(0xDF, 0x5); /* DF /5 */ 11296 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11297 ins_pipe( pipe_slow ); 11298 %} 11299 11300 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11301 predicate (UseSSE>=2); 11302 match(Set dst (ConvL2D src)); 11303 effect( KILL cr ); 11304 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11305 "PUSH $src.lo\n\t" 11306 "FILD_D [ESP]\n\t" 11307 "FSTP_D [ESP]\n\t" 11308 "MOVSD $dst,[ESP]\n\t" 11309 "ADD ESP,8" %} 11310 opcode(0xDF, 0x5); /* DF /5 */ 11311 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11312 ins_pipe( pipe_slow ); 11313 %} 11314 11315 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11316 predicate (UseSSE>=1); 11317 match(Set dst (ConvL2F src)); 11318 effect( KILL cr ); 11319 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11320 "PUSH $src.lo\n\t" 11321 "FILD_D [ESP]\n\t" 11322 "FSTP_S [ESP]\n\t" 11323 "MOVSS $dst,[ESP]\n\t" 11324 "ADD ESP,8" %} 11325 opcode(0xDF, 0x5); /* DF /5 */ 11326 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11327 ins_pipe( pipe_slow ); 11328 %} 11329 11330 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11331 match(Set dst (ConvL2F src)); 11332 effect( KILL cr ); 11333 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11334 "PUSH $src.lo\n\t" 11335 "FILD ST,[ESP + #0]\n\t" 11336 "ADD ESP,8\n\t" 11337 "FSTP_S $dst\t# F-round" %} 11338 opcode(0xDF, 0x5); /* DF /5 */ 11339 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11340 ins_pipe( pipe_slow ); 11341 %} 11342 11343 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11344 match(Set dst (ConvL2I src)); 11345 effect( DEF dst, USE src ); 11346 format %{ "MOV $dst,$src.lo" %} 11347 ins_encode(enc_CopyL_Lo(dst,src)); 11348 ins_pipe( ialu_reg_reg ); 11349 %} 11350 11351 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11352 match(Set dst (MoveF2I src)); 11353 effect( DEF dst, USE src ); 11354 ins_cost(100); 11355 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11356 ins_encode %{ 11357 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11358 %} 11359 ins_pipe( ialu_reg_mem ); 11360 %} 11361 11362 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11363 predicate(UseSSE==0); 11364 match(Set dst (MoveF2I src)); 11365 effect( DEF dst, USE src ); 11366 11367 ins_cost(125); 11368 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11369 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11370 ins_pipe( fpu_mem_reg ); 11371 %} 11372 11373 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11374 predicate(UseSSE>=1); 11375 match(Set dst (MoveF2I src)); 11376 effect( DEF dst, USE src ); 11377 11378 ins_cost(95); 11379 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11380 ins_encode %{ 11381 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11382 %} 11383 ins_pipe( pipe_slow ); 11384 %} 11385 11386 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11387 predicate(UseSSE>=2); 11388 match(Set dst (MoveF2I src)); 11389 effect( DEF dst, USE src ); 11390 ins_cost(85); 11391 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11392 ins_encode %{ 11393 __ movdl($dst$$Register, $src$$XMMRegister); 11394 %} 11395 ins_pipe( pipe_slow ); 11396 %} 11397 11398 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11399 match(Set dst (MoveI2F src)); 11400 effect( DEF dst, USE src ); 11401 11402 ins_cost(100); 11403 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11404 ins_encode %{ 11405 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11406 %} 11407 ins_pipe( ialu_mem_reg ); 11408 %} 11409 11410 11411 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11412 predicate(UseSSE==0); 11413 match(Set dst (MoveI2F src)); 11414 effect(DEF dst, USE src); 11415 11416 ins_cost(125); 11417 format %{ "FLD_S $src\n\t" 11418 "FSTP $dst\t# MoveI2F_stack_reg" %} 11419 opcode(0xD9); /* D9 /0, FLD m32real */ 11420 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11421 Pop_Reg_FPR(dst) ); 11422 ins_pipe( fpu_reg_mem ); 11423 %} 11424 11425 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11426 predicate(UseSSE>=1); 11427 match(Set dst (MoveI2F src)); 11428 effect( DEF dst, USE src ); 11429 11430 ins_cost(95); 11431 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11432 ins_encode %{ 11433 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11434 %} 11435 ins_pipe( pipe_slow ); 11436 %} 11437 11438 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11439 predicate(UseSSE>=2); 11440 match(Set dst (MoveI2F src)); 11441 effect( DEF dst, USE src ); 11442 11443 ins_cost(85); 11444 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11445 ins_encode %{ 11446 __ movdl($dst$$XMMRegister, $src$$Register); 11447 %} 11448 ins_pipe( pipe_slow ); 11449 %} 11450 11451 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11452 match(Set dst (MoveD2L src)); 11453 effect(DEF dst, USE src); 11454 11455 ins_cost(250); 11456 format %{ "MOV $dst.lo,$src\n\t" 11457 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11458 opcode(0x8B, 0x8B); 11459 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11460 ins_pipe( ialu_mem_long_reg ); 11461 %} 11462 11463 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11464 predicate(UseSSE<=1); 11465 match(Set dst (MoveD2L src)); 11466 effect(DEF dst, USE src); 11467 11468 ins_cost(125); 11469 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11470 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11471 ins_pipe( fpu_mem_reg ); 11472 %} 11473 11474 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11475 predicate(UseSSE>=2); 11476 match(Set dst (MoveD2L src)); 11477 effect(DEF dst, USE src); 11478 ins_cost(95); 11479 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11480 ins_encode %{ 11481 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11482 %} 11483 ins_pipe( pipe_slow ); 11484 %} 11485 11486 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11487 predicate(UseSSE>=2); 11488 match(Set dst (MoveD2L src)); 11489 effect(DEF dst, USE src, TEMP tmp); 11490 ins_cost(85); 11491 format %{ "MOVD $dst.lo,$src\n\t" 11492 "PSHUFLW $tmp,$src,0x4E\n\t" 11493 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11494 ins_encode %{ 11495 __ movdl($dst$$Register, $src$$XMMRegister); 11496 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11497 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11498 %} 11499 ins_pipe( pipe_slow ); 11500 %} 11501 11502 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11503 match(Set dst (MoveL2D src)); 11504 effect(DEF dst, USE src); 11505 11506 ins_cost(200); 11507 format %{ "MOV $dst,$src.lo\n\t" 11508 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11509 opcode(0x89, 0x89); 11510 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11511 ins_pipe( ialu_mem_long_reg ); 11512 %} 11513 11514 11515 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11516 predicate(UseSSE<=1); 11517 match(Set dst (MoveL2D src)); 11518 effect(DEF dst, USE src); 11519 ins_cost(125); 11520 11521 format %{ "FLD_D $src\n\t" 11522 "FSTP $dst\t# MoveL2D_stack_reg" %} 11523 opcode(0xDD); /* DD /0, FLD m64real */ 11524 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11525 Pop_Reg_DPR(dst) ); 11526 ins_pipe( fpu_reg_mem ); 11527 %} 11528 11529 11530 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11531 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11532 match(Set dst (MoveL2D src)); 11533 effect(DEF dst, USE src); 11534 11535 ins_cost(95); 11536 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11537 ins_encode %{ 11538 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11539 %} 11540 ins_pipe( pipe_slow ); 11541 %} 11542 11543 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11544 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11545 match(Set dst (MoveL2D src)); 11546 effect(DEF dst, USE src); 11547 11548 ins_cost(95); 11549 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11550 ins_encode %{ 11551 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11552 %} 11553 ins_pipe( pipe_slow ); 11554 %} 11555 11556 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11557 predicate(UseSSE>=2); 11558 match(Set dst (MoveL2D src)); 11559 effect(TEMP dst, USE src, TEMP tmp); 11560 ins_cost(85); 11561 format %{ "MOVD $dst,$src.lo\n\t" 11562 "MOVD $tmp,$src.hi\n\t" 11563 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11564 ins_encode %{ 11565 __ movdl($dst$$XMMRegister, $src$$Register); 11566 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11567 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11568 %} 11569 ins_pipe( pipe_slow ); 11570 %} 11571 11572 11573 // ======================================================================= 11574 // fast clearing of an array 11575 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11576 predicate(!((ClearArrayNode*)n)->is_large()); 11577 match(Set dummy (ClearArray cnt base)); 11578 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11579 11580 format %{ $$template 11581 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11582 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11583 $$emit$$"JG LARGE\n\t" 11584 $$emit$$"SHL ECX, 1\n\t" 11585 $$emit$$"DEC ECX\n\t" 11586 $$emit$$"JS DONE\t# Zero length\n\t" 11587 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11588 $$emit$$"DEC ECX\n\t" 11589 $$emit$$"JGE LOOP\n\t" 11590 $$emit$$"JMP DONE\n\t" 11591 $$emit$$"# LARGE:\n\t" 11592 if (UseFastStosb) { 11593 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11594 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11595 } else if (UseXMMForObjInit) { 11596 $$emit$$"MOV RDI,RAX\n\t" 11597 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11598 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11599 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11600 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11601 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11602 $$emit$$"ADD 0x40,RAX\n\t" 11603 $$emit$$"# L_zero_64_bytes:\n\t" 11604 $$emit$$"SUB 0x8,RCX\n\t" 11605 $$emit$$"JGE L_loop\n\t" 11606 $$emit$$"ADD 0x4,RCX\n\t" 11607 $$emit$$"JL L_tail\n\t" 11608 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11609 $$emit$$"ADD 0x20,RAX\n\t" 11610 $$emit$$"SUB 0x4,RCX\n\t" 11611 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11612 $$emit$$"ADD 0x4,RCX\n\t" 11613 $$emit$$"JLE L_end\n\t" 11614 $$emit$$"DEC RCX\n\t" 11615 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11616 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11617 $$emit$$"ADD 0x8,RAX\n\t" 11618 $$emit$$"DEC RCX\n\t" 11619 $$emit$$"JGE L_sloop\n\t" 11620 $$emit$$"# L_end:\n\t" 11621 } else { 11622 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11623 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11624 } 11625 $$emit$$"# DONE" 11626 %} 11627 ins_encode %{ 11628 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11629 $tmp$$XMMRegister, false); 11630 %} 11631 ins_pipe( pipe_slow ); 11632 %} 11633 11634 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11635 predicate(((ClearArrayNode*)n)->is_large()); 11636 match(Set dummy (ClearArray cnt base)); 11637 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11638 format %{ $$template 11639 if (UseFastStosb) { 11640 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11641 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11642 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11643 } else if (UseXMMForObjInit) { 11644 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11645 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11646 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11647 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11648 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11649 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11650 $$emit$$"ADD 0x40,RAX\n\t" 11651 $$emit$$"# L_zero_64_bytes:\n\t" 11652 $$emit$$"SUB 0x8,RCX\n\t" 11653 $$emit$$"JGE L_loop\n\t" 11654 $$emit$$"ADD 0x4,RCX\n\t" 11655 $$emit$$"JL L_tail\n\t" 11656 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11657 $$emit$$"ADD 0x20,RAX\n\t" 11658 $$emit$$"SUB 0x4,RCX\n\t" 11659 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11660 $$emit$$"ADD 0x4,RCX\n\t" 11661 $$emit$$"JLE L_end\n\t" 11662 $$emit$$"DEC RCX\n\t" 11663 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11664 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11665 $$emit$$"ADD 0x8,RAX\n\t" 11666 $$emit$$"DEC RCX\n\t" 11667 $$emit$$"JGE L_sloop\n\t" 11668 $$emit$$"# L_end:\n\t" 11669 } else { 11670 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11671 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11672 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11673 } 11674 $$emit$$"# DONE" 11675 %} 11676 ins_encode %{ 11677 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11678 $tmp$$XMMRegister, true); 11679 %} 11680 ins_pipe( pipe_slow ); 11681 %} 11682 11683 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11684 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11685 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11686 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11687 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11688 11689 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11690 ins_encode %{ 11691 __ string_compare($str1$$Register, $str2$$Register, 11692 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11693 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11694 %} 11695 ins_pipe( pipe_slow ); 11696 %} 11697 11698 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11699 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11700 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11701 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11702 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11703 11704 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11705 ins_encode %{ 11706 __ string_compare($str1$$Register, $str2$$Register, 11707 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11708 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11709 %} 11710 ins_pipe( pipe_slow ); 11711 %} 11712 11713 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11714 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11715 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11716 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11717 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11718 11719 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11720 ins_encode %{ 11721 __ string_compare($str1$$Register, $str2$$Register, 11722 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11723 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11724 %} 11725 ins_pipe( pipe_slow ); 11726 %} 11727 11728 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11729 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11730 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11731 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11732 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11733 11734 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11735 ins_encode %{ 11736 __ string_compare($str2$$Register, $str1$$Register, 11737 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11738 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11739 %} 11740 ins_pipe( pipe_slow ); 11741 %} 11742 11743 // fast string equals 11744 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11745 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11746 match(Set result (StrEquals (Binary str1 str2) cnt)); 11747 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11748 11749 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11750 ins_encode %{ 11751 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11752 $cnt$$Register, $result$$Register, $tmp3$$Register, 11753 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11754 %} 11755 11756 ins_pipe( pipe_slow ); 11757 %} 11758 11759 // fast search of substring with known size. 11760 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11761 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11762 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11763 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11764 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11765 11766 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11767 ins_encode %{ 11768 int icnt2 = (int)$int_cnt2$$constant; 11769 if (icnt2 >= 16) { 11770 // IndexOf for constant substrings with size >= 16 elements 11771 // which don't need to be loaded through stack. 11772 __ string_indexofC8($str1$$Register, $str2$$Register, 11773 $cnt1$$Register, $cnt2$$Register, 11774 icnt2, $result$$Register, 11775 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11776 } else { 11777 // Small strings are loaded through stack if they cross page boundary. 11778 __ string_indexof($str1$$Register, $str2$$Register, 11779 $cnt1$$Register, $cnt2$$Register, 11780 icnt2, $result$$Register, 11781 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11782 } 11783 %} 11784 ins_pipe( pipe_slow ); 11785 %} 11786 11787 // fast search of substring with known size. 11788 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11789 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11790 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11791 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11792 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11793 11794 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11795 ins_encode %{ 11796 int icnt2 = (int)$int_cnt2$$constant; 11797 if (icnt2 >= 8) { 11798 // IndexOf for constant substrings with size >= 8 elements 11799 // which don't need to be loaded through stack. 11800 __ string_indexofC8($str1$$Register, $str2$$Register, 11801 $cnt1$$Register, $cnt2$$Register, 11802 icnt2, $result$$Register, 11803 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11804 } else { 11805 // Small strings are loaded through stack if they cross page boundary. 11806 __ string_indexof($str1$$Register, $str2$$Register, 11807 $cnt1$$Register, $cnt2$$Register, 11808 icnt2, $result$$Register, 11809 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11810 } 11811 %} 11812 ins_pipe( pipe_slow ); 11813 %} 11814 11815 // fast search of substring with known size. 11816 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11817 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11818 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11819 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11820 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11821 11822 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11823 ins_encode %{ 11824 int icnt2 = (int)$int_cnt2$$constant; 11825 if (icnt2 >= 8) { 11826 // IndexOf for constant substrings with size >= 8 elements 11827 // which don't need to be loaded through stack. 11828 __ string_indexofC8($str1$$Register, $str2$$Register, 11829 $cnt1$$Register, $cnt2$$Register, 11830 icnt2, $result$$Register, 11831 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11832 } else { 11833 // Small strings are loaded through stack if they cross page boundary. 11834 __ string_indexof($str1$$Register, $str2$$Register, 11835 $cnt1$$Register, $cnt2$$Register, 11836 icnt2, $result$$Register, 11837 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11838 } 11839 %} 11840 ins_pipe( pipe_slow ); 11841 %} 11842 11843 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11844 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11845 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11846 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11847 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11848 11849 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11850 ins_encode %{ 11851 __ string_indexof($str1$$Register, $str2$$Register, 11852 $cnt1$$Register, $cnt2$$Register, 11853 (-1), $result$$Register, 11854 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11855 %} 11856 ins_pipe( pipe_slow ); 11857 %} 11858 11859 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11860 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11861 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11862 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11863 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11864 11865 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11866 ins_encode %{ 11867 __ string_indexof($str1$$Register, $str2$$Register, 11868 $cnt1$$Register, $cnt2$$Register, 11869 (-1), $result$$Register, 11870 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11871 %} 11872 ins_pipe( pipe_slow ); 11873 %} 11874 11875 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11876 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11877 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11878 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11879 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11880 11881 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11882 ins_encode %{ 11883 __ string_indexof($str1$$Register, $str2$$Register, 11884 $cnt1$$Register, $cnt2$$Register, 11885 (-1), $result$$Register, 11886 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11887 %} 11888 ins_pipe( pipe_slow ); 11889 %} 11890 11891 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11892 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11893 predicate(UseSSE42Intrinsics); 11894 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11895 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11896 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11897 ins_encode %{ 11898 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11899 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11900 %} 11901 ins_pipe( pipe_slow ); 11902 %} 11903 11904 // fast array equals 11905 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11906 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11907 %{ 11908 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11909 match(Set result (AryEq ary1 ary2)); 11910 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11911 //ins_cost(300); 11912 11913 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11914 ins_encode %{ 11915 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11916 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11917 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11923 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11924 %{ 11925 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11926 match(Set result (AryEq ary1 ary2)); 11927 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11928 //ins_cost(300); 11929 11930 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11931 ins_encode %{ 11932 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11933 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11934 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11935 %} 11936 ins_pipe( pipe_slow ); 11937 %} 11938 11939 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11940 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11941 %{ 11942 match(Set result (HasNegatives ary1 len)); 11943 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11944 11945 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11946 ins_encode %{ 11947 __ has_negatives($ary1$$Register, $len$$Register, 11948 $result$$Register, $tmp3$$Register, 11949 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11950 %} 11951 ins_pipe( pipe_slow ); 11952 %} 11953 11954 // fast char[] to byte[] compression 11955 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11956 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11957 match(Set result (StrCompressedCopy src (Binary dst len))); 11958 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11959 11960 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11961 ins_encode %{ 11962 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11963 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11964 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11965 %} 11966 ins_pipe( pipe_slow ); 11967 %} 11968 11969 // fast byte[] to char[] inflation 11970 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11971 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11972 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11973 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11974 11975 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11976 ins_encode %{ 11977 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11978 $tmp1$$XMMRegister, $tmp2$$Register); 11979 %} 11980 ins_pipe( pipe_slow ); 11981 %} 11982 11983 // encode char[] to byte[] in ISO_8859_1 11984 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11985 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11986 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11987 match(Set result (EncodeISOArray src (Binary dst len))); 11988 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11989 11990 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11991 ins_encode %{ 11992 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11993 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11994 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11995 %} 11996 ins_pipe( pipe_slow ); 11997 %} 11998 11999 12000 //----------Control Flow Instructions------------------------------------------ 12001 // Signed compare Instructions 12002 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12003 match(Set cr (CmpI op1 op2)); 12004 effect( DEF cr, USE op1, USE op2 ); 12005 format %{ "CMP $op1,$op2" %} 12006 opcode(0x3B); /* Opcode 3B /r */ 12007 ins_encode( OpcP, RegReg( op1, op2) ); 12008 ins_pipe( ialu_cr_reg_reg ); 12009 %} 12010 12011 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12012 match(Set cr (CmpI op1 op2)); 12013 effect( DEF cr, USE op1 ); 12014 format %{ "CMP $op1,$op2" %} 12015 opcode(0x81,0x07); /* Opcode 81 /7 */ 12016 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12017 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12018 ins_pipe( ialu_cr_reg_imm ); 12019 %} 12020 12021 // Cisc-spilled version of cmpI_eReg 12022 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12023 match(Set cr (CmpI op1 (LoadI op2))); 12024 12025 format %{ "CMP $op1,$op2" %} 12026 ins_cost(500); 12027 opcode(0x3B); /* Opcode 3B /r */ 12028 ins_encode( OpcP, RegMem( op1, op2) ); 12029 ins_pipe( ialu_cr_reg_mem ); 12030 %} 12031 12032 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 12033 match(Set cr (CmpI src zero)); 12034 effect( DEF cr, USE src ); 12035 12036 format %{ "TEST $src,$src" %} 12037 opcode(0x85); 12038 ins_encode( OpcP, RegReg( src, src ) ); 12039 ins_pipe( ialu_cr_reg_imm ); 12040 %} 12041 12042 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 12043 match(Set cr (CmpI (AndI src con) zero)); 12044 12045 format %{ "TEST $src,$con" %} 12046 opcode(0xF7,0x00); 12047 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12048 ins_pipe( ialu_cr_reg_imm ); 12049 %} 12050 12051 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 12052 match(Set cr (CmpI (AndI src mem) zero)); 12053 12054 format %{ "TEST $src,$mem" %} 12055 opcode(0x85); 12056 ins_encode( OpcP, RegMem( src, mem ) ); 12057 ins_pipe( ialu_cr_reg_mem ); 12058 %} 12059 12060 // Unsigned compare Instructions; really, same as signed except they 12061 // produce an eFlagsRegU instead of eFlagsReg. 12062 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12063 match(Set cr (CmpU op1 op2)); 12064 12065 format %{ "CMPu $op1,$op2" %} 12066 opcode(0x3B); /* Opcode 3B /r */ 12067 ins_encode( OpcP, RegReg( op1, op2) ); 12068 ins_pipe( ialu_cr_reg_reg ); 12069 %} 12070 12071 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12072 match(Set cr (CmpU op1 op2)); 12073 12074 format %{ "CMPu $op1,$op2" %} 12075 opcode(0x81,0x07); /* Opcode 81 /7 */ 12076 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12077 ins_pipe( ialu_cr_reg_imm ); 12078 %} 12079 12080 // // Cisc-spilled version of cmpU_eReg 12081 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12082 match(Set cr (CmpU op1 (LoadI op2))); 12083 12084 format %{ "CMPu $op1,$op2" %} 12085 ins_cost(500); 12086 opcode(0x3B); /* Opcode 3B /r */ 12087 ins_encode( OpcP, RegMem( op1, op2) ); 12088 ins_pipe( ialu_cr_reg_mem ); 12089 %} 12090 12091 // // Cisc-spilled version of cmpU_eReg 12092 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12093 // match(Set cr (CmpU (LoadI op1) op2)); 12094 // 12095 // format %{ "CMPu $op1,$op2" %} 12096 // ins_cost(500); 12097 // opcode(0x39); /* Opcode 39 /r */ 12098 // ins_encode( OpcP, RegMem( op1, op2) ); 12099 //%} 12100 12101 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12102 match(Set cr (CmpU src zero)); 12103 12104 format %{ "TESTu $src,$src" %} 12105 opcode(0x85); 12106 ins_encode( OpcP, RegReg( src, src ) ); 12107 ins_pipe( ialu_cr_reg_imm ); 12108 %} 12109 12110 // Unsigned pointer compare Instructions 12111 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12112 match(Set cr (CmpP op1 op2)); 12113 12114 format %{ "CMPu $op1,$op2" %} 12115 opcode(0x3B); /* Opcode 3B /r */ 12116 ins_encode( OpcP, RegReg( op1, op2) ); 12117 ins_pipe( ialu_cr_reg_reg ); 12118 %} 12119 12120 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12121 match(Set cr (CmpP op1 op2)); 12122 12123 format %{ "CMPu $op1,$op2" %} 12124 opcode(0x81,0x07); /* Opcode 81 /7 */ 12125 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12126 ins_pipe( ialu_cr_reg_imm ); 12127 %} 12128 12129 // // Cisc-spilled version of cmpP_eReg 12130 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12131 match(Set cr (CmpP op1 (LoadP op2))); 12132 12133 format %{ "CMPu $op1,$op2" %} 12134 ins_cost(500); 12135 opcode(0x3B); /* Opcode 3B /r */ 12136 ins_encode( OpcP, RegMem( op1, op2) ); 12137 ins_pipe( ialu_cr_reg_mem ); 12138 %} 12139 12140 // // Cisc-spilled version of cmpP_eReg 12141 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12142 // match(Set cr (CmpP (LoadP op1) op2)); 12143 // 12144 // format %{ "CMPu $op1,$op2" %} 12145 // ins_cost(500); 12146 // opcode(0x39); /* Opcode 39 /r */ 12147 // ins_encode( OpcP, RegMem( op1, op2) ); 12148 //%} 12149 12150 // Compare raw pointer (used in out-of-heap check). 12151 // Only works because non-oop pointers must be raw pointers 12152 // and raw pointers have no anti-dependencies. 12153 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12154 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12155 match(Set cr (CmpP op1 (LoadP op2))); 12156 12157 format %{ "CMPu $op1,$op2" %} 12158 opcode(0x3B); /* Opcode 3B /r */ 12159 ins_encode( OpcP, RegMem( op1, op2) ); 12160 ins_pipe( ialu_cr_reg_mem ); 12161 %} 12162 12163 // 12164 // This will generate a signed flags result. This should be ok 12165 // since any compare to a zero should be eq/neq. 12166 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12167 match(Set cr (CmpP src zero)); 12168 12169 format %{ "TEST $src,$src" %} 12170 opcode(0x85); 12171 ins_encode( OpcP, RegReg( src, src ) ); 12172 ins_pipe( ialu_cr_reg_imm ); 12173 %} 12174 12175 // Cisc-spilled version of testP_reg 12176 // This will generate a signed flags result. This should be ok 12177 // since any compare to a zero should be eq/neq. 12178 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12179 match(Set cr (CmpP (LoadP op) zero)); 12180 12181 format %{ "TEST $op,0xFFFFFFFF" %} 12182 ins_cost(500); 12183 opcode(0xF7); /* Opcode F7 /0 */ 12184 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12185 ins_pipe( ialu_cr_reg_imm ); 12186 %} 12187 12188 // Yanked all unsigned pointer compare operations. 12189 // Pointer compares are done with CmpP which is already unsigned. 12190 12191 //----------Max and Min-------------------------------------------------------- 12192 // Min Instructions 12193 //// 12194 // *** Min and Max using the conditional move are slower than the 12195 // *** branch version on a Pentium III. 12196 // // Conditional move for min 12197 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12198 // effect( USE_DEF op2, USE op1, USE cr ); 12199 // format %{ "CMOVlt $op2,$op1\t! min" %} 12200 // opcode(0x4C,0x0F); 12201 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12202 // ins_pipe( pipe_cmov_reg ); 12203 //%} 12204 // 12205 //// Min Register with Register (P6 version) 12206 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12207 // predicate(VM_Version::supports_cmov() ); 12208 // match(Set op2 (MinI op1 op2)); 12209 // ins_cost(200); 12210 // expand %{ 12211 // eFlagsReg cr; 12212 // compI_eReg(cr,op1,op2); 12213 // cmovI_reg_lt(op2,op1,cr); 12214 // %} 12215 //%} 12216 12217 // Min Register with Register (generic version) 12218 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12219 match(Set dst (MinI dst src)); 12220 effect(KILL flags); 12221 ins_cost(300); 12222 12223 format %{ "MIN $dst,$src" %} 12224 opcode(0xCC); 12225 ins_encode( min_enc(dst,src) ); 12226 ins_pipe( pipe_slow ); 12227 %} 12228 12229 // Max Register with Register 12230 // *** Min and Max using the conditional move are slower than the 12231 // *** branch version on a Pentium III. 12232 // // Conditional move for max 12233 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12234 // effect( USE_DEF op2, USE op1, USE cr ); 12235 // format %{ "CMOVgt $op2,$op1\t! max" %} 12236 // opcode(0x4F,0x0F); 12237 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12238 // ins_pipe( pipe_cmov_reg ); 12239 //%} 12240 // 12241 // // Max Register with Register (P6 version) 12242 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12243 // predicate(VM_Version::supports_cmov() ); 12244 // match(Set op2 (MaxI op1 op2)); 12245 // ins_cost(200); 12246 // expand %{ 12247 // eFlagsReg cr; 12248 // compI_eReg(cr,op1,op2); 12249 // cmovI_reg_gt(op2,op1,cr); 12250 // %} 12251 //%} 12252 12253 // Max Register with Register (generic version) 12254 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12255 match(Set dst (MaxI dst src)); 12256 effect(KILL flags); 12257 ins_cost(300); 12258 12259 format %{ "MAX $dst,$src" %} 12260 opcode(0xCC); 12261 ins_encode( max_enc(dst,src) ); 12262 ins_pipe( pipe_slow ); 12263 %} 12264 12265 // ============================================================================ 12266 // Counted Loop limit node which represents exact final iterator value. 12267 // Note: the resulting value should fit into integer range since 12268 // counted loops have limit check on overflow. 12269 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12270 match(Set limit (LoopLimit (Binary init limit) stride)); 12271 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12272 ins_cost(300); 12273 12274 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12275 ins_encode %{ 12276 int strd = (int)$stride$$constant; 12277 assert(strd != 1 && strd != -1, "sanity"); 12278 int m1 = (strd > 0) ? 1 : -1; 12279 // Convert limit to long (EAX:EDX) 12280 __ cdql(); 12281 // Convert init to long (init:tmp) 12282 __ movl($tmp$$Register, $init$$Register); 12283 __ sarl($tmp$$Register, 31); 12284 // $limit - $init 12285 __ subl($limit$$Register, $init$$Register); 12286 __ sbbl($limit_hi$$Register, $tmp$$Register); 12287 // + ($stride - 1) 12288 if (strd > 0) { 12289 __ addl($limit$$Register, (strd - 1)); 12290 __ adcl($limit_hi$$Register, 0); 12291 __ movl($tmp$$Register, strd); 12292 } else { 12293 __ addl($limit$$Register, (strd + 1)); 12294 __ adcl($limit_hi$$Register, -1); 12295 __ lneg($limit_hi$$Register, $limit$$Register); 12296 __ movl($tmp$$Register, -strd); 12297 } 12298 // signed devision: (EAX:EDX) / pos_stride 12299 __ idivl($tmp$$Register); 12300 if (strd < 0) { 12301 // restore sign 12302 __ negl($tmp$$Register); 12303 } 12304 // (EAX) * stride 12305 __ mull($tmp$$Register); 12306 // + init (ignore upper bits) 12307 __ addl($limit$$Register, $init$$Register); 12308 %} 12309 ins_pipe( pipe_slow ); 12310 %} 12311 12312 // ============================================================================ 12313 // Branch Instructions 12314 // Jump Table 12315 instruct jumpXtnd(rRegI switch_val) %{ 12316 match(Jump switch_val); 12317 ins_cost(350); 12318 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12319 ins_encode %{ 12320 // Jump to Address(table_base + switch_reg) 12321 Address index(noreg, $switch_val$$Register, Address::times_1); 12322 __ jump(ArrayAddress($constantaddress, index)); 12323 %} 12324 ins_pipe(pipe_jmp); 12325 %} 12326 12327 // Jump Direct - Label defines a relative address from JMP+1 12328 instruct jmpDir(label labl) %{ 12329 match(Goto); 12330 effect(USE labl); 12331 12332 ins_cost(300); 12333 format %{ "JMP $labl" %} 12334 size(5); 12335 ins_encode %{ 12336 Label* L = $labl$$label; 12337 __ jmp(*L, false); // Always long jump 12338 %} 12339 ins_pipe( pipe_jmp ); 12340 %} 12341 12342 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12343 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12344 match(If cop cr); 12345 effect(USE labl); 12346 12347 ins_cost(300); 12348 format %{ "J$cop $labl" %} 12349 size(6); 12350 ins_encode %{ 12351 Label* L = $labl$$label; 12352 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12353 %} 12354 ins_pipe( pipe_jcc ); 12355 %} 12356 12357 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12358 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12359 predicate(!n->has_vector_mask_set()); 12360 match(CountedLoopEnd cop cr); 12361 effect(USE labl); 12362 12363 ins_cost(300); 12364 format %{ "J$cop $labl\t# Loop end" %} 12365 size(6); 12366 ins_encode %{ 12367 Label* L = $labl$$label; 12368 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12369 %} 12370 ins_pipe( pipe_jcc ); 12371 %} 12372 12373 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12374 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12375 predicate(!n->has_vector_mask_set()); 12376 match(CountedLoopEnd cop cmp); 12377 effect(USE labl); 12378 12379 ins_cost(300); 12380 format %{ "J$cop,u $labl\t# Loop end" %} 12381 size(6); 12382 ins_encode %{ 12383 Label* L = $labl$$label; 12384 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12385 %} 12386 ins_pipe( pipe_jcc ); 12387 %} 12388 12389 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12390 predicate(!n->has_vector_mask_set()); 12391 match(CountedLoopEnd cop cmp); 12392 effect(USE labl); 12393 12394 ins_cost(200); 12395 format %{ "J$cop,u $labl\t# Loop end" %} 12396 size(6); 12397 ins_encode %{ 12398 Label* L = $labl$$label; 12399 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12400 %} 12401 ins_pipe( pipe_jcc ); 12402 %} 12403 12404 // mask version 12405 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12406 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12407 predicate(n->has_vector_mask_set()); 12408 match(CountedLoopEnd cop cr); 12409 effect(USE labl); 12410 12411 ins_cost(400); 12412 format %{ "J$cop $labl\t# Loop end\n\t" 12413 "restorevectmask \t# vector mask restore for loops" %} 12414 size(10); 12415 ins_encode %{ 12416 Label* L = $labl$$label; 12417 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12418 __ restorevectmask(); 12419 %} 12420 ins_pipe( pipe_jcc ); 12421 %} 12422 12423 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12424 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12425 predicate(n->has_vector_mask_set()); 12426 match(CountedLoopEnd cop cmp); 12427 effect(USE labl); 12428 12429 ins_cost(400); 12430 format %{ "J$cop,u $labl\t# Loop end\n\t" 12431 "restorevectmask \t# vector mask restore for loops" %} 12432 size(10); 12433 ins_encode %{ 12434 Label* L = $labl$$label; 12435 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12436 __ restorevectmask(); 12437 %} 12438 ins_pipe( pipe_jcc ); 12439 %} 12440 12441 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12442 predicate(n->has_vector_mask_set()); 12443 match(CountedLoopEnd cop cmp); 12444 effect(USE labl); 12445 12446 ins_cost(300); 12447 format %{ "J$cop,u $labl\t# Loop end\n\t" 12448 "restorevectmask \t# vector mask restore for loops" %} 12449 size(10); 12450 ins_encode %{ 12451 Label* L = $labl$$label; 12452 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12453 __ restorevectmask(); 12454 %} 12455 ins_pipe( pipe_jcc ); 12456 %} 12457 12458 // Jump Direct Conditional - using unsigned comparison 12459 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12460 match(If cop cmp); 12461 effect(USE labl); 12462 12463 ins_cost(300); 12464 format %{ "J$cop,u $labl" %} 12465 size(6); 12466 ins_encode %{ 12467 Label* L = $labl$$label; 12468 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12469 %} 12470 ins_pipe(pipe_jcc); 12471 %} 12472 12473 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12474 match(If cop cmp); 12475 effect(USE labl); 12476 12477 ins_cost(200); 12478 format %{ "J$cop,u $labl" %} 12479 size(6); 12480 ins_encode %{ 12481 Label* L = $labl$$label; 12482 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12483 %} 12484 ins_pipe(pipe_jcc); 12485 %} 12486 12487 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12488 match(If cop cmp); 12489 effect(USE labl); 12490 12491 ins_cost(200); 12492 format %{ $$template 12493 if ($cop$$cmpcode == Assembler::notEqual) { 12494 $$emit$$"JP,u $labl\n\t" 12495 $$emit$$"J$cop,u $labl" 12496 } else { 12497 $$emit$$"JP,u done\n\t" 12498 $$emit$$"J$cop,u $labl\n\t" 12499 $$emit$$"done:" 12500 } 12501 %} 12502 ins_encode %{ 12503 Label* l = $labl$$label; 12504 if ($cop$$cmpcode == Assembler::notEqual) { 12505 __ jcc(Assembler::parity, *l, false); 12506 __ jcc(Assembler::notEqual, *l, false); 12507 } else if ($cop$$cmpcode == Assembler::equal) { 12508 Label done; 12509 __ jccb(Assembler::parity, done); 12510 __ jcc(Assembler::equal, *l, false); 12511 __ bind(done); 12512 } else { 12513 ShouldNotReachHere(); 12514 } 12515 %} 12516 ins_pipe(pipe_jcc); 12517 %} 12518 12519 // ============================================================================ 12520 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12521 // array for an instance of the superklass. Set a hidden internal cache on a 12522 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12523 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12524 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12525 match(Set result (PartialSubtypeCheck sub super)); 12526 effect( KILL rcx, KILL cr ); 12527 12528 ins_cost(1100); // slightly larger than the next version 12529 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12530 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12531 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12532 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12533 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12534 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12535 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12536 "miss:\t" %} 12537 12538 opcode(0x1); // Force a XOR of EDI 12539 ins_encode( enc_PartialSubtypeCheck() ); 12540 ins_pipe( pipe_slow ); 12541 %} 12542 12543 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12544 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12545 effect( KILL rcx, KILL result ); 12546 12547 ins_cost(1000); 12548 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12549 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12550 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12551 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12552 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12553 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12554 "miss:\t" %} 12555 12556 opcode(0x0); // No need to XOR EDI 12557 ins_encode( enc_PartialSubtypeCheck() ); 12558 ins_pipe( pipe_slow ); 12559 %} 12560 12561 // ============================================================================ 12562 // Branch Instructions -- short offset versions 12563 // 12564 // These instructions are used to replace jumps of a long offset (the default 12565 // match) with jumps of a shorter offset. These instructions are all tagged 12566 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12567 // match rules in general matching. Instead, the ADLC generates a conversion 12568 // method in the MachNode which can be used to do in-place replacement of the 12569 // long variant with the shorter variant. The compiler will determine if a 12570 // branch can be taken by the is_short_branch_offset() predicate in the machine 12571 // specific code section of the file. 12572 12573 // Jump Direct - Label defines a relative address from JMP+1 12574 instruct jmpDir_short(label labl) %{ 12575 match(Goto); 12576 effect(USE labl); 12577 12578 ins_cost(300); 12579 format %{ "JMP,s $labl" %} 12580 size(2); 12581 ins_encode %{ 12582 Label* L = $labl$$label; 12583 __ jmpb(*L); 12584 %} 12585 ins_pipe( pipe_jmp ); 12586 ins_short_branch(1); 12587 %} 12588 12589 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12590 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12591 match(If cop cr); 12592 effect(USE labl); 12593 12594 ins_cost(300); 12595 format %{ "J$cop,s $labl" %} 12596 size(2); 12597 ins_encode %{ 12598 Label* L = $labl$$label; 12599 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12600 %} 12601 ins_pipe( pipe_jcc ); 12602 ins_short_branch(1); 12603 %} 12604 12605 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12606 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12607 match(CountedLoopEnd cop cr); 12608 effect(USE labl); 12609 12610 ins_cost(300); 12611 format %{ "J$cop,s $labl\t# Loop end" %} 12612 size(2); 12613 ins_encode %{ 12614 Label* L = $labl$$label; 12615 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12616 %} 12617 ins_pipe( pipe_jcc ); 12618 ins_short_branch(1); 12619 %} 12620 12621 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12622 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12623 match(CountedLoopEnd cop cmp); 12624 effect(USE labl); 12625 12626 ins_cost(300); 12627 format %{ "J$cop,us $labl\t# Loop end" %} 12628 size(2); 12629 ins_encode %{ 12630 Label* L = $labl$$label; 12631 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12632 %} 12633 ins_pipe( pipe_jcc ); 12634 ins_short_branch(1); 12635 %} 12636 12637 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12638 match(CountedLoopEnd cop cmp); 12639 effect(USE labl); 12640 12641 ins_cost(300); 12642 format %{ "J$cop,us $labl\t# Loop end" %} 12643 size(2); 12644 ins_encode %{ 12645 Label* L = $labl$$label; 12646 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12647 %} 12648 ins_pipe( pipe_jcc ); 12649 ins_short_branch(1); 12650 %} 12651 12652 // Jump Direct Conditional - using unsigned comparison 12653 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12654 match(If cop cmp); 12655 effect(USE labl); 12656 12657 ins_cost(300); 12658 format %{ "J$cop,us $labl" %} 12659 size(2); 12660 ins_encode %{ 12661 Label* L = $labl$$label; 12662 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12663 %} 12664 ins_pipe( pipe_jcc ); 12665 ins_short_branch(1); 12666 %} 12667 12668 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12669 match(If cop cmp); 12670 effect(USE labl); 12671 12672 ins_cost(300); 12673 format %{ "J$cop,us $labl" %} 12674 size(2); 12675 ins_encode %{ 12676 Label* L = $labl$$label; 12677 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12678 %} 12679 ins_pipe( pipe_jcc ); 12680 ins_short_branch(1); 12681 %} 12682 12683 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12684 match(If cop cmp); 12685 effect(USE labl); 12686 12687 ins_cost(300); 12688 format %{ $$template 12689 if ($cop$$cmpcode == Assembler::notEqual) { 12690 $$emit$$"JP,u,s $labl\n\t" 12691 $$emit$$"J$cop,u,s $labl" 12692 } else { 12693 $$emit$$"JP,u,s done\n\t" 12694 $$emit$$"J$cop,u,s $labl\n\t" 12695 $$emit$$"done:" 12696 } 12697 %} 12698 size(4); 12699 ins_encode %{ 12700 Label* l = $labl$$label; 12701 if ($cop$$cmpcode == Assembler::notEqual) { 12702 __ jccb(Assembler::parity, *l); 12703 __ jccb(Assembler::notEqual, *l); 12704 } else if ($cop$$cmpcode == Assembler::equal) { 12705 Label done; 12706 __ jccb(Assembler::parity, done); 12707 __ jccb(Assembler::equal, *l); 12708 __ bind(done); 12709 } else { 12710 ShouldNotReachHere(); 12711 } 12712 %} 12713 ins_pipe(pipe_jcc); 12714 ins_short_branch(1); 12715 %} 12716 12717 // ============================================================================ 12718 // Long Compare 12719 // 12720 // Currently we hold longs in 2 registers. Comparing such values efficiently 12721 // is tricky. The flavor of compare used depends on whether we are testing 12722 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12723 // The GE test is the negated LT test. The LE test can be had by commuting 12724 // the operands (yielding a GE test) and then negating; negate again for the 12725 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12726 // NE test is negated from that. 12727 12728 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12729 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12730 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12731 // are collapsed internally in the ADLC's dfa-gen code. The match for 12732 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12733 // foo match ends up with the wrong leaf. One fix is to not match both 12734 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12735 // both forms beat the trinary form of long-compare and both are very useful 12736 // on Intel which has so few registers. 12737 12738 // Manifest a CmpL result in an integer register. Very painful. 12739 // This is the test to avoid. 12740 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12741 match(Set dst (CmpL3 src1 src2)); 12742 effect( KILL flags ); 12743 ins_cost(1000); 12744 format %{ "XOR $dst,$dst\n\t" 12745 "CMP $src1.hi,$src2.hi\n\t" 12746 "JLT,s m_one\n\t" 12747 "JGT,s p_one\n\t" 12748 "CMP $src1.lo,$src2.lo\n\t" 12749 "JB,s m_one\n\t" 12750 "JEQ,s done\n" 12751 "p_one:\tINC $dst\n\t" 12752 "JMP,s done\n" 12753 "m_one:\tDEC $dst\n" 12754 "done:" %} 12755 ins_encode %{ 12756 Label p_one, m_one, done; 12757 __ xorptr($dst$$Register, $dst$$Register); 12758 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12759 __ jccb(Assembler::less, m_one); 12760 __ jccb(Assembler::greater, p_one); 12761 __ cmpl($src1$$Register, $src2$$Register); 12762 __ jccb(Assembler::below, m_one); 12763 __ jccb(Assembler::equal, done); 12764 __ bind(p_one); 12765 __ incrementl($dst$$Register); 12766 __ jmpb(done); 12767 __ bind(m_one); 12768 __ decrementl($dst$$Register); 12769 __ bind(done); 12770 %} 12771 ins_pipe( pipe_slow ); 12772 %} 12773 12774 //====== 12775 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12776 // compares. Can be used for LE or GT compares by reversing arguments. 12777 // NOT GOOD FOR EQ/NE tests. 12778 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12779 match( Set flags (CmpL src zero )); 12780 ins_cost(100); 12781 format %{ "TEST $src.hi,$src.hi" %} 12782 opcode(0x85); 12783 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12784 ins_pipe( ialu_cr_reg_reg ); 12785 %} 12786 12787 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12788 // compares. Can be used for LE or GT compares by reversing arguments. 12789 // NOT GOOD FOR EQ/NE tests. 12790 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12791 match( Set flags (CmpL src1 src2 )); 12792 effect( TEMP tmp ); 12793 ins_cost(300); 12794 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12795 "MOV $tmp,$src1.hi\n\t" 12796 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12797 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12798 ins_pipe( ialu_cr_reg_reg ); 12799 %} 12800 12801 // Long compares reg < zero/req OR reg >= zero/req. 12802 // Just a wrapper for a normal branch, plus the predicate test. 12803 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12804 match(If cmp flags); 12805 effect(USE labl); 12806 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12807 expand %{ 12808 jmpCon(cmp,flags,labl); // JLT or JGE... 12809 %} 12810 %} 12811 12812 //====== 12813 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12814 // compares. Can be used for LE or GT compares by reversing arguments. 12815 // NOT GOOD FOR EQ/NE tests. 12816 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12817 match(Set flags (CmpUL src zero)); 12818 ins_cost(100); 12819 format %{ "TEST $src.hi,$src.hi" %} 12820 opcode(0x85); 12821 ins_encode(OpcP, RegReg_Hi2(src, src)); 12822 ins_pipe(ialu_cr_reg_reg); 12823 %} 12824 12825 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12826 // compares. Can be used for LE or GT compares by reversing arguments. 12827 // NOT GOOD FOR EQ/NE tests. 12828 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12829 match(Set flags (CmpUL src1 src2)); 12830 effect(TEMP tmp); 12831 ins_cost(300); 12832 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12833 "MOV $tmp,$src1.hi\n\t" 12834 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12835 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12836 ins_pipe(ialu_cr_reg_reg); 12837 %} 12838 12839 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12840 // Just a wrapper for a normal branch, plus the predicate test. 12841 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12842 match(If cmp flags); 12843 effect(USE labl); 12844 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12845 expand %{ 12846 jmpCon(cmp, flags, labl); // JLT or JGE... 12847 %} 12848 %} 12849 12850 // Compare 2 longs and CMOVE longs. 12851 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12852 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12853 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12854 ins_cost(400); 12855 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12856 "CMOV$cmp $dst.hi,$src.hi" %} 12857 opcode(0x0F,0x40); 12858 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12859 ins_pipe( pipe_cmov_reg_long ); 12860 %} 12861 12862 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12863 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12864 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12865 ins_cost(500); 12866 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12867 "CMOV$cmp $dst.hi,$src.hi" %} 12868 opcode(0x0F,0x40); 12869 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12870 ins_pipe( pipe_cmov_reg_long ); 12871 %} 12872 12873 // Compare 2 longs and CMOVE ints. 12874 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12875 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12876 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12877 ins_cost(200); 12878 format %{ "CMOV$cmp $dst,$src" %} 12879 opcode(0x0F,0x40); 12880 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12881 ins_pipe( pipe_cmov_reg ); 12882 %} 12883 12884 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12885 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12886 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12887 ins_cost(250); 12888 format %{ "CMOV$cmp $dst,$src" %} 12889 opcode(0x0F,0x40); 12890 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12891 ins_pipe( pipe_cmov_mem ); 12892 %} 12893 12894 // Compare 2 longs and CMOVE ints. 12895 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12896 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12897 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12898 ins_cost(200); 12899 format %{ "CMOV$cmp $dst,$src" %} 12900 opcode(0x0F,0x40); 12901 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12902 ins_pipe( pipe_cmov_reg ); 12903 %} 12904 12905 // Compare 2 longs and CMOVE doubles 12906 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12907 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12908 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12909 ins_cost(200); 12910 expand %{ 12911 fcmovDPR_regS(cmp,flags,dst,src); 12912 %} 12913 %} 12914 12915 // Compare 2 longs and CMOVE doubles 12916 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12917 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12918 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12919 ins_cost(200); 12920 expand %{ 12921 fcmovD_regS(cmp,flags,dst,src); 12922 %} 12923 %} 12924 12925 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12926 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12927 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12928 ins_cost(200); 12929 expand %{ 12930 fcmovFPR_regS(cmp,flags,dst,src); 12931 %} 12932 %} 12933 12934 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12935 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12936 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12937 ins_cost(200); 12938 expand %{ 12939 fcmovF_regS(cmp,flags,dst,src); 12940 %} 12941 %} 12942 12943 //====== 12944 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12945 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12946 match( Set flags (CmpL src zero )); 12947 effect(TEMP tmp); 12948 ins_cost(200); 12949 format %{ "MOV $tmp,$src.lo\n\t" 12950 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12951 ins_encode( long_cmp_flags0( src, tmp ) ); 12952 ins_pipe( ialu_reg_reg_long ); 12953 %} 12954 12955 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12956 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12957 match( Set flags (CmpL src1 src2 )); 12958 ins_cost(200+300); 12959 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12960 "JNE,s skip\n\t" 12961 "CMP $src1.hi,$src2.hi\n\t" 12962 "skip:\t" %} 12963 ins_encode( long_cmp_flags1( src1, src2 ) ); 12964 ins_pipe( ialu_cr_reg_reg ); 12965 %} 12966 12967 // Long compare reg == zero/reg OR reg != zero/reg 12968 // Just a wrapper for a normal branch, plus the predicate test. 12969 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12970 match(If cmp flags); 12971 effect(USE labl); 12972 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12973 expand %{ 12974 jmpCon(cmp,flags,labl); // JEQ or JNE... 12975 %} 12976 %} 12977 12978 //====== 12979 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12980 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12981 match(Set flags (CmpUL src zero)); 12982 effect(TEMP tmp); 12983 ins_cost(200); 12984 format %{ "MOV $tmp,$src.lo\n\t" 12985 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12986 ins_encode(long_cmp_flags0(src, tmp)); 12987 ins_pipe(ialu_reg_reg_long); 12988 %} 12989 12990 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12991 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12992 match(Set flags (CmpUL src1 src2)); 12993 ins_cost(200+300); 12994 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12995 "JNE,s skip\n\t" 12996 "CMP $src1.hi,$src2.hi\n\t" 12997 "skip:\t" %} 12998 ins_encode(long_cmp_flags1(src1, src2)); 12999 ins_pipe(ialu_cr_reg_reg); 13000 %} 13001 13002 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13003 // Just a wrapper for a normal branch, plus the predicate test. 13004 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13005 match(If cmp flags); 13006 effect(USE labl); 13007 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13008 expand %{ 13009 jmpCon(cmp, flags, labl); // JEQ or JNE... 13010 %} 13011 %} 13012 13013 // Compare 2 longs and CMOVE longs. 13014 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13015 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13016 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13017 ins_cost(400); 13018 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13019 "CMOV$cmp $dst.hi,$src.hi" %} 13020 opcode(0x0F,0x40); 13021 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13022 ins_pipe( pipe_cmov_reg_long ); 13023 %} 13024 13025 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13026 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13027 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13028 ins_cost(500); 13029 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13030 "CMOV$cmp $dst.hi,$src.hi" %} 13031 opcode(0x0F,0x40); 13032 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13033 ins_pipe( pipe_cmov_reg_long ); 13034 %} 13035 13036 // Compare 2 longs and CMOVE ints. 13037 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13038 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13039 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13040 ins_cost(200); 13041 format %{ "CMOV$cmp $dst,$src" %} 13042 opcode(0x0F,0x40); 13043 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13044 ins_pipe( pipe_cmov_reg ); 13045 %} 13046 13047 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13048 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13049 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13050 ins_cost(250); 13051 format %{ "CMOV$cmp $dst,$src" %} 13052 opcode(0x0F,0x40); 13053 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13054 ins_pipe( pipe_cmov_mem ); 13055 %} 13056 13057 // Compare 2 longs and CMOVE ints. 13058 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13059 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13060 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13061 ins_cost(200); 13062 format %{ "CMOV$cmp $dst,$src" %} 13063 opcode(0x0F,0x40); 13064 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13065 ins_pipe( pipe_cmov_reg ); 13066 %} 13067 13068 // Compare 2 longs and CMOVE doubles 13069 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13070 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13071 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13072 ins_cost(200); 13073 expand %{ 13074 fcmovDPR_regS(cmp,flags,dst,src); 13075 %} 13076 %} 13077 13078 // Compare 2 longs and CMOVE doubles 13079 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13080 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13081 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13082 ins_cost(200); 13083 expand %{ 13084 fcmovD_regS(cmp,flags,dst,src); 13085 %} 13086 %} 13087 13088 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13089 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13090 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13091 ins_cost(200); 13092 expand %{ 13093 fcmovFPR_regS(cmp,flags,dst,src); 13094 %} 13095 %} 13096 13097 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13098 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13099 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13100 ins_cost(200); 13101 expand %{ 13102 fcmovF_regS(cmp,flags,dst,src); 13103 %} 13104 %} 13105 13106 //====== 13107 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13108 // Same as cmpL_reg_flags_LEGT except must negate src 13109 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13110 match( Set flags (CmpL src zero )); 13111 effect( TEMP tmp ); 13112 ins_cost(300); 13113 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13114 "CMP $tmp,$src.lo\n\t" 13115 "SBB $tmp,$src.hi\n\t" %} 13116 ins_encode( long_cmp_flags3(src, tmp) ); 13117 ins_pipe( ialu_reg_reg_long ); 13118 %} 13119 13120 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13121 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13122 // requires a commuted test to get the same result. 13123 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13124 match( Set flags (CmpL src1 src2 )); 13125 effect( TEMP tmp ); 13126 ins_cost(300); 13127 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13128 "MOV $tmp,$src2.hi\n\t" 13129 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13130 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13131 ins_pipe( ialu_cr_reg_reg ); 13132 %} 13133 13134 // Long compares reg < zero/req OR reg >= zero/req. 13135 // Just a wrapper for a normal branch, plus the predicate test 13136 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13137 match(If cmp flags); 13138 effect(USE labl); 13139 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13140 ins_cost(300); 13141 expand %{ 13142 jmpCon(cmp,flags,labl); // JGT or JLE... 13143 %} 13144 %} 13145 13146 //====== 13147 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13148 // Same as cmpUL_reg_flags_LEGT except must negate src 13149 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13150 match(Set flags (CmpUL src zero)); 13151 effect(TEMP tmp); 13152 ins_cost(300); 13153 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13154 "CMP $tmp,$src.lo\n\t" 13155 "SBB $tmp,$src.hi\n\t" %} 13156 ins_encode(long_cmp_flags3(src, tmp)); 13157 ins_pipe(ialu_reg_reg_long); 13158 %} 13159 13160 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13161 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13162 // requires a commuted test to get the same result. 13163 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13164 match(Set flags (CmpUL src1 src2)); 13165 effect(TEMP tmp); 13166 ins_cost(300); 13167 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13168 "MOV $tmp,$src2.hi\n\t" 13169 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13170 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13171 ins_pipe(ialu_cr_reg_reg); 13172 %} 13173 13174 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13175 // Just a wrapper for a normal branch, plus the predicate test 13176 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13177 match(If cmp flags); 13178 effect(USE labl); 13179 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13180 ins_cost(300); 13181 expand %{ 13182 jmpCon(cmp, flags, labl); // JGT or JLE... 13183 %} 13184 %} 13185 13186 // Compare 2 longs and CMOVE longs. 13187 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13188 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13189 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13190 ins_cost(400); 13191 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13192 "CMOV$cmp $dst.hi,$src.hi" %} 13193 opcode(0x0F,0x40); 13194 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13195 ins_pipe( pipe_cmov_reg_long ); 13196 %} 13197 13198 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13199 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13200 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13201 ins_cost(500); 13202 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13203 "CMOV$cmp $dst.hi,$src.hi+4" %} 13204 opcode(0x0F,0x40); 13205 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13206 ins_pipe( pipe_cmov_reg_long ); 13207 %} 13208 13209 // Compare 2 longs and CMOVE ints. 13210 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13211 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13212 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13213 ins_cost(200); 13214 format %{ "CMOV$cmp $dst,$src" %} 13215 opcode(0x0F,0x40); 13216 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13217 ins_pipe( pipe_cmov_reg ); 13218 %} 13219 13220 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13221 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13222 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13223 ins_cost(250); 13224 format %{ "CMOV$cmp $dst,$src" %} 13225 opcode(0x0F,0x40); 13226 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13227 ins_pipe( pipe_cmov_mem ); 13228 %} 13229 13230 // Compare 2 longs and CMOVE ptrs. 13231 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13232 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13233 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13234 ins_cost(200); 13235 format %{ "CMOV$cmp $dst,$src" %} 13236 opcode(0x0F,0x40); 13237 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13238 ins_pipe( pipe_cmov_reg ); 13239 %} 13240 13241 // Compare 2 longs and CMOVE doubles 13242 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13243 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13244 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13245 ins_cost(200); 13246 expand %{ 13247 fcmovDPR_regS(cmp,flags,dst,src); 13248 %} 13249 %} 13250 13251 // Compare 2 longs and CMOVE doubles 13252 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13253 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13254 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13255 ins_cost(200); 13256 expand %{ 13257 fcmovD_regS(cmp,flags,dst,src); 13258 %} 13259 %} 13260 13261 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13262 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13263 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13264 ins_cost(200); 13265 expand %{ 13266 fcmovFPR_regS(cmp,flags,dst,src); 13267 %} 13268 %} 13269 13270 13271 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13272 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13273 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13274 ins_cost(200); 13275 expand %{ 13276 fcmovF_regS(cmp,flags,dst,src); 13277 %} 13278 %} 13279 13280 13281 // ============================================================================ 13282 // Procedure Call/Return Instructions 13283 // Call Java Static Instruction 13284 // Note: If this code changes, the corresponding ret_addr_offset() and 13285 // compute_padding() functions will have to be adjusted. 13286 instruct CallStaticJavaDirect(method meth) %{ 13287 match(CallStaticJava); 13288 effect(USE meth); 13289 13290 ins_cost(300); 13291 format %{ "CALL,static " %} 13292 opcode(0xE8); /* E8 cd */ 13293 ins_encode( pre_call_resets, 13294 Java_Static_Call( meth ), 13295 call_epilog, 13296 post_call_FPU ); 13297 ins_pipe( pipe_slow ); 13298 ins_alignment(4); 13299 %} 13300 13301 // Call Java Dynamic Instruction 13302 // Note: If this code changes, the corresponding ret_addr_offset() and 13303 // compute_padding() functions will have to be adjusted. 13304 instruct CallDynamicJavaDirect(method meth) %{ 13305 match(CallDynamicJava); 13306 effect(USE meth); 13307 13308 ins_cost(300); 13309 format %{ "MOV EAX,(oop)-1\n\t" 13310 "CALL,dynamic" %} 13311 opcode(0xE8); /* E8 cd */ 13312 ins_encode( pre_call_resets, 13313 Java_Dynamic_Call( meth ), 13314 call_epilog, 13315 post_call_FPU ); 13316 ins_pipe( pipe_slow ); 13317 ins_alignment(4); 13318 %} 13319 13320 // Call Runtime Instruction 13321 instruct CallRuntimeDirect(method meth) %{ 13322 match(CallRuntime ); 13323 effect(USE meth); 13324 13325 ins_cost(300); 13326 format %{ "CALL,runtime " %} 13327 opcode(0xE8); /* E8 cd */ 13328 // Use FFREEs to clear entries in float stack 13329 ins_encode( pre_call_resets, 13330 FFree_Float_Stack_All, 13331 Java_To_Runtime( meth ), 13332 post_call_FPU ); 13333 ins_pipe( pipe_slow ); 13334 %} 13335 13336 // Call runtime without safepoint 13337 instruct CallLeafDirect(method meth) %{ 13338 match(CallLeaf); 13339 effect(USE meth); 13340 13341 ins_cost(300); 13342 format %{ "CALL_LEAF,runtime " %} 13343 opcode(0xE8); /* E8 cd */ 13344 ins_encode( pre_call_resets, 13345 FFree_Float_Stack_All, 13346 Java_To_Runtime( meth ), 13347 Verify_FPU_For_Leaf, post_call_FPU ); 13348 ins_pipe( pipe_slow ); 13349 %} 13350 13351 instruct CallLeafNoFPDirect(method meth) %{ 13352 match(CallLeafNoFP); 13353 effect(USE meth); 13354 13355 ins_cost(300); 13356 format %{ "CALL_LEAF_NOFP,runtime " %} 13357 opcode(0xE8); /* E8 cd */ 13358 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13359 ins_pipe( pipe_slow ); 13360 %} 13361 13362 13363 // Return Instruction 13364 // Remove the return address & jump to it. 13365 instruct Ret() %{ 13366 match(Return); 13367 format %{ "RET" %} 13368 opcode(0xC3); 13369 ins_encode(OpcP); 13370 ins_pipe( pipe_jmp ); 13371 %} 13372 13373 // Tail Call; Jump from runtime stub to Java code. 13374 // Also known as an 'interprocedural jump'. 13375 // Target of jump will eventually return to caller. 13376 // TailJump below removes the return address. 13377 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13378 match(TailCall jump_target method_oop ); 13379 ins_cost(300); 13380 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13381 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13382 ins_encode( OpcP, RegOpc(jump_target) ); 13383 ins_pipe( pipe_jmp ); 13384 %} 13385 13386 13387 // Tail Jump; remove the return address; jump to target. 13388 // TailCall above leaves the return address around. 13389 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13390 match( TailJump jump_target ex_oop ); 13391 ins_cost(300); 13392 format %{ "POP EDX\t# pop return address into dummy\n\t" 13393 "JMP $jump_target " %} 13394 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13395 ins_encode( enc_pop_rdx, 13396 OpcP, RegOpc(jump_target) ); 13397 ins_pipe( pipe_jmp ); 13398 %} 13399 13400 // Create exception oop: created by stack-crawling runtime code. 13401 // Created exception is now available to this handler, and is setup 13402 // just prior to jumping to this handler. No code emitted. 13403 instruct CreateException( eAXRegP ex_oop ) 13404 %{ 13405 match(Set ex_oop (CreateEx)); 13406 13407 size(0); 13408 // use the following format syntax 13409 format %{ "# exception oop is in EAX; no code emitted" %} 13410 ins_encode(); 13411 ins_pipe( empty ); 13412 %} 13413 13414 13415 // Rethrow exception: 13416 // The exception oop will come in the first argument position. 13417 // Then JUMP (not call) to the rethrow stub code. 13418 instruct RethrowException() 13419 %{ 13420 match(Rethrow); 13421 13422 // use the following format syntax 13423 format %{ "JMP rethrow_stub" %} 13424 ins_encode(enc_rethrow); 13425 ins_pipe( pipe_jmp ); 13426 %} 13427 13428 // inlined locking and unlocking 13429 13430 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13431 predicate(Compile::current()->use_rtm()); 13432 match(Set cr (FastLock object box)); 13433 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13434 ins_cost(300); 13435 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13436 ins_encode %{ 13437 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13438 $scr$$Register, $cx1$$Register, $cx2$$Register, 13439 _counters, _rtm_counters, _stack_rtm_counters, 13440 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13441 true, ra_->C->profile_rtm()); 13442 %} 13443 ins_pipe(pipe_slow); 13444 %} 13445 13446 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13447 predicate(!Compile::current()->use_rtm()); 13448 match(Set cr (FastLock object box)); 13449 effect(TEMP tmp, TEMP scr, USE_KILL box); 13450 ins_cost(300); 13451 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13452 ins_encode %{ 13453 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13454 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13455 %} 13456 ins_pipe(pipe_slow); 13457 %} 13458 13459 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13460 match(Set cr (FastUnlock object box)); 13461 effect(TEMP tmp, USE_KILL box); 13462 ins_cost(300); 13463 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13464 ins_encode %{ 13465 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13466 %} 13467 ins_pipe(pipe_slow); 13468 %} 13469 13470 13471 13472 // ============================================================================ 13473 // Safepoint Instruction 13474 instruct safePoint_poll(eFlagsReg cr) %{ 13475 predicate(SafepointMechanism::uses_global_page_poll()); 13476 match(SafePoint); 13477 effect(KILL cr); 13478 13479 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13480 // On SPARC that might be acceptable as we can generate the address with 13481 // just a sethi, saving an or. By polling at offset 0 we can end up 13482 // putting additional pressure on the index-0 in the D$. Because of 13483 // alignment (just like the situation at hand) the lower indices tend 13484 // to see more traffic. It'd be better to change the polling address 13485 // to offset 0 of the last $line in the polling page. 13486 13487 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13488 ins_cost(125); 13489 size(6) ; 13490 ins_encode( Safepoint_Poll() ); 13491 ins_pipe( ialu_reg_mem ); 13492 %} 13493 13494 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13495 predicate(SafepointMechanism::uses_thread_local_poll()); 13496 match(SafePoint poll); 13497 effect(KILL cr, USE poll); 13498 13499 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13500 ins_cost(125); 13501 // EBP would need size(3) 13502 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13503 ins_encode %{ 13504 __ relocate(relocInfo::poll_type); 13505 address pre_pc = __ pc(); 13506 __ testl(rax, Address($poll$$Register, 0)); 13507 address post_pc = __ pc(); 13508 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13509 %} 13510 ins_pipe(ialu_reg_mem); 13511 %} 13512 13513 13514 // ============================================================================ 13515 // This name is KNOWN by the ADLC and cannot be changed. 13516 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13517 // for this guy. 13518 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13519 match(Set dst (ThreadLocal)); 13520 effect(DEF dst, KILL cr); 13521 13522 format %{ "MOV $dst, Thread::current()" %} 13523 ins_encode %{ 13524 Register dstReg = as_Register($dst$$reg); 13525 __ get_thread(dstReg); 13526 %} 13527 ins_pipe( ialu_reg_fat ); 13528 %} 13529 13530 13531 13532 //----------PEEPHOLE RULES----------------------------------------------------- 13533 // These must follow all instruction definitions as they use the names 13534 // defined in the instructions definitions. 13535 // 13536 // peepmatch ( root_instr_name [preceding_instruction]* ); 13537 // 13538 // peepconstraint %{ 13539 // (instruction_number.operand_name relational_op instruction_number.operand_name 13540 // [, ...] ); 13541 // // instruction numbers are zero-based using left to right order in peepmatch 13542 // 13543 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13544 // // provide an instruction_number.operand_name for each operand that appears 13545 // // in the replacement instruction's match rule 13546 // 13547 // ---------VM FLAGS--------------------------------------------------------- 13548 // 13549 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13550 // 13551 // Each peephole rule is given an identifying number starting with zero and 13552 // increasing by one in the order seen by the parser. An individual peephole 13553 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13554 // on the command-line. 13555 // 13556 // ---------CURRENT LIMITATIONS---------------------------------------------- 13557 // 13558 // Only match adjacent instructions in same basic block 13559 // Only equality constraints 13560 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13561 // Only one replacement instruction 13562 // 13563 // ---------EXAMPLE---------------------------------------------------------- 13564 // 13565 // // pertinent parts of existing instructions in architecture description 13566 // instruct movI(rRegI dst, rRegI src) %{ 13567 // match(Set dst (CopyI src)); 13568 // %} 13569 // 13570 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13571 // match(Set dst (AddI dst src)); 13572 // effect(KILL cr); 13573 // %} 13574 // 13575 // // Change (inc mov) to lea 13576 // peephole %{ 13577 // // increment preceeded by register-register move 13578 // peepmatch ( incI_eReg movI ); 13579 // // require that the destination register of the increment 13580 // // match the destination register of the move 13581 // peepconstraint ( 0.dst == 1.dst ); 13582 // // construct a replacement instruction that sets 13583 // // the destination to ( move's source register + one ) 13584 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13585 // %} 13586 // 13587 // Implementation no longer uses movX instructions since 13588 // machine-independent system no longer uses CopyX nodes. 13589 // 13590 // peephole %{ 13591 // peepmatch ( incI_eReg movI ); 13592 // peepconstraint ( 0.dst == 1.dst ); 13593 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13594 // %} 13595 // 13596 // peephole %{ 13597 // peepmatch ( decI_eReg movI ); 13598 // peepconstraint ( 0.dst == 1.dst ); 13599 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13600 // %} 13601 // 13602 // peephole %{ 13603 // peepmatch ( addI_eReg_imm movI ); 13604 // peepconstraint ( 0.dst == 1.dst ); 13605 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13606 // %} 13607 // 13608 // peephole %{ 13609 // peepmatch ( addP_eReg_imm movP ); 13610 // peepconstraint ( 0.dst == 1.dst ); 13611 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13612 // %} 13613 13614 // // Change load of spilled value to only a spill 13615 // instruct storeI(memory mem, rRegI src) %{ 13616 // match(Set mem (StoreI mem src)); 13617 // %} 13618 // 13619 // instruct loadI(rRegI dst, memory mem) %{ 13620 // match(Set dst (LoadI mem)); 13621 // %} 13622 // 13623 peephole %{ 13624 peepmatch ( loadI storeI ); 13625 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13626 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13627 %} 13628 13629 //----------SMARTSPILL RULES--------------------------------------------------- 13630 // These must follow all instruction definitions as they use the names 13631 // defined in the instructions definitions.