1 // 2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return SafepointMechanism::uses_thread_local_poll(); 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 if (SafepointMechanism::uses_thread_local_poll()) { 710 Register pollReg = as_Register(EBX_enc); 711 MacroAssembler masm(&cbuf); 712 masm.get_thread(pollReg); 713 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 714 masm.relocate(relocInfo::poll_return_type); 715 masm.testl(rax, Address(pollReg, 0)); 716 } else { 717 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 718 emit_opcode(cbuf,0x85); 719 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 720 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 721 } 722 } 723 } 724 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 726 return MachNode::size(ra_); // too many variables; just compute it 727 // the hard way 728 } 729 730 int MachEpilogNode::reloc() const { 731 return 0; // a large enough number 732 } 733 734 const Pipeline * MachEpilogNode::pipeline() const { 735 return MachNode::pipeline_class(); 736 } 737 738 int MachEpilogNode::safepoint_offset() const { return 0; } 739 740 //============================================================================= 741 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 743 static enum RC rc_class( OptoReg::Name reg ) { 744 745 if( !OptoReg::is_valid(reg) ) return rc_bad; 746 if (OptoReg::is_stack(reg)) return rc_stack; 747 748 VMReg r = OptoReg::as_VMReg(reg); 749 if (r->is_Register()) return rc_int; 750 if (r->is_FloatRegister()) { 751 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 752 return rc_float; 753 } 754 assert(r->is_XMMRegister(), "must be"); 755 return rc_xmm; 756 } 757 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 759 int opcode, const char *op_str, int size, outputStream* st ) { 760 if( cbuf ) { 761 emit_opcode (*cbuf, opcode ); 762 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 763 #ifndef PRODUCT 764 } else if( !do_size ) { 765 if( size != 0 ) st->print("\n\t"); 766 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 767 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 768 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 769 } else { // FLD, FST, PUSH, POP 770 st->print("%s [ESP + #%d]",op_str,offset); 771 } 772 #endif 773 } 774 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 775 return size+3+offset_size; 776 } 777 778 // Helper for XMM registers. Extra opcode bits, limited syntax. 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 780 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 781 int in_size_in_bits = Assembler::EVEX_32bit; 782 int evex_encoding = 0; 783 if (reg_lo+1 == reg_hi) { 784 in_size_in_bits = Assembler::EVEX_64bit; 785 evex_encoding = Assembler::VEX_W; 786 } 787 if (cbuf) { 788 MacroAssembler _masm(cbuf); 789 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 790 // it maps more cases to single byte displacement 791 _masm.set_managed(); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 843 _masm.set_managed(); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 883 _masm.set_managed(); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 901 _masm.set_managed(); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1016 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Needs 2 CMOV's for longs. 1424 const int Matcher::long_cmove_cost() { return 1; } 1425 1426 // No CMOVF/CMOVD with SSE/SSE2 1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429 // Does the CPU require late expand (see block.cpp for description of late expand)? 1430 const bool Matcher::require_postalloc_expand = false; 1431 1432 // Do we need to mask the count passed to shift instructions or does 1433 // the cpu only look at the lower 5/6 bits anyway? 1434 const bool Matcher::need_masked_shift_count = false; 1435 1436 bool Matcher::narrow_oop_use_complex_address() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 bool Matcher::narrow_klass_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444 } 1445 1446 bool Matcher::const_oop_prefer_decode() { 1447 ShouldNotCallThis(); 1448 return true; 1449 } 1450 1451 bool Matcher::const_klass_prefer_decode() { 1452 ShouldNotCallThis(); 1453 return true; 1454 } 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 debug_only(int off1 = cbuf.insts_size()); 1890 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1891 %} 1892 1893 enc_class post_call_FPU %{ 1894 // If method sets FPU control word do it here also 1895 if (Compile::current()->in_24_bit_fp_mode()) { 1896 MacroAssembler masm(&cbuf); 1897 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1898 } 1899 %} 1900 1901 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1902 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1903 // who we intended to call. 1904 cbuf.set_insts_mark(); 1905 $$$emit8$primary; 1906 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), 1910 RELOC_IMM32); 1911 } else { 1912 int method_index = resolved_method_index(cbuf); 1913 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1914 : static_call_Relocation::spec(method_index); 1915 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1916 rspec, RELOC_DISP32); 1917 // Emit stubs for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943 // Following encoding is no longer used, but may be restored if calling 1944 // convention changes significantly. 1945 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946 // 1947 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948 // // int ic_reg = Matcher::inline_cache_reg(); 1949 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1950 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1952 // 1953 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954 // // // so we load it immediately before the call 1955 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957 // 1958 // // xor rbp,ebp 1959 // emit_opcode(cbuf, 0x33); 1960 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961 // 1962 // // CALL to interpreter. 1963 // cbuf.set_insts_mark(); 1964 // $$$emit8$primary; 1965 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967 // %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 emit_opcode(cbuf,0xF0); // [Lock] 2091 %} 2092 2093 // Cmp-xchg long value. 2094 // Note: we need to swap rbx, and rcx before and after the 2095 // cmpxchg8 instruction because the instruction uses 2096 // rcx as the high order word of the new value to store but 2097 // our register encoding uses rbx,. 2098 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2099 2100 // XCHG rbx,ecx 2101 emit_opcode(cbuf,0x87); 2102 emit_opcode(cbuf,0xD9); 2103 // [Lock] 2104 emit_opcode(cbuf,0xF0); 2105 // CMPXCHG8 [Eptr] 2106 emit_opcode(cbuf,0x0F); 2107 emit_opcode(cbuf,0xC7); 2108 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2109 // XCHG rbx,ecx 2110 emit_opcode(cbuf,0x87); 2111 emit_opcode(cbuf,0xD9); 2112 %} 2113 2114 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2115 // [Lock] 2116 emit_opcode(cbuf,0xF0); 2117 2118 // CMPXCHG [Eptr] 2119 emit_opcode(cbuf,0x0F); 2120 emit_opcode(cbuf,0xB1); 2121 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2122 %} 2123 2124 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2125 // [Lock] 2126 emit_opcode(cbuf,0xF0); 2127 2128 // CMPXCHGB [Eptr] 2129 emit_opcode(cbuf,0x0F); 2130 emit_opcode(cbuf,0xB0); 2131 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2132 %} 2133 2134 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2135 // [Lock] 2136 emit_opcode(cbuf,0xF0); 2137 2138 // 16-bit mode 2139 emit_opcode(cbuf, 0x66); 2140 2141 // CMPXCHGW [Eptr] 2142 emit_opcode(cbuf,0x0F); 2143 emit_opcode(cbuf,0xB1); 2144 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2145 %} 2146 2147 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2148 int res_encoding = $res$$reg; 2149 2150 // MOV res,0 2151 emit_opcode( cbuf, 0xB8 + res_encoding); 2152 emit_d32( cbuf, 0 ); 2153 // JNE,s fail 2154 emit_opcode(cbuf,0x75); 2155 emit_d8(cbuf, 5 ); 2156 // MOV res,1 2157 emit_opcode( cbuf, 0xB8 + res_encoding); 2158 emit_d32( cbuf, 1 ); 2159 // fail: 2160 %} 2161 2162 enc_class set_instruction_start( ) %{ 2163 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2164 %} 2165 2166 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2167 int reg_encoding = $ereg$$reg; 2168 int base = $mem$$base; 2169 int index = $mem$$index; 2170 int scale = $mem$$scale; 2171 int displace = $mem$$disp; 2172 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2173 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2174 %} 2175 2176 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2177 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2178 int base = $mem$$base; 2179 int index = $mem$$index; 2180 int scale = $mem$$scale; 2181 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2182 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2183 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2184 %} 2185 2186 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2187 int r1, r2; 2188 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2189 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2190 emit_opcode(cbuf,0x0F); 2191 emit_opcode(cbuf,$tertiary); 2192 emit_rm(cbuf, 0x3, r1, r2); 2193 emit_d8(cbuf,$cnt$$constant); 2194 emit_d8(cbuf,$primary); 2195 emit_rm(cbuf, 0x3, $secondary, r1); 2196 emit_d8(cbuf,$cnt$$constant); 2197 %} 2198 2199 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2200 emit_opcode( cbuf, 0x8B ); // Move 2201 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2202 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2203 emit_d8(cbuf,$primary); 2204 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2205 emit_d8(cbuf,$cnt$$constant-32); 2206 } 2207 emit_d8(cbuf,$primary); 2208 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2209 emit_d8(cbuf,31); 2210 %} 2211 2212 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2213 int r1, r2; 2214 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2215 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2216 2217 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2218 emit_rm(cbuf, 0x3, r1, r2); 2219 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2220 emit_opcode(cbuf,$primary); 2221 emit_rm(cbuf, 0x3, $secondary, r1); 2222 emit_d8(cbuf,$cnt$$constant-32); 2223 } 2224 emit_opcode(cbuf,0x33); // XOR r2,r2 2225 emit_rm(cbuf, 0x3, r2, r2); 2226 %} 2227 2228 // Clone of RegMem but accepts an extra parameter to access each 2229 // half of a double in memory; it never needs relocation info. 2230 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2231 emit_opcode(cbuf,$opcode$$constant); 2232 int reg_encoding = $rm_reg$$reg; 2233 int base = $mem$$base; 2234 int index = $mem$$index; 2235 int scale = $mem$$scale; 2236 int displace = $mem$$disp + $disp_for_half$$constant; 2237 relocInfo::relocType disp_reloc = relocInfo::none; 2238 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2239 %} 2240 2241 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2242 // 2243 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2244 // and it never needs relocation information. 2245 // Frequently used to move data between FPU's Stack Top and memory. 2246 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2247 int rm_byte_opcode = $rm_opcode$$constant; 2248 int base = $mem$$base; 2249 int index = $mem$$index; 2250 int scale = $mem$$scale; 2251 int displace = $mem$$disp; 2252 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2254 %} 2255 2256 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2257 int rm_byte_opcode = $rm_opcode$$constant; 2258 int base = $mem$$base; 2259 int index = $mem$$index; 2260 int scale = $mem$$scale; 2261 int displace = $mem$$disp; 2262 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2263 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2264 %} 2265 2266 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2267 int reg_encoding = $dst$$reg; 2268 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2269 int index = 0x04; // 0x04 indicates no index 2270 int scale = 0x00; // 0x00 indicates no scale 2271 int displace = $src1$$constant; // 0x00 indicates no displacement 2272 relocInfo::relocType disp_reloc = relocInfo::none; 2273 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2274 %} 2275 2276 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2277 // Compare dst,src 2278 emit_opcode(cbuf,0x3B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 // jmp dst < src around move 2281 emit_opcode(cbuf,0x7C); 2282 emit_d8(cbuf,2); 2283 // move dst,src 2284 emit_opcode(cbuf,0x8B); 2285 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2286 %} 2287 2288 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2289 // Compare dst,src 2290 emit_opcode(cbuf,0x3B); 2291 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2292 // jmp dst > src around move 2293 emit_opcode(cbuf,0x7F); 2294 emit_d8(cbuf,2); 2295 // move dst,src 2296 emit_opcode(cbuf,0x8B); 2297 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2298 %} 2299 2300 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2301 // If src is FPR1, we can just FST to store it. 2302 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2303 int reg_encoding = 0x2; // Just store 2304 int base = $mem$$base; 2305 int index = $mem$$index; 2306 int scale = $mem$$scale; 2307 int displace = $mem$$disp; 2308 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2309 if( $src$$reg != FPR1L_enc ) { 2310 reg_encoding = 0x3; // Store & pop 2311 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2312 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2313 } 2314 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2315 emit_opcode(cbuf,$primary); 2316 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2317 %} 2318 2319 enc_class neg_reg(rRegI dst) %{ 2320 // NEG $dst 2321 emit_opcode(cbuf,0xF7); 2322 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2323 %} 2324 2325 enc_class setLT_reg(eCXRegI dst) %{ 2326 // SETLT $dst 2327 emit_opcode(cbuf,0x0F); 2328 emit_opcode(cbuf,0x9C); 2329 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2330 %} 2331 2332 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2333 int tmpReg = $tmp$$reg; 2334 2335 // SUB $p,$q 2336 emit_opcode(cbuf,0x2B); 2337 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2338 // SBB $tmp,$tmp 2339 emit_opcode(cbuf,0x1B); 2340 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2341 // AND $tmp,$y 2342 emit_opcode(cbuf,0x23); 2343 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2344 // ADD $p,$tmp 2345 emit_opcode(cbuf,0x03); 2346 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2347 %} 2348 2349 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2350 // TEST shift,32 2351 emit_opcode(cbuf,0xF7); 2352 emit_rm(cbuf, 0x3, 0, ECX_enc); 2353 emit_d32(cbuf,0x20); 2354 // JEQ,s small 2355 emit_opcode(cbuf, 0x74); 2356 emit_d8(cbuf, 0x04); 2357 // MOV $dst.hi,$dst.lo 2358 emit_opcode( cbuf, 0x8B ); 2359 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2360 // CLR $dst.lo 2361 emit_opcode(cbuf, 0x33); 2362 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2363 // small: 2364 // SHLD $dst.hi,$dst.lo,$shift 2365 emit_opcode(cbuf,0x0F); 2366 emit_opcode(cbuf,0xA5); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2368 // SHL $dst.lo,$shift" 2369 emit_opcode(cbuf,0xD3); 2370 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2371 %} 2372 2373 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2374 // TEST shift,32 2375 emit_opcode(cbuf,0xF7); 2376 emit_rm(cbuf, 0x3, 0, ECX_enc); 2377 emit_d32(cbuf,0x20); 2378 // JEQ,s small 2379 emit_opcode(cbuf, 0x74); 2380 emit_d8(cbuf, 0x04); 2381 // MOV $dst.lo,$dst.hi 2382 emit_opcode( cbuf, 0x8B ); 2383 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2384 // CLR $dst.hi 2385 emit_opcode(cbuf, 0x33); 2386 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2387 // small: 2388 // SHRD $dst.lo,$dst.hi,$shift 2389 emit_opcode(cbuf,0x0F); 2390 emit_opcode(cbuf,0xAD); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2392 // SHR $dst.hi,$shift" 2393 emit_opcode(cbuf,0xD3); 2394 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2395 %} 2396 2397 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2398 // TEST shift,32 2399 emit_opcode(cbuf,0xF7); 2400 emit_rm(cbuf, 0x3, 0, ECX_enc); 2401 emit_d32(cbuf,0x20); 2402 // JEQ,s small 2403 emit_opcode(cbuf, 0x74); 2404 emit_d8(cbuf, 0x05); 2405 // MOV $dst.lo,$dst.hi 2406 emit_opcode( cbuf, 0x8B ); 2407 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2408 // SAR $dst.hi,31 2409 emit_opcode(cbuf, 0xC1); 2410 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2411 emit_d8(cbuf, 0x1F ); 2412 // small: 2413 // SHRD $dst.lo,$dst.hi,$shift 2414 emit_opcode(cbuf,0x0F); 2415 emit_opcode(cbuf,0xAD); 2416 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2417 // SAR $dst.hi,$shift" 2418 emit_opcode(cbuf,0xD3); 2419 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2420 %} 2421 2422 2423 // ----------------- Encodings for floating point unit ----------------- 2424 // May leave result in FPU-TOS or FPU reg depending on opcodes 2425 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2426 $$$emit8$primary; 2427 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2428 %} 2429 2430 // Pop argument in FPR0 with FSTP ST(0) 2431 enc_class PopFPU() %{ 2432 emit_opcode( cbuf, 0xDD ); 2433 emit_d8( cbuf, 0xD8 ); 2434 %} 2435 2436 // !!!!! equivalent to Pop_Reg_F 2437 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2438 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2439 emit_d8( cbuf, 0xD8+$dst$$reg ); 2440 %} 2441 2442 enc_class Push_Reg_DPR( regDPR dst ) %{ 2443 emit_opcode( cbuf, 0xD9 ); 2444 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2445 %} 2446 2447 enc_class strictfp_bias1( regDPR dst ) %{ 2448 emit_opcode( cbuf, 0xDB ); // FLD m80real 2449 emit_opcode( cbuf, 0x2D ); 2450 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2451 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2452 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2453 %} 2454 2455 enc_class strictfp_bias2( regDPR dst ) %{ 2456 emit_opcode( cbuf, 0xDB ); // FLD m80real 2457 emit_opcode( cbuf, 0x2D ); 2458 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2459 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2460 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2461 %} 2462 2463 // Special case for moving an integer register to a stack slot. 2464 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2465 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2466 %} 2467 2468 // Special case for moving a register to a stack slot. 2469 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2470 // Opcode already emitted 2471 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2472 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2473 emit_d32(cbuf, $dst$$disp); // Displacement 2474 %} 2475 2476 // Push the integer in stackSlot 'src' onto FP-stack 2477 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2478 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2479 %} 2480 2481 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2483 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2484 %} 2485 2486 // Same as Pop_Mem_F except for opcode 2487 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2488 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2489 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2490 %} 2491 2492 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2493 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2494 emit_d8( cbuf, 0xD8+$dst$$reg ); 2495 %} 2496 2497 enc_class Push_Reg_FPR( regFPR dst ) %{ 2498 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2499 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2500 %} 2501 2502 // Push FPU's float to a stack-slot, and pop FPU-stack 2503 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2504 int pop = 0x02; 2505 if ($src$$reg != FPR1L_enc) { 2506 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2507 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2508 pop = 0x03; 2509 } 2510 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2511 %} 2512 2513 // Push FPU's double to a stack-slot, and pop FPU-stack 2514 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2515 int pop = 0x02; 2516 if ($src$$reg != FPR1L_enc) { 2517 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2518 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2519 pop = 0x03; 2520 } 2521 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2522 %} 2523 2524 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2525 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2526 int pop = 0xD0 - 1; // -1 since we skip FLD 2527 if ($src$$reg != FPR1L_enc) { 2528 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2529 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2530 pop = 0xD8; 2531 } 2532 emit_opcode( cbuf, 0xDD ); 2533 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2534 %} 2535 2536 2537 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2538 // load dst in FPR0 2539 emit_opcode( cbuf, 0xD9 ); 2540 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2541 if ($src$$reg != FPR1L_enc) { 2542 // fincstp 2543 emit_opcode (cbuf, 0xD9); 2544 emit_opcode (cbuf, 0xF7); 2545 // swap src with FPR1: 2546 // FXCH FPR1 with src 2547 emit_opcode(cbuf, 0xD9); 2548 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2549 // fdecstp 2550 emit_opcode (cbuf, 0xD9); 2551 emit_opcode (cbuf, 0xF6); 2552 } 2553 %} 2554 2555 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ subptr(rsp, 8); 2558 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2559 __ fld_d(Address(rsp, 0)); 2560 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2561 __ fld_d(Address(rsp, 0)); 2562 %} 2563 2564 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ subptr(rsp, 4); 2567 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2568 __ fld_s(Address(rsp, 0)); 2569 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2570 __ fld_s(Address(rsp, 0)); 2571 %} 2572 2573 enc_class Push_ResultD(regD dst) %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ fstp_d(Address(rsp, 0)); 2576 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2577 __ addptr(rsp, 8); 2578 %} 2579 2580 enc_class Push_ResultF(regF dst, immI d8) %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ fstp_s(Address(rsp, 0)); 2583 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2584 __ addptr(rsp, $d8$$constant); 2585 %} 2586 2587 enc_class Push_SrcD(regD src) %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ subptr(rsp, 8); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class push_stack_temp_qword() %{ 2595 MacroAssembler _masm(&cbuf); 2596 __ subptr(rsp, 8); 2597 %} 2598 2599 enc_class pop_stack_temp_qword() %{ 2600 MacroAssembler _masm(&cbuf); 2601 __ addptr(rsp, 8); 2602 %} 2603 2604 enc_class push_xmm_to_fpr1(regD src) %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2607 __ fld_d(Address(rsp, 0)); 2608 %} 2609 2610 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2611 if ($src$$reg != FPR1L_enc) { 2612 // fincstp 2613 emit_opcode (cbuf, 0xD9); 2614 emit_opcode (cbuf, 0xF7); 2615 // FXCH FPR1 with src 2616 emit_opcode(cbuf, 0xD9); 2617 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2618 // fdecstp 2619 emit_opcode (cbuf, 0xD9); 2620 emit_opcode (cbuf, 0xF6); 2621 } 2622 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2623 // // FSTP FPR$dst$$reg 2624 // emit_opcode( cbuf, 0xDD ); 2625 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2626 %} 2627 2628 enc_class fnstsw_sahf_skip_parity() %{ 2629 // fnstsw ax 2630 emit_opcode( cbuf, 0xDF ); 2631 emit_opcode( cbuf, 0xE0 ); 2632 // sahf 2633 emit_opcode( cbuf, 0x9E ); 2634 // jnp ::skip 2635 emit_opcode( cbuf, 0x7B ); 2636 emit_opcode( cbuf, 0x05 ); 2637 %} 2638 2639 enc_class emitModDPR() %{ 2640 // fprem must be iterative 2641 // :: loop 2642 // fprem 2643 emit_opcode( cbuf, 0xD9 ); 2644 emit_opcode( cbuf, 0xF8 ); 2645 // wait 2646 emit_opcode( cbuf, 0x9b ); 2647 // fnstsw ax 2648 emit_opcode( cbuf, 0xDF ); 2649 emit_opcode( cbuf, 0xE0 ); 2650 // sahf 2651 emit_opcode( cbuf, 0x9E ); 2652 // jp ::loop 2653 emit_opcode( cbuf, 0x0F ); 2654 emit_opcode( cbuf, 0x8A ); 2655 emit_opcode( cbuf, 0xF4 ); 2656 emit_opcode( cbuf, 0xFF ); 2657 emit_opcode( cbuf, 0xFF ); 2658 emit_opcode( cbuf, 0xFF ); 2659 %} 2660 2661 enc_class fpu_flags() %{ 2662 // fnstsw_ax 2663 emit_opcode( cbuf, 0xDF); 2664 emit_opcode( cbuf, 0xE0); 2665 // test ax,0x0400 2666 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2667 emit_opcode( cbuf, 0xA9 ); 2668 emit_d16 ( cbuf, 0x0400 ); 2669 // // // This sequence works, but stalls for 12-16 cycles on PPro 2670 // // test rax,0x0400 2671 // emit_opcode( cbuf, 0xA9 ); 2672 // emit_d32 ( cbuf, 0x00000400 ); 2673 // 2674 // jz exit (no unordered comparison) 2675 emit_opcode( cbuf, 0x74 ); 2676 emit_d8 ( cbuf, 0x02 ); 2677 // mov ah,1 - treat as LT case (set carry flag) 2678 emit_opcode( cbuf, 0xB4 ); 2679 emit_d8 ( cbuf, 0x01 ); 2680 // sahf 2681 emit_opcode( cbuf, 0x9E); 2682 %} 2683 2684 enc_class cmpF_P6_fixup() %{ 2685 // Fixup the integer flags in case comparison involved a NaN 2686 // 2687 // JNP exit (no unordered comparison, P-flag is set by NaN) 2688 emit_opcode( cbuf, 0x7B ); 2689 emit_d8 ( cbuf, 0x03 ); 2690 // MOV AH,1 - treat as LT case (set carry flag) 2691 emit_opcode( cbuf, 0xB4 ); 2692 emit_d8 ( cbuf, 0x01 ); 2693 // SAHF 2694 emit_opcode( cbuf, 0x9E); 2695 // NOP // target for branch to avoid branch to branch 2696 emit_opcode( cbuf, 0x90); 2697 %} 2698 2699 // fnstsw_ax(); 2700 // sahf(); 2701 // movl(dst, nan_result); 2702 // jcc(Assembler::parity, exit); 2703 // movl(dst, less_result); 2704 // jcc(Assembler::below, exit); 2705 // movl(dst, equal_result); 2706 // jcc(Assembler::equal, exit); 2707 // movl(dst, greater_result); 2708 2709 // less_result = 1; 2710 // greater_result = -1; 2711 // equal_result = 0; 2712 // nan_result = -1; 2713 2714 enc_class CmpF_Result(rRegI dst) %{ 2715 // fnstsw_ax(); 2716 emit_opcode( cbuf, 0xDF); 2717 emit_opcode( cbuf, 0xE0); 2718 // sahf 2719 emit_opcode( cbuf, 0x9E); 2720 // movl(dst, nan_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, -1 ); 2723 // jcc(Assembler::parity, exit); 2724 emit_opcode( cbuf, 0x7A ); 2725 emit_d8 ( cbuf, 0x13 ); 2726 // movl(dst, less_result); 2727 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2728 emit_d32( cbuf, -1 ); 2729 // jcc(Assembler::below, exit); 2730 emit_opcode( cbuf, 0x72 ); 2731 emit_d8 ( cbuf, 0x0C ); 2732 // movl(dst, equal_result); 2733 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2734 emit_d32( cbuf, 0 ); 2735 // jcc(Assembler::equal, exit); 2736 emit_opcode( cbuf, 0x74 ); 2737 emit_d8 ( cbuf, 0x05 ); 2738 // movl(dst, greater_result); 2739 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2740 emit_d32( cbuf, 1 ); 2741 %} 2742 2743 2744 // Compare the longs and set flags 2745 // BROKEN! Do Not use as-is 2746 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2747 // CMP $src1.hi,$src2.hi 2748 emit_opcode( cbuf, 0x3B ); 2749 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2750 // JNE,s done 2751 emit_opcode(cbuf,0x75); 2752 emit_d8(cbuf, 2 ); 2753 // CMP $src1.lo,$src2.lo 2754 emit_opcode( cbuf, 0x3B ); 2755 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2756 // done: 2757 %} 2758 2759 enc_class convert_int_long( regL dst, rRegI src ) %{ 2760 // mov $dst.lo,$src 2761 int dst_encoding = $dst$$reg; 2762 int src_encoding = $src$$reg; 2763 encode_Copy( cbuf, dst_encoding , src_encoding ); 2764 // mov $dst.hi,$src 2765 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2766 // sar $dst.hi,31 2767 emit_opcode( cbuf, 0xC1 ); 2768 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2769 emit_d8(cbuf, 0x1F ); 2770 %} 2771 2772 enc_class convert_long_double( eRegL src ) %{ 2773 // push $src.hi 2774 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2775 // push $src.lo 2776 emit_opcode(cbuf, 0x50+$src$$reg ); 2777 // fild 64-bits at [SP] 2778 emit_opcode(cbuf,0xdf); 2779 emit_d8(cbuf, 0x6C); 2780 emit_d8(cbuf, 0x24); 2781 emit_d8(cbuf, 0x00); 2782 // pop stack 2783 emit_opcode(cbuf, 0x83); // add SP, #8 2784 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2785 emit_d8(cbuf, 0x8); 2786 %} 2787 2788 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2789 // IMUL EDX:EAX,$src1 2790 emit_opcode( cbuf, 0xF7 ); 2791 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2792 // SAR EDX,$cnt-32 2793 int shift_count = ((int)$cnt$$constant) - 32; 2794 if (shift_count > 0) { 2795 emit_opcode(cbuf, 0xC1); 2796 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2797 emit_d8(cbuf, shift_count); 2798 } 2799 %} 2800 2801 // this version doesn't have add sp, 8 2802 enc_class convert_long_double2( eRegL src ) %{ 2803 // push $src.hi 2804 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2805 // push $src.lo 2806 emit_opcode(cbuf, 0x50+$src$$reg ); 2807 // fild 64-bits at [SP] 2808 emit_opcode(cbuf,0xdf); 2809 emit_d8(cbuf, 0x6C); 2810 emit_d8(cbuf, 0x24); 2811 emit_d8(cbuf, 0x00); 2812 %} 2813 2814 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2815 // Basic idea: long = (long)int * (long)int 2816 // IMUL EDX:EAX, src 2817 emit_opcode( cbuf, 0xF7 ); 2818 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2819 %} 2820 2821 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2822 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2823 // MUL EDX:EAX, src 2824 emit_opcode( cbuf, 0xF7 ); 2825 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2826 %} 2827 2828 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2829 // Basic idea: lo(result) = lo(x_lo * y_lo) 2830 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2831 // MOV $tmp,$src.lo 2832 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2833 // IMUL $tmp,EDX 2834 emit_opcode( cbuf, 0x0F ); 2835 emit_opcode( cbuf, 0xAF ); 2836 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2837 // MOV EDX,$src.hi 2838 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2839 // IMUL EDX,EAX 2840 emit_opcode( cbuf, 0x0F ); 2841 emit_opcode( cbuf, 0xAF ); 2842 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2843 // ADD $tmp,EDX 2844 emit_opcode( cbuf, 0x03 ); 2845 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2846 // MUL EDX:EAX,$src.lo 2847 emit_opcode( cbuf, 0xF7 ); 2848 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2849 // ADD EDX,ESI 2850 emit_opcode( cbuf, 0x03 ); 2851 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2852 %} 2853 2854 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2855 // Basic idea: lo(result) = lo(src * y_lo) 2856 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2857 // IMUL $tmp,EDX,$src 2858 emit_opcode( cbuf, 0x6B ); 2859 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2860 emit_d8( cbuf, (int)$src$$constant ); 2861 // MOV EDX,$src 2862 emit_opcode(cbuf, 0xB8 + EDX_enc); 2863 emit_d32( cbuf, (int)$src$$constant ); 2864 // MUL EDX:EAX,EDX 2865 emit_opcode( cbuf, 0xF7 ); 2866 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2867 // ADD EDX,ESI 2868 emit_opcode( cbuf, 0x03 ); 2869 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2870 %} 2871 2872 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2873 // PUSH src1.hi 2874 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2875 // PUSH src1.lo 2876 emit_opcode(cbuf, 0x50+$src1$$reg ); 2877 // PUSH src2.hi 2878 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2879 // PUSH src2.lo 2880 emit_opcode(cbuf, 0x50+$src2$$reg ); 2881 // CALL directly to the runtime 2882 cbuf.set_insts_mark(); 2883 emit_opcode(cbuf,0xE8); // Call into runtime 2884 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2885 // Restore stack 2886 emit_opcode(cbuf, 0x83); // add SP, #framesize 2887 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2888 emit_d8(cbuf, 4*4); 2889 %} 2890 2891 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2892 // PUSH src1.hi 2893 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2894 // PUSH src1.lo 2895 emit_opcode(cbuf, 0x50+$src1$$reg ); 2896 // PUSH src2.hi 2897 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2898 // PUSH src2.lo 2899 emit_opcode(cbuf, 0x50+$src2$$reg ); 2900 // CALL directly to the runtime 2901 cbuf.set_insts_mark(); 2902 emit_opcode(cbuf,0xE8); // Call into runtime 2903 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2904 // Restore stack 2905 emit_opcode(cbuf, 0x83); // add SP, #framesize 2906 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2907 emit_d8(cbuf, 4*4); 2908 %} 2909 2910 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2911 // MOV $tmp,$src.lo 2912 emit_opcode(cbuf, 0x8B); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2914 // OR $tmp,$src.hi 2915 emit_opcode(cbuf, 0x0B); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2917 %} 2918 2919 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2920 // CMP $src1.lo,$src2.lo 2921 emit_opcode( cbuf, 0x3B ); 2922 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2923 // JNE,s skip 2924 emit_cc(cbuf, 0x70, 0x5); 2925 emit_d8(cbuf,2); 2926 // CMP $src1.hi,$src2.hi 2927 emit_opcode( cbuf, 0x3B ); 2928 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2929 %} 2930 2931 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2932 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2933 emit_opcode( cbuf, 0x3B ); 2934 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2935 // MOV $tmp,$src1.hi 2936 emit_opcode( cbuf, 0x8B ); 2937 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2938 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2939 emit_opcode( cbuf, 0x1B ); 2940 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2941 %} 2942 2943 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2944 // XOR $tmp,$tmp 2945 emit_opcode(cbuf,0x33); // XOR 2946 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2947 // CMP $tmp,$src.lo 2948 emit_opcode( cbuf, 0x3B ); 2949 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2950 // SBB $tmp,$src.hi 2951 emit_opcode( cbuf, 0x1B ); 2952 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2953 %} 2954 2955 // Sniff, sniff... smells like Gnu Superoptimizer 2956 enc_class neg_long( eRegL dst ) %{ 2957 emit_opcode(cbuf,0xF7); // NEG hi 2958 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2959 emit_opcode(cbuf,0xF7); // NEG lo 2960 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2961 emit_opcode(cbuf,0x83); // SBB hi,0 2962 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2963 emit_d8 (cbuf,0 ); 2964 %} 2965 2966 enc_class enc_pop_rdx() %{ 2967 emit_opcode(cbuf,0x5A); 2968 %} 2969 2970 enc_class enc_rethrow() %{ 2971 cbuf.set_insts_mark(); 2972 emit_opcode(cbuf, 0xE9); // jmp entry 2973 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2974 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2975 %} 2976 2977 2978 // Convert a double to an int. Java semantics require we do complex 2979 // manglelations in the corner cases. So we set the rounding mode to 2980 // 'zero', store the darned double down as an int, and reset the 2981 // rounding mode to 'nearest'. The hardware throws an exception which 2982 // patches up the correct value directly to the stack. 2983 enc_class DPR2I_encoding( regDPR src ) %{ 2984 // Flip to round-to-zero mode. We attempted to allow invalid-op 2985 // exceptions here, so that a NAN or other corner-case value will 2986 // thrown an exception (but normal values get converted at full speed). 2987 // However, I2C adapters and other float-stack manglers leave pending 2988 // invalid-op exceptions hanging. We would have to clear them before 2989 // enabling them and that is more expensive than just testing for the 2990 // invalid value Intel stores down in the corner cases. 2991 emit_opcode(cbuf,0xD9); // FLDCW trunc 2992 emit_opcode(cbuf,0x2D); 2993 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2994 // Allocate a word 2995 emit_opcode(cbuf,0x83); // SUB ESP,4 2996 emit_opcode(cbuf,0xEC); 2997 emit_d8(cbuf,0x04); 2998 // Encoding assumes a double has been pushed into FPR0. 2999 // Store down the double as an int, popping the FPU stack 3000 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3001 emit_opcode(cbuf,0x1C); 3002 emit_d8(cbuf,0x24); 3003 // Restore the rounding mode; mask the exception 3004 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3005 emit_opcode(cbuf,0x2D); 3006 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3007 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3008 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3009 3010 // Load the converted int; adjust CPU stack 3011 emit_opcode(cbuf,0x58); // POP EAX 3012 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3013 emit_d32 (cbuf,0x80000000); // 0x80000000 3014 emit_opcode(cbuf,0x75); // JNE around_slow_call 3015 emit_d8 (cbuf,0x07); // Size of slow_call 3016 // Push src onto stack slow-path 3017 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3018 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3019 // CALL directly to the runtime 3020 cbuf.set_insts_mark(); 3021 emit_opcode(cbuf,0xE8); // Call into runtime 3022 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3023 // Carry on here... 3024 %} 3025 3026 enc_class DPR2L_encoding( regDPR src ) %{ 3027 emit_opcode(cbuf,0xD9); // FLDCW trunc 3028 emit_opcode(cbuf,0x2D); 3029 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3030 // Allocate a word 3031 emit_opcode(cbuf,0x83); // SUB ESP,8 3032 emit_opcode(cbuf,0xEC); 3033 emit_d8(cbuf,0x08); 3034 // Encoding assumes a double has been pushed into FPR0. 3035 // Store down the double as a long, popping the FPU stack 3036 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3037 emit_opcode(cbuf,0x3C); 3038 emit_d8(cbuf,0x24); 3039 // Restore the rounding mode; mask the exception 3040 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3041 emit_opcode(cbuf,0x2D); 3042 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3043 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3044 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3045 3046 // Load the converted int; adjust CPU stack 3047 emit_opcode(cbuf,0x58); // POP EAX 3048 emit_opcode(cbuf,0x5A); // POP EDX 3049 emit_opcode(cbuf,0x81); // CMP EDX,imm 3050 emit_d8 (cbuf,0xFA); // rdx 3051 emit_d32 (cbuf,0x80000000); // 0x80000000 3052 emit_opcode(cbuf,0x75); // JNE around_slow_call 3053 emit_d8 (cbuf,0x07+4); // Size of slow_call 3054 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3055 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3056 emit_opcode(cbuf,0x75); // JNE around_slow_call 3057 emit_d8 (cbuf,0x07); // Size of slow_call 3058 // Push src onto stack slow-path 3059 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3060 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3061 // CALL directly to the runtime 3062 cbuf.set_insts_mark(); 3063 emit_opcode(cbuf,0xE8); // Call into runtime 3064 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3065 // Carry on here... 3066 %} 3067 3068 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3069 // Operand was loaded from memory into fp ST (stack top) 3070 // FMUL ST,$src /* D8 C8+i */ 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3073 %} 3074 3075 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3076 // FADDP ST,src2 /* D8 C0+i */ 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3079 //could use FADDP src2,fpST /* DE C0+i */ 3080 %} 3081 3082 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3083 // FADDP src2,ST /* DE C0+i */ 3084 emit_opcode(cbuf, 0xDE); 3085 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3086 %} 3087 3088 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3089 // Operand has been loaded into fp ST (stack top) 3090 // FSUB ST,$src1 3091 emit_opcode(cbuf, 0xD8); 3092 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3093 3094 // FDIV 3095 emit_opcode(cbuf, 0xD8); 3096 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3097 %} 3098 3099 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3100 // Operand was loaded from memory into fp ST (stack top) 3101 // FADD ST,$src /* D8 C0+i */ 3102 emit_opcode(cbuf, 0xD8); 3103 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3104 3105 // FMUL ST,src2 /* D8 C*+i */ 3106 emit_opcode(cbuf, 0xD8); 3107 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3108 %} 3109 3110 3111 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3112 // Operand was loaded from memory into fp ST (stack top) 3113 // FADD ST,$src /* D8 C0+i */ 3114 emit_opcode(cbuf, 0xD8); 3115 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3116 3117 // FMULP src2,ST /* DE C8+i */ 3118 emit_opcode(cbuf, 0xDE); 3119 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3120 %} 3121 3122 // Atomically load the volatile long 3123 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3124 emit_opcode(cbuf,0xDF); 3125 int rm_byte_opcode = 0x05; 3126 int base = $mem$$base; 3127 int index = $mem$$index; 3128 int scale = $mem$$scale; 3129 int displace = $mem$$disp; 3130 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3131 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3132 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3133 %} 3134 3135 // Volatile Store Long. Must be atomic, so move it into 3136 // the FP TOS and then do a 64-bit FIST. Has to probe the 3137 // target address before the store (for null-ptr checks) 3138 // so the memory operand is used twice in the encoding. 3139 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3140 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3141 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3142 emit_opcode(cbuf,0xDF); 3143 int rm_byte_opcode = 0x07; 3144 int base = $mem$$base; 3145 int index = $mem$$index; 3146 int scale = $mem$$scale; 3147 int displace = $mem$$disp; 3148 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3149 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3150 %} 3151 3152 // Safepoint Poll. This polls the safepoint page, and causes an 3153 // exception if it is not readable. Unfortunately, it kills the condition code 3154 // in the process 3155 // We current use TESTL [spp],EDI 3156 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3157 3158 enc_class Safepoint_Poll() %{ 3159 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3160 emit_opcode(cbuf,0x85); 3161 emit_rm (cbuf, 0x0, 0x7, 0x5); 3162 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3163 %} 3164 %} 3165 3166 3167 //----------FRAME-------------------------------------------------------------- 3168 // Definition of frame structure and management information. 3169 // 3170 // S T A C K L A Y O U T Allocators stack-slot number 3171 // | (to get allocators register number 3172 // G Owned by | | v add OptoReg::stack0()) 3173 // r CALLER | | 3174 // o | +--------+ pad to even-align allocators stack-slot 3175 // w V | pad0 | numbers; owned by CALLER 3176 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3177 // h ^ | in | 5 3178 // | | args | 4 Holes in incoming args owned by SELF 3179 // | | | | 3 3180 // | | +--------+ 3181 // V | | old out| Empty on Intel, window on Sparc 3182 // | old |preserve| Must be even aligned. 3183 // | SP-+--------+----> Matcher::_old_SP, even aligned 3184 // | | in | 3 area for Intel ret address 3185 // Owned by |preserve| Empty on Sparc. 3186 // SELF +--------+ 3187 // | | pad2 | 2 pad to align old SP 3188 // | +--------+ 1 3189 // | | locks | 0 3190 // | +--------+----> OptoReg::stack0(), even aligned 3191 // | | pad1 | 11 pad to align new SP 3192 // | +--------+ 3193 // | | | 10 3194 // | | spills | 9 spills 3195 // V | | 8 (pad0 slot for callee) 3196 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3197 // ^ | out | 7 3198 // | | args | 6 Holes in outgoing args owned by CALLEE 3199 // Owned by +--------+ 3200 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3201 // | new |preserve| Must be even-aligned. 3202 // | SP-+--------+----> Matcher::_new_SP, even aligned 3203 // | | | 3204 // 3205 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3206 // known from SELF's arguments and the Java calling convention. 3207 // Region 6-7 is determined per call site. 3208 // Note 2: If the calling convention leaves holes in the incoming argument 3209 // area, those holes are owned by SELF. Holes in the outgoing area 3210 // are owned by the CALLEE. Holes should not be nessecary in the 3211 // incoming area, as the Java calling convention is completely under 3212 // the control of the AD file. Doubles can be sorted and packed to 3213 // avoid holes. Holes in the outgoing arguments may be nessecary for 3214 // varargs C calling conventions. 3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3216 // even aligned with pad0 as needed. 3217 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3218 // region 6-11 is even aligned; it may be padded out more so that 3219 // the region from SP to FP meets the minimum stack alignment. 3220 3221 frame %{ 3222 // What direction does stack grow in (assumed to be same for C & Java) 3223 stack_direction(TOWARDS_LOW); 3224 3225 // These three registers define part of the calling convention 3226 // between compiled code and the interpreter. 3227 inline_cache_reg(EAX); // Inline Cache Register 3228 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3229 3230 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3231 cisc_spilling_operand_name(indOffset32); 3232 3233 // Number of stack slots consumed by locking an object 3234 sync_stack_slots(1); 3235 3236 // Compiled code's Frame Pointer 3237 frame_pointer(ESP); 3238 // Interpreter stores its frame pointer in a register which is 3239 // stored to the stack by I2CAdaptors. 3240 // I2CAdaptors convert from interpreted java to compiled java. 3241 interpreter_frame_pointer(EBP); 3242 3243 // Stack alignment requirement 3244 // Alignment size in bytes (128-bit -> 16 bytes) 3245 stack_alignment(StackAlignmentInBytes); 3246 3247 // Number of stack slots between incoming argument block and the start of 3248 // a new frame. The PROLOG must add this many slots to the stack. The 3249 // EPILOG must remove this many slots. Intel needs one slot for 3250 // return address and one for rbp, (must save rbp) 3251 in_preserve_stack_slots(2+VerifyStackAtCalls); 3252 3253 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3254 // for calls to C. Supports the var-args backing area for register parms. 3255 varargs_C_out_slots_killed(0); 3256 3257 // The after-PROLOG location of the return address. Location of 3258 // return address specifies a type (REG or STACK) and a number 3259 // representing the register number (i.e. - use a register name) or 3260 // stack slot. 3261 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3262 // Otherwise, it is above the locks and verification slot and alignment word 3263 return_addr(STACK - 1 + 3264 align_up((Compile::current()->in_preserve_stack_slots() + 3265 Compile::current()->fixed_slots()), 3266 stack_alignment_in_slots())); 3267 3268 // Body of function which returns an integer array locating 3269 // arguments either in registers or in stack slots. Passed an array 3270 // of ideal registers called "sig" and a "length" count. Stack-slot 3271 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3272 // arguments for a CALLEE. Incoming stack arguments are 3273 // automatically biased by the preserve_stack_slots field above. 3274 calling_convention %{ 3275 // No difference between ingoing/outgoing just pass false 3276 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3277 %} 3278 3279 3280 // Body of function which returns an integer array locating 3281 // arguments either in registers or in stack slots. Passed an array 3282 // of ideal registers called "sig" and a "length" count. Stack-slot 3283 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3284 // arguments for a CALLEE. Incoming stack arguments are 3285 // automatically biased by the preserve_stack_slots field above. 3286 c_calling_convention %{ 3287 // This is obviously always outgoing 3288 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3289 %} 3290 3291 // Location of C & interpreter return values 3292 c_return_value %{ 3293 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3294 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3295 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3296 3297 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3298 // that C functions return float and double results in XMM0. 3299 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3300 return OptoRegPair(XMM0b_num,XMM0_num); 3301 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3302 return OptoRegPair(OptoReg::Bad,XMM0_num); 3303 3304 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3305 %} 3306 3307 // Location of return values 3308 return_value %{ 3309 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3310 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3311 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3312 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3313 return OptoRegPair(XMM0b_num,XMM0_num); 3314 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3315 return OptoRegPair(OptoReg::Bad,XMM0_num); 3316 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3317 %} 3318 3319 %} 3320 3321 //----------ATTRIBUTES--------------------------------------------------------- 3322 //----------Operand Attributes------------------------------------------------- 3323 op_attrib op_cost(0); // Required cost attribute 3324 3325 //----------Instruction Attributes--------------------------------------------- 3326 ins_attrib ins_cost(100); // Required cost attribute 3327 ins_attrib ins_size(8); // Required size attribute (in bits) 3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3329 // non-matching short branch variant of some 3330 // long branch? 3331 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3332 // specifies the alignment that some part of the instruction (not 3333 // necessarily the start) requires. If > 1, a compute_padding() 3334 // function must be provided for the instruction 3335 3336 //----------OPERANDS----------------------------------------------------------- 3337 // Operand definitions must precede instruction definitions for correct parsing 3338 // in the ADLC because operands constitute user defined types which are used in 3339 // instruction definitions. 3340 3341 //----------Simple Operands---------------------------------------------------- 3342 // Immediate Operands 3343 // Integer Immediate 3344 operand immI() %{ 3345 match(ConI); 3346 3347 op_cost(10); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 // Constant for test vs zero 3353 operand immI0() %{ 3354 predicate(n->get_int() == 0); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 // Constant for increment 3363 operand immI1() %{ 3364 predicate(n->get_int() == 1); 3365 match(ConI); 3366 3367 op_cost(0); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 // Constant for decrement 3373 operand immI_M1() %{ 3374 predicate(n->get_int() == -1); 3375 match(ConI); 3376 3377 op_cost(0); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 // Valid scale values for addressing modes 3383 operand immI2() %{ 3384 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3385 match(ConI); 3386 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 operand immI8() %{ 3392 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3393 match(ConI); 3394 3395 op_cost(5); 3396 format %{ %} 3397 interface(CONST_INTER); 3398 %} 3399 3400 operand immI16() %{ 3401 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3402 match(ConI); 3403 3404 op_cost(10); 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Int Immediate non-negative 3410 operand immU31() 3411 %{ 3412 predicate(n->get_int() >= 0); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // Constant for long shifts 3421 operand immI_32() %{ 3422 predicate( n->get_int() == 32 ); 3423 match(ConI); 3424 3425 op_cost(0); 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 operand immI_1_31() %{ 3431 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3432 match(ConI); 3433 3434 op_cost(0); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 operand immI_32_63() %{ 3440 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3441 match(ConI); 3442 op_cost(0); 3443 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immI_1() %{ 3449 predicate( n->get_int() == 1 ); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 operand immI_2() %{ 3458 predicate( n->get_int() == 2 ); 3459 match(ConI); 3460 3461 op_cost(0); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 operand immI_3() %{ 3467 predicate( n->get_int() == 3 ); 3468 match(ConI); 3469 3470 op_cost(0); 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Pointer Immediate 3476 operand immP() %{ 3477 match(ConP); 3478 3479 op_cost(10); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 // NULL Pointer Immediate 3485 operand immP0() %{ 3486 predicate( n->get_ptr() == 0 ); 3487 match(ConP); 3488 op_cost(0); 3489 3490 format %{ %} 3491 interface(CONST_INTER); 3492 %} 3493 3494 // Long Immediate 3495 operand immL() %{ 3496 match(ConL); 3497 3498 op_cost(20); 3499 format %{ %} 3500 interface(CONST_INTER); 3501 %} 3502 3503 // Long Immediate zero 3504 operand immL0() %{ 3505 predicate( n->get_long() == 0L ); 3506 match(ConL); 3507 op_cost(0); 3508 3509 format %{ %} 3510 interface(CONST_INTER); 3511 %} 3512 3513 // Long Immediate zero 3514 operand immL_M1() %{ 3515 predicate( n->get_long() == -1L ); 3516 match(ConL); 3517 op_cost(0); 3518 3519 format %{ %} 3520 interface(CONST_INTER); 3521 %} 3522 3523 // Long immediate from 0 to 127. 3524 // Used for a shorter form of long mul by 10. 3525 operand immL_127() %{ 3526 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3527 match(ConL); 3528 op_cost(0); 3529 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 // Long Immediate: low 32-bit mask 3535 operand immL_32bits() %{ 3536 predicate(n->get_long() == 0xFFFFFFFFL); 3537 match(ConL); 3538 op_cost(0); 3539 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 // Long Immediate: low 32-bit mask 3545 operand immL32() %{ 3546 predicate(n->get_long() == (int)(n->get_long())); 3547 match(ConL); 3548 op_cost(20); 3549 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 //Double Immediate zero 3555 operand immDPR0() %{ 3556 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3557 // bug that generates code such that NaNs compare equal to 0.0 3558 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3559 match(ConD); 3560 3561 op_cost(5); 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 // Double Immediate one 3567 operand immDPR1() %{ 3568 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3569 match(ConD); 3570 3571 op_cost(5); 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Double Immediate 3577 operand immDPR() %{ 3578 predicate(UseSSE<=1); 3579 match(ConD); 3580 3581 op_cost(5); 3582 format %{ %} 3583 interface(CONST_INTER); 3584 %} 3585 3586 operand immD() %{ 3587 predicate(UseSSE>=2); 3588 match(ConD); 3589 3590 op_cost(5); 3591 format %{ %} 3592 interface(CONST_INTER); 3593 %} 3594 3595 // Double Immediate zero 3596 operand immD0() %{ 3597 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3598 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3599 // compare equal to -0.0. 3600 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3601 match(ConD); 3602 3603 format %{ %} 3604 interface(CONST_INTER); 3605 %} 3606 3607 // Float Immediate zero 3608 operand immFPR0() %{ 3609 predicate(UseSSE == 0 && n->getf() == 0.0F); 3610 match(ConF); 3611 3612 op_cost(5); 3613 format %{ %} 3614 interface(CONST_INTER); 3615 %} 3616 3617 // Float Immediate one 3618 operand immFPR1() %{ 3619 predicate(UseSSE == 0 && n->getf() == 1.0F); 3620 match(ConF); 3621 3622 op_cost(5); 3623 format %{ %} 3624 interface(CONST_INTER); 3625 %} 3626 3627 // Float Immediate 3628 operand immFPR() %{ 3629 predicate( UseSSE == 0 ); 3630 match(ConF); 3631 3632 op_cost(5); 3633 format %{ %} 3634 interface(CONST_INTER); 3635 %} 3636 3637 // Float Immediate 3638 operand immF() %{ 3639 predicate(UseSSE >= 1); 3640 match(ConF); 3641 3642 op_cost(5); 3643 format %{ %} 3644 interface(CONST_INTER); 3645 %} 3646 3647 // Float Immediate zero. Zero and not -0.0 3648 operand immF0() %{ 3649 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3650 match(ConF); 3651 3652 op_cost(5); 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Immediates for special shifts (sign extend) 3658 3659 // Constants for increment 3660 operand immI_16() %{ 3661 predicate( n->get_int() == 16 ); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 operand immI_24() %{ 3669 predicate( n->get_int() == 24 ); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 // Constant for byte-wide masking 3677 operand immI_255() %{ 3678 predicate( n->get_int() == 255 ); 3679 match(ConI); 3680 3681 format %{ %} 3682 interface(CONST_INTER); 3683 %} 3684 3685 // Constant for short-wide masking 3686 operand immI_65535() %{ 3687 predicate(n->get_int() == 65535); 3688 match(ConI); 3689 3690 format %{ %} 3691 interface(CONST_INTER); 3692 %} 3693 3694 // Register Operands 3695 // Integer Register 3696 operand rRegI() %{ 3697 constraint(ALLOC_IN_RC(int_reg)); 3698 match(RegI); 3699 match(xRegI); 3700 match(eAXRegI); 3701 match(eBXRegI); 3702 match(eCXRegI); 3703 match(eDXRegI); 3704 match(eDIRegI); 3705 match(eSIRegI); 3706 3707 format %{ %} 3708 interface(REG_INTER); 3709 %} 3710 3711 // Subset of Integer Register 3712 operand xRegI(rRegI reg) %{ 3713 constraint(ALLOC_IN_RC(int_x_reg)); 3714 match(reg); 3715 match(eAXRegI); 3716 match(eBXRegI); 3717 match(eCXRegI); 3718 match(eDXRegI); 3719 3720 format %{ %} 3721 interface(REG_INTER); 3722 %} 3723 3724 // Special Registers 3725 operand eAXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(eax_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EAX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 // Special Registers 3735 operand eBXRegI(xRegI reg) %{ 3736 constraint(ALLOC_IN_RC(ebx_reg)); 3737 match(reg); 3738 match(rRegI); 3739 3740 format %{ "EBX" %} 3741 interface(REG_INTER); 3742 %} 3743 3744 operand eCXRegI(xRegI reg) %{ 3745 constraint(ALLOC_IN_RC(ecx_reg)); 3746 match(reg); 3747 match(rRegI); 3748 3749 format %{ "ECX" %} 3750 interface(REG_INTER); 3751 %} 3752 3753 operand eDXRegI(xRegI reg) %{ 3754 constraint(ALLOC_IN_RC(edx_reg)); 3755 match(reg); 3756 match(rRegI); 3757 3758 format %{ "EDX" %} 3759 interface(REG_INTER); 3760 %} 3761 3762 operand eDIRegI(xRegI reg) %{ 3763 constraint(ALLOC_IN_RC(edi_reg)); 3764 match(reg); 3765 match(rRegI); 3766 3767 format %{ "EDI" %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand naxRegI() %{ 3772 constraint(ALLOC_IN_RC(nax_reg)); 3773 match(RegI); 3774 match(eCXRegI); 3775 match(eDXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 operand nadxRegI() %{ 3784 constraint(ALLOC_IN_RC(nadx_reg)); 3785 match(RegI); 3786 match(eBXRegI); 3787 match(eCXRegI); 3788 match(eSIRegI); 3789 match(eDIRegI); 3790 3791 format %{ %} 3792 interface(REG_INTER); 3793 %} 3794 3795 operand ncxRegI() %{ 3796 constraint(ALLOC_IN_RC(ncx_reg)); 3797 match(RegI); 3798 match(eAXRegI); 3799 match(eDXRegI); 3800 match(eSIRegI); 3801 match(eDIRegI); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3808 // // 3809 operand eSIRegI(xRegI reg) %{ 3810 constraint(ALLOC_IN_RC(esi_reg)); 3811 match(reg); 3812 match(rRegI); 3813 3814 format %{ "ESI" %} 3815 interface(REG_INTER); 3816 %} 3817 3818 // Pointer Register 3819 operand anyRegP() %{ 3820 constraint(ALLOC_IN_RC(any_reg)); 3821 match(RegP); 3822 match(eAXRegP); 3823 match(eBXRegP); 3824 match(eCXRegP); 3825 match(eDIRegP); 3826 match(eRegP); 3827 3828 format %{ %} 3829 interface(REG_INTER); 3830 %} 3831 3832 operand eRegP() %{ 3833 constraint(ALLOC_IN_RC(int_reg)); 3834 match(RegP); 3835 match(eAXRegP); 3836 match(eBXRegP); 3837 match(eCXRegP); 3838 match(eDIRegP); 3839 3840 format %{ %} 3841 interface(REG_INTER); 3842 %} 3843 3844 // On windows95, EBP is not safe to use for implicit null tests. 3845 operand eRegP_no_EBP() %{ 3846 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3847 match(RegP); 3848 match(eAXRegP); 3849 match(eBXRegP); 3850 match(eCXRegP); 3851 match(eDIRegP); 3852 3853 op_cost(100); 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand naxRegP() %{ 3859 constraint(ALLOC_IN_RC(nax_reg)); 3860 match(RegP); 3861 match(eBXRegP); 3862 match(eDXRegP); 3863 match(eCXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 operand nabxRegP() %{ 3872 constraint(ALLOC_IN_RC(nabx_reg)); 3873 match(RegP); 3874 match(eCXRegP); 3875 match(eDXRegP); 3876 match(eSIRegP); 3877 match(eDIRegP); 3878 3879 format %{ %} 3880 interface(REG_INTER); 3881 %} 3882 3883 operand pRegP() %{ 3884 constraint(ALLOC_IN_RC(p_reg)); 3885 match(RegP); 3886 match(eBXRegP); 3887 match(eDXRegP); 3888 match(eSIRegP); 3889 match(eDIRegP); 3890 3891 format %{ %} 3892 interface(REG_INTER); 3893 %} 3894 3895 // Special Registers 3896 // Return a pointer value 3897 operand eAXRegP(eRegP reg) %{ 3898 constraint(ALLOC_IN_RC(eax_reg)); 3899 match(reg); 3900 format %{ "EAX" %} 3901 interface(REG_INTER); 3902 %} 3903 3904 // Used in AtomicAdd 3905 operand eBXRegP(eRegP reg) %{ 3906 constraint(ALLOC_IN_RC(ebx_reg)); 3907 match(reg); 3908 format %{ "EBX" %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Tail-call (interprocedural jump) to interpreter 3913 operand eCXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(ecx_reg)); 3915 match(reg); 3916 format %{ "ECX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eSIRegP(eRegP reg) %{ 3921 constraint(ALLOC_IN_RC(esi_reg)); 3922 match(reg); 3923 format %{ "ESI" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 // Used in rep stosw 3928 operand eDIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(edi_reg)); 3930 match(reg); 3931 format %{ "EDI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eRegL() %{ 3936 constraint(ALLOC_IN_RC(long_reg)); 3937 match(RegL); 3938 match(eADXRegL); 3939 3940 format %{ %} 3941 interface(REG_INTER); 3942 %} 3943 3944 operand eADXRegL( eRegL reg ) %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(reg); 3947 3948 format %{ "EDX:EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eBCXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(ebcx_reg)); 3954 match(reg); 3955 3956 format %{ "EBX:ECX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 // Special case for integer high multiply 3961 operand eADXRegL_low_only() %{ 3962 constraint(ALLOC_IN_RC(eadx_reg)); 3963 match(RegL); 3964 3965 format %{ "EAX" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 // Flags register, used as output of compare instructions 3970 operand eFlagsReg() %{ 3971 constraint(ALLOC_IN_RC(int_flags)); 3972 match(RegFlags); 3973 3974 format %{ "EFLAGS" %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Flags register, used as output of FLOATING POINT compare instructions 3979 operand eFlagsRegU() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 3983 format %{ "EFLAGS_U" %} 3984 interface(REG_INTER); 3985 %} 3986 3987 operand eFlagsRegUCF() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 predicate(false); 3991 3992 format %{ "EFLAGS_U_CF" %} 3993 interface(REG_INTER); 3994 %} 3995 3996 // Condition Code Register used by long compare 3997 operand flagsReg_long_LTGE() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_LTGE" %} 4001 interface(REG_INTER); 4002 %} 4003 operand flagsReg_long_EQNE() %{ 4004 constraint(ALLOC_IN_RC(int_flags)); 4005 match(RegFlags); 4006 format %{ "FLAGS_EQNE" %} 4007 interface(REG_INTER); 4008 %} 4009 operand flagsReg_long_LEGT() %{ 4010 constraint(ALLOC_IN_RC(int_flags)); 4011 match(RegFlags); 4012 format %{ "FLAGS_LEGT" %} 4013 interface(REG_INTER); 4014 %} 4015 4016 // Condition Code Register used by unsigned long compare 4017 operand flagsReg_ulong_LTGE() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_U_LTGE" %} 4021 interface(REG_INTER); 4022 %} 4023 operand flagsReg_ulong_EQNE() %{ 4024 constraint(ALLOC_IN_RC(int_flags)); 4025 match(RegFlags); 4026 format %{ "FLAGS_U_EQNE" %} 4027 interface(REG_INTER); 4028 %} 4029 operand flagsReg_ulong_LEGT() %{ 4030 constraint(ALLOC_IN_RC(int_flags)); 4031 match(RegFlags); 4032 format %{ "FLAGS_U_LEGT" %} 4033 interface(REG_INTER); 4034 %} 4035 4036 // Float register operands 4037 operand regDPR() %{ 4038 predicate( UseSSE < 2 ); 4039 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4040 match(RegD); 4041 match(regDPR1); 4042 match(regDPR2); 4043 format %{ %} 4044 interface(REG_INTER); 4045 %} 4046 4047 operand regDPR1(regDPR reg) %{ 4048 predicate( UseSSE < 2 ); 4049 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4050 match(reg); 4051 format %{ "FPR1" %} 4052 interface(REG_INTER); 4053 %} 4054 4055 operand regDPR2(regDPR reg) %{ 4056 predicate( UseSSE < 2 ); 4057 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4058 match(reg); 4059 format %{ "FPR2" %} 4060 interface(REG_INTER); 4061 %} 4062 4063 operand regnotDPR1(regDPR reg) %{ 4064 predicate( UseSSE < 2 ); 4065 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4066 match(reg); 4067 format %{ %} 4068 interface(REG_INTER); 4069 %} 4070 4071 // Float register operands 4072 operand regFPR() %{ 4073 predicate( UseSSE < 2 ); 4074 constraint(ALLOC_IN_RC(fp_flt_reg)); 4075 match(RegF); 4076 match(regFPR1); 4077 format %{ %} 4078 interface(REG_INTER); 4079 %} 4080 4081 // Float register operands 4082 operand regFPR1(regFPR reg) %{ 4083 predicate( UseSSE < 2 ); 4084 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4085 match(reg); 4086 format %{ "FPR1" %} 4087 interface(REG_INTER); 4088 %} 4089 4090 // XMM Float register operands 4091 operand regF() %{ 4092 predicate( UseSSE>=1 ); 4093 constraint(ALLOC_IN_RC(float_reg_legacy)); 4094 match(RegF); 4095 format %{ %} 4096 interface(REG_INTER); 4097 %} 4098 4099 // Float register operands 4100 operand vlRegF() %{ 4101 constraint(ALLOC_IN_RC(float_reg_vl)); 4102 match(RegF); 4103 4104 format %{ %} 4105 interface(REG_INTER); 4106 %} 4107 4108 // XMM Double register operands 4109 operand regD() %{ 4110 predicate( UseSSE>=2 ); 4111 constraint(ALLOC_IN_RC(double_reg_legacy)); 4112 match(RegD); 4113 format %{ %} 4114 interface(REG_INTER); 4115 %} 4116 4117 // Double register operands 4118 operand vlRegD() %{ 4119 constraint(ALLOC_IN_RC(double_reg_vl)); 4120 match(RegD); 4121 4122 format %{ %} 4123 interface(REG_INTER); 4124 %} 4125 4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4127 // runtime code generation via reg_class_dynamic. 4128 operand vecS() %{ 4129 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4130 match(VecS); 4131 4132 format %{ %} 4133 interface(REG_INTER); 4134 %} 4135 4136 operand legVecS() %{ 4137 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4138 match(VecS); 4139 4140 format %{ %} 4141 interface(REG_INTER); 4142 %} 4143 4144 operand vecD() %{ 4145 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4146 match(VecD); 4147 4148 format %{ %} 4149 interface(REG_INTER); 4150 %} 4151 4152 operand legVecD() %{ 4153 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4154 match(VecD); 4155 4156 format %{ %} 4157 interface(REG_INTER); 4158 %} 4159 4160 operand vecX() %{ 4161 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4162 match(VecX); 4163 4164 format %{ %} 4165 interface(REG_INTER); 4166 %} 4167 4168 operand legVecX() %{ 4169 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4170 match(VecX); 4171 4172 format %{ %} 4173 interface(REG_INTER); 4174 %} 4175 4176 operand vecY() %{ 4177 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4178 match(VecY); 4179 4180 format %{ %} 4181 interface(REG_INTER); 4182 %} 4183 4184 operand legVecY() %{ 4185 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4186 match(VecY); 4187 4188 format %{ %} 4189 interface(REG_INTER); 4190 %} 4191 4192 //----------Memory Operands---------------------------------------------------- 4193 // Direct Memory Operand 4194 operand direct(immP addr) %{ 4195 match(addr); 4196 4197 format %{ "[$addr]" %} 4198 interface(MEMORY_INTER) %{ 4199 base(0xFFFFFFFF); 4200 index(0x4); 4201 scale(0x0); 4202 disp($addr); 4203 %} 4204 %} 4205 4206 // Indirect Memory Operand 4207 operand indirect(eRegP reg) %{ 4208 constraint(ALLOC_IN_RC(int_reg)); 4209 match(reg); 4210 4211 format %{ "[$reg]" %} 4212 interface(MEMORY_INTER) %{ 4213 base($reg); 4214 index(0x4); 4215 scale(0x0); 4216 disp(0x0); 4217 %} 4218 %} 4219 4220 // Indirect Memory Plus Short Offset Operand 4221 operand indOffset8(eRegP reg, immI8 off) %{ 4222 match(AddP reg off); 4223 4224 format %{ "[$reg + $off]" %} 4225 interface(MEMORY_INTER) %{ 4226 base($reg); 4227 index(0x4); 4228 scale(0x0); 4229 disp($off); 4230 %} 4231 %} 4232 4233 // Indirect Memory Plus Long Offset Operand 4234 operand indOffset32(eRegP reg, immI off) %{ 4235 match(AddP reg off); 4236 4237 format %{ "[$reg + $off]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index(0x4); 4241 scale(0x0); 4242 disp($off); 4243 %} 4244 %} 4245 4246 // Indirect Memory Plus Long Offset Operand 4247 operand indOffset32X(rRegI reg, immP off) %{ 4248 match(AddP off reg); 4249 4250 format %{ "[$reg + $off]" %} 4251 interface(MEMORY_INTER) %{ 4252 base($reg); 4253 index(0x4); 4254 scale(0x0); 4255 disp($off); 4256 %} 4257 %} 4258 4259 // Indirect Memory Plus Index Register Plus Offset Operand 4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4261 match(AddP (AddP reg ireg) off); 4262 4263 op_cost(10); 4264 format %{"[$reg + $off + $ireg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base($reg); 4267 index($ireg); 4268 scale(0x0); 4269 disp($off); 4270 %} 4271 %} 4272 4273 // Indirect Memory Plus Index Register Plus Offset Operand 4274 operand indIndex(eRegP reg, rRegI ireg) %{ 4275 match(AddP reg ireg); 4276 4277 op_cost(10); 4278 format %{"[$reg + $ireg]" %} 4279 interface(MEMORY_INTER) %{ 4280 base($reg); 4281 index($ireg); 4282 scale(0x0); 4283 disp(0x0); 4284 %} 4285 %} 4286 4287 // // ------------------------------------------------------------------------- 4288 // // 486 architecture doesn't support "scale * index + offset" with out a base 4289 // // ------------------------------------------------------------------------- 4290 // // Scaled Memory Operands 4291 // // Indirect Memory Times Scale Plus Offset Operand 4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4293 // match(AddP off (LShiftI ireg scale)); 4294 // 4295 // op_cost(10); 4296 // format %{"[$off + $ireg << $scale]" %} 4297 // interface(MEMORY_INTER) %{ 4298 // base(0x4); 4299 // index($ireg); 4300 // scale($scale); 4301 // disp($off); 4302 // %} 4303 // %} 4304 4305 // Indirect Memory Times Scale Plus Index Register 4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4307 match(AddP reg (LShiftI ireg scale)); 4308 4309 op_cost(10); 4310 format %{"[$reg + $ireg << $scale]" %} 4311 interface(MEMORY_INTER) %{ 4312 base($reg); 4313 index($ireg); 4314 scale($scale); 4315 disp(0x0); 4316 %} 4317 %} 4318 4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4321 match(AddP (AddP reg (LShiftI ireg scale)) off); 4322 4323 op_cost(10); 4324 format %{"[$reg + $off + $ireg << $scale]" %} 4325 interface(MEMORY_INTER) %{ 4326 base($reg); 4327 index($ireg); 4328 scale($scale); 4329 disp($off); 4330 %} 4331 %} 4332 4333 //----------Load Long Memory Operands------------------------------------------ 4334 // The load-long idiom will use it's address expression again after loading 4335 // the first word of the long. If the load-long destination overlaps with 4336 // registers used in the addressing expression, the 2nd half will be loaded 4337 // from a clobbered address. Fix this by requiring that load-long use 4338 // address registers that do not overlap with the load-long target. 4339 4340 // load-long support 4341 operand load_long_RegP() %{ 4342 constraint(ALLOC_IN_RC(esi_reg)); 4343 match(RegP); 4344 match(eSIRegP); 4345 op_cost(100); 4346 format %{ %} 4347 interface(REG_INTER); 4348 %} 4349 4350 // Indirect Memory Operand Long 4351 operand load_long_indirect(load_long_RegP reg) %{ 4352 constraint(ALLOC_IN_RC(esi_reg)); 4353 match(reg); 4354 4355 format %{ "[$reg]" %} 4356 interface(MEMORY_INTER) %{ 4357 base($reg); 4358 index(0x4); 4359 scale(0x0); 4360 disp(0x0); 4361 %} 4362 %} 4363 4364 // Indirect Memory Plus Long Offset Operand 4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4366 match(AddP reg off); 4367 4368 format %{ "[$reg + $off]" %} 4369 interface(MEMORY_INTER) %{ 4370 base($reg); 4371 index(0x4); 4372 scale(0x0); 4373 disp($off); 4374 %} 4375 %} 4376 4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4378 4379 4380 //----------Special Memory Operands-------------------------------------------- 4381 // Stack Slot Operand - This operand is used for loading and storing temporary 4382 // values on the stack where a match requires a value to 4383 // flow through memory. 4384 operand stackSlotP(sRegP reg) %{ 4385 constraint(ALLOC_IN_RC(stack_slots)); 4386 // No match rule because this operand is only generated in matching 4387 format %{ "[$reg]" %} 4388 interface(MEMORY_INTER) %{ 4389 base(0x4); // ESP 4390 index(0x4); // No Index 4391 scale(0x0); // No Scale 4392 disp($reg); // Stack Offset 4393 %} 4394 %} 4395 4396 operand stackSlotI(sRegI reg) %{ 4397 constraint(ALLOC_IN_RC(stack_slots)); 4398 // No match rule because this operand is only generated in matching 4399 format %{ "[$reg]" %} 4400 interface(MEMORY_INTER) %{ 4401 base(0x4); // ESP 4402 index(0x4); // No Index 4403 scale(0x0); // No Scale 4404 disp($reg); // Stack Offset 4405 %} 4406 %} 4407 4408 operand stackSlotF(sRegF reg) %{ 4409 constraint(ALLOC_IN_RC(stack_slots)); 4410 // No match rule because this operand is only generated in matching 4411 format %{ "[$reg]" %} 4412 interface(MEMORY_INTER) %{ 4413 base(0x4); // ESP 4414 index(0x4); // No Index 4415 scale(0x0); // No Scale 4416 disp($reg); // Stack Offset 4417 %} 4418 %} 4419 4420 operand stackSlotD(sRegD reg) %{ 4421 constraint(ALLOC_IN_RC(stack_slots)); 4422 // No match rule because this operand is only generated in matching 4423 format %{ "[$reg]" %} 4424 interface(MEMORY_INTER) %{ 4425 base(0x4); // ESP 4426 index(0x4); // No Index 4427 scale(0x0); // No Scale 4428 disp($reg); // Stack Offset 4429 %} 4430 %} 4431 4432 operand stackSlotL(sRegL reg) %{ 4433 constraint(ALLOC_IN_RC(stack_slots)); 4434 // No match rule because this operand is only generated in matching 4435 format %{ "[$reg]" %} 4436 interface(MEMORY_INTER) %{ 4437 base(0x4); // ESP 4438 index(0x4); // No Index 4439 scale(0x0); // No Scale 4440 disp($reg); // Stack Offset 4441 %} 4442 %} 4443 4444 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4445 // Indirect Memory Operand 4446 operand indirect_win95_safe(eRegP_no_EBP reg) 4447 %{ 4448 constraint(ALLOC_IN_RC(int_reg)); 4449 match(reg); 4450 4451 op_cost(100); 4452 format %{ "[$reg]" %} 4453 interface(MEMORY_INTER) %{ 4454 base($reg); 4455 index(0x4); 4456 scale(0x0); 4457 disp(0x0); 4458 %} 4459 %} 4460 4461 // Indirect Memory Plus Short Offset Operand 4462 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4463 %{ 4464 match(AddP reg off); 4465 4466 op_cost(100); 4467 format %{ "[$reg + $off]" %} 4468 interface(MEMORY_INTER) %{ 4469 base($reg); 4470 index(0x4); 4471 scale(0x0); 4472 disp($off); 4473 %} 4474 %} 4475 4476 // Indirect Memory Plus Long Offset Operand 4477 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4478 %{ 4479 match(AddP reg off); 4480 4481 op_cost(100); 4482 format %{ "[$reg + $off]" %} 4483 interface(MEMORY_INTER) %{ 4484 base($reg); 4485 index(0x4); 4486 scale(0x0); 4487 disp($off); 4488 %} 4489 %} 4490 4491 // Indirect Memory Plus Index Register Plus Offset Operand 4492 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4493 %{ 4494 match(AddP (AddP reg ireg) off); 4495 4496 op_cost(100); 4497 format %{"[$reg + $off + $ireg]" %} 4498 interface(MEMORY_INTER) %{ 4499 base($reg); 4500 index($ireg); 4501 scale(0x0); 4502 disp($off); 4503 %} 4504 %} 4505 4506 // Indirect Memory Times Scale Plus Index Register 4507 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4508 %{ 4509 match(AddP reg (LShiftI ireg scale)); 4510 4511 op_cost(100); 4512 format %{"[$reg + $ireg << $scale]" %} 4513 interface(MEMORY_INTER) %{ 4514 base($reg); 4515 index($ireg); 4516 scale($scale); 4517 disp(0x0); 4518 %} 4519 %} 4520 4521 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4522 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4523 %{ 4524 match(AddP (AddP reg (LShiftI ireg scale)) off); 4525 4526 op_cost(100); 4527 format %{"[$reg + $off + $ireg << $scale]" %} 4528 interface(MEMORY_INTER) %{ 4529 base($reg); 4530 index($ireg); 4531 scale($scale); 4532 disp($off); 4533 %} 4534 %} 4535 4536 //----------Conditional Branch Operands---------------------------------------- 4537 // Comparison Op - This is the operation of the comparison, and is limited to 4538 // the following set of codes: 4539 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4540 // 4541 // Other attributes of the comparison, such as unsignedness, are specified 4542 // by the comparison instruction that sets a condition code flags register. 4543 // That result is represented by a flags operand whose subtype is appropriate 4544 // to the unsignedness (etc.) of the comparison. 4545 // 4546 // Later, the instruction which matches both the Comparison Op (a Bool) and 4547 // the flags (produced by the Cmp) specifies the coding of the comparison op 4548 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4549 4550 // Comparision Code 4551 operand cmpOp() %{ 4552 match(Bool); 4553 4554 format %{ "" %} 4555 interface(COND_INTER) %{ 4556 equal(0x4, "e"); 4557 not_equal(0x5, "ne"); 4558 less(0xC, "l"); 4559 greater_equal(0xD, "ge"); 4560 less_equal(0xE, "le"); 4561 greater(0xF, "g"); 4562 overflow(0x0, "o"); 4563 no_overflow(0x1, "no"); 4564 %} 4565 %} 4566 4567 // Comparison Code, unsigned compare. Used by FP also, with 4568 // C2 (unordered) turned into GT or LT already. The other bits 4569 // C0 and C3 are turned into Carry & Zero flags. 4570 operand cmpOpU() %{ 4571 match(Bool); 4572 4573 format %{ "" %} 4574 interface(COND_INTER) %{ 4575 equal(0x4, "e"); 4576 not_equal(0x5, "ne"); 4577 less(0x2, "b"); 4578 greater_equal(0x3, "nb"); 4579 less_equal(0x6, "be"); 4580 greater(0x7, "nbe"); 4581 overflow(0x0, "o"); 4582 no_overflow(0x1, "no"); 4583 %} 4584 %} 4585 4586 // Floating comparisons that don't require any fixup for the unordered case 4587 operand cmpOpUCF() %{ 4588 match(Bool); 4589 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4590 n->as_Bool()->_test._test == BoolTest::ge || 4591 n->as_Bool()->_test._test == BoolTest::le || 4592 n->as_Bool()->_test._test == BoolTest::gt); 4593 format %{ "" %} 4594 interface(COND_INTER) %{ 4595 equal(0x4, "e"); 4596 not_equal(0x5, "ne"); 4597 less(0x2, "b"); 4598 greater_equal(0x3, "nb"); 4599 less_equal(0x6, "be"); 4600 greater(0x7, "nbe"); 4601 overflow(0x0, "o"); 4602 no_overflow(0x1, "no"); 4603 %} 4604 %} 4605 4606 4607 // Floating comparisons that can be fixed up with extra conditional jumps 4608 operand cmpOpUCF2() %{ 4609 match(Bool); 4610 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4611 n->as_Bool()->_test._test == BoolTest::eq); 4612 format %{ "" %} 4613 interface(COND_INTER) %{ 4614 equal(0x4, "e"); 4615 not_equal(0x5, "ne"); 4616 less(0x2, "b"); 4617 greater_equal(0x3, "nb"); 4618 less_equal(0x6, "be"); 4619 greater(0x7, "nbe"); 4620 overflow(0x0, "o"); 4621 no_overflow(0x1, "no"); 4622 %} 4623 %} 4624 4625 // Comparison Code for FP conditional move 4626 operand cmpOp_fcmov() %{ 4627 match(Bool); 4628 4629 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4630 n->as_Bool()->_test._test != BoolTest::no_overflow); 4631 format %{ "" %} 4632 interface(COND_INTER) %{ 4633 equal (0x0C8); 4634 not_equal (0x1C8); 4635 less (0x0C0); 4636 greater_equal(0x1C0); 4637 less_equal (0x0D0); 4638 greater (0x1D0); 4639 overflow(0x0, "o"); // not really supported by the instruction 4640 no_overflow(0x1, "no"); // not really supported by the instruction 4641 %} 4642 %} 4643 4644 // Comparison Code used in long compares 4645 operand cmpOp_commute() %{ 4646 match(Bool); 4647 4648 format %{ "" %} 4649 interface(COND_INTER) %{ 4650 equal(0x4, "e"); 4651 not_equal(0x5, "ne"); 4652 less(0xF, "g"); 4653 greater_equal(0xE, "le"); 4654 less_equal(0xD, "ge"); 4655 greater(0xC, "l"); 4656 overflow(0x0, "o"); 4657 no_overflow(0x1, "no"); 4658 %} 4659 %} 4660 4661 // Comparison Code used in unsigned long compares 4662 operand cmpOpU_commute() %{ 4663 match(Bool); 4664 4665 format %{ "" %} 4666 interface(COND_INTER) %{ 4667 equal(0x4, "e"); 4668 not_equal(0x5, "ne"); 4669 less(0x7, "nbe"); 4670 greater_equal(0x6, "be"); 4671 less_equal(0x3, "nb"); 4672 greater(0x2, "b"); 4673 overflow(0x0, "o"); 4674 no_overflow(0x1, "no"); 4675 %} 4676 %} 4677 4678 //----------OPERAND CLASSES---------------------------------------------------- 4679 // Operand Classes are groups of operands that are used as to simplify 4680 // instruction definitions by not requiring the AD writer to specify separate 4681 // instructions for every form of operand when the instruction accepts 4682 // multiple operand types with the same basic encoding and format. The classic 4683 // case of this is memory operands. 4684 4685 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4686 indIndex, indIndexScale, indIndexScaleOffset); 4687 4688 // Long memory operations are encoded in 2 instructions and a +4 offset. 4689 // This means some kind of offset is always required and you cannot use 4690 // an oop as the offset (done when working on static globals). 4691 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4692 indIndex, indIndexScale, indIndexScaleOffset); 4693 4694 4695 //----------PIPELINE----------------------------------------------------------- 4696 // Rules which define the behavior of the target architectures pipeline. 4697 pipeline %{ 4698 4699 //----------ATTRIBUTES--------------------------------------------------------- 4700 attributes %{ 4701 variable_size_instructions; // Fixed size instructions 4702 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4703 instruction_unit_size = 1; // An instruction is 1 bytes long 4704 instruction_fetch_unit_size = 16; // The processor fetches one line 4705 instruction_fetch_units = 1; // of 16 bytes 4706 4707 // List of nop instructions 4708 nops( MachNop ); 4709 %} 4710 4711 //----------RESOURCES---------------------------------------------------------- 4712 // Resources are the functional units available to the machine 4713 4714 // Generic P2/P3 pipeline 4715 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4716 // 3 instructions decoded per cycle. 4717 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4718 // 2 ALU op, only ALU0 handles mul/div instructions. 4719 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4720 MS0, MS1, MEM = MS0 | MS1, 4721 BR, FPU, 4722 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4723 4724 //----------PIPELINE DESCRIPTION----------------------------------------------- 4725 // Pipeline Description specifies the stages in the machine's pipeline 4726 4727 // Generic P2/P3 pipeline 4728 pipe_desc(S0, S1, S2, S3, S4, S5); 4729 4730 //----------PIPELINE CLASSES--------------------------------------------------- 4731 // Pipeline Classes describe the stages in which input and output are 4732 // referenced by the hardware pipeline. 4733 4734 // Naming convention: ialu or fpu 4735 // Then: _reg 4736 // Then: _reg if there is a 2nd register 4737 // Then: _long if it's a pair of instructions implementing a long 4738 // Then: _fat if it requires the big decoder 4739 // Or: _mem if it requires the big decoder and a memory unit. 4740 4741 // Integer ALU reg operation 4742 pipe_class ialu_reg(rRegI dst) %{ 4743 single_instruction; 4744 dst : S4(write); 4745 dst : S3(read); 4746 DECODE : S0; // any decoder 4747 ALU : S3; // any alu 4748 %} 4749 4750 // Long ALU reg operation 4751 pipe_class ialu_reg_long(eRegL dst) %{ 4752 instruction_count(2); 4753 dst : S4(write); 4754 dst : S3(read); 4755 DECODE : S0(2); // any 2 decoders 4756 ALU : S3(2); // both alus 4757 %} 4758 4759 // Integer ALU reg operation using big decoder 4760 pipe_class ialu_reg_fat(rRegI dst) %{ 4761 single_instruction; 4762 dst : S4(write); 4763 dst : S3(read); 4764 D0 : S0; // big decoder only 4765 ALU : S3; // any alu 4766 %} 4767 4768 // Long ALU reg operation using big decoder 4769 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4770 instruction_count(2); 4771 dst : S4(write); 4772 dst : S3(read); 4773 D0 : S0(2); // big decoder only; twice 4774 ALU : S3(2); // any 2 alus 4775 %} 4776 4777 // Integer ALU reg-reg operation 4778 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4779 single_instruction; 4780 dst : S4(write); 4781 src : S3(read); 4782 DECODE : S0; // any decoder 4783 ALU : S3; // any alu 4784 %} 4785 4786 // Long ALU reg-reg operation 4787 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4788 instruction_count(2); 4789 dst : S4(write); 4790 src : S3(read); 4791 DECODE : S0(2); // any 2 decoders 4792 ALU : S3(2); // both alus 4793 %} 4794 4795 // Integer ALU reg-reg operation 4796 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4797 single_instruction; 4798 dst : S4(write); 4799 src : S3(read); 4800 D0 : S0; // big decoder only 4801 ALU : S3; // any alu 4802 %} 4803 4804 // Long ALU reg-reg operation 4805 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4806 instruction_count(2); 4807 dst : S4(write); 4808 src : S3(read); 4809 D0 : S0(2); // big decoder only; twice 4810 ALU : S3(2); // both alus 4811 %} 4812 4813 // Integer ALU reg-mem operation 4814 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4815 single_instruction; 4816 dst : S5(write); 4817 mem : S3(read); 4818 D0 : S0; // big decoder only 4819 ALU : S4; // any alu 4820 MEM : S3; // any mem 4821 %} 4822 4823 // Long ALU reg-mem operation 4824 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4825 instruction_count(2); 4826 dst : S5(write); 4827 mem : S3(read); 4828 D0 : S0(2); // big decoder only; twice 4829 ALU : S4(2); // any 2 alus 4830 MEM : S3(2); // both mems 4831 %} 4832 4833 // Integer mem operation (prefetch) 4834 pipe_class ialu_mem(memory mem) 4835 %{ 4836 single_instruction; 4837 mem : S3(read); 4838 D0 : S0; // big decoder only 4839 MEM : S3; // any mem 4840 %} 4841 4842 // Integer Store to Memory 4843 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4844 single_instruction; 4845 mem : S3(read); 4846 src : S5(read); 4847 D0 : S0; // big decoder only 4848 ALU : S4; // any alu 4849 MEM : S3; 4850 %} 4851 4852 // Long Store to Memory 4853 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4854 instruction_count(2); 4855 mem : S3(read); 4856 src : S5(read); 4857 D0 : S0(2); // big decoder only; twice 4858 ALU : S4(2); // any 2 alus 4859 MEM : S3(2); // Both mems 4860 %} 4861 4862 // Integer Store to Memory 4863 pipe_class ialu_mem_imm(memory mem) %{ 4864 single_instruction; 4865 mem : S3(read); 4866 D0 : S0; // big decoder only 4867 ALU : S4; // any alu 4868 MEM : S3; 4869 %} 4870 4871 // Integer ALU0 reg-reg operation 4872 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4873 single_instruction; 4874 dst : S4(write); 4875 src : S3(read); 4876 D0 : S0; // Big decoder only 4877 ALU0 : S3; // only alu0 4878 %} 4879 4880 // Integer ALU0 reg-mem operation 4881 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4882 single_instruction; 4883 dst : S5(write); 4884 mem : S3(read); 4885 D0 : S0; // big decoder only 4886 ALU0 : S4; // ALU0 only 4887 MEM : S3; // any mem 4888 %} 4889 4890 // Integer ALU reg-reg operation 4891 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4892 single_instruction; 4893 cr : S4(write); 4894 src1 : S3(read); 4895 src2 : S3(read); 4896 DECODE : S0; // any decoder 4897 ALU : S3; // any alu 4898 %} 4899 4900 // Integer ALU reg-imm operation 4901 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4902 single_instruction; 4903 cr : S4(write); 4904 src1 : S3(read); 4905 DECODE : S0; // any decoder 4906 ALU : S3; // any alu 4907 %} 4908 4909 // Integer ALU reg-mem operation 4910 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4911 single_instruction; 4912 cr : S4(write); 4913 src1 : S3(read); 4914 src2 : S3(read); 4915 D0 : S0; // big decoder only 4916 ALU : S4; // any alu 4917 MEM : S3; 4918 %} 4919 4920 // Conditional move reg-reg 4921 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4922 instruction_count(4); 4923 y : S4(read); 4924 q : S3(read); 4925 p : S3(read); 4926 DECODE : S0(4); // any decoder 4927 %} 4928 4929 // Conditional move reg-reg 4930 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4931 single_instruction; 4932 dst : S4(write); 4933 src : S3(read); 4934 cr : S3(read); 4935 DECODE : S0; // any decoder 4936 %} 4937 4938 // Conditional move reg-mem 4939 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4940 single_instruction; 4941 dst : S4(write); 4942 src : S3(read); 4943 cr : S3(read); 4944 DECODE : S0; // any decoder 4945 MEM : S3; 4946 %} 4947 4948 // Conditional move reg-reg long 4949 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4950 single_instruction; 4951 dst : S4(write); 4952 src : S3(read); 4953 cr : S3(read); 4954 DECODE : S0(2); // any 2 decoders 4955 %} 4956 4957 // Conditional move double reg-reg 4958 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4959 single_instruction; 4960 dst : S4(write); 4961 src : S3(read); 4962 cr : S3(read); 4963 DECODE : S0; // any decoder 4964 %} 4965 4966 // Float reg-reg operation 4967 pipe_class fpu_reg(regDPR dst) %{ 4968 instruction_count(2); 4969 dst : S3(read); 4970 DECODE : S0(2); // any 2 decoders 4971 FPU : S3; 4972 %} 4973 4974 // Float reg-reg operation 4975 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4976 instruction_count(2); 4977 dst : S4(write); 4978 src : S3(read); 4979 DECODE : S0(2); // any 2 decoders 4980 FPU : S3; 4981 %} 4982 4983 // Float reg-reg operation 4984 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4985 instruction_count(3); 4986 dst : S4(write); 4987 src1 : S3(read); 4988 src2 : S3(read); 4989 DECODE : S0(3); // any 3 decoders 4990 FPU : S3(2); 4991 %} 4992 4993 // Float reg-reg operation 4994 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4995 instruction_count(4); 4996 dst : S4(write); 4997 src1 : S3(read); 4998 src2 : S3(read); 4999 src3 : S3(read); 5000 DECODE : S0(4); // any 3 decoders 5001 FPU : S3(2); 5002 %} 5003 5004 // Float reg-reg operation 5005 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 5006 instruction_count(4); 5007 dst : S4(write); 5008 src1 : S3(read); 5009 src2 : S3(read); 5010 src3 : S3(read); 5011 DECODE : S1(3); // any 3 decoders 5012 D0 : S0; // Big decoder only 5013 FPU : S3(2); 5014 MEM : S3; 5015 %} 5016 5017 // Float reg-mem operation 5018 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 5019 instruction_count(2); 5020 dst : S5(write); 5021 mem : S3(read); 5022 D0 : S0; // big decoder only 5023 DECODE : S1; // any decoder for FPU POP 5024 FPU : S4; 5025 MEM : S3; // any mem 5026 %} 5027 5028 // Float reg-mem operation 5029 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 5030 instruction_count(3); 5031 dst : S5(write); 5032 src1 : S3(read); 5033 mem : S3(read); 5034 D0 : S0; // big decoder only 5035 DECODE : S1(2); // any decoder for FPU POP 5036 FPU : S4; 5037 MEM : S3; // any mem 5038 %} 5039 5040 // Float mem-reg operation 5041 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5042 instruction_count(2); 5043 src : S5(read); 5044 mem : S3(read); 5045 DECODE : S0; // any decoder for FPU PUSH 5046 D0 : S1; // big decoder only 5047 FPU : S4; 5048 MEM : S3; // any mem 5049 %} 5050 5051 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5052 instruction_count(3); 5053 src1 : S3(read); 5054 src2 : S3(read); 5055 mem : S3(read); 5056 DECODE : S0(2); // any decoder for FPU PUSH 5057 D0 : S1; // big decoder only 5058 FPU : S4; 5059 MEM : S3; // any mem 5060 %} 5061 5062 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5063 instruction_count(3); 5064 src1 : S3(read); 5065 src2 : S3(read); 5066 mem : S4(read); 5067 DECODE : S0; // any decoder for FPU PUSH 5068 D0 : S0(2); // big decoder only 5069 FPU : S4; 5070 MEM : S3(2); // any mem 5071 %} 5072 5073 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5074 instruction_count(2); 5075 src1 : S3(read); 5076 dst : S4(read); 5077 D0 : S0(2); // big decoder only 5078 MEM : S3(2); // any mem 5079 %} 5080 5081 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5082 instruction_count(3); 5083 src1 : S3(read); 5084 src2 : S3(read); 5085 dst : S4(read); 5086 D0 : S0(3); // big decoder only 5087 FPU : S4; 5088 MEM : S3(3); // any mem 5089 %} 5090 5091 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5092 instruction_count(3); 5093 src1 : S4(read); 5094 mem : S4(read); 5095 DECODE : S0; // any decoder for FPU PUSH 5096 D0 : S0(2); // big decoder only 5097 FPU : S4; 5098 MEM : S3(2); // any mem 5099 %} 5100 5101 // Float load constant 5102 pipe_class fpu_reg_con(regDPR dst) %{ 5103 instruction_count(2); 5104 dst : S5(write); 5105 D0 : S0; // big decoder only for the load 5106 DECODE : S1; // any decoder for FPU POP 5107 FPU : S4; 5108 MEM : S3; // any mem 5109 %} 5110 5111 // Float load constant 5112 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5113 instruction_count(3); 5114 dst : S5(write); 5115 src : S3(read); 5116 D0 : S0; // big decoder only for the load 5117 DECODE : S1(2); // any decoder for FPU POP 5118 FPU : S4; 5119 MEM : S3; // any mem 5120 %} 5121 5122 // UnConditional branch 5123 pipe_class pipe_jmp( label labl ) %{ 5124 single_instruction; 5125 BR : S3; 5126 %} 5127 5128 // Conditional branch 5129 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5130 single_instruction; 5131 cr : S1(read); 5132 BR : S3; 5133 %} 5134 5135 // Allocation idiom 5136 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5137 instruction_count(1); force_serialization; 5138 fixed_latency(6); 5139 heap_ptr : S3(read); 5140 DECODE : S0(3); 5141 D0 : S2; 5142 MEM : S3; 5143 ALU : S3(2); 5144 dst : S5(write); 5145 BR : S5; 5146 %} 5147 5148 // Generic big/slow expanded idiom 5149 pipe_class pipe_slow( ) %{ 5150 instruction_count(10); multiple_bundles; force_serialization; 5151 fixed_latency(100); 5152 D0 : S0(2); 5153 MEM : S3(2); 5154 %} 5155 5156 // The real do-nothing guy 5157 pipe_class empty( ) %{ 5158 instruction_count(0); 5159 %} 5160 5161 // Define the class for the Nop node 5162 define %{ 5163 MachNop = empty; 5164 %} 5165 5166 %} 5167 5168 //----------INSTRUCTIONS------------------------------------------------------- 5169 // 5170 // match -- States which machine-independent subtree may be replaced 5171 // by this instruction. 5172 // ins_cost -- The estimated cost of this instruction is used by instruction 5173 // selection to identify a minimum cost tree of machine 5174 // instructions that matches a tree of machine-independent 5175 // instructions. 5176 // format -- A string providing the disassembly for this instruction. 5177 // The value of an instruction's operand may be inserted 5178 // by referring to it with a '$' prefix. 5179 // opcode -- Three instruction opcodes may be provided. These are referred 5180 // to within an encode class as $primary, $secondary, and $tertiary 5181 // respectively. The primary opcode is commonly used to 5182 // indicate the type of machine instruction, while secondary 5183 // and tertiary are often used for prefix options or addressing 5184 // modes. 5185 // ins_encode -- A list of encode classes with parameters. The encode class 5186 // name must have been defined in an 'enc_class' specification 5187 // in the encode section of the architecture description. 5188 5189 //----------BSWAP-Instruction-------------------------------------------------- 5190 instruct bytes_reverse_int(rRegI dst) %{ 5191 match(Set dst (ReverseBytesI dst)); 5192 5193 format %{ "BSWAP $dst" %} 5194 opcode(0x0F, 0xC8); 5195 ins_encode( OpcP, OpcSReg(dst) ); 5196 ins_pipe( ialu_reg ); 5197 %} 5198 5199 instruct bytes_reverse_long(eRegL dst) %{ 5200 match(Set dst (ReverseBytesL dst)); 5201 5202 format %{ "BSWAP $dst.lo\n\t" 5203 "BSWAP $dst.hi\n\t" 5204 "XCHG $dst.lo $dst.hi" %} 5205 5206 ins_cost(125); 5207 ins_encode( bswap_long_bytes(dst) ); 5208 ins_pipe( ialu_reg_reg); 5209 %} 5210 5211 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5212 match(Set dst (ReverseBytesUS dst)); 5213 effect(KILL cr); 5214 5215 format %{ "BSWAP $dst\n\t" 5216 "SHR $dst,16\n\t" %} 5217 ins_encode %{ 5218 __ bswapl($dst$$Register); 5219 __ shrl($dst$$Register, 16); 5220 %} 5221 ins_pipe( ialu_reg ); 5222 %} 5223 5224 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5225 match(Set dst (ReverseBytesS dst)); 5226 effect(KILL cr); 5227 5228 format %{ "BSWAP $dst\n\t" 5229 "SAR $dst,16\n\t" %} 5230 ins_encode %{ 5231 __ bswapl($dst$$Register); 5232 __ sarl($dst$$Register, 16); 5233 %} 5234 ins_pipe( ialu_reg ); 5235 %} 5236 5237 5238 //---------- Zeros Count Instructions ------------------------------------------ 5239 5240 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5241 predicate(UseCountLeadingZerosInstruction); 5242 match(Set dst (CountLeadingZerosI src)); 5243 effect(KILL cr); 5244 5245 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5246 ins_encode %{ 5247 __ lzcntl($dst$$Register, $src$$Register); 5248 %} 5249 ins_pipe(ialu_reg); 5250 %} 5251 5252 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5253 predicate(!UseCountLeadingZerosInstruction); 5254 match(Set dst (CountLeadingZerosI src)); 5255 effect(KILL cr); 5256 5257 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5258 "JNZ skip\n\t" 5259 "MOV $dst, -1\n" 5260 "skip:\n\t" 5261 "NEG $dst\n\t" 5262 "ADD $dst, 31" %} 5263 ins_encode %{ 5264 Register Rdst = $dst$$Register; 5265 Register Rsrc = $src$$Register; 5266 Label skip; 5267 __ bsrl(Rdst, Rsrc); 5268 __ jccb(Assembler::notZero, skip); 5269 __ movl(Rdst, -1); 5270 __ bind(skip); 5271 __ negl(Rdst); 5272 __ addl(Rdst, BitsPerInt - 1); 5273 %} 5274 ins_pipe(ialu_reg); 5275 %} 5276 5277 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5278 predicate(UseCountLeadingZerosInstruction); 5279 match(Set dst (CountLeadingZerosL src)); 5280 effect(TEMP dst, KILL cr); 5281 5282 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5283 "JNC done\n\t" 5284 "LZCNT $dst, $src.lo\n\t" 5285 "ADD $dst, 32\n" 5286 "done:" %} 5287 ins_encode %{ 5288 Register Rdst = $dst$$Register; 5289 Register Rsrc = $src$$Register; 5290 Label done; 5291 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5292 __ jccb(Assembler::carryClear, done); 5293 __ lzcntl(Rdst, Rsrc); 5294 __ addl(Rdst, BitsPerInt); 5295 __ bind(done); 5296 %} 5297 ins_pipe(ialu_reg); 5298 %} 5299 5300 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5301 predicate(!UseCountLeadingZerosInstruction); 5302 match(Set dst (CountLeadingZerosL src)); 5303 effect(TEMP dst, KILL cr); 5304 5305 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5306 "JZ msw_is_zero\n\t" 5307 "ADD $dst, 32\n\t" 5308 "JMP not_zero\n" 5309 "msw_is_zero:\n\t" 5310 "BSR $dst, $src.lo\n\t" 5311 "JNZ not_zero\n\t" 5312 "MOV $dst, -1\n" 5313 "not_zero:\n\t" 5314 "NEG $dst\n\t" 5315 "ADD $dst, 63\n" %} 5316 ins_encode %{ 5317 Register Rdst = $dst$$Register; 5318 Register Rsrc = $src$$Register; 5319 Label msw_is_zero; 5320 Label not_zero; 5321 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5322 __ jccb(Assembler::zero, msw_is_zero); 5323 __ addl(Rdst, BitsPerInt); 5324 __ jmpb(not_zero); 5325 __ bind(msw_is_zero); 5326 __ bsrl(Rdst, Rsrc); 5327 __ jccb(Assembler::notZero, not_zero); 5328 __ movl(Rdst, -1); 5329 __ bind(not_zero); 5330 __ negl(Rdst); 5331 __ addl(Rdst, BitsPerLong - 1); 5332 %} 5333 ins_pipe(ialu_reg); 5334 %} 5335 5336 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5337 predicate(UseCountTrailingZerosInstruction); 5338 match(Set dst (CountTrailingZerosI src)); 5339 effect(KILL cr); 5340 5341 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5342 ins_encode %{ 5343 __ tzcntl($dst$$Register, $src$$Register); 5344 %} 5345 ins_pipe(ialu_reg); 5346 %} 5347 5348 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5349 predicate(!UseCountTrailingZerosInstruction); 5350 match(Set dst (CountTrailingZerosI src)); 5351 effect(KILL cr); 5352 5353 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5354 "JNZ done\n\t" 5355 "MOV $dst, 32\n" 5356 "done:" %} 5357 ins_encode %{ 5358 Register Rdst = $dst$$Register; 5359 Label done; 5360 __ bsfl(Rdst, $src$$Register); 5361 __ jccb(Assembler::notZero, done); 5362 __ movl(Rdst, BitsPerInt); 5363 __ bind(done); 5364 %} 5365 ins_pipe(ialu_reg); 5366 %} 5367 5368 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5369 predicate(UseCountTrailingZerosInstruction); 5370 match(Set dst (CountTrailingZerosL src)); 5371 effect(TEMP dst, KILL cr); 5372 5373 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5374 "JNC done\n\t" 5375 "TZCNT $dst, $src.hi\n\t" 5376 "ADD $dst, 32\n" 5377 "done:" %} 5378 ins_encode %{ 5379 Register Rdst = $dst$$Register; 5380 Register Rsrc = $src$$Register; 5381 Label done; 5382 __ tzcntl(Rdst, Rsrc); 5383 __ jccb(Assembler::carryClear, done); 5384 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5385 __ addl(Rdst, BitsPerInt); 5386 __ bind(done); 5387 %} 5388 ins_pipe(ialu_reg); 5389 %} 5390 5391 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5392 predicate(!UseCountTrailingZerosInstruction); 5393 match(Set dst (CountTrailingZerosL src)); 5394 effect(TEMP dst, KILL cr); 5395 5396 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5397 "JNZ done\n\t" 5398 "BSF $dst, $src.hi\n\t" 5399 "JNZ msw_not_zero\n\t" 5400 "MOV $dst, 32\n" 5401 "msw_not_zero:\n\t" 5402 "ADD $dst, 32\n" 5403 "done:" %} 5404 ins_encode %{ 5405 Register Rdst = $dst$$Register; 5406 Register Rsrc = $src$$Register; 5407 Label msw_not_zero; 5408 Label done; 5409 __ bsfl(Rdst, Rsrc); 5410 __ jccb(Assembler::notZero, done); 5411 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5412 __ jccb(Assembler::notZero, msw_not_zero); 5413 __ movl(Rdst, BitsPerInt); 5414 __ bind(msw_not_zero); 5415 __ addl(Rdst, BitsPerInt); 5416 __ bind(done); 5417 %} 5418 ins_pipe(ialu_reg); 5419 %} 5420 5421 5422 //---------- Population Count Instructions ------------------------------------- 5423 5424 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5425 predicate(UsePopCountInstruction); 5426 match(Set dst (PopCountI src)); 5427 effect(KILL cr); 5428 5429 format %{ "POPCNT $dst, $src" %} 5430 ins_encode %{ 5431 __ popcntl($dst$$Register, $src$$Register); 5432 %} 5433 ins_pipe(ialu_reg); 5434 %} 5435 5436 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5437 predicate(UsePopCountInstruction); 5438 match(Set dst (PopCountI (LoadI mem))); 5439 effect(KILL cr); 5440 5441 format %{ "POPCNT $dst, $mem" %} 5442 ins_encode %{ 5443 __ popcntl($dst$$Register, $mem$$Address); 5444 %} 5445 ins_pipe(ialu_reg); 5446 %} 5447 5448 // Note: Long.bitCount(long) returns an int. 5449 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5450 predicate(UsePopCountInstruction); 5451 match(Set dst (PopCountL src)); 5452 effect(KILL cr, TEMP tmp, TEMP dst); 5453 5454 format %{ "POPCNT $dst, $src.lo\n\t" 5455 "POPCNT $tmp, $src.hi\n\t" 5456 "ADD $dst, $tmp" %} 5457 ins_encode %{ 5458 __ popcntl($dst$$Register, $src$$Register); 5459 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5460 __ addl($dst$$Register, $tmp$$Register); 5461 %} 5462 ins_pipe(ialu_reg); 5463 %} 5464 5465 // Note: Long.bitCount(long) returns an int. 5466 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5467 predicate(UsePopCountInstruction); 5468 match(Set dst (PopCountL (LoadL mem))); 5469 effect(KILL cr, TEMP tmp, TEMP dst); 5470 5471 format %{ "POPCNT $dst, $mem\n\t" 5472 "POPCNT $tmp, $mem+4\n\t" 5473 "ADD $dst, $tmp" %} 5474 ins_encode %{ 5475 //__ popcntl($dst$$Register, $mem$$Address$$first); 5476 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5477 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5478 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5479 __ addl($dst$$Register, $tmp$$Register); 5480 %} 5481 ins_pipe(ialu_reg); 5482 %} 5483 5484 5485 //----------Load/Store/Move Instructions--------------------------------------- 5486 //----------Load Instructions-------------------------------------------------- 5487 // Load Byte (8bit signed) 5488 instruct loadB(xRegI dst, memory mem) %{ 5489 match(Set dst (LoadB mem)); 5490 5491 ins_cost(125); 5492 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5493 5494 ins_encode %{ 5495 __ movsbl($dst$$Register, $mem$$Address); 5496 %} 5497 5498 ins_pipe(ialu_reg_mem); 5499 %} 5500 5501 // Load Byte (8bit signed) into Long Register 5502 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5503 match(Set dst (ConvI2L (LoadB mem))); 5504 effect(KILL cr); 5505 5506 ins_cost(375); 5507 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5508 "MOV $dst.hi,$dst.lo\n\t" 5509 "SAR $dst.hi,7" %} 5510 5511 ins_encode %{ 5512 __ movsbl($dst$$Register, $mem$$Address); 5513 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5514 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5515 %} 5516 5517 ins_pipe(ialu_reg_mem); 5518 %} 5519 5520 // Load Unsigned Byte (8bit UNsigned) 5521 instruct loadUB(xRegI dst, memory mem) %{ 5522 match(Set dst (LoadUB mem)); 5523 5524 ins_cost(125); 5525 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5526 5527 ins_encode %{ 5528 __ movzbl($dst$$Register, $mem$$Address); 5529 %} 5530 5531 ins_pipe(ialu_reg_mem); 5532 %} 5533 5534 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5535 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5536 match(Set dst (ConvI2L (LoadUB mem))); 5537 effect(KILL cr); 5538 5539 ins_cost(250); 5540 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5541 "XOR $dst.hi,$dst.hi" %} 5542 5543 ins_encode %{ 5544 Register Rdst = $dst$$Register; 5545 __ movzbl(Rdst, $mem$$Address); 5546 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5547 %} 5548 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5553 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5555 effect(KILL cr); 5556 5557 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5558 "XOR $dst.hi,$dst.hi\n\t" 5559 "AND $dst.lo,right_n_bits($mask, 8)" %} 5560 ins_encode %{ 5561 Register Rdst = $dst$$Register; 5562 __ movzbl(Rdst, $mem$$Address); 5563 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5564 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5565 %} 5566 ins_pipe(ialu_reg_mem); 5567 %} 5568 5569 // Load Short (16bit signed) 5570 instruct loadS(rRegI dst, memory mem) %{ 5571 match(Set dst (LoadS mem)); 5572 5573 ins_cost(125); 5574 format %{ "MOVSX $dst,$mem\t# short" %} 5575 5576 ins_encode %{ 5577 __ movswl($dst$$Register, $mem$$Address); 5578 %} 5579 5580 ins_pipe(ialu_reg_mem); 5581 %} 5582 5583 // Load Short (16 bit signed) to Byte (8 bit signed) 5584 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5585 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5586 5587 ins_cost(125); 5588 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5589 ins_encode %{ 5590 __ movsbl($dst$$Register, $mem$$Address); 5591 %} 5592 ins_pipe(ialu_reg_mem); 5593 %} 5594 5595 // Load Short (16bit signed) into Long Register 5596 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5597 match(Set dst (ConvI2L (LoadS mem))); 5598 effect(KILL cr); 5599 5600 ins_cost(375); 5601 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5602 "MOV $dst.hi,$dst.lo\n\t" 5603 "SAR $dst.hi,15" %} 5604 5605 ins_encode %{ 5606 __ movswl($dst$$Register, $mem$$Address); 5607 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5608 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5609 %} 5610 5611 ins_pipe(ialu_reg_mem); 5612 %} 5613 5614 // Load Unsigned Short/Char (16bit unsigned) 5615 instruct loadUS(rRegI dst, memory mem) %{ 5616 match(Set dst (LoadUS mem)); 5617 5618 ins_cost(125); 5619 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5620 5621 ins_encode %{ 5622 __ movzwl($dst$$Register, $mem$$Address); 5623 %} 5624 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5629 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5630 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5631 5632 ins_cost(125); 5633 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5634 ins_encode %{ 5635 __ movsbl($dst$$Register, $mem$$Address); 5636 %} 5637 ins_pipe(ialu_reg_mem); 5638 %} 5639 5640 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5641 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5642 match(Set dst (ConvI2L (LoadUS mem))); 5643 effect(KILL cr); 5644 5645 ins_cost(250); 5646 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5647 "XOR $dst.hi,$dst.hi" %} 5648 5649 ins_encode %{ 5650 __ movzwl($dst$$Register, $mem$$Address); 5651 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5652 %} 5653 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5658 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5659 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5660 effect(KILL cr); 5661 5662 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5663 "XOR $dst.hi,$dst.hi" %} 5664 ins_encode %{ 5665 Register Rdst = $dst$$Register; 5666 __ movzbl(Rdst, $mem$$Address); 5667 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5668 %} 5669 ins_pipe(ialu_reg_mem); 5670 %} 5671 5672 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5673 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5674 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5675 effect(KILL cr); 5676 5677 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5678 "XOR $dst.hi,$dst.hi\n\t" 5679 "AND $dst.lo,right_n_bits($mask, 16)" %} 5680 ins_encode %{ 5681 Register Rdst = $dst$$Register; 5682 __ movzwl(Rdst, $mem$$Address); 5683 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5684 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5685 %} 5686 ins_pipe(ialu_reg_mem); 5687 %} 5688 5689 // Load Integer 5690 instruct loadI(rRegI dst, memory mem) %{ 5691 match(Set dst (LoadI mem)); 5692 5693 ins_cost(125); 5694 format %{ "MOV $dst,$mem\t# int" %} 5695 5696 ins_encode %{ 5697 __ movl($dst$$Register, $mem$$Address); 5698 %} 5699 5700 ins_pipe(ialu_reg_mem); 5701 %} 5702 5703 // Load Integer (32 bit signed) to Byte (8 bit signed) 5704 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5705 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5706 5707 ins_cost(125); 5708 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5709 ins_encode %{ 5710 __ movsbl($dst$$Register, $mem$$Address); 5711 %} 5712 ins_pipe(ialu_reg_mem); 5713 %} 5714 5715 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5716 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5717 match(Set dst (AndI (LoadI mem) mask)); 5718 5719 ins_cost(125); 5720 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5721 ins_encode %{ 5722 __ movzbl($dst$$Register, $mem$$Address); 5723 %} 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Integer (32 bit signed) to Short (16 bit signed) 5728 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5729 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5730 5731 ins_cost(125); 5732 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5733 ins_encode %{ 5734 __ movswl($dst$$Register, $mem$$Address); 5735 %} 5736 ins_pipe(ialu_reg_mem); 5737 %} 5738 5739 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5740 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5741 match(Set dst (AndI (LoadI mem) mask)); 5742 5743 ins_cost(125); 5744 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5745 ins_encode %{ 5746 __ movzwl($dst$$Register, $mem$$Address); 5747 %} 5748 ins_pipe(ialu_reg_mem); 5749 %} 5750 5751 // Load Integer into Long Register 5752 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5753 match(Set dst (ConvI2L (LoadI mem))); 5754 effect(KILL cr); 5755 5756 ins_cost(375); 5757 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5758 "MOV $dst.hi,$dst.lo\n\t" 5759 "SAR $dst.hi,31" %} 5760 5761 ins_encode %{ 5762 __ movl($dst$$Register, $mem$$Address); 5763 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5764 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5765 %} 5766 5767 ins_pipe(ialu_reg_mem); 5768 %} 5769 5770 // Load Integer with mask 0xFF into Long Register 5771 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5772 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5773 effect(KILL cr); 5774 5775 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5776 "XOR $dst.hi,$dst.hi" %} 5777 ins_encode %{ 5778 Register Rdst = $dst$$Register; 5779 __ movzbl(Rdst, $mem$$Address); 5780 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5781 %} 5782 ins_pipe(ialu_reg_mem); 5783 %} 5784 5785 // Load Integer with mask 0xFFFF into Long Register 5786 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5787 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5788 effect(KILL cr); 5789 5790 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5791 "XOR $dst.hi,$dst.hi" %} 5792 ins_encode %{ 5793 Register Rdst = $dst$$Register; 5794 __ movzwl(Rdst, $mem$$Address); 5795 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5796 %} 5797 ins_pipe(ialu_reg_mem); 5798 %} 5799 5800 // Load Integer with 31-bit mask into Long Register 5801 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5802 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5803 effect(KILL cr); 5804 5805 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5806 "XOR $dst.hi,$dst.hi\n\t" 5807 "AND $dst.lo,$mask" %} 5808 ins_encode %{ 5809 Register Rdst = $dst$$Register; 5810 __ movl(Rdst, $mem$$Address); 5811 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5812 __ andl(Rdst, $mask$$constant); 5813 %} 5814 ins_pipe(ialu_reg_mem); 5815 %} 5816 5817 // Load Unsigned Integer into Long Register 5818 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5819 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5820 effect(KILL cr); 5821 5822 ins_cost(250); 5823 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5824 "XOR $dst.hi,$dst.hi" %} 5825 5826 ins_encode %{ 5827 __ movl($dst$$Register, $mem$$Address); 5828 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5829 %} 5830 5831 ins_pipe(ialu_reg_mem); 5832 %} 5833 5834 // Load Long. Cannot clobber address while loading, so restrict address 5835 // register to ESI 5836 instruct loadL(eRegL dst, load_long_memory mem) %{ 5837 predicate(!((LoadLNode*)n)->require_atomic_access()); 5838 match(Set dst (LoadL mem)); 5839 5840 ins_cost(250); 5841 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5842 "MOV $dst.hi,$mem+4" %} 5843 5844 ins_encode %{ 5845 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5846 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5847 __ movl($dst$$Register, Amemlo); 5848 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5849 %} 5850 5851 ins_pipe(ialu_reg_long_mem); 5852 %} 5853 5854 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5855 // then store it down to the stack and reload on the int 5856 // side. 5857 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5858 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5859 match(Set dst (LoadL mem)); 5860 5861 ins_cost(200); 5862 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5863 "FISTp $dst" %} 5864 ins_encode(enc_loadL_volatile(mem,dst)); 5865 ins_pipe( fpu_reg_mem ); 5866 %} 5867 5868 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5869 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5870 match(Set dst (LoadL mem)); 5871 effect(TEMP tmp); 5872 ins_cost(180); 5873 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5874 "MOVSD $dst,$tmp" %} 5875 ins_encode %{ 5876 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5877 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5878 %} 5879 ins_pipe( pipe_slow ); 5880 %} 5881 5882 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5883 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5884 match(Set dst (LoadL mem)); 5885 effect(TEMP tmp); 5886 ins_cost(160); 5887 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5888 "MOVD $dst.lo,$tmp\n\t" 5889 "PSRLQ $tmp,32\n\t" 5890 "MOVD $dst.hi,$tmp" %} 5891 ins_encode %{ 5892 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5893 __ movdl($dst$$Register, $tmp$$XMMRegister); 5894 __ psrlq($tmp$$XMMRegister, 32); 5895 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5896 %} 5897 ins_pipe( pipe_slow ); 5898 %} 5899 5900 // Load Range 5901 instruct loadRange(rRegI dst, memory mem) %{ 5902 match(Set dst (LoadRange mem)); 5903 5904 ins_cost(125); 5905 format %{ "MOV $dst,$mem" %} 5906 opcode(0x8B); 5907 ins_encode( OpcP, RegMem(dst,mem)); 5908 ins_pipe( ialu_reg_mem ); 5909 %} 5910 5911 5912 // Load Pointer 5913 instruct loadP(eRegP dst, memory mem) %{ 5914 match(Set dst (LoadP mem)); 5915 5916 ins_cost(125); 5917 format %{ "MOV $dst,$mem" %} 5918 opcode(0x8B); 5919 ins_encode( OpcP, RegMem(dst,mem)); 5920 ins_pipe( ialu_reg_mem ); 5921 %} 5922 5923 // Load Klass Pointer 5924 instruct loadKlass(eRegP dst, memory mem) %{ 5925 match(Set dst (LoadKlass mem)); 5926 5927 ins_cost(125); 5928 format %{ "MOV $dst,$mem" %} 5929 opcode(0x8B); 5930 ins_encode( OpcP, RegMem(dst,mem)); 5931 ins_pipe( ialu_reg_mem ); 5932 %} 5933 5934 // Load Double 5935 instruct loadDPR(regDPR dst, memory mem) %{ 5936 predicate(UseSSE<=1); 5937 match(Set dst (LoadD mem)); 5938 5939 ins_cost(150); 5940 format %{ "FLD_D ST,$mem\n\t" 5941 "FSTP $dst" %} 5942 opcode(0xDD); /* DD /0 */ 5943 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5944 Pop_Reg_DPR(dst) ); 5945 ins_pipe( fpu_reg_mem ); 5946 %} 5947 5948 // Load Double to XMM 5949 instruct loadD(regD dst, memory mem) %{ 5950 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5951 match(Set dst (LoadD mem)); 5952 ins_cost(145); 5953 format %{ "MOVSD $dst,$mem" %} 5954 ins_encode %{ 5955 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 instruct loadD_partial(regD dst, memory mem) %{ 5961 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5962 match(Set dst (LoadD mem)); 5963 ins_cost(145); 5964 format %{ "MOVLPD $dst,$mem" %} 5965 ins_encode %{ 5966 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 // Load to XMM register (single-precision floating point) 5972 // MOVSS instruction 5973 instruct loadF(regF dst, memory mem) %{ 5974 predicate(UseSSE>=1); 5975 match(Set dst (LoadF mem)); 5976 ins_cost(145); 5977 format %{ "MOVSS $dst,$mem" %} 5978 ins_encode %{ 5979 __ movflt ($dst$$XMMRegister, $mem$$Address); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 // Load Float 5985 instruct loadFPR(regFPR dst, memory mem) %{ 5986 predicate(UseSSE==0); 5987 match(Set dst (LoadF mem)); 5988 5989 ins_cost(150); 5990 format %{ "FLD_S ST,$mem\n\t" 5991 "FSTP $dst" %} 5992 opcode(0xD9); /* D9 /0 */ 5993 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5994 Pop_Reg_FPR(dst) ); 5995 ins_pipe( fpu_reg_mem ); 5996 %} 5997 5998 // Load Effective Address 5999 instruct leaP8(eRegP dst, indOffset8 mem) %{ 6000 match(Set dst mem); 6001 6002 ins_cost(110); 6003 format %{ "LEA $dst,$mem" %} 6004 opcode(0x8D); 6005 ins_encode( OpcP, RegMem(dst,mem)); 6006 ins_pipe( ialu_reg_reg_fat ); 6007 %} 6008 6009 instruct leaP32(eRegP dst, indOffset32 mem) %{ 6010 match(Set dst mem); 6011 6012 ins_cost(110); 6013 format %{ "LEA $dst,$mem" %} 6014 opcode(0x8D); 6015 ins_encode( OpcP, RegMem(dst,mem)); 6016 ins_pipe( ialu_reg_reg_fat ); 6017 %} 6018 6019 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 6020 match(Set dst mem); 6021 6022 ins_cost(110); 6023 format %{ "LEA $dst,$mem" %} 6024 opcode(0x8D); 6025 ins_encode( OpcP, RegMem(dst,mem)); 6026 ins_pipe( ialu_reg_reg_fat ); 6027 %} 6028 6029 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 6030 match(Set dst mem); 6031 6032 ins_cost(110); 6033 format %{ "LEA $dst,$mem" %} 6034 opcode(0x8D); 6035 ins_encode( OpcP, RegMem(dst,mem)); 6036 ins_pipe( ialu_reg_reg_fat ); 6037 %} 6038 6039 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6040 match(Set dst mem); 6041 6042 ins_cost(110); 6043 format %{ "LEA $dst,$mem" %} 6044 opcode(0x8D); 6045 ins_encode( OpcP, RegMem(dst,mem)); 6046 ins_pipe( ialu_reg_reg_fat ); 6047 %} 6048 6049 // Load Constant 6050 instruct loadConI(rRegI dst, immI src) %{ 6051 match(Set dst src); 6052 6053 format %{ "MOV $dst,$src" %} 6054 ins_encode( LdImmI(dst, src) ); 6055 ins_pipe( ialu_reg_fat ); 6056 %} 6057 6058 // Load Constant zero 6059 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6060 match(Set dst src); 6061 effect(KILL cr); 6062 6063 ins_cost(50); 6064 format %{ "XOR $dst,$dst" %} 6065 opcode(0x33); /* + rd */ 6066 ins_encode( OpcP, RegReg( dst, dst ) ); 6067 ins_pipe( ialu_reg ); 6068 %} 6069 6070 instruct loadConP(eRegP dst, immP src) %{ 6071 match(Set dst src); 6072 6073 format %{ "MOV $dst,$src" %} 6074 opcode(0xB8); /* + rd */ 6075 ins_encode( LdImmP(dst, src) ); 6076 ins_pipe( ialu_reg_fat ); 6077 %} 6078 6079 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6080 match(Set dst src); 6081 effect(KILL cr); 6082 ins_cost(200); 6083 format %{ "MOV $dst.lo,$src.lo\n\t" 6084 "MOV $dst.hi,$src.hi" %} 6085 opcode(0xB8); 6086 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6087 ins_pipe( ialu_reg_long_fat ); 6088 %} 6089 6090 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6091 match(Set dst src); 6092 effect(KILL cr); 6093 ins_cost(150); 6094 format %{ "XOR $dst.lo,$dst.lo\n\t" 6095 "XOR $dst.hi,$dst.hi" %} 6096 opcode(0x33,0x33); 6097 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6098 ins_pipe( ialu_reg_long ); 6099 %} 6100 6101 // The instruction usage is guarded by predicate in operand immFPR(). 6102 instruct loadConFPR(regFPR dst, immFPR con) %{ 6103 match(Set dst con); 6104 ins_cost(125); 6105 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6106 "FSTP $dst" %} 6107 ins_encode %{ 6108 __ fld_s($constantaddress($con)); 6109 __ fstp_d($dst$$reg); 6110 %} 6111 ins_pipe(fpu_reg_con); 6112 %} 6113 6114 // The instruction usage is guarded by predicate in operand immFPR0(). 6115 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6116 match(Set dst con); 6117 ins_cost(125); 6118 format %{ "FLDZ ST\n\t" 6119 "FSTP $dst" %} 6120 ins_encode %{ 6121 __ fldz(); 6122 __ fstp_d($dst$$reg); 6123 %} 6124 ins_pipe(fpu_reg_con); 6125 %} 6126 6127 // The instruction usage is guarded by predicate in operand immFPR1(). 6128 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6129 match(Set dst con); 6130 ins_cost(125); 6131 format %{ "FLD1 ST\n\t" 6132 "FSTP $dst" %} 6133 ins_encode %{ 6134 __ fld1(); 6135 __ fstp_d($dst$$reg); 6136 %} 6137 ins_pipe(fpu_reg_con); 6138 %} 6139 6140 // The instruction usage is guarded by predicate in operand immF(). 6141 instruct loadConF(regF dst, immF con) %{ 6142 match(Set dst con); 6143 ins_cost(125); 6144 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6145 ins_encode %{ 6146 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6147 %} 6148 ins_pipe(pipe_slow); 6149 %} 6150 6151 // The instruction usage is guarded by predicate in operand immF0(). 6152 instruct loadConF0(regF dst, immF0 src) %{ 6153 match(Set dst src); 6154 ins_cost(100); 6155 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6156 ins_encode %{ 6157 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6158 %} 6159 ins_pipe(pipe_slow); 6160 %} 6161 6162 // The instruction usage is guarded by predicate in operand immDPR(). 6163 instruct loadConDPR(regDPR dst, immDPR con) %{ 6164 match(Set dst con); 6165 ins_cost(125); 6166 6167 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6168 "FSTP $dst" %} 6169 ins_encode %{ 6170 __ fld_d($constantaddress($con)); 6171 __ fstp_d($dst$$reg); 6172 %} 6173 ins_pipe(fpu_reg_con); 6174 %} 6175 6176 // The instruction usage is guarded by predicate in operand immDPR0(). 6177 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6178 match(Set dst con); 6179 ins_cost(125); 6180 6181 format %{ "FLDZ ST\n\t" 6182 "FSTP $dst" %} 6183 ins_encode %{ 6184 __ fldz(); 6185 __ fstp_d($dst$$reg); 6186 %} 6187 ins_pipe(fpu_reg_con); 6188 %} 6189 6190 // The instruction usage is guarded by predicate in operand immDPR1(). 6191 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6192 match(Set dst con); 6193 ins_cost(125); 6194 6195 format %{ "FLD1 ST\n\t" 6196 "FSTP $dst" %} 6197 ins_encode %{ 6198 __ fld1(); 6199 __ fstp_d($dst$$reg); 6200 %} 6201 ins_pipe(fpu_reg_con); 6202 %} 6203 6204 // The instruction usage is guarded by predicate in operand immD(). 6205 instruct loadConD(regD dst, immD con) %{ 6206 match(Set dst con); 6207 ins_cost(125); 6208 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6209 ins_encode %{ 6210 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6211 %} 6212 ins_pipe(pipe_slow); 6213 %} 6214 6215 // The instruction usage is guarded by predicate in operand immD0(). 6216 instruct loadConD0(regD dst, immD0 src) %{ 6217 match(Set dst src); 6218 ins_cost(100); 6219 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6220 ins_encode %{ 6221 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6222 %} 6223 ins_pipe( pipe_slow ); 6224 %} 6225 6226 // Load Stack Slot 6227 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6228 match(Set dst src); 6229 ins_cost(125); 6230 6231 format %{ "MOV $dst,$src" %} 6232 opcode(0x8B); 6233 ins_encode( OpcP, RegMem(dst,src)); 6234 ins_pipe( ialu_reg_mem ); 6235 %} 6236 6237 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6238 match(Set dst src); 6239 6240 ins_cost(200); 6241 format %{ "MOV $dst,$src.lo\n\t" 6242 "MOV $dst+4,$src.hi" %} 6243 opcode(0x8B, 0x8B); 6244 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6245 ins_pipe( ialu_mem_long_reg ); 6246 %} 6247 6248 // Load Stack Slot 6249 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6250 match(Set dst src); 6251 ins_cost(125); 6252 6253 format %{ "MOV $dst,$src" %} 6254 opcode(0x8B); 6255 ins_encode( OpcP, RegMem(dst,src)); 6256 ins_pipe( ialu_reg_mem ); 6257 %} 6258 6259 // Load Stack Slot 6260 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6261 match(Set dst src); 6262 ins_cost(125); 6263 6264 format %{ "FLD_S $src\n\t" 6265 "FSTP $dst" %} 6266 opcode(0xD9); /* D9 /0, FLD m32real */ 6267 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6268 Pop_Reg_FPR(dst) ); 6269 ins_pipe( fpu_reg_mem ); 6270 %} 6271 6272 // Load Stack Slot 6273 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6274 match(Set dst src); 6275 ins_cost(125); 6276 6277 format %{ "FLD_D $src\n\t" 6278 "FSTP $dst" %} 6279 opcode(0xDD); /* DD /0, FLD m64real */ 6280 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6281 Pop_Reg_DPR(dst) ); 6282 ins_pipe( fpu_reg_mem ); 6283 %} 6284 6285 // Prefetch instructions for allocation. 6286 // Must be safe to execute with invalid address (cannot fault). 6287 6288 instruct prefetchAlloc0( memory mem ) %{ 6289 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6290 match(PrefetchAllocation mem); 6291 ins_cost(0); 6292 size(0); 6293 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6294 ins_encode(); 6295 ins_pipe(empty); 6296 %} 6297 6298 instruct prefetchAlloc( memory mem ) %{ 6299 predicate(AllocatePrefetchInstr==3); 6300 match( PrefetchAllocation mem ); 6301 ins_cost(100); 6302 6303 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6304 ins_encode %{ 6305 __ prefetchw($mem$$Address); 6306 %} 6307 ins_pipe(ialu_mem); 6308 %} 6309 6310 instruct prefetchAllocNTA( memory mem ) %{ 6311 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6312 match(PrefetchAllocation mem); 6313 ins_cost(100); 6314 6315 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6316 ins_encode %{ 6317 __ prefetchnta($mem$$Address); 6318 %} 6319 ins_pipe(ialu_mem); 6320 %} 6321 6322 instruct prefetchAllocT0( memory mem ) %{ 6323 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6324 match(PrefetchAllocation mem); 6325 ins_cost(100); 6326 6327 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6328 ins_encode %{ 6329 __ prefetcht0($mem$$Address); 6330 %} 6331 ins_pipe(ialu_mem); 6332 %} 6333 6334 instruct prefetchAllocT2( memory mem ) %{ 6335 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6336 match(PrefetchAllocation mem); 6337 ins_cost(100); 6338 6339 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6340 ins_encode %{ 6341 __ prefetcht2($mem$$Address); 6342 %} 6343 ins_pipe(ialu_mem); 6344 %} 6345 6346 //----------Store Instructions------------------------------------------------- 6347 6348 // Store Byte 6349 instruct storeB(memory mem, xRegI src) %{ 6350 match(Set mem (StoreB mem src)); 6351 6352 ins_cost(125); 6353 format %{ "MOV8 $mem,$src" %} 6354 opcode(0x88); 6355 ins_encode( OpcP, RegMem( src, mem ) ); 6356 ins_pipe( ialu_mem_reg ); 6357 %} 6358 6359 // Store Char/Short 6360 instruct storeC(memory mem, rRegI src) %{ 6361 match(Set mem (StoreC mem src)); 6362 6363 ins_cost(125); 6364 format %{ "MOV16 $mem,$src" %} 6365 opcode(0x89, 0x66); 6366 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6367 ins_pipe( ialu_mem_reg ); 6368 %} 6369 6370 // Store Integer 6371 instruct storeI(memory mem, rRegI src) %{ 6372 match(Set mem (StoreI mem src)); 6373 6374 ins_cost(125); 6375 format %{ "MOV $mem,$src" %} 6376 opcode(0x89); 6377 ins_encode( OpcP, RegMem( src, mem ) ); 6378 ins_pipe( ialu_mem_reg ); 6379 %} 6380 6381 // Store Long 6382 instruct storeL(long_memory mem, eRegL src) %{ 6383 predicate(!((StoreLNode*)n)->require_atomic_access()); 6384 match(Set mem (StoreL mem src)); 6385 6386 ins_cost(200); 6387 format %{ "MOV $mem,$src.lo\n\t" 6388 "MOV $mem+4,$src.hi" %} 6389 opcode(0x89, 0x89); 6390 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6391 ins_pipe( ialu_mem_long_reg ); 6392 %} 6393 6394 // Store Long to Integer 6395 instruct storeL2I(memory mem, eRegL src) %{ 6396 match(Set mem (StoreI mem (ConvL2I src))); 6397 6398 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6399 ins_encode %{ 6400 __ movl($mem$$Address, $src$$Register); 6401 %} 6402 ins_pipe(ialu_mem_reg); 6403 %} 6404 6405 // Volatile Store Long. Must be atomic, so move it into 6406 // the FP TOS and then do a 64-bit FIST. Has to probe the 6407 // target address before the store (for null-ptr checks) 6408 // so the memory operand is used twice in the encoding. 6409 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6410 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6411 match(Set mem (StoreL mem src)); 6412 effect( KILL cr ); 6413 ins_cost(400); 6414 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6415 "FILD $src\n\t" 6416 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6417 opcode(0x3B); 6418 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6419 ins_pipe( fpu_reg_mem ); 6420 %} 6421 6422 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6423 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6424 match(Set mem (StoreL mem src)); 6425 effect( TEMP tmp, KILL cr ); 6426 ins_cost(380); 6427 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6428 "MOVSD $tmp,$src\n\t" 6429 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6430 ins_encode %{ 6431 __ cmpl(rax, $mem$$Address); 6432 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6433 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6434 %} 6435 ins_pipe( pipe_slow ); 6436 %} 6437 6438 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6439 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6440 match(Set mem (StoreL mem src)); 6441 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6442 ins_cost(360); 6443 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6444 "MOVD $tmp,$src.lo\n\t" 6445 "MOVD $tmp2,$src.hi\n\t" 6446 "PUNPCKLDQ $tmp,$tmp2\n\t" 6447 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6448 ins_encode %{ 6449 __ cmpl(rax, $mem$$Address); 6450 __ movdl($tmp$$XMMRegister, $src$$Register); 6451 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6452 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6453 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 // Store Pointer; for storing unknown oops and raw pointers 6459 instruct storeP(memory mem, anyRegP src) %{ 6460 match(Set mem (StoreP mem src)); 6461 6462 ins_cost(125); 6463 format %{ "MOV $mem,$src" %} 6464 opcode(0x89); 6465 ins_encode( OpcP, RegMem( src, mem ) ); 6466 ins_pipe( ialu_mem_reg ); 6467 %} 6468 6469 // Store Integer Immediate 6470 instruct storeImmI(memory mem, immI src) %{ 6471 match(Set mem (StoreI mem src)); 6472 6473 ins_cost(150); 6474 format %{ "MOV $mem,$src" %} 6475 opcode(0xC7); /* C7 /0 */ 6476 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6477 ins_pipe( ialu_mem_imm ); 6478 %} 6479 6480 // Store Short/Char Immediate 6481 instruct storeImmI16(memory mem, immI16 src) %{ 6482 predicate(UseStoreImmI16); 6483 match(Set mem (StoreC mem src)); 6484 6485 ins_cost(150); 6486 format %{ "MOV16 $mem,$src" %} 6487 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6488 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6489 ins_pipe( ialu_mem_imm ); 6490 %} 6491 6492 // Store Pointer Immediate; null pointers or constant oops that do not 6493 // need card-mark barriers. 6494 instruct storeImmP(memory mem, immP src) %{ 6495 match(Set mem (StoreP mem src)); 6496 6497 ins_cost(150); 6498 format %{ "MOV $mem,$src" %} 6499 opcode(0xC7); /* C7 /0 */ 6500 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6501 ins_pipe( ialu_mem_imm ); 6502 %} 6503 6504 // Store Byte Immediate 6505 instruct storeImmB(memory mem, immI8 src) %{ 6506 match(Set mem (StoreB mem src)); 6507 6508 ins_cost(150); 6509 format %{ "MOV8 $mem,$src" %} 6510 opcode(0xC6); /* C6 /0 */ 6511 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6512 ins_pipe( ialu_mem_imm ); 6513 %} 6514 6515 // Store CMS card-mark Immediate 6516 instruct storeImmCM(memory mem, immI8 src) %{ 6517 match(Set mem (StoreCM mem src)); 6518 6519 ins_cost(150); 6520 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6521 opcode(0xC6); /* C6 /0 */ 6522 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6523 ins_pipe( ialu_mem_imm ); 6524 %} 6525 6526 // Store Double 6527 instruct storeDPR( memory mem, regDPR1 src) %{ 6528 predicate(UseSSE<=1); 6529 match(Set mem (StoreD mem src)); 6530 6531 ins_cost(100); 6532 format %{ "FST_D $mem,$src" %} 6533 opcode(0xDD); /* DD /2 */ 6534 ins_encode( enc_FPR_store(mem,src) ); 6535 ins_pipe( fpu_mem_reg ); 6536 %} 6537 6538 // Store double does rounding on x86 6539 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6540 predicate(UseSSE<=1); 6541 match(Set mem (StoreD mem (RoundDouble src))); 6542 6543 ins_cost(100); 6544 format %{ "FST_D $mem,$src\t# round" %} 6545 opcode(0xDD); /* DD /2 */ 6546 ins_encode( enc_FPR_store(mem,src) ); 6547 ins_pipe( fpu_mem_reg ); 6548 %} 6549 6550 // Store XMM register to memory (double-precision floating points) 6551 // MOVSD instruction 6552 instruct storeD(memory mem, regD src) %{ 6553 predicate(UseSSE>=2); 6554 match(Set mem (StoreD mem src)); 6555 ins_cost(95); 6556 format %{ "MOVSD $mem,$src" %} 6557 ins_encode %{ 6558 __ movdbl($mem$$Address, $src$$XMMRegister); 6559 %} 6560 ins_pipe( pipe_slow ); 6561 %} 6562 6563 // Load Double 6564 instruct MoveD2VL(vlRegD dst, regD src) %{ 6565 match(Set dst src); 6566 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6567 ins_encode %{ 6568 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6569 %} 6570 ins_pipe( fpu_reg_reg ); 6571 %} 6572 6573 // Load Double 6574 instruct MoveVL2D(regD dst, vlRegD src) %{ 6575 match(Set dst src); 6576 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6577 ins_encode %{ 6578 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6579 %} 6580 ins_pipe( fpu_reg_reg ); 6581 %} 6582 6583 // Store XMM register to memory (single-precision floating point) 6584 // MOVSS instruction 6585 instruct storeF(memory mem, regF src) %{ 6586 predicate(UseSSE>=1); 6587 match(Set mem (StoreF mem src)); 6588 ins_cost(95); 6589 format %{ "MOVSS $mem,$src" %} 6590 ins_encode %{ 6591 __ movflt($mem$$Address, $src$$XMMRegister); 6592 %} 6593 ins_pipe( pipe_slow ); 6594 %} 6595 6596 // Load Float 6597 instruct MoveF2VL(vlRegF dst, regF src) %{ 6598 match(Set dst src); 6599 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6600 ins_encode %{ 6601 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6602 %} 6603 ins_pipe( fpu_reg_reg ); 6604 %} 6605 6606 // Load Float 6607 instruct MoveVL2F(regF dst, vlRegF src) %{ 6608 match(Set dst src); 6609 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6610 ins_encode %{ 6611 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6612 %} 6613 ins_pipe( fpu_reg_reg ); 6614 %} 6615 6616 // Store Float 6617 instruct storeFPR( memory mem, regFPR1 src) %{ 6618 predicate(UseSSE==0); 6619 match(Set mem (StoreF mem src)); 6620 6621 ins_cost(100); 6622 format %{ "FST_S $mem,$src" %} 6623 opcode(0xD9); /* D9 /2 */ 6624 ins_encode( enc_FPR_store(mem,src) ); 6625 ins_pipe( fpu_mem_reg ); 6626 %} 6627 6628 // Store Float does rounding on x86 6629 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6630 predicate(UseSSE==0); 6631 match(Set mem (StoreF mem (RoundFloat src))); 6632 6633 ins_cost(100); 6634 format %{ "FST_S $mem,$src\t# round" %} 6635 opcode(0xD9); /* D9 /2 */ 6636 ins_encode( enc_FPR_store(mem,src) ); 6637 ins_pipe( fpu_mem_reg ); 6638 %} 6639 6640 // Store Float does rounding on x86 6641 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6642 predicate(UseSSE<=1); 6643 match(Set mem (StoreF mem (ConvD2F src))); 6644 6645 ins_cost(100); 6646 format %{ "FST_S $mem,$src\t# D-round" %} 6647 opcode(0xD9); /* D9 /2 */ 6648 ins_encode( enc_FPR_store(mem,src) ); 6649 ins_pipe( fpu_mem_reg ); 6650 %} 6651 6652 // Store immediate Float value (it is faster than store from FPU register) 6653 // The instruction usage is guarded by predicate in operand immFPR(). 6654 instruct storeFPR_imm( memory mem, immFPR src) %{ 6655 match(Set mem (StoreF mem src)); 6656 6657 ins_cost(50); 6658 format %{ "MOV $mem,$src\t# store float" %} 6659 opcode(0xC7); /* C7 /0 */ 6660 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6661 ins_pipe( ialu_mem_imm ); 6662 %} 6663 6664 // Store immediate Float value (it is faster than store from XMM register) 6665 // The instruction usage is guarded by predicate in operand immF(). 6666 instruct storeF_imm( memory mem, immF src) %{ 6667 match(Set mem (StoreF mem src)); 6668 6669 ins_cost(50); 6670 format %{ "MOV $mem,$src\t# store float" %} 6671 opcode(0xC7); /* C7 /0 */ 6672 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6673 ins_pipe( ialu_mem_imm ); 6674 %} 6675 6676 // Store Integer to stack slot 6677 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6678 match(Set dst src); 6679 6680 ins_cost(100); 6681 format %{ "MOV $dst,$src" %} 6682 opcode(0x89); 6683 ins_encode( OpcPRegSS( dst, src ) ); 6684 ins_pipe( ialu_mem_reg ); 6685 %} 6686 6687 // Store Integer to stack slot 6688 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6689 match(Set dst src); 6690 6691 ins_cost(100); 6692 format %{ "MOV $dst,$src" %} 6693 opcode(0x89); 6694 ins_encode( OpcPRegSS( dst, src ) ); 6695 ins_pipe( ialu_mem_reg ); 6696 %} 6697 6698 // Store Long to stack slot 6699 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6700 match(Set dst src); 6701 6702 ins_cost(200); 6703 format %{ "MOV $dst,$src.lo\n\t" 6704 "MOV $dst+4,$src.hi" %} 6705 opcode(0x89, 0x89); 6706 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6707 ins_pipe( ialu_mem_long_reg ); 6708 %} 6709 6710 //----------MemBar Instructions----------------------------------------------- 6711 // Memory barrier flavors 6712 6713 instruct membar_acquire() %{ 6714 match(MemBarAcquire); 6715 match(LoadFence); 6716 ins_cost(400); 6717 6718 size(0); 6719 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6720 ins_encode(); 6721 ins_pipe(empty); 6722 %} 6723 6724 instruct membar_acquire_lock() %{ 6725 match(MemBarAcquireLock); 6726 ins_cost(0); 6727 6728 size(0); 6729 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6730 ins_encode( ); 6731 ins_pipe(empty); 6732 %} 6733 6734 instruct membar_release() %{ 6735 match(MemBarRelease); 6736 match(StoreFence); 6737 ins_cost(400); 6738 6739 size(0); 6740 format %{ "MEMBAR-release ! (empty encoding)" %} 6741 ins_encode( ); 6742 ins_pipe(empty); 6743 %} 6744 6745 instruct membar_release_lock() %{ 6746 match(MemBarReleaseLock); 6747 ins_cost(0); 6748 6749 size(0); 6750 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6751 ins_encode( ); 6752 ins_pipe(empty); 6753 %} 6754 6755 instruct membar_volatile(eFlagsReg cr) %{ 6756 match(MemBarVolatile); 6757 effect(KILL cr); 6758 ins_cost(400); 6759 6760 format %{ 6761 $$template 6762 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6763 %} 6764 ins_encode %{ 6765 __ membar(Assembler::StoreLoad); 6766 %} 6767 ins_pipe(pipe_slow); 6768 %} 6769 6770 instruct unnecessary_membar_volatile() %{ 6771 match(MemBarVolatile); 6772 predicate(Matcher::post_store_load_barrier(n)); 6773 ins_cost(0); 6774 6775 size(0); 6776 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6777 ins_encode( ); 6778 ins_pipe(empty); 6779 %} 6780 6781 instruct membar_storestore() %{ 6782 match(MemBarStoreStore); 6783 ins_cost(0); 6784 6785 size(0); 6786 format %{ "MEMBAR-storestore (empty encoding)" %} 6787 ins_encode( ); 6788 ins_pipe(empty); 6789 %} 6790 6791 //----------Move Instructions-------------------------------------------------- 6792 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6793 match(Set dst (CastX2P src)); 6794 format %{ "# X2P $dst, $src" %} 6795 ins_encode( /*empty encoding*/ ); 6796 ins_cost(0); 6797 ins_pipe(empty); 6798 %} 6799 6800 instruct castP2X(rRegI dst, eRegP src ) %{ 6801 match(Set dst (CastP2X src)); 6802 ins_cost(50); 6803 format %{ "MOV $dst, $src\t# CastP2X" %} 6804 ins_encode( enc_Copy( dst, src) ); 6805 ins_pipe( ialu_reg_reg ); 6806 %} 6807 6808 //----------Conditional Move--------------------------------------------------- 6809 // Conditional move 6810 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6811 predicate(!VM_Version::supports_cmov() ); 6812 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6813 ins_cost(200); 6814 format %{ "J$cop,us skip\t# signed cmove\n\t" 6815 "MOV $dst,$src\n" 6816 "skip:" %} 6817 ins_encode %{ 6818 Label Lskip; 6819 // Invert sense of branch from sense of CMOV 6820 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6821 __ movl($dst$$Register, $src$$Register); 6822 __ bind(Lskip); 6823 %} 6824 ins_pipe( pipe_cmov_reg ); 6825 %} 6826 6827 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6828 predicate(!VM_Version::supports_cmov() ); 6829 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6830 ins_cost(200); 6831 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6832 "MOV $dst,$src\n" 6833 "skip:" %} 6834 ins_encode %{ 6835 Label Lskip; 6836 // Invert sense of branch from sense of CMOV 6837 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6838 __ movl($dst$$Register, $src$$Register); 6839 __ bind(Lskip); 6840 %} 6841 ins_pipe( pipe_cmov_reg ); 6842 %} 6843 6844 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6845 predicate(VM_Version::supports_cmov() ); 6846 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6847 ins_cost(200); 6848 format %{ "CMOV$cop $dst,$src" %} 6849 opcode(0x0F,0x40); 6850 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6851 ins_pipe( pipe_cmov_reg ); 6852 %} 6853 6854 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6855 predicate(VM_Version::supports_cmov() ); 6856 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6857 ins_cost(200); 6858 format %{ "CMOV$cop $dst,$src" %} 6859 opcode(0x0F,0x40); 6860 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6861 ins_pipe( pipe_cmov_reg ); 6862 %} 6863 6864 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6865 predicate(VM_Version::supports_cmov() ); 6866 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6867 ins_cost(200); 6868 expand %{ 6869 cmovI_regU(cop, cr, dst, src); 6870 %} 6871 %} 6872 6873 // Conditional move 6874 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6875 predicate(VM_Version::supports_cmov() ); 6876 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6877 ins_cost(250); 6878 format %{ "CMOV$cop $dst,$src" %} 6879 opcode(0x0F,0x40); 6880 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6881 ins_pipe( pipe_cmov_mem ); 6882 %} 6883 6884 // Conditional move 6885 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6886 predicate(VM_Version::supports_cmov() ); 6887 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6888 ins_cost(250); 6889 format %{ "CMOV$cop $dst,$src" %} 6890 opcode(0x0F,0x40); 6891 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6892 ins_pipe( pipe_cmov_mem ); 6893 %} 6894 6895 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6896 predicate(VM_Version::supports_cmov() ); 6897 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6898 ins_cost(250); 6899 expand %{ 6900 cmovI_memU(cop, cr, dst, src); 6901 %} 6902 %} 6903 6904 // Conditional move 6905 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6906 predicate(VM_Version::supports_cmov() ); 6907 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6908 ins_cost(200); 6909 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6910 opcode(0x0F,0x40); 6911 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6912 ins_pipe( pipe_cmov_reg ); 6913 %} 6914 6915 // Conditional move (non-P6 version) 6916 // Note: a CMoveP is generated for stubs and native wrappers 6917 // regardless of whether we are on a P6, so we 6918 // emulate a cmov here 6919 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6920 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6921 ins_cost(300); 6922 format %{ "Jn$cop skip\n\t" 6923 "MOV $dst,$src\t# pointer\n" 6924 "skip:" %} 6925 opcode(0x8b); 6926 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6927 ins_pipe( pipe_cmov_reg ); 6928 %} 6929 6930 // Conditional move 6931 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6932 predicate(VM_Version::supports_cmov() ); 6933 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6934 ins_cost(200); 6935 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6936 opcode(0x0F,0x40); 6937 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6938 ins_pipe( pipe_cmov_reg ); 6939 %} 6940 6941 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6942 predicate(VM_Version::supports_cmov() ); 6943 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6944 ins_cost(200); 6945 expand %{ 6946 cmovP_regU(cop, cr, dst, src); 6947 %} 6948 %} 6949 6950 // DISABLED: Requires the ADLC to emit a bottom_type call that 6951 // correctly meets the two pointer arguments; one is an incoming 6952 // register but the other is a memory operand. ALSO appears to 6953 // be buggy with implicit null checks. 6954 // 6955 //// Conditional move 6956 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6957 // predicate(VM_Version::supports_cmov() ); 6958 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6959 // ins_cost(250); 6960 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6961 // opcode(0x0F,0x40); 6962 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6963 // ins_pipe( pipe_cmov_mem ); 6964 //%} 6965 // 6966 //// Conditional move 6967 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6968 // predicate(VM_Version::supports_cmov() ); 6969 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6970 // ins_cost(250); 6971 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6972 // opcode(0x0F,0x40); 6973 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6974 // ins_pipe( pipe_cmov_mem ); 6975 //%} 6976 6977 // Conditional move 6978 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6979 predicate(UseSSE<=1); 6980 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6981 ins_cost(200); 6982 format %{ "FCMOV$cop $dst,$src\t# double" %} 6983 opcode(0xDA); 6984 ins_encode( enc_cmov_dpr(cop,src) ); 6985 ins_pipe( pipe_cmovDPR_reg ); 6986 %} 6987 6988 // Conditional move 6989 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6990 predicate(UseSSE==0); 6991 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6992 ins_cost(200); 6993 format %{ "FCMOV$cop $dst,$src\t# float" %} 6994 opcode(0xDA); 6995 ins_encode( enc_cmov_dpr(cop,src) ); 6996 ins_pipe( pipe_cmovDPR_reg ); 6997 %} 6998 6999 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7000 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 7001 predicate(UseSSE<=1); 7002 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 format %{ "Jn$cop skip\n\t" 7005 "MOV $dst,$src\t# double\n" 7006 "skip:" %} 7007 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7008 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 7009 ins_pipe( pipe_cmovDPR_reg ); 7010 %} 7011 7012 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7013 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 7014 predicate(UseSSE==0); 7015 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7016 ins_cost(200); 7017 format %{ "Jn$cop skip\n\t" 7018 "MOV $dst,$src\t# float\n" 7019 "skip:" %} 7020 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7021 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 7022 ins_pipe( pipe_cmovDPR_reg ); 7023 %} 7024 7025 // No CMOVE with SSE/SSE2 7026 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7027 predicate (UseSSE>=1); 7028 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7029 ins_cost(200); 7030 format %{ "Jn$cop skip\n\t" 7031 "MOVSS $dst,$src\t# float\n" 7032 "skip:" %} 7033 ins_encode %{ 7034 Label skip; 7035 // Invert sense of branch from sense of CMOV 7036 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7037 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7038 __ bind(skip); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 // No CMOVE with SSE/SSE2 7044 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7045 predicate (UseSSE>=2); 7046 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7047 ins_cost(200); 7048 format %{ "Jn$cop skip\n\t" 7049 "MOVSD $dst,$src\t# float\n" 7050 "skip:" %} 7051 ins_encode %{ 7052 Label skip; 7053 // Invert sense of branch from sense of CMOV 7054 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7055 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7056 __ bind(skip); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 // unsigned version 7062 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 7063 predicate (UseSSE>=1); 7064 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7065 ins_cost(200); 7066 format %{ "Jn$cop skip\n\t" 7067 "MOVSS $dst,$src\t# float\n" 7068 "skip:" %} 7069 ins_encode %{ 7070 Label skip; 7071 // Invert sense of branch from sense of CMOV 7072 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7073 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7074 __ bind(skip); 7075 %} 7076 ins_pipe( pipe_slow ); 7077 %} 7078 7079 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7080 predicate (UseSSE>=1); 7081 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7082 ins_cost(200); 7083 expand %{ 7084 fcmovF_regU(cop, cr, dst, src); 7085 %} 7086 %} 7087 7088 // unsigned version 7089 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7090 predicate (UseSSE>=2); 7091 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7092 ins_cost(200); 7093 format %{ "Jn$cop skip\n\t" 7094 "MOVSD $dst,$src\t# float\n" 7095 "skip:" %} 7096 ins_encode %{ 7097 Label skip; 7098 // Invert sense of branch from sense of CMOV 7099 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7100 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7101 __ bind(skip); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7107 predicate (UseSSE>=2); 7108 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7109 ins_cost(200); 7110 expand %{ 7111 fcmovD_regU(cop, cr, dst, src); 7112 %} 7113 %} 7114 7115 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7116 predicate(VM_Version::supports_cmov() ); 7117 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7118 ins_cost(200); 7119 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7120 "CMOV$cop $dst.hi,$src.hi" %} 7121 opcode(0x0F,0x40); 7122 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7123 ins_pipe( pipe_cmov_reg_long ); 7124 %} 7125 7126 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7127 predicate(VM_Version::supports_cmov() ); 7128 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7129 ins_cost(200); 7130 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7131 "CMOV$cop $dst.hi,$src.hi" %} 7132 opcode(0x0F,0x40); 7133 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7134 ins_pipe( pipe_cmov_reg_long ); 7135 %} 7136 7137 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7138 predicate(VM_Version::supports_cmov() ); 7139 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7140 ins_cost(200); 7141 expand %{ 7142 cmovL_regU(cop, cr, dst, src); 7143 %} 7144 %} 7145 7146 //----------Arithmetic Instructions-------------------------------------------- 7147 //----------Addition Instructions---------------------------------------------- 7148 7149 // Integer Addition Instructions 7150 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7151 match(Set dst (AddI dst src)); 7152 effect(KILL cr); 7153 7154 size(2); 7155 format %{ "ADD $dst,$src" %} 7156 opcode(0x03); 7157 ins_encode( OpcP, RegReg( dst, src) ); 7158 ins_pipe( ialu_reg_reg ); 7159 %} 7160 7161 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7162 match(Set dst (AddI dst src)); 7163 effect(KILL cr); 7164 7165 format %{ "ADD $dst,$src" %} 7166 opcode(0x81, 0x00); /* /0 id */ 7167 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7168 ins_pipe( ialu_reg ); 7169 %} 7170 7171 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7172 predicate(UseIncDec); 7173 match(Set dst (AddI dst src)); 7174 effect(KILL cr); 7175 7176 size(1); 7177 format %{ "INC $dst" %} 7178 opcode(0x40); /* */ 7179 ins_encode( Opc_plus( primary, dst ) ); 7180 ins_pipe( ialu_reg ); 7181 %} 7182 7183 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7184 match(Set dst (AddI src0 src1)); 7185 ins_cost(110); 7186 7187 format %{ "LEA $dst,[$src0 + $src1]" %} 7188 opcode(0x8D); /* 0x8D /r */ 7189 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7190 ins_pipe( ialu_reg_reg ); 7191 %} 7192 7193 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7194 match(Set dst (AddP src0 src1)); 7195 ins_cost(110); 7196 7197 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7198 opcode(0x8D); /* 0x8D /r */ 7199 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7200 ins_pipe( ialu_reg_reg ); 7201 %} 7202 7203 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7204 predicate(UseIncDec); 7205 match(Set dst (AddI dst src)); 7206 effect(KILL cr); 7207 7208 size(1); 7209 format %{ "DEC $dst" %} 7210 opcode(0x48); /* */ 7211 ins_encode( Opc_plus( primary, dst ) ); 7212 ins_pipe( ialu_reg ); 7213 %} 7214 7215 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7216 match(Set dst (AddP dst src)); 7217 effect(KILL cr); 7218 7219 size(2); 7220 format %{ "ADD $dst,$src" %} 7221 opcode(0x03); 7222 ins_encode( OpcP, RegReg( dst, src) ); 7223 ins_pipe( ialu_reg_reg ); 7224 %} 7225 7226 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7227 match(Set dst (AddP dst src)); 7228 effect(KILL cr); 7229 7230 format %{ "ADD $dst,$src" %} 7231 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7232 // ins_encode( RegImm( dst, src) ); 7233 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7234 ins_pipe( ialu_reg ); 7235 %} 7236 7237 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7238 match(Set dst (AddI dst (LoadI src))); 7239 effect(KILL cr); 7240 7241 ins_cost(125); 7242 format %{ "ADD $dst,$src" %} 7243 opcode(0x03); 7244 ins_encode( OpcP, RegMem( dst, src) ); 7245 ins_pipe( ialu_reg_mem ); 7246 %} 7247 7248 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7249 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7250 effect(KILL cr); 7251 7252 ins_cost(150); 7253 format %{ "ADD $dst,$src" %} 7254 opcode(0x01); /* Opcode 01 /r */ 7255 ins_encode( OpcP, RegMem( src, dst ) ); 7256 ins_pipe( ialu_mem_reg ); 7257 %} 7258 7259 // Add Memory with Immediate 7260 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7261 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7262 effect(KILL cr); 7263 7264 ins_cost(125); 7265 format %{ "ADD $dst,$src" %} 7266 opcode(0x81); /* Opcode 81 /0 id */ 7267 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7268 ins_pipe( ialu_mem_imm ); 7269 %} 7270 7271 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7272 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7273 effect(KILL cr); 7274 7275 ins_cost(125); 7276 format %{ "INC $dst" %} 7277 opcode(0xFF); /* Opcode FF /0 */ 7278 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7279 ins_pipe( ialu_mem_imm ); 7280 %} 7281 7282 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7283 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7284 effect(KILL cr); 7285 7286 ins_cost(125); 7287 format %{ "DEC $dst" %} 7288 opcode(0xFF); /* Opcode FF /1 */ 7289 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7290 ins_pipe( ialu_mem_imm ); 7291 %} 7292 7293 7294 instruct checkCastPP( eRegP dst ) %{ 7295 match(Set dst (CheckCastPP dst)); 7296 7297 size(0); 7298 format %{ "#checkcastPP of $dst" %} 7299 ins_encode( /*empty encoding*/ ); 7300 ins_pipe( empty ); 7301 %} 7302 7303 instruct castPP( eRegP dst ) %{ 7304 match(Set dst (CastPP dst)); 7305 format %{ "#castPP of $dst" %} 7306 ins_encode( /*empty encoding*/ ); 7307 ins_pipe( empty ); 7308 %} 7309 7310 instruct castII( rRegI dst ) %{ 7311 match(Set dst (CastII dst)); 7312 format %{ "#castII of $dst" %} 7313 ins_encode( /*empty encoding*/ ); 7314 ins_cost(0); 7315 ins_pipe( empty ); 7316 %} 7317 7318 7319 // Load-locked - same as a regular pointer load when used with compare-swap 7320 instruct loadPLocked(eRegP dst, memory mem) %{ 7321 match(Set dst (LoadPLocked mem)); 7322 7323 ins_cost(125); 7324 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7325 opcode(0x8B); 7326 ins_encode( OpcP, RegMem(dst,mem)); 7327 ins_pipe( ialu_reg_mem ); 7328 %} 7329 7330 // Conditional-store of the updated heap-top. 7331 // Used during allocation of the shared heap. 7332 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7333 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7334 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7335 // EAX is killed if there is contention, but then it's also unused. 7336 // In the common case of no contention, EAX holds the new oop address. 7337 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7338 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7339 ins_pipe( pipe_cmpxchg ); 7340 %} 7341 7342 // Conditional-store of an int value. 7343 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7344 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7345 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7346 effect(KILL oldval); 7347 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7348 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7349 ins_pipe( pipe_cmpxchg ); 7350 %} 7351 7352 // Conditional-store of a long value. 7353 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7354 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7355 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7356 effect(KILL oldval); 7357 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7358 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7359 "XCHG EBX,ECX" 7360 %} 7361 ins_encode %{ 7362 // Note: we need to swap rbx, and rcx before and after the 7363 // cmpxchg8 instruction because the instruction uses 7364 // rcx as the high order word of the new value to store but 7365 // our register encoding uses rbx. 7366 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7367 __ lock(); 7368 __ cmpxchg8($mem$$Address); 7369 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7370 %} 7371 ins_pipe( pipe_cmpxchg ); 7372 %} 7373 7374 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7375 7376 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7377 predicate(VM_Version::supports_cx8()); 7378 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7379 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7380 effect(KILL cr, KILL oldval); 7381 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7382 "MOV $res,0\n\t" 7383 "JNE,s fail\n\t" 7384 "MOV $res,1\n" 7385 "fail:" %} 7386 ins_encode( enc_cmpxchg8(mem_ptr), 7387 enc_flags_ne_to_boolean(res) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7392 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7393 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7394 effect(KILL cr, KILL oldval); 7395 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7396 "MOV $res,0\n\t" 7397 "JNE,s fail\n\t" 7398 "MOV $res,1\n" 7399 "fail:" %} 7400 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7401 ins_pipe( pipe_cmpxchg ); 7402 %} 7403 7404 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7405 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7406 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7407 effect(KILL cr, KILL oldval); 7408 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7409 "MOV $res,0\n\t" 7410 "JNE,s fail\n\t" 7411 "MOV $res,1\n" 7412 "fail:" %} 7413 ins_encode( enc_cmpxchgb(mem_ptr), 7414 enc_flags_ne_to_boolean(res) ); 7415 ins_pipe( pipe_cmpxchg ); 7416 %} 7417 7418 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7419 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7420 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7421 effect(KILL cr, KILL oldval); 7422 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7423 "MOV $res,0\n\t" 7424 "JNE,s fail\n\t" 7425 "MOV $res,1\n" 7426 "fail:" %} 7427 ins_encode( enc_cmpxchgw(mem_ptr), 7428 enc_flags_ne_to_boolean(res) ); 7429 ins_pipe( pipe_cmpxchg ); 7430 %} 7431 7432 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7433 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7434 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7435 effect(KILL cr, KILL oldval); 7436 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7437 "MOV $res,0\n\t" 7438 "JNE,s fail\n\t" 7439 "MOV $res,1\n" 7440 "fail:" %} 7441 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7442 ins_pipe( pipe_cmpxchg ); 7443 %} 7444 7445 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7446 predicate(VM_Version::supports_cx8()); 7447 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7448 effect(KILL cr); 7449 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7450 ins_encode( enc_cmpxchg8(mem_ptr) ); 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7455 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7456 effect(KILL cr); 7457 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7458 ins_encode( enc_cmpxchg(mem_ptr) ); 7459 ins_pipe( pipe_cmpxchg ); 7460 %} 7461 7462 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7463 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7464 effect(KILL cr); 7465 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7466 ins_encode( enc_cmpxchgb(mem_ptr) ); 7467 ins_pipe( pipe_cmpxchg ); 7468 %} 7469 7470 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7471 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7472 effect(KILL cr); 7473 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7474 ins_encode( enc_cmpxchgw(mem_ptr) ); 7475 ins_pipe( pipe_cmpxchg ); 7476 %} 7477 7478 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7479 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7480 effect(KILL cr); 7481 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7482 ins_encode( enc_cmpxchg(mem_ptr) ); 7483 ins_pipe( pipe_cmpxchg ); 7484 %} 7485 7486 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7487 predicate(n->as_LoadStore()->result_not_used()); 7488 match(Set dummy (GetAndAddB mem add)); 7489 effect(KILL cr); 7490 format %{ "ADDB [$mem],$add" %} 7491 ins_encode %{ 7492 __ lock(); 7493 __ addb($mem$$Address, $add$$constant); 7494 %} 7495 ins_pipe( pipe_cmpxchg ); 7496 %} 7497 7498 // Important to match to xRegI: only 8-bit regs. 7499 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7500 match(Set newval (GetAndAddB mem newval)); 7501 effect(KILL cr); 7502 format %{ "XADDB [$mem],$newval" %} 7503 ins_encode %{ 7504 __ lock(); 7505 __ xaddb($mem$$Address, $newval$$Register); 7506 %} 7507 ins_pipe( pipe_cmpxchg ); 7508 %} 7509 7510 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7511 predicate(n->as_LoadStore()->result_not_used()); 7512 match(Set dummy (GetAndAddS mem add)); 7513 effect(KILL cr); 7514 format %{ "ADDS [$mem],$add" %} 7515 ins_encode %{ 7516 __ lock(); 7517 __ addw($mem$$Address, $add$$constant); 7518 %} 7519 ins_pipe( pipe_cmpxchg ); 7520 %} 7521 7522 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7523 match(Set newval (GetAndAddS mem newval)); 7524 effect(KILL cr); 7525 format %{ "XADDS [$mem],$newval" %} 7526 ins_encode %{ 7527 __ lock(); 7528 __ xaddw($mem$$Address, $newval$$Register); 7529 %} 7530 ins_pipe( pipe_cmpxchg ); 7531 %} 7532 7533 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7534 predicate(n->as_LoadStore()->result_not_used()); 7535 match(Set dummy (GetAndAddI mem add)); 7536 effect(KILL cr); 7537 format %{ "ADDL [$mem],$add" %} 7538 ins_encode %{ 7539 __ lock(); 7540 __ addl($mem$$Address, $add$$constant); 7541 %} 7542 ins_pipe( pipe_cmpxchg ); 7543 %} 7544 7545 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7546 match(Set newval (GetAndAddI mem newval)); 7547 effect(KILL cr); 7548 format %{ "XADDL [$mem],$newval" %} 7549 ins_encode %{ 7550 __ lock(); 7551 __ xaddl($mem$$Address, $newval$$Register); 7552 %} 7553 ins_pipe( pipe_cmpxchg ); 7554 %} 7555 7556 // Important to match to xRegI: only 8-bit regs. 7557 instruct xchgB( memory mem, xRegI newval) %{ 7558 match(Set newval (GetAndSetB mem newval)); 7559 format %{ "XCHGB $newval,[$mem]" %} 7560 ins_encode %{ 7561 __ xchgb($newval$$Register, $mem$$Address); 7562 %} 7563 ins_pipe( pipe_cmpxchg ); 7564 %} 7565 7566 instruct xchgS( memory mem, rRegI newval) %{ 7567 match(Set newval (GetAndSetS mem newval)); 7568 format %{ "XCHGW $newval,[$mem]" %} 7569 ins_encode %{ 7570 __ xchgw($newval$$Register, $mem$$Address); 7571 %} 7572 ins_pipe( pipe_cmpxchg ); 7573 %} 7574 7575 instruct xchgI( memory mem, rRegI newval) %{ 7576 match(Set newval (GetAndSetI mem newval)); 7577 format %{ "XCHGL $newval,[$mem]" %} 7578 ins_encode %{ 7579 __ xchgl($newval$$Register, $mem$$Address); 7580 %} 7581 ins_pipe( pipe_cmpxchg ); 7582 %} 7583 7584 instruct xchgP( memory mem, pRegP newval) %{ 7585 match(Set newval (GetAndSetP mem newval)); 7586 format %{ "XCHGL $newval,[$mem]" %} 7587 ins_encode %{ 7588 __ xchgl($newval$$Register, $mem$$Address); 7589 %} 7590 ins_pipe( pipe_cmpxchg ); 7591 %} 7592 7593 //----------Subtraction Instructions------------------------------------------- 7594 7595 // Integer Subtraction Instructions 7596 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7597 match(Set dst (SubI dst src)); 7598 effect(KILL cr); 7599 7600 size(2); 7601 format %{ "SUB $dst,$src" %} 7602 opcode(0x2B); 7603 ins_encode( OpcP, RegReg( dst, src) ); 7604 ins_pipe( ialu_reg_reg ); 7605 %} 7606 7607 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7608 match(Set dst (SubI dst src)); 7609 effect(KILL cr); 7610 7611 format %{ "SUB $dst,$src" %} 7612 opcode(0x81,0x05); /* Opcode 81 /5 */ 7613 // ins_encode( RegImm( dst, src) ); 7614 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7615 ins_pipe( ialu_reg ); 7616 %} 7617 7618 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7619 match(Set dst (SubI dst (LoadI src))); 7620 effect(KILL cr); 7621 7622 ins_cost(125); 7623 format %{ "SUB $dst,$src" %} 7624 opcode(0x2B); 7625 ins_encode( OpcP, RegMem( dst, src) ); 7626 ins_pipe( ialu_reg_mem ); 7627 %} 7628 7629 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7630 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7631 effect(KILL cr); 7632 7633 ins_cost(150); 7634 format %{ "SUB $dst,$src" %} 7635 opcode(0x29); /* Opcode 29 /r */ 7636 ins_encode( OpcP, RegMem( src, dst ) ); 7637 ins_pipe( ialu_mem_reg ); 7638 %} 7639 7640 // Subtract from a pointer 7641 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7642 match(Set dst (AddP dst (SubI zero src))); 7643 effect(KILL cr); 7644 7645 size(2); 7646 format %{ "SUB $dst,$src" %} 7647 opcode(0x2B); 7648 ins_encode( OpcP, RegReg( dst, src) ); 7649 ins_pipe( ialu_reg_reg ); 7650 %} 7651 7652 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7653 match(Set dst (SubI zero dst)); 7654 effect(KILL cr); 7655 7656 size(2); 7657 format %{ "NEG $dst" %} 7658 opcode(0xF7,0x03); // Opcode F7 /3 7659 ins_encode( OpcP, RegOpc( dst ) ); 7660 ins_pipe( ialu_reg ); 7661 %} 7662 7663 //----------Multiplication/Division Instructions------------------------------- 7664 // Integer Multiplication Instructions 7665 // Multiply Register 7666 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7667 match(Set dst (MulI dst src)); 7668 effect(KILL cr); 7669 7670 size(3); 7671 ins_cost(300); 7672 format %{ "IMUL $dst,$src" %} 7673 opcode(0xAF, 0x0F); 7674 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7675 ins_pipe( ialu_reg_reg_alu0 ); 7676 %} 7677 7678 // Multiply 32-bit Immediate 7679 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7680 match(Set dst (MulI src imm)); 7681 effect(KILL cr); 7682 7683 ins_cost(300); 7684 format %{ "IMUL $dst,$src,$imm" %} 7685 opcode(0x69); /* 69 /r id */ 7686 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7687 ins_pipe( ialu_reg_reg_alu0 ); 7688 %} 7689 7690 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7691 match(Set dst src); 7692 effect(KILL cr); 7693 7694 // Note that this is artificially increased to make it more expensive than loadConL 7695 ins_cost(250); 7696 format %{ "MOV EAX,$src\t// low word only" %} 7697 opcode(0xB8); 7698 ins_encode( LdImmL_Lo(dst, src) ); 7699 ins_pipe( ialu_reg_fat ); 7700 %} 7701 7702 // Multiply by 32-bit Immediate, taking the shifted high order results 7703 // (special case for shift by 32) 7704 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7705 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7706 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7707 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7708 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7709 effect(USE src1, KILL cr); 7710 7711 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7712 ins_cost(0*100 + 1*400 - 150); 7713 format %{ "IMUL EDX:EAX,$src1" %} 7714 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 // Multiply by 32-bit Immediate, taking the shifted high order results 7719 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7720 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7721 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7722 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7723 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7724 effect(USE src1, KILL cr); 7725 7726 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7727 ins_cost(1*100 + 1*400 - 150); 7728 format %{ "IMUL EDX:EAX,$src1\n\t" 7729 "SAR EDX,$cnt-32" %} 7730 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 // Multiply Memory 32-bit Immediate 7735 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7736 match(Set dst (MulI (LoadI src) imm)); 7737 effect(KILL cr); 7738 7739 ins_cost(300); 7740 format %{ "IMUL $dst,$src,$imm" %} 7741 opcode(0x69); /* 69 /r id */ 7742 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7743 ins_pipe( ialu_reg_mem_alu0 ); 7744 %} 7745 7746 // Multiply Memory 7747 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7748 match(Set dst (MulI dst (LoadI src))); 7749 effect(KILL cr); 7750 7751 ins_cost(350); 7752 format %{ "IMUL $dst,$src" %} 7753 opcode(0xAF, 0x0F); 7754 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7755 ins_pipe( ialu_reg_mem_alu0 ); 7756 %} 7757 7758 // Multiply Register Int to Long 7759 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7760 // Basic Idea: long = (long)int * (long)int 7761 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7762 effect(DEF dst, USE src, USE src1, KILL flags); 7763 7764 ins_cost(300); 7765 format %{ "IMUL $dst,$src1" %} 7766 7767 ins_encode( long_int_multiply( dst, src1 ) ); 7768 ins_pipe( ialu_reg_reg_alu0 ); 7769 %} 7770 7771 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7772 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7773 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7774 effect(KILL flags); 7775 7776 ins_cost(300); 7777 format %{ "MUL $dst,$src1" %} 7778 7779 ins_encode( long_uint_multiply(dst, src1) ); 7780 ins_pipe( ialu_reg_reg_alu0 ); 7781 %} 7782 7783 // Multiply Register Long 7784 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7785 match(Set dst (MulL dst src)); 7786 effect(KILL cr, TEMP tmp); 7787 ins_cost(4*100+3*400); 7788 // Basic idea: lo(result) = lo(x_lo * y_lo) 7789 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7790 format %{ "MOV $tmp,$src.lo\n\t" 7791 "IMUL $tmp,EDX\n\t" 7792 "MOV EDX,$src.hi\n\t" 7793 "IMUL EDX,EAX\n\t" 7794 "ADD $tmp,EDX\n\t" 7795 "MUL EDX:EAX,$src.lo\n\t" 7796 "ADD EDX,$tmp" %} 7797 ins_encode( long_multiply( dst, src, tmp ) ); 7798 ins_pipe( pipe_slow ); 7799 %} 7800 7801 // Multiply Register Long where the left operand's high 32 bits are zero 7802 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7803 predicate(is_operand_hi32_zero(n->in(1))); 7804 match(Set dst (MulL dst src)); 7805 effect(KILL cr, TEMP tmp); 7806 ins_cost(2*100+2*400); 7807 // Basic idea: lo(result) = lo(x_lo * y_lo) 7808 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7809 format %{ "MOV $tmp,$src.hi\n\t" 7810 "IMUL $tmp,EAX\n\t" 7811 "MUL EDX:EAX,$src.lo\n\t" 7812 "ADD EDX,$tmp" %} 7813 ins_encode %{ 7814 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7815 __ imull($tmp$$Register, rax); 7816 __ mull($src$$Register); 7817 __ addl(rdx, $tmp$$Register); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 // Multiply Register Long where the right operand's high 32 bits are zero 7823 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7824 predicate(is_operand_hi32_zero(n->in(2))); 7825 match(Set dst (MulL dst src)); 7826 effect(KILL cr, TEMP tmp); 7827 ins_cost(2*100+2*400); 7828 // Basic idea: lo(result) = lo(x_lo * y_lo) 7829 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7830 format %{ "MOV $tmp,$src.lo\n\t" 7831 "IMUL $tmp,EDX\n\t" 7832 "MUL EDX:EAX,$src.lo\n\t" 7833 "ADD EDX,$tmp" %} 7834 ins_encode %{ 7835 __ movl($tmp$$Register, $src$$Register); 7836 __ imull($tmp$$Register, rdx); 7837 __ mull($src$$Register); 7838 __ addl(rdx, $tmp$$Register); 7839 %} 7840 ins_pipe( pipe_slow ); 7841 %} 7842 7843 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7844 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7845 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7846 match(Set dst (MulL dst src)); 7847 effect(KILL cr); 7848 ins_cost(1*400); 7849 // Basic idea: lo(result) = lo(x_lo * y_lo) 7850 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7851 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7852 ins_encode %{ 7853 __ mull($src$$Register); 7854 %} 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 // Multiply Register Long by small constant 7859 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7860 match(Set dst (MulL dst src)); 7861 effect(KILL cr, TEMP tmp); 7862 ins_cost(2*100+2*400); 7863 size(12); 7864 // Basic idea: lo(result) = lo(src * EAX) 7865 // hi(result) = hi(src * EAX) + lo(src * EDX) 7866 format %{ "IMUL $tmp,EDX,$src\n\t" 7867 "MOV EDX,$src\n\t" 7868 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7869 "ADD EDX,$tmp" %} 7870 ins_encode( long_multiply_con( dst, src, tmp ) ); 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 // Integer DIV with Register 7875 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7876 match(Set rax (DivI rax div)); 7877 effect(KILL rdx, KILL cr); 7878 size(26); 7879 ins_cost(30*100+10*100); 7880 format %{ "CMP EAX,0x80000000\n\t" 7881 "JNE,s normal\n\t" 7882 "XOR EDX,EDX\n\t" 7883 "CMP ECX,-1\n\t" 7884 "JE,s done\n" 7885 "normal: CDQ\n\t" 7886 "IDIV $div\n\t" 7887 "done:" %} 7888 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7889 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7890 ins_pipe( ialu_reg_reg_alu0 ); 7891 %} 7892 7893 // Divide Register Long 7894 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7895 match(Set dst (DivL src1 src2)); 7896 effect( KILL cr, KILL cx, KILL bx ); 7897 ins_cost(10000); 7898 format %{ "PUSH $src1.hi\n\t" 7899 "PUSH $src1.lo\n\t" 7900 "PUSH $src2.hi\n\t" 7901 "PUSH $src2.lo\n\t" 7902 "CALL SharedRuntime::ldiv\n\t" 7903 "ADD ESP,16" %} 7904 ins_encode( long_div(src1,src2) ); 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 // Integer DIVMOD with Register, both quotient and mod results 7909 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7910 match(DivModI rax div); 7911 effect(KILL cr); 7912 size(26); 7913 ins_cost(30*100+10*100); 7914 format %{ "CMP EAX,0x80000000\n\t" 7915 "JNE,s normal\n\t" 7916 "XOR EDX,EDX\n\t" 7917 "CMP ECX,-1\n\t" 7918 "JE,s done\n" 7919 "normal: CDQ\n\t" 7920 "IDIV $div\n\t" 7921 "done:" %} 7922 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7923 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 // Integer MOD with Register 7928 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7929 match(Set rdx (ModI rax div)); 7930 effect(KILL rax, KILL cr); 7931 7932 size(26); 7933 ins_cost(300); 7934 format %{ "CDQ\n\t" 7935 "IDIV $div" %} 7936 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7937 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7938 ins_pipe( ialu_reg_reg_alu0 ); 7939 %} 7940 7941 // Remainder Register Long 7942 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7943 match(Set dst (ModL src1 src2)); 7944 effect( KILL cr, KILL cx, KILL bx ); 7945 ins_cost(10000); 7946 format %{ "PUSH $src1.hi\n\t" 7947 "PUSH $src1.lo\n\t" 7948 "PUSH $src2.hi\n\t" 7949 "PUSH $src2.lo\n\t" 7950 "CALL SharedRuntime::lrem\n\t" 7951 "ADD ESP,16" %} 7952 ins_encode( long_mod(src1,src2) ); 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 // Divide Register Long (no special case since divisor != -1) 7957 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7958 match(Set dst (DivL dst imm)); 7959 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7960 ins_cost(1000); 7961 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7962 "XOR $tmp2,$tmp2\n\t" 7963 "CMP $tmp,EDX\n\t" 7964 "JA,s fast\n\t" 7965 "MOV $tmp2,EAX\n\t" 7966 "MOV EAX,EDX\n\t" 7967 "MOV EDX,0\n\t" 7968 "JLE,s pos\n\t" 7969 "LNEG EAX : $tmp2\n\t" 7970 "DIV $tmp # unsigned division\n\t" 7971 "XCHG EAX,$tmp2\n\t" 7972 "DIV $tmp\n\t" 7973 "LNEG $tmp2 : EAX\n\t" 7974 "JMP,s done\n" 7975 "pos:\n\t" 7976 "DIV $tmp\n\t" 7977 "XCHG EAX,$tmp2\n" 7978 "fast:\n\t" 7979 "DIV $tmp\n" 7980 "done:\n\t" 7981 "MOV EDX,$tmp2\n\t" 7982 "NEG EDX:EAX # if $imm < 0" %} 7983 ins_encode %{ 7984 int con = (int)$imm$$constant; 7985 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7986 int pcon = (con > 0) ? con : -con; 7987 Label Lfast, Lpos, Ldone; 7988 7989 __ movl($tmp$$Register, pcon); 7990 __ xorl($tmp2$$Register,$tmp2$$Register); 7991 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7992 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7993 7994 __ movl($tmp2$$Register, $dst$$Register); // save 7995 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7996 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7997 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7998 7999 // Negative dividend. 8000 // convert value to positive to use unsigned division 8001 __ lneg($dst$$Register, $tmp2$$Register); 8002 __ divl($tmp$$Register); 8003 __ xchgl($dst$$Register, $tmp2$$Register); 8004 __ divl($tmp$$Register); 8005 // revert result back to negative 8006 __ lneg($tmp2$$Register, $dst$$Register); 8007 __ jmpb(Ldone); 8008 8009 __ bind(Lpos); 8010 __ divl($tmp$$Register); // Use unsigned division 8011 __ xchgl($dst$$Register, $tmp2$$Register); 8012 // Fallthrow for final divide, tmp2 has 32 bit hi result 8013 8014 __ bind(Lfast); 8015 // fast path: src is positive 8016 __ divl($tmp$$Register); // Use unsigned division 8017 8018 __ bind(Ldone); 8019 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8020 if (con < 0) { 8021 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8022 } 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 // Remainder Register Long (remainder fit into 32 bits) 8028 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8029 match(Set dst (ModL dst imm)); 8030 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8031 ins_cost(1000); 8032 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8033 "CMP $tmp,EDX\n\t" 8034 "JA,s fast\n\t" 8035 "MOV $tmp2,EAX\n\t" 8036 "MOV EAX,EDX\n\t" 8037 "MOV EDX,0\n\t" 8038 "JLE,s pos\n\t" 8039 "LNEG EAX : $tmp2\n\t" 8040 "DIV $tmp # unsigned division\n\t" 8041 "MOV EAX,$tmp2\n\t" 8042 "DIV $tmp\n\t" 8043 "NEG EDX\n\t" 8044 "JMP,s done\n" 8045 "pos:\n\t" 8046 "DIV $tmp\n\t" 8047 "MOV EAX,$tmp2\n" 8048 "fast:\n\t" 8049 "DIV $tmp\n" 8050 "done:\n\t" 8051 "MOV EAX,EDX\n\t" 8052 "SAR EDX,31\n\t" %} 8053 ins_encode %{ 8054 int con = (int)$imm$$constant; 8055 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8056 int pcon = (con > 0) ? con : -con; 8057 Label Lfast, Lpos, Ldone; 8058 8059 __ movl($tmp$$Register, pcon); 8060 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8061 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8062 8063 __ movl($tmp2$$Register, $dst$$Register); // save 8064 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8065 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8066 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8067 8068 // Negative dividend. 8069 // convert value to positive to use unsigned division 8070 __ lneg($dst$$Register, $tmp2$$Register); 8071 __ divl($tmp$$Register); 8072 __ movl($dst$$Register, $tmp2$$Register); 8073 __ divl($tmp$$Register); 8074 // revert remainder back to negative 8075 __ negl(HIGH_FROM_LOW($dst$$Register)); 8076 __ jmpb(Ldone); 8077 8078 __ bind(Lpos); 8079 __ divl($tmp$$Register); 8080 __ movl($dst$$Register, $tmp2$$Register); 8081 8082 __ bind(Lfast); 8083 // fast path: src is positive 8084 __ divl($tmp$$Register); 8085 8086 __ bind(Ldone); 8087 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8088 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8089 8090 %} 8091 ins_pipe( pipe_slow ); 8092 %} 8093 8094 // Integer Shift Instructions 8095 // Shift Left by one 8096 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8097 match(Set dst (LShiftI dst shift)); 8098 effect(KILL cr); 8099 8100 size(2); 8101 format %{ "SHL $dst,$shift" %} 8102 opcode(0xD1, 0x4); /* D1 /4 */ 8103 ins_encode( OpcP, RegOpc( dst ) ); 8104 ins_pipe( ialu_reg ); 8105 %} 8106 8107 // Shift Left by 8-bit immediate 8108 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8109 match(Set dst (LShiftI dst shift)); 8110 effect(KILL cr); 8111 8112 size(3); 8113 format %{ "SHL $dst,$shift" %} 8114 opcode(0xC1, 0x4); /* C1 /4 ib */ 8115 ins_encode( RegOpcImm( dst, shift) ); 8116 ins_pipe( ialu_reg ); 8117 %} 8118 8119 // Shift Left by variable 8120 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8121 match(Set dst (LShiftI dst shift)); 8122 effect(KILL cr); 8123 8124 size(2); 8125 format %{ "SHL $dst,$shift" %} 8126 opcode(0xD3, 0x4); /* D3 /4 */ 8127 ins_encode( OpcP, RegOpc( dst ) ); 8128 ins_pipe( ialu_reg_reg ); 8129 %} 8130 8131 // Arithmetic shift right by one 8132 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8133 match(Set dst (RShiftI dst shift)); 8134 effect(KILL cr); 8135 8136 size(2); 8137 format %{ "SAR $dst,$shift" %} 8138 opcode(0xD1, 0x7); /* D1 /7 */ 8139 ins_encode( OpcP, RegOpc( dst ) ); 8140 ins_pipe( ialu_reg ); 8141 %} 8142 8143 // Arithmetic shift right by one 8144 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8145 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8146 effect(KILL cr); 8147 format %{ "SAR $dst,$shift" %} 8148 opcode(0xD1, 0x7); /* D1 /7 */ 8149 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8150 ins_pipe( ialu_mem_imm ); 8151 %} 8152 8153 // Arithmetic Shift Right by 8-bit immediate 8154 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8155 match(Set dst (RShiftI dst shift)); 8156 effect(KILL cr); 8157 8158 size(3); 8159 format %{ "SAR $dst,$shift" %} 8160 opcode(0xC1, 0x7); /* C1 /7 ib */ 8161 ins_encode( RegOpcImm( dst, shift ) ); 8162 ins_pipe( ialu_mem_imm ); 8163 %} 8164 8165 // Arithmetic Shift Right by 8-bit immediate 8166 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8167 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8168 effect(KILL cr); 8169 8170 format %{ "SAR $dst,$shift" %} 8171 opcode(0xC1, 0x7); /* C1 /7 ib */ 8172 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8173 ins_pipe( ialu_mem_imm ); 8174 %} 8175 8176 // Arithmetic Shift Right by variable 8177 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8178 match(Set dst (RShiftI dst shift)); 8179 effect(KILL cr); 8180 8181 size(2); 8182 format %{ "SAR $dst,$shift" %} 8183 opcode(0xD3, 0x7); /* D3 /7 */ 8184 ins_encode( OpcP, RegOpc( dst ) ); 8185 ins_pipe( ialu_reg_reg ); 8186 %} 8187 8188 // Logical shift right by one 8189 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8190 match(Set dst (URShiftI dst shift)); 8191 effect(KILL cr); 8192 8193 size(2); 8194 format %{ "SHR $dst,$shift" %} 8195 opcode(0xD1, 0x5); /* D1 /5 */ 8196 ins_encode( OpcP, RegOpc( dst ) ); 8197 ins_pipe( ialu_reg ); 8198 %} 8199 8200 // Logical Shift Right by 8-bit immediate 8201 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8202 match(Set dst (URShiftI dst shift)); 8203 effect(KILL cr); 8204 8205 size(3); 8206 format %{ "SHR $dst,$shift" %} 8207 opcode(0xC1, 0x5); /* C1 /5 ib */ 8208 ins_encode( RegOpcImm( dst, shift) ); 8209 ins_pipe( ialu_reg ); 8210 %} 8211 8212 8213 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8214 // This idiom is used by the compiler for the i2b bytecode. 8215 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8216 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8217 8218 size(3); 8219 format %{ "MOVSX $dst,$src :8" %} 8220 ins_encode %{ 8221 __ movsbl($dst$$Register, $src$$Register); 8222 %} 8223 ins_pipe(ialu_reg_reg); 8224 %} 8225 8226 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8227 // This idiom is used by the compiler the i2s bytecode. 8228 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8229 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8230 8231 size(3); 8232 format %{ "MOVSX $dst,$src :16" %} 8233 ins_encode %{ 8234 __ movswl($dst$$Register, $src$$Register); 8235 %} 8236 ins_pipe(ialu_reg_reg); 8237 %} 8238 8239 8240 // Logical Shift Right by variable 8241 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8242 match(Set dst (URShiftI dst shift)); 8243 effect(KILL cr); 8244 8245 size(2); 8246 format %{ "SHR $dst,$shift" %} 8247 opcode(0xD3, 0x5); /* D3 /5 */ 8248 ins_encode( OpcP, RegOpc( dst ) ); 8249 ins_pipe( ialu_reg_reg ); 8250 %} 8251 8252 8253 //----------Logical Instructions----------------------------------------------- 8254 //----------Integer Logical Instructions--------------------------------------- 8255 // And Instructions 8256 // And Register with Register 8257 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8258 match(Set dst (AndI dst src)); 8259 effect(KILL cr); 8260 8261 size(2); 8262 format %{ "AND $dst,$src" %} 8263 opcode(0x23); 8264 ins_encode( OpcP, RegReg( dst, src) ); 8265 ins_pipe( ialu_reg_reg ); 8266 %} 8267 8268 // And Register with Immediate 8269 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8270 match(Set dst (AndI dst src)); 8271 effect(KILL cr); 8272 8273 format %{ "AND $dst,$src" %} 8274 opcode(0x81,0x04); /* Opcode 81 /4 */ 8275 // ins_encode( RegImm( dst, src) ); 8276 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8277 ins_pipe( ialu_reg ); 8278 %} 8279 8280 // And Register with Memory 8281 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8282 match(Set dst (AndI dst (LoadI src))); 8283 effect(KILL cr); 8284 8285 ins_cost(125); 8286 format %{ "AND $dst,$src" %} 8287 opcode(0x23); 8288 ins_encode( OpcP, RegMem( dst, src) ); 8289 ins_pipe( ialu_reg_mem ); 8290 %} 8291 8292 // And Memory with Register 8293 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8294 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8295 effect(KILL cr); 8296 8297 ins_cost(150); 8298 format %{ "AND $dst,$src" %} 8299 opcode(0x21); /* Opcode 21 /r */ 8300 ins_encode( OpcP, RegMem( src, dst ) ); 8301 ins_pipe( ialu_mem_reg ); 8302 %} 8303 8304 // And Memory with Immediate 8305 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8306 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8307 effect(KILL cr); 8308 8309 ins_cost(125); 8310 format %{ "AND $dst,$src" %} 8311 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8312 // ins_encode( MemImm( dst, src) ); 8313 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8314 ins_pipe( ialu_mem_imm ); 8315 %} 8316 8317 // BMI1 instructions 8318 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8319 match(Set dst (AndI (XorI src1 minus_1) src2)); 8320 predicate(UseBMI1Instructions); 8321 effect(KILL cr); 8322 8323 format %{ "ANDNL $dst, $src1, $src2" %} 8324 8325 ins_encode %{ 8326 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8327 %} 8328 ins_pipe(ialu_reg); 8329 %} 8330 8331 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8332 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8333 predicate(UseBMI1Instructions); 8334 effect(KILL cr); 8335 8336 ins_cost(125); 8337 format %{ "ANDNL $dst, $src1, $src2" %} 8338 8339 ins_encode %{ 8340 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8341 %} 8342 ins_pipe(ialu_reg_mem); 8343 %} 8344 8345 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8346 match(Set dst (AndI (SubI imm_zero src) src)); 8347 predicate(UseBMI1Instructions); 8348 effect(KILL cr); 8349 8350 format %{ "BLSIL $dst, $src" %} 8351 8352 ins_encode %{ 8353 __ blsil($dst$$Register, $src$$Register); 8354 %} 8355 ins_pipe(ialu_reg); 8356 %} 8357 8358 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8359 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8360 predicate(UseBMI1Instructions); 8361 effect(KILL cr); 8362 8363 ins_cost(125); 8364 format %{ "BLSIL $dst, $src" %} 8365 8366 ins_encode %{ 8367 __ blsil($dst$$Register, $src$$Address); 8368 %} 8369 ins_pipe(ialu_reg_mem); 8370 %} 8371 8372 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8373 %{ 8374 match(Set dst (XorI (AddI src minus_1) src)); 8375 predicate(UseBMI1Instructions); 8376 effect(KILL cr); 8377 8378 format %{ "BLSMSKL $dst, $src" %} 8379 8380 ins_encode %{ 8381 __ blsmskl($dst$$Register, $src$$Register); 8382 %} 8383 8384 ins_pipe(ialu_reg); 8385 %} 8386 8387 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8388 %{ 8389 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8390 predicate(UseBMI1Instructions); 8391 effect(KILL cr); 8392 8393 ins_cost(125); 8394 format %{ "BLSMSKL $dst, $src" %} 8395 8396 ins_encode %{ 8397 __ blsmskl($dst$$Register, $src$$Address); 8398 %} 8399 8400 ins_pipe(ialu_reg_mem); 8401 %} 8402 8403 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8404 %{ 8405 match(Set dst (AndI (AddI src minus_1) src) ); 8406 predicate(UseBMI1Instructions); 8407 effect(KILL cr); 8408 8409 format %{ "BLSRL $dst, $src" %} 8410 8411 ins_encode %{ 8412 __ blsrl($dst$$Register, $src$$Register); 8413 %} 8414 8415 ins_pipe(ialu_reg); 8416 %} 8417 8418 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8419 %{ 8420 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8421 predicate(UseBMI1Instructions); 8422 effect(KILL cr); 8423 8424 ins_cost(125); 8425 format %{ "BLSRL $dst, $src" %} 8426 8427 ins_encode %{ 8428 __ blsrl($dst$$Register, $src$$Address); 8429 %} 8430 8431 ins_pipe(ialu_reg_mem); 8432 %} 8433 8434 // Or Instructions 8435 // Or Register with Register 8436 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8437 match(Set dst (OrI dst src)); 8438 effect(KILL cr); 8439 8440 size(2); 8441 format %{ "OR $dst,$src" %} 8442 opcode(0x0B); 8443 ins_encode( OpcP, RegReg( dst, src) ); 8444 ins_pipe( ialu_reg_reg ); 8445 %} 8446 8447 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8448 match(Set dst (OrI dst (CastP2X src))); 8449 effect(KILL cr); 8450 8451 size(2); 8452 format %{ "OR $dst,$src" %} 8453 opcode(0x0B); 8454 ins_encode( OpcP, RegReg( dst, src) ); 8455 ins_pipe( ialu_reg_reg ); 8456 %} 8457 8458 8459 // Or Register with Immediate 8460 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8461 match(Set dst (OrI dst src)); 8462 effect(KILL cr); 8463 8464 format %{ "OR $dst,$src" %} 8465 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8466 // ins_encode( RegImm( dst, src) ); 8467 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8468 ins_pipe( ialu_reg ); 8469 %} 8470 8471 // Or Register with Memory 8472 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8473 match(Set dst (OrI dst (LoadI src))); 8474 effect(KILL cr); 8475 8476 ins_cost(125); 8477 format %{ "OR $dst,$src" %} 8478 opcode(0x0B); 8479 ins_encode( OpcP, RegMem( dst, src) ); 8480 ins_pipe( ialu_reg_mem ); 8481 %} 8482 8483 // Or Memory with Register 8484 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8485 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8486 effect(KILL cr); 8487 8488 ins_cost(150); 8489 format %{ "OR $dst,$src" %} 8490 opcode(0x09); /* Opcode 09 /r */ 8491 ins_encode( OpcP, RegMem( src, dst ) ); 8492 ins_pipe( ialu_mem_reg ); 8493 %} 8494 8495 // Or Memory with Immediate 8496 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8497 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8498 effect(KILL cr); 8499 8500 ins_cost(125); 8501 format %{ "OR $dst,$src" %} 8502 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8503 // ins_encode( MemImm( dst, src) ); 8504 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8505 ins_pipe( ialu_mem_imm ); 8506 %} 8507 8508 // ROL/ROR 8509 // ROL expand 8510 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8511 effect(USE_DEF dst, USE shift, KILL cr); 8512 8513 format %{ "ROL $dst, $shift" %} 8514 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8515 ins_encode( OpcP, RegOpc( dst )); 8516 ins_pipe( ialu_reg ); 8517 %} 8518 8519 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8520 effect(USE_DEF dst, USE shift, KILL cr); 8521 8522 format %{ "ROL $dst, $shift" %} 8523 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8524 ins_encode( RegOpcImm(dst, shift) ); 8525 ins_pipe(ialu_reg); 8526 %} 8527 8528 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8529 effect(USE_DEF dst, USE shift, KILL cr); 8530 8531 format %{ "ROL $dst, $shift" %} 8532 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8533 ins_encode(OpcP, RegOpc(dst)); 8534 ins_pipe( ialu_reg_reg ); 8535 %} 8536 // end of ROL expand 8537 8538 // ROL 32bit by one once 8539 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8540 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8541 8542 expand %{ 8543 rolI_eReg_imm1(dst, lshift, cr); 8544 %} 8545 %} 8546 8547 // ROL 32bit var by imm8 once 8548 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8549 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8550 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8551 8552 expand %{ 8553 rolI_eReg_imm8(dst, lshift, cr); 8554 %} 8555 %} 8556 8557 // ROL 32bit var by var once 8558 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8559 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8560 8561 expand %{ 8562 rolI_eReg_CL(dst, shift, cr); 8563 %} 8564 %} 8565 8566 // ROL 32bit var by var once 8567 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8568 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8569 8570 expand %{ 8571 rolI_eReg_CL(dst, shift, cr); 8572 %} 8573 %} 8574 8575 // ROR expand 8576 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8577 effect(USE_DEF dst, USE shift, KILL cr); 8578 8579 format %{ "ROR $dst, $shift" %} 8580 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8581 ins_encode( OpcP, RegOpc( dst ) ); 8582 ins_pipe( ialu_reg ); 8583 %} 8584 8585 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8586 effect (USE_DEF dst, USE shift, KILL cr); 8587 8588 format %{ "ROR $dst, $shift" %} 8589 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8590 ins_encode( RegOpcImm(dst, shift) ); 8591 ins_pipe( ialu_reg ); 8592 %} 8593 8594 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8595 effect(USE_DEF dst, USE shift, KILL cr); 8596 8597 format %{ "ROR $dst, $shift" %} 8598 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8599 ins_encode(OpcP, RegOpc(dst)); 8600 ins_pipe( ialu_reg_reg ); 8601 %} 8602 // end of ROR expand 8603 8604 // ROR right once 8605 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8606 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8607 8608 expand %{ 8609 rorI_eReg_imm1(dst, rshift, cr); 8610 %} 8611 %} 8612 8613 // ROR 32bit by immI8 once 8614 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8615 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8616 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8617 8618 expand %{ 8619 rorI_eReg_imm8(dst, rshift, cr); 8620 %} 8621 %} 8622 8623 // ROR 32bit var by var once 8624 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8625 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8626 8627 expand %{ 8628 rorI_eReg_CL(dst, shift, cr); 8629 %} 8630 %} 8631 8632 // ROR 32bit var by var once 8633 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8634 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8635 8636 expand %{ 8637 rorI_eReg_CL(dst, shift, cr); 8638 %} 8639 %} 8640 8641 // Xor Instructions 8642 // Xor Register with Register 8643 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8644 match(Set dst (XorI dst src)); 8645 effect(KILL cr); 8646 8647 size(2); 8648 format %{ "XOR $dst,$src" %} 8649 opcode(0x33); 8650 ins_encode( OpcP, RegReg( dst, src) ); 8651 ins_pipe( ialu_reg_reg ); 8652 %} 8653 8654 // Xor Register with Immediate -1 8655 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8656 match(Set dst (XorI dst imm)); 8657 8658 size(2); 8659 format %{ "NOT $dst" %} 8660 ins_encode %{ 8661 __ notl($dst$$Register); 8662 %} 8663 ins_pipe( ialu_reg ); 8664 %} 8665 8666 // Xor Register with Immediate 8667 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8668 match(Set dst (XorI dst src)); 8669 effect(KILL cr); 8670 8671 format %{ "XOR $dst,$src" %} 8672 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8673 // ins_encode( RegImm( dst, src) ); 8674 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8675 ins_pipe( ialu_reg ); 8676 %} 8677 8678 // Xor Register with Memory 8679 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8680 match(Set dst (XorI dst (LoadI src))); 8681 effect(KILL cr); 8682 8683 ins_cost(125); 8684 format %{ "XOR $dst,$src" %} 8685 opcode(0x33); 8686 ins_encode( OpcP, RegMem(dst, src) ); 8687 ins_pipe( ialu_reg_mem ); 8688 %} 8689 8690 // Xor Memory with Register 8691 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8692 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8693 effect(KILL cr); 8694 8695 ins_cost(150); 8696 format %{ "XOR $dst,$src" %} 8697 opcode(0x31); /* Opcode 31 /r */ 8698 ins_encode( OpcP, RegMem( src, dst ) ); 8699 ins_pipe( ialu_mem_reg ); 8700 %} 8701 8702 // Xor Memory with Immediate 8703 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8704 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8705 effect(KILL cr); 8706 8707 ins_cost(125); 8708 format %{ "XOR $dst,$src" %} 8709 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8710 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8711 ins_pipe( ialu_mem_imm ); 8712 %} 8713 8714 //----------Convert Int to Boolean--------------------------------------------- 8715 8716 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8717 effect( DEF dst, USE src ); 8718 format %{ "MOV $dst,$src" %} 8719 ins_encode( enc_Copy( dst, src) ); 8720 ins_pipe( ialu_reg_reg ); 8721 %} 8722 8723 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8724 effect( USE_DEF dst, USE src, KILL cr ); 8725 8726 size(4); 8727 format %{ "NEG $dst\n\t" 8728 "ADC $dst,$src" %} 8729 ins_encode( neg_reg(dst), 8730 OpcRegReg(0x13,dst,src) ); 8731 ins_pipe( ialu_reg_reg_long ); 8732 %} 8733 8734 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8735 match(Set dst (Conv2B src)); 8736 8737 expand %{ 8738 movI_nocopy(dst,src); 8739 ci2b(dst,src,cr); 8740 %} 8741 %} 8742 8743 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8744 effect( DEF dst, USE src ); 8745 format %{ "MOV $dst,$src" %} 8746 ins_encode( enc_Copy( dst, src) ); 8747 ins_pipe( ialu_reg_reg ); 8748 %} 8749 8750 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8751 effect( USE_DEF dst, USE src, KILL cr ); 8752 format %{ "NEG $dst\n\t" 8753 "ADC $dst,$src" %} 8754 ins_encode( neg_reg(dst), 8755 OpcRegReg(0x13,dst,src) ); 8756 ins_pipe( ialu_reg_reg_long ); 8757 %} 8758 8759 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8760 match(Set dst (Conv2B src)); 8761 8762 expand %{ 8763 movP_nocopy(dst,src); 8764 cp2b(dst,src,cr); 8765 %} 8766 %} 8767 8768 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8769 match(Set dst (CmpLTMask p q)); 8770 effect(KILL cr); 8771 ins_cost(400); 8772 8773 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8774 format %{ "XOR $dst,$dst\n\t" 8775 "CMP $p,$q\n\t" 8776 "SETlt $dst\n\t" 8777 "NEG $dst" %} 8778 ins_encode %{ 8779 Register Rp = $p$$Register; 8780 Register Rq = $q$$Register; 8781 Register Rd = $dst$$Register; 8782 Label done; 8783 __ xorl(Rd, Rd); 8784 __ cmpl(Rp, Rq); 8785 __ setb(Assembler::less, Rd); 8786 __ negl(Rd); 8787 %} 8788 8789 ins_pipe(pipe_slow); 8790 %} 8791 8792 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8793 match(Set dst (CmpLTMask dst zero)); 8794 effect(DEF dst, KILL cr); 8795 ins_cost(100); 8796 8797 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8798 ins_encode %{ 8799 __ sarl($dst$$Register, 31); 8800 %} 8801 ins_pipe(ialu_reg); 8802 %} 8803 8804 /* better to save a register than avoid a branch */ 8805 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8806 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8807 effect(KILL cr); 8808 ins_cost(400); 8809 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8810 "JGE done\n\t" 8811 "ADD $p,$y\n" 8812 "done: " %} 8813 ins_encode %{ 8814 Register Rp = $p$$Register; 8815 Register Rq = $q$$Register; 8816 Register Ry = $y$$Register; 8817 Label done; 8818 __ subl(Rp, Rq); 8819 __ jccb(Assembler::greaterEqual, done); 8820 __ addl(Rp, Ry); 8821 __ bind(done); 8822 %} 8823 8824 ins_pipe(pipe_cmplt); 8825 %} 8826 8827 /* better to save a register than avoid a branch */ 8828 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8829 match(Set y (AndI (CmpLTMask p q) y)); 8830 effect(KILL cr); 8831 8832 ins_cost(300); 8833 8834 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8835 "JLT done\n\t" 8836 "XORL $y, $y\n" 8837 "done: " %} 8838 ins_encode %{ 8839 Register Rp = $p$$Register; 8840 Register Rq = $q$$Register; 8841 Register Ry = $y$$Register; 8842 Label done; 8843 __ cmpl(Rp, Rq); 8844 __ jccb(Assembler::less, done); 8845 __ xorl(Ry, Ry); 8846 __ bind(done); 8847 %} 8848 8849 ins_pipe(pipe_cmplt); 8850 %} 8851 8852 /* If I enable this, I encourage spilling in the inner loop of compress. 8853 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8854 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8855 */ 8856 //----------Overflow Math Instructions----------------------------------------- 8857 8858 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8859 %{ 8860 match(Set cr (OverflowAddI op1 op2)); 8861 effect(DEF cr, USE_KILL op1, USE op2); 8862 8863 format %{ "ADD $op1, $op2\t# overflow check int" %} 8864 8865 ins_encode %{ 8866 __ addl($op1$$Register, $op2$$Register); 8867 %} 8868 ins_pipe(ialu_reg_reg); 8869 %} 8870 8871 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8872 %{ 8873 match(Set cr (OverflowAddI op1 op2)); 8874 effect(DEF cr, USE_KILL op1, USE op2); 8875 8876 format %{ "ADD $op1, $op2\t# overflow check int" %} 8877 8878 ins_encode %{ 8879 __ addl($op1$$Register, $op2$$constant); 8880 %} 8881 ins_pipe(ialu_reg_reg); 8882 %} 8883 8884 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8885 %{ 8886 match(Set cr (OverflowSubI op1 op2)); 8887 8888 format %{ "CMP $op1, $op2\t# overflow check int" %} 8889 ins_encode %{ 8890 __ cmpl($op1$$Register, $op2$$Register); 8891 %} 8892 ins_pipe(ialu_reg_reg); 8893 %} 8894 8895 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8896 %{ 8897 match(Set cr (OverflowSubI op1 op2)); 8898 8899 format %{ "CMP $op1, $op2\t# overflow check int" %} 8900 ins_encode %{ 8901 __ cmpl($op1$$Register, $op2$$constant); 8902 %} 8903 ins_pipe(ialu_reg_reg); 8904 %} 8905 8906 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8907 %{ 8908 match(Set cr (OverflowSubI zero op2)); 8909 effect(DEF cr, USE_KILL op2); 8910 8911 format %{ "NEG $op2\t# overflow check int" %} 8912 ins_encode %{ 8913 __ negl($op2$$Register); 8914 %} 8915 ins_pipe(ialu_reg_reg); 8916 %} 8917 8918 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8919 %{ 8920 match(Set cr (OverflowMulI op1 op2)); 8921 effect(DEF cr, USE_KILL op1, USE op2); 8922 8923 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8924 ins_encode %{ 8925 __ imull($op1$$Register, $op2$$Register); 8926 %} 8927 ins_pipe(ialu_reg_reg_alu0); 8928 %} 8929 8930 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8931 %{ 8932 match(Set cr (OverflowMulI op1 op2)); 8933 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8934 8935 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8936 ins_encode %{ 8937 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8938 %} 8939 ins_pipe(ialu_reg_reg_alu0); 8940 %} 8941 8942 //----------Long Instructions------------------------------------------------ 8943 // Add Long Register with Register 8944 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8945 match(Set dst (AddL dst src)); 8946 effect(KILL cr); 8947 ins_cost(200); 8948 format %{ "ADD $dst.lo,$src.lo\n\t" 8949 "ADC $dst.hi,$src.hi" %} 8950 opcode(0x03, 0x13); 8951 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8952 ins_pipe( ialu_reg_reg_long ); 8953 %} 8954 8955 // Add Long Register with Immediate 8956 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8957 match(Set dst (AddL dst src)); 8958 effect(KILL cr); 8959 format %{ "ADD $dst.lo,$src.lo\n\t" 8960 "ADC $dst.hi,$src.hi" %} 8961 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8962 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8963 ins_pipe( ialu_reg_long ); 8964 %} 8965 8966 // Add Long Register with Memory 8967 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8968 match(Set dst (AddL dst (LoadL mem))); 8969 effect(KILL cr); 8970 ins_cost(125); 8971 format %{ "ADD $dst.lo,$mem\n\t" 8972 "ADC $dst.hi,$mem+4" %} 8973 opcode(0x03, 0x13); 8974 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8975 ins_pipe( ialu_reg_long_mem ); 8976 %} 8977 8978 // Subtract Long Register with Register. 8979 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8980 match(Set dst (SubL dst src)); 8981 effect(KILL cr); 8982 ins_cost(200); 8983 format %{ "SUB $dst.lo,$src.lo\n\t" 8984 "SBB $dst.hi,$src.hi" %} 8985 opcode(0x2B, 0x1B); 8986 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8987 ins_pipe( ialu_reg_reg_long ); 8988 %} 8989 8990 // Subtract Long Register with Immediate 8991 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8992 match(Set dst (SubL dst src)); 8993 effect(KILL cr); 8994 format %{ "SUB $dst.lo,$src.lo\n\t" 8995 "SBB $dst.hi,$src.hi" %} 8996 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8997 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8998 ins_pipe( ialu_reg_long ); 8999 %} 9000 9001 // Subtract Long Register with Memory 9002 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9003 match(Set dst (SubL dst (LoadL mem))); 9004 effect(KILL cr); 9005 ins_cost(125); 9006 format %{ "SUB $dst.lo,$mem\n\t" 9007 "SBB $dst.hi,$mem+4" %} 9008 opcode(0x2B, 0x1B); 9009 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9010 ins_pipe( ialu_reg_long_mem ); 9011 %} 9012 9013 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9014 match(Set dst (SubL zero dst)); 9015 effect(KILL cr); 9016 ins_cost(300); 9017 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9018 ins_encode( neg_long(dst) ); 9019 ins_pipe( ialu_reg_reg_long ); 9020 %} 9021 9022 // And Long Register with Register 9023 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9024 match(Set dst (AndL dst src)); 9025 effect(KILL cr); 9026 format %{ "AND $dst.lo,$src.lo\n\t" 9027 "AND $dst.hi,$src.hi" %} 9028 opcode(0x23,0x23); 9029 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9030 ins_pipe( ialu_reg_reg_long ); 9031 %} 9032 9033 // And Long Register with Immediate 9034 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9035 match(Set dst (AndL dst src)); 9036 effect(KILL cr); 9037 format %{ "AND $dst.lo,$src.lo\n\t" 9038 "AND $dst.hi,$src.hi" %} 9039 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9040 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9041 ins_pipe( ialu_reg_long ); 9042 %} 9043 9044 // And Long Register with Memory 9045 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9046 match(Set dst (AndL dst (LoadL mem))); 9047 effect(KILL cr); 9048 ins_cost(125); 9049 format %{ "AND $dst.lo,$mem\n\t" 9050 "AND $dst.hi,$mem+4" %} 9051 opcode(0x23, 0x23); 9052 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9053 ins_pipe( ialu_reg_long_mem ); 9054 %} 9055 9056 // BMI1 instructions 9057 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9058 match(Set dst (AndL (XorL src1 minus_1) src2)); 9059 predicate(UseBMI1Instructions); 9060 effect(KILL cr, TEMP dst); 9061 9062 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9063 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9064 %} 9065 9066 ins_encode %{ 9067 Register Rdst = $dst$$Register; 9068 Register Rsrc1 = $src1$$Register; 9069 Register Rsrc2 = $src2$$Register; 9070 __ andnl(Rdst, Rsrc1, Rsrc2); 9071 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9072 %} 9073 ins_pipe(ialu_reg_reg_long); 9074 %} 9075 9076 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9077 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9078 predicate(UseBMI1Instructions); 9079 effect(KILL cr, TEMP dst); 9080 9081 ins_cost(125); 9082 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9083 "ANDNL $dst.hi, $src1.hi, $src2+4" 9084 %} 9085 9086 ins_encode %{ 9087 Register Rdst = $dst$$Register; 9088 Register Rsrc1 = $src1$$Register; 9089 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9090 9091 __ andnl(Rdst, Rsrc1, $src2$$Address); 9092 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9093 %} 9094 ins_pipe(ialu_reg_mem); 9095 %} 9096 9097 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9098 match(Set dst (AndL (SubL imm_zero src) src)); 9099 predicate(UseBMI1Instructions); 9100 effect(KILL cr, TEMP dst); 9101 9102 format %{ "MOVL $dst.hi, 0\n\t" 9103 "BLSIL $dst.lo, $src.lo\n\t" 9104 "JNZ done\n\t" 9105 "BLSIL $dst.hi, $src.hi\n" 9106 "done:" 9107 %} 9108 9109 ins_encode %{ 9110 Label done; 9111 Register Rdst = $dst$$Register; 9112 Register Rsrc = $src$$Register; 9113 __ movl(HIGH_FROM_LOW(Rdst), 0); 9114 __ blsil(Rdst, Rsrc); 9115 __ jccb(Assembler::notZero, done); 9116 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9117 __ bind(done); 9118 %} 9119 ins_pipe(ialu_reg); 9120 %} 9121 9122 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9123 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9124 predicate(UseBMI1Instructions); 9125 effect(KILL cr, TEMP dst); 9126 9127 ins_cost(125); 9128 format %{ "MOVL $dst.hi, 0\n\t" 9129 "BLSIL $dst.lo, $src\n\t" 9130 "JNZ done\n\t" 9131 "BLSIL $dst.hi, $src+4\n" 9132 "done:" 9133 %} 9134 9135 ins_encode %{ 9136 Label done; 9137 Register Rdst = $dst$$Register; 9138 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9139 9140 __ movl(HIGH_FROM_LOW(Rdst), 0); 9141 __ blsil(Rdst, $src$$Address); 9142 __ jccb(Assembler::notZero, done); 9143 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9144 __ bind(done); 9145 %} 9146 ins_pipe(ialu_reg_mem); 9147 %} 9148 9149 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9150 %{ 9151 match(Set dst (XorL (AddL src minus_1) src)); 9152 predicate(UseBMI1Instructions); 9153 effect(KILL cr, TEMP dst); 9154 9155 format %{ "MOVL $dst.hi, 0\n\t" 9156 "BLSMSKL $dst.lo, $src.lo\n\t" 9157 "JNC done\n\t" 9158 "BLSMSKL $dst.hi, $src.hi\n" 9159 "done:" 9160 %} 9161 9162 ins_encode %{ 9163 Label done; 9164 Register Rdst = $dst$$Register; 9165 Register Rsrc = $src$$Register; 9166 __ movl(HIGH_FROM_LOW(Rdst), 0); 9167 __ blsmskl(Rdst, Rsrc); 9168 __ jccb(Assembler::carryClear, done); 9169 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9170 __ bind(done); 9171 %} 9172 9173 ins_pipe(ialu_reg); 9174 %} 9175 9176 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9177 %{ 9178 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9179 predicate(UseBMI1Instructions); 9180 effect(KILL cr, TEMP dst); 9181 9182 ins_cost(125); 9183 format %{ "MOVL $dst.hi, 0\n\t" 9184 "BLSMSKL $dst.lo, $src\n\t" 9185 "JNC done\n\t" 9186 "BLSMSKL $dst.hi, $src+4\n" 9187 "done:" 9188 %} 9189 9190 ins_encode %{ 9191 Label done; 9192 Register Rdst = $dst$$Register; 9193 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9194 9195 __ movl(HIGH_FROM_LOW(Rdst), 0); 9196 __ blsmskl(Rdst, $src$$Address); 9197 __ jccb(Assembler::carryClear, done); 9198 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9199 __ bind(done); 9200 %} 9201 9202 ins_pipe(ialu_reg_mem); 9203 %} 9204 9205 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9206 %{ 9207 match(Set dst (AndL (AddL src minus_1) src) ); 9208 predicate(UseBMI1Instructions); 9209 effect(KILL cr, TEMP dst); 9210 9211 format %{ "MOVL $dst.hi, $src.hi\n\t" 9212 "BLSRL $dst.lo, $src.lo\n\t" 9213 "JNC done\n\t" 9214 "BLSRL $dst.hi, $src.hi\n" 9215 "done:" 9216 %} 9217 9218 ins_encode %{ 9219 Label done; 9220 Register Rdst = $dst$$Register; 9221 Register Rsrc = $src$$Register; 9222 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9223 __ blsrl(Rdst, Rsrc); 9224 __ jccb(Assembler::carryClear, done); 9225 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9226 __ bind(done); 9227 %} 9228 9229 ins_pipe(ialu_reg); 9230 %} 9231 9232 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9233 %{ 9234 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9235 predicate(UseBMI1Instructions); 9236 effect(KILL cr, TEMP dst); 9237 9238 ins_cost(125); 9239 format %{ "MOVL $dst.hi, $src+4\n\t" 9240 "BLSRL $dst.lo, $src\n\t" 9241 "JNC done\n\t" 9242 "BLSRL $dst.hi, $src+4\n" 9243 "done:" 9244 %} 9245 9246 ins_encode %{ 9247 Label done; 9248 Register Rdst = $dst$$Register; 9249 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9250 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9251 __ blsrl(Rdst, $src$$Address); 9252 __ jccb(Assembler::carryClear, done); 9253 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9254 __ bind(done); 9255 %} 9256 9257 ins_pipe(ialu_reg_mem); 9258 %} 9259 9260 // Or Long Register with Register 9261 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9262 match(Set dst (OrL dst src)); 9263 effect(KILL cr); 9264 format %{ "OR $dst.lo,$src.lo\n\t" 9265 "OR $dst.hi,$src.hi" %} 9266 opcode(0x0B,0x0B); 9267 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9268 ins_pipe( ialu_reg_reg_long ); 9269 %} 9270 9271 // Or Long Register with Immediate 9272 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9273 match(Set dst (OrL dst src)); 9274 effect(KILL cr); 9275 format %{ "OR $dst.lo,$src.lo\n\t" 9276 "OR $dst.hi,$src.hi" %} 9277 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9278 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9279 ins_pipe( ialu_reg_long ); 9280 %} 9281 9282 // Or Long Register with Memory 9283 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9284 match(Set dst (OrL dst (LoadL mem))); 9285 effect(KILL cr); 9286 ins_cost(125); 9287 format %{ "OR $dst.lo,$mem\n\t" 9288 "OR $dst.hi,$mem+4" %} 9289 opcode(0x0B,0x0B); 9290 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9291 ins_pipe( ialu_reg_long_mem ); 9292 %} 9293 9294 // Xor Long Register with Register 9295 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9296 match(Set dst (XorL dst src)); 9297 effect(KILL cr); 9298 format %{ "XOR $dst.lo,$src.lo\n\t" 9299 "XOR $dst.hi,$src.hi" %} 9300 opcode(0x33,0x33); 9301 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9302 ins_pipe( ialu_reg_reg_long ); 9303 %} 9304 9305 // Xor Long Register with Immediate -1 9306 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9307 match(Set dst (XorL dst imm)); 9308 format %{ "NOT $dst.lo\n\t" 9309 "NOT $dst.hi" %} 9310 ins_encode %{ 9311 __ notl($dst$$Register); 9312 __ notl(HIGH_FROM_LOW($dst$$Register)); 9313 %} 9314 ins_pipe( ialu_reg_long ); 9315 %} 9316 9317 // Xor Long Register with Immediate 9318 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9319 match(Set dst (XorL dst src)); 9320 effect(KILL cr); 9321 format %{ "XOR $dst.lo,$src.lo\n\t" 9322 "XOR $dst.hi,$src.hi" %} 9323 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9324 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9325 ins_pipe( ialu_reg_long ); 9326 %} 9327 9328 // Xor Long Register with Memory 9329 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9330 match(Set dst (XorL dst (LoadL mem))); 9331 effect(KILL cr); 9332 ins_cost(125); 9333 format %{ "XOR $dst.lo,$mem\n\t" 9334 "XOR $dst.hi,$mem+4" %} 9335 opcode(0x33,0x33); 9336 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9337 ins_pipe( ialu_reg_long_mem ); 9338 %} 9339 9340 // Shift Left Long by 1 9341 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9342 predicate(UseNewLongLShift); 9343 match(Set dst (LShiftL dst cnt)); 9344 effect(KILL cr); 9345 ins_cost(100); 9346 format %{ "ADD $dst.lo,$dst.lo\n\t" 9347 "ADC $dst.hi,$dst.hi" %} 9348 ins_encode %{ 9349 __ addl($dst$$Register,$dst$$Register); 9350 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9351 %} 9352 ins_pipe( ialu_reg_long ); 9353 %} 9354 9355 // Shift Left Long by 2 9356 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9357 predicate(UseNewLongLShift); 9358 match(Set dst (LShiftL dst cnt)); 9359 effect(KILL cr); 9360 ins_cost(100); 9361 format %{ "ADD $dst.lo,$dst.lo\n\t" 9362 "ADC $dst.hi,$dst.hi\n\t" 9363 "ADD $dst.lo,$dst.lo\n\t" 9364 "ADC $dst.hi,$dst.hi" %} 9365 ins_encode %{ 9366 __ addl($dst$$Register,$dst$$Register); 9367 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9368 __ addl($dst$$Register,$dst$$Register); 9369 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9370 %} 9371 ins_pipe( ialu_reg_long ); 9372 %} 9373 9374 // Shift Left Long by 3 9375 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9376 predicate(UseNewLongLShift); 9377 match(Set dst (LShiftL dst cnt)); 9378 effect(KILL cr); 9379 ins_cost(100); 9380 format %{ "ADD $dst.lo,$dst.lo\n\t" 9381 "ADC $dst.hi,$dst.hi\n\t" 9382 "ADD $dst.lo,$dst.lo\n\t" 9383 "ADC $dst.hi,$dst.hi\n\t" 9384 "ADD $dst.lo,$dst.lo\n\t" 9385 "ADC $dst.hi,$dst.hi" %} 9386 ins_encode %{ 9387 __ addl($dst$$Register,$dst$$Register); 9388 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9389 __ addl($dst$$Register,$dst$$Register); 9390 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9391 __ addl($dst$$Register,$dst$$Register); 9392 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9393 %} 9394 ins_pipe( ialu_reg_long ); 9395 %} 9396 9397 // Shift Left Long by 1-31 9398 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9399 match(Set dst (LShiftL dst cnt)); 9400 effect(KILL cr); 9401 ins_cost(200); 9402 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9403 "SHL $dst.lo,$cnt" %} 9404 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9405 ins_encode( move_long_small_shift(dst,cnt) ); 9406 ins_pipe( ialu_reg_long ); 9407 %} 9408 9409 // Shift Left Long by 32-63 9410 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9411 match(Set dst (LShiftL dst cnt)); 9412 effect(KILL cr); 9413 ins_cost(300); 9414 format %{ "MOV $dst.hi,$dst.lo\n" 9415 "\tSHL $dst.hi,$cnt-32\n" 9416 "\tXOR $dst.lo,$dst.lo" %} 9417 opcode(0xC1, 0x4); /* C1 /4 ib */ 9418 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9419 ins_pipe( ialu_reg_long ); 9420 %} 9421 9422 // Shift Left Long by variable 9423 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9424 match(Set dst (LShiftL dst shift)); 9425 effect(KILL cr); 9426 ins_cost(500+200); 9427 size(17); 9428 format %{ "TEST $shift,32\n\t" 9429 "JEQ,s small\n\t" 9430 "MOV $dst.hi,$dst.lo\n\t" 9431 "XOR $dst.lo,$dst.lo\n" 9432 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9433 "SHL $dst.lo,$shift" %} 9434 ins_encode( shift_left_long( dst, shift ) ); 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 // Shift Right Long by 1-31 9439 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9440 match(Set dst (URShiftL dst cnt)); 9441 effect(KILL cr); 9442 ins_cost(200); 9443 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9444 "SHR $dst.hi,$cnt" %} 9445 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9446 ins_encode( move_long_small_shift(dst,cnt) ); 9447 ins_pipe( ialu_reg_long ); 9448 %} 9449 9450 // Shift Right Long by 32-63 9451 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9452 match(Set dst (URShiftL dst cnt)); 9453 effect(KILL cr); 9454 ins_cost(300); 9455 format %{ "MOV $dst.lo,$dst.hi\n" 9456 "\tSHR $dst.lo,$cnt-32\n" 9457 "\tXOR $dst.hi,$dst.hi" %} 9458 opcode(0xC1, 0x5); /* C1 /5 ib */ 9459 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9460 ins_pipe( ialu_reg_long ); 9461 %} 9462 9463 // Shift Right Long by variable 9464 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9465 match(Set dst (URShiftL dst shift)); 9466 effect(KILL cr); 9467 ins_cost(600); 9468 size(17); 9469 format %{ "TEST $shift,32\n\t" 9470 "JEQ,s small\n\t" 9471 "MOV $dst.lo,$dst.hi\n\t" 9472 "XOR $dst.hi,$dst.hi\n" 9473 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9474 "SHR $dst.hi,$shift" %} 9475 ins_encode( shift_right_long( dst, shift ) ); 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 // Shift Right Long by 1-31 9480 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9481 match(Set dst (RShiftL dst cnt)); 9482 effect(KILL cr); 9483 ins_cost(200); 9484 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9485 "SAR $dst.hi,$cnt" %} 9486 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9487 ins_encode( move_long_small_shift(dst,cnt) ); 9488 ins_pipe( ialu_reg_long ); 9489 %} 9490 9491 // Shift Right Long by 32-63 9492 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9493 match(Set dst (RShiftL dst cnt)); 9494 effect(KILL cr); 9495 ins_cost(300); 9496 format %{ "MOV $dst.lo,$dst.hi\n" 9497 "\tSAR $dst.lo,$cnt-32\n" 9498 "\tSAR $dst.hi,31" %} 9499 opcode(0xC1, 0x7); /* C1 /7 ib */ 9500 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9501 ins_pipe( ialu_reg_long ); 9502 %} 9503 9504 // Shift Right arithmetic Long by variable 9505 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9506 match(Set dst (RShiftL dst shift)); 9507 effect(KILL cr); 9508 ins_cost(600); 9509 size(18); 9510 format %{ "TEST $shift,32\n\t" 9511 "JEQ,s small\n\t" 9512 "MOV $dst.lo,$dst.hi\n\t" 9513 "SAR $dst.hi,31\n" 9514 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9515 "SAR $dst.hi,$shift" %} 9516 ins_encode( shift_right_arith_long( dst, shift ) ); 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 9521 //----------Double Instructions------------------------------------------------ 9522 // Double Math 9523 9524 // Compare & branch 9525 9526 // P6 version of float compare, sets condition codes in EFLAGS 9527 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9528 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9529 match(Set cr (CmpD src1 src2)); 9530 effect(KILL rax); 9531 ins_cost(150); 9532 format %{ "FLD $src1\n\t" 9533 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9534 "JNP exit\n\t" 9535 "MOV ah,1 // saw a NaN, set CF\n\t" 9536 "SAHF\n" 9537 "exit:\tNOP // avoid branch to branch" %} 9538 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9539 ins_encode( Push_Reg_DPR(src1), 9540 OpcP, RegOpc(src2), 9541 cmpF_P6_fixup ); 9542 ins_pipe( pipe_slow ); 9543 %} 9544 9545 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9546 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9547 match(Set cr (CmpD src1 src2)); 9548 ins_cost(150); 9549 format %{ "FLD $src1\n\t" 9550 "FUCOMIP ST,$src2 // P6 instruction" %} 9551 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9552 ins_encode( Push_Reg_DPR(src1), 9553 OpcP, RegOpc(src2)); 9554 ins_pipe( pipe_slow ); 9555 %} 9556 9557 // Compare & branch 9558 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9559 predicate(UseSSE<=1); 9560 match(Set cr (CmpD src1 src2)); 9561 effect(KILL rax); 9562 ins_cost(200); 9563 format %{ "FLD $src1\n\t" 9564 "FCOMp $src2\n\t" 9565 "FNSTSW AX\n\t" 9566 "TEST AX,0x400\n\t" 9567 "JZ,s flags\n\t" 9568 "MOV AH,1\t# unordered treat as LT\n" 9569 "flags:\tSAHF" %} 9570 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9571 ins_encode( Push_Reg_DPR(src1), 9572 OpcP, RegOpc(src2), 9573 fpu_flags); 9574 ins_pipe( pipe_slow ); 9575 %} 9576 9577 // Compare vs zero into -1,0,1 9578 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9579 predicate(UseSSE<=1); 9580 match(Set dst (CmpD3 src1 zero)); 9581 effect(KILL cr, KILL rax); 9582 ins_cost(280); 9583 format %{ "FTSTD $dst,$src1" %} 9584 opcode(0xE4, 0xD9); 9585 ins_encode( Push_Reg_DPR(src1), 9586 OpcS, OpcP, PopFPU, 9587 CmpF_Result(dst)); 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 // Compare into -1,0,1 9592 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9593 predicate(UseSSE<=1); 9594 match(Set dst (CmpD3 src1 src2)); 9595 effect(KILL cr, KILL rax); 9596 ins_cost(300); 9597 format %{ "FCMPD $dst,$src1,$src2" %} 9598 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9599 ins_encode( Push_Reg_DPR(src1), 9600 OpcP, RegOpc(src2), 9601 CmpF_Result(dst)); 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 // float compare and set condition codes in EFLAGS by XMM regs 9606 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9607 predicate(UseSSE>=2); 9608 match(Set cr (CmpD src1 src2)); 9609 ins_cost(145); 9610 format %{ "UCOMISD $src1,$src2\n\t" 9611 "JNP,s exit\n\t" 9612 "PUSHF\t# saw NaN, set CF\n\t" 9613 "AND [rsp], #0xffffff2b\n\t" 9614 "POPF\n" 9615 "exit:" %} 9616 ins_encode %{ 9617 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9618 emit_cmpfp_fixup(_masm); 9619 %} 9620 ins_pipe( pipe_slow ); 9621 %} 9622 9623 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9624 predicate(UseSSE>=2); 9625 match(Set cr (CmpD src1 src2)); 9626 ins_cost(100); 9627 format %{ "UCOMISD $src1,$src2" %} 9628 ins_encode %{ 9629 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9630 %} 9631 ins_pipe( pipe_slow ); 9632 %} 9633 9634 // float compare and set condition codes in EFLAGS by XMM regs 9635 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9636 predicate(UseSSE>=2); 9637 match(Set cr (CmpD src1 (LoadD src2))); 9638 ins_cost(145); 9639 format %{ "UCOMISD $src1,$src2\n\t" 9640 "JNP,s exit\n\t" 9641 "PUSHF\t# saw NaN, set CF\n\t" 9642 "AND [rsp], #0xffffff2b\n\t" 9643 "POPF\n" 9644 "exit:" %} 9645 ins_encode %{ 9646 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9647 emit_cmpfp_fixup(_masm); 9648 %} 9649 ins_pipe( pipe_slow ); 9650 %} 9651 9652 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9653 predicate(UseSSE>=2); 9654 match(Set cr (CmpD src1 (LoadD src2))); 9655 ins_cost(100); 9656 format %{ "UCOMISD $src1,$src2" %} 9657 ins_encode %{ 9658 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 // Compare into -1,0,1 in XMM 9664 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9665 predicate(UseSSE>=2); 9666 match(Set dst (CmpD3 src1 src2)); 9667 effect(KILL cr); 9668 ins_cost(255); 9669 format %{ "UCOMISD $src1, $src2\n\t" 9670 "MOV $dst, #-1\n\t" 9671 "JP,s done\n\t" 9672 "JB,s done\n\t" 9673 "SETNE $dst\n\t" 9674 "MOVZB $dst, $dst\n" 9675 "done:" %} 9676 ins_encode %{ 9677 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9678 emit_cmpfp3(_masm, $dst$$Register); 9679 %} 9680 ins_pipe( pipe_slow ); 9681 %} 9682 9683 // Compare into -1,0,1 in XMM and memory 9684 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9685 predicate(UseSSE>=2); 9686 match(Set dst (CmpD3 src1 (LoadD src2))); 9687 effect(KILL cr); 9688 ins_cost(275); 9689 format %{ "UCOMISD $src1, $src2\n\t" 9690 "MOV $dst, #-1\n\t" 9691 "JP,s done\n\t" 9692 "JB,s done\n\t" 9693 "SETNE $dst\n\t" 9694 "MOVZB $dst, $dst\n" 9695 "done:" %} 9696 ins_encode %{ 9697 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9698 emit_cmpfp3(_masm, $dst$$Register); 9699 %} 9700 ins_pipe( pipe_slow ); 9701 %} 9702 9703 9704 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9705 predicate (UseSSE <=1); 9706 match(Set dst (SubD dst src)); 9707 9708 format %{ "FLD $src\n\t" 9709 "DSUBp $dst,ST" %} 9710 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9711 ins_cost(150); 9712 ins_encode( Push_Reg_DPR(src), 9713 OpcP, RegOpc(dst) ); 9714 ins_pipe( fpu_reg_reg ); 9715 %} 9716 9717 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9718 predicate (UseSSE <=1); 9719 match(Set dst (RoundDouble (SubD src1 src2))); 9720 ins_cost(250); 9721 9722 format %{ "FLD $src2\n\t" 9723 "DSUB ST,$src1\n\t" 9724 "FSTP_D $dst\t# D-round" %} 9725 opcode(0xD8, 0x5); 9726 ins_encode( Push_Reg_DPR(src2), 9727 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9728 ins_pipe( fpu_mem_reg_reg ); 9729 %} 9730 9731 9732 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9733 predicate (UseSSE <=1); 9734 match(Set dst (SubD dst (LoadD src))); 9735 ins_cost(150); 9736 9737 format %{ "FLD $src\n\t" 9738 "DSUBp $dst,ST" %} 9739 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9740 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9741 OpcP, RegOpc(dst) ); 9742 ins_pipe( fpu_reg_mem ); 9743 %} 9744 9745 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9746 predicate (UseSSE<=1); 9747 match(Set dst (AbsD src)); 9748 ins_cost(100); 9749 format %{ "FABS" %} 9750 opcode(0xE1, 0xD9); 9751 ins_encode( OpcS, OpcP ); 9752 ins_pipe( fpu_reg_reg ); 9753 %} 9754 9755 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9756 predicate(UseSSE<=1); 9757 match(Set dst (NegD src)); 9758 ins_cost(100); 9759 format %{ "FCHS" %} 9760 opcode(0xE0, 0xD9); 9761 ins_encode( OpcS, OpcP ); 9762 ins_pipe( fpu_reg_reg ); 9763 %} 9764 9765 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9766 predicate(UseSSE<=1); 9767 match(Set dst (AddD dst src)); 9768 format %{ "FLD $src\n\t" 9769 "DADD $dst,ST" %} 9770 size(4); 9771 ins_cost(150); 9772 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9773 ins_encode( Push_Reg_DPR(src), 9774 OpcP, RegOpc(dst) ); 9775 ins_pipe( fpu_reg_reg ); 9776 %} 9777 9778 9779 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9780 predicate(UseSSE<=1); 9781 match(Set dst (RoundDouble (AddD src1 src2))); 9782 ins_cost(250); 9783 9784 format %{ "FLD $src2\n\t" 9785 "DADD ST,$src1\n\t" 9786 "FSTP_D $dst\t# D-round" %} 9787 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9788 ins_encode( Push_Reg_DPR(src2), 9789 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9790 ins_pipe( fpu_mem_reg_reg ); 9791 %} 9792 9793 9794 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9795 predicate(UseSSE<=1); 9796 match(Set dst (AddD dst (LoadD src))); 9797 ins_cost(150); 9798 9799 format %{ "FLD $src\n\t" 9800 "DADDp $dst,ST" %} 9801 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9802 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9803 OpcP, RegOpc(dst) ); 9804 ins_pipe( fpu_reg_mem ); 9805 %} 9806 9807 // add-to-memory 9808 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9809 predicate(UseSSE<=1); 9810 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9811 ins_cost(150); 9812 9813 format %{ "FLD_D $dst\n\t" 9814 "DADD ST,$src\n\t" 9815 "FST_D $dst" %} 9816 opcode(0xDD, 0x0); 9817 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9818 Opcode(0xD8), RegOpc(src), 9819 set_instruction_start, 9820 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9821 ins_pipe( fpu_reg_mem ); 9822 %} 9823 9824 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9825 predicate(UseSSE<=1); 9826 match(Set dst (AddD dst con)); 9827 ins_cost(125); 9828 format %{ "FLD1\n\t" 9829 "DADDp $dst,ST" %} 9830 ins_encode %{ 9831 __ fld1(); 9832 __ faddp($dst$$reg); 9833 %} 9834 ins_pipe(fpu_reg); 9835 %} 9836 9837 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9838 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9839 match(Set dst (AddD dst con)); 9840 ins_cost(200); 9841 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9842 "DADDp $dst,ST" %} 9843 ins_encode %{ 9844 __ fld_d($constantaddress($con)); 9845 __ faddp($dst$$reg); 9846 %} 9847 ins_pipe(fpu_reg_mem); 9848 %} 9849 9850 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9851 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9852 match(Set dst (RoundDouble (AddD src con))); 9853 ins_cost(200); 9854 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9855 "DADD ST,$src\n\t" 9856 "FSTP_D $dst\t# D-round" %} 9857 ins_encode %{ 9858 __ fld_d($constantaddress($con)); 9859 __ fadd($src$$reg); 9860 __ fstp_d(Address(rsp, $dst$$disp)); 9861 %} 9862 ins_pipe(fpu_mem_reg_con); 9863 %} 9864 9865 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9866 predicate(UseSSE<=1); 9867 match(Set dst (MulD dst src)); 9868 format %{ "FLD $src\n\t" 9869 "DMULp $dst,ST" %} 9870 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9871 ins_cost(150); 9872 ins_encode( Push_Reg_DPR(src), 9873 OpcP, RegOpc(dst) ); 9874 ins_pipe( fpu_reg_reg ); 9875 %} 9876 9877 // Strict FP instruction biases argument before multiply then 9878 // biases result to avoid double rounding of subnormals. 9879 // 9880 // scale arg1 by multiplying arg1 by 2^(-15360) 9881 // load arg2 9882 // multiply scaled arg1 by arg2 9883 // rescale product by 2^(15360) 9884 // 9885 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9886 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9887 match(Set dst (MulD dst src)); 9888 ins_cost(1); // Select this instruction for all strict FP double multiplies 9889 9890 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9891 "DMULp $dst,ST\n\t" 9892 "FLD $src\n\t" 9893 "DMULp $dst,ST\n\t" 9894 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9895 "DMULp $dst,ST\n\t" %} 9896 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9897 ins_encode( strictfp_bias1(dst), 9898 Push_Reg_DPR(src), 9899 OpcP, RegOpc(dst), 9900 strictfp_bias2(dst) ); 9901 ins_pipe( fpu_reg_reg ); 9902 %} 9903 9904 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9905 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9906 match(Set dst (MulD dst con)); 9907 ins_cost(200); 9908 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9909 "DMULp $dst,ST" %} 9910 ins_encode %{ 9911 __ fld_d($constantaddress($con)); 9912 __ fmulp($dst$$reg); 9913 %} 9914 ins_pipe(fpu_reg_mem); 9915 %} 9916 9917 9918 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9919 predicate( UseSSE<=1 ); 9920 match(Set dst (MulD dst (LoadD src))); 9921 ins_cost(200); 9922 format %{ "FLD_D $src\n\t" 9923 "DMULp $dst,ST" %} 9924 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9925 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9926 OpcP, RegOpc(dst) ); 9927 ins_pipe( fpu_reg_mem ); 9928 %} 9929 9930 // 9931 // Cisc-alternate to reg-reg multiply 9932 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9933 predicate( UseSSE<=1 ); 9934 match(Set dst (MulD src (LoadD mem))); 9935 ins_cost(250); 9936 format %{ "FLD_D $mem\n\t" 9937 "DMUL ST,$src\n\t" 9938 "FSTP_D $dst" %} 9939 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9940 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9941 OpcReg_FPR(src), 9942 Pop_Reg_DPR(dst) ); 9943 ins_pipe( fpu_reg_reg_mem ); 9944 %} 9945 9946 9947 // MACRO3 -- addDPR a mulDPR 9948 // This instruction is a '2-address' instruction in that the result goes 9949 // back to src2. This eliminates a move from the macro; possibly the 9950 // register allocator will have to add it back (and maybe not). 9951 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9952 predicate( UseSSE<=1 ); 9953 match(Set src2 (AddD (MulD src0 src1) src2)); 9954 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9955 "DMUL ST,$src1\n\t" 9956 "DADDp $src2,ST" %} 9957 ins_cost(250); 9958 opcode(0xDD); /* LoadD DD /0 */ 9959 ins_encode( Push_Reg_FPR(src0), 9960 FMul_ST_reg(src1), 9961 FAddP_reg_ST(src2) ); 9962 ins_pipe( fpu_reg_reg_reg ); 9963 %} 9964 9965 9966 // MACRO3 -- subDPR a mulDPR 9967 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9968 predicate( UseSSE<=1 ); 9969 match(Set src2 (SubD (MulD src0 src1) src2)); 9970 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9971 "DMUL ST,$src1\n\t" 9972 "DSUBRp $src2,ST" %} 9973 ins_cost(250); 9974 ins_encode( Push_Reg_FPR(src0), 9975 FMul_ST_reg(src1), 9976 Opcode(0xDE), Opc_plus(0xE0,src2)); 9977 ins_pipe( fpu_reg_reg_reg ); 9978 %} 9979 9980 9981 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9982 predicate( UseSSE<=1 ); 9983 match(Set dst (DivD dst src)); 9984 9985 format %{ "FLD $src\n\t" 9986 "FDIVp $dst,ST" %} 9987 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9988 ins_cost(150); 9989 ins_encode( Push_Reg_DPR(src), 9990 OpcP, RegOpc(dst) ); 9991 ins_pipe( fpu_reg_reg ); 9992 %} 9993 9994 // Strict FP instruction biases argument before division then 9995 // biases result, to avoid double rounding of subnormals. 9996 // 9997 // scale dividend by multiplying dividend by 2^(-15360) 9998 // load divisor 9999 // divide scaled dividend by divisor 10000 // rescale quotient by 2^(15360) 10001 // 10002 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 10003 predicate (UseSSE<=1); 10004 match(Set dst (DivD dst src)); 10005 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10006 ins_cost(01); 10007 10008 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10009 "DMULp $dst,ST\n\t" 10010 "FLD $src\n\t" 10011 "FDIVp $dst,ST\n\t" 10012 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10013 "DMULp $dst,ST\n\t" %} 10014 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10015 ins_encode( strictfp_bias1(dst), 10016 Push_Reg_DPR(src), 10017 OpcP, RegOpc(dst), 10018 strictfp_bias2(dst) ); 10019 ins_pipe( fpu_reg_reg ); 10020 %} 10021 10022 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 10023 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10024 match(Set dst (RoundDouble (DivD src1 src2))); 10025 10026 format %{ "FLD $src1\n\t" 10027 "FDIV ST,$src2\n\t" 10028 "FSTP_D $dst\t# D-round" %} 10029 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10030 ins_encode( Push_Reg_DPR(src1), 10031 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 10032 ins_pipe( fpu_mem_reg_reg ); 10033 %} 10034 10035 10036 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 10037 predicate(UseSSE<=1); 10038 match(Set dst (ModD dst src)); 10039 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10040 10041 format %{ "DMOD $dst,$src" %} 10042 ins_cost(250); 10043 ins_encode(Push_Reg_Mod_DPR(dst, src), 10044 emitModDPR(), 10045 Push_Result_Mod_DPR(src), 10046 Pop_Reg_DPR(dst)); 10047 ins_pipe( pipe_slow ); 10048 %} 10049 10050 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10051 predicate(UseSSE>=2); 10052 match(Set dst (ModD src0 src1)); 10053 effect(KILL rax, KILL cr); 10054 10055 format %{ "SUB ESP,8\t # DMOD\n" 10056 "\tMOVSD [ESP+0],$src1\n" 10057 "\tFLD_D [ESP+0]\n" 10058 "\tMOVSD [ESP+0],$src0\n" 10059 "\tFLD_D [ESP+0]\n" 10060 "loop:\tFPREM\n" 10061 "\tFWAIT\n" 10062 "\tFNSTSW AX\n" 10063 "\tSAHF\n" 10064 "\tJP loop\n" 10065 "\tFSTP_D [ESP+0]\n" 10066 "\tMOVSD $dst,[ESP+0]\n" 10067 "\tADD ESP,8\n" 10068 "\tFSTP ST0\t # Restore FPU Stack" 10069 %} 10070 ins_cost(250); 10071 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10076 predicate (UseSSE<=1); 10077 match(Set dst(AtanD dst src)); 10078 format %{ "DATA $dst,$src" %} 10079 opcode(0xD9, 0xF3); 10080 ins_encode( Push_Reg_DPR(src), 10081 OpcP, OpcS, RegOpc(dst) ); 10082 ins_pipe( pipe_slow ); 10083 %} 10084 10085 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10086 predicate (UseSSE>=2); 10087 match(Set dst(AtanD dst src)); 10088 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10089 format %{ "DATA $dst,$src" %} 10090 opcode(0xD9, 0xF3); 10091 ins_encode( Push_SrcD(src), 10092 OpcP, OpcS, Push_ResultD(dst) ); 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10097 predicate (UseSSE<=1); 10098 match(Set dst (SqrtD src)); 10099 format %{ "DSQRT $dst,$src" %} 10100 opcode(0xFA, 0xD9); 10101 ins_encode( Push_Reg_DPR(src), 10102 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 //-------------Float Instructions------------------------------- 10107 // Float Math 10108 10109 // Code for float compare: 10110 // fcompp(); 10111 // fwait(); fnstsw_ax(); 10112 // sahf(); 10113 // movl(dst, unordered_result); 10114 // jcc(Assembler::parity, exit); 10115 // movl(dst, less_result); 10116 // jcc(Assembler::below, exit); 10117 // movl(dst, equal_result); 10118 // jcc(Assembler::equal, exit); 10119 // movl(dst, greater_result); 10120 // exit: 10121 10122 // P6 version of float compare, sets condition codes in EFLAGS 10123 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10124 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10125 match(Set cr (CmpF src1 src2)); 10126 effect(KILL rax); 10127 ins_cost(150); 10128 format %{ "FLD $src1\n\t" 10129 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10130 "JNP exit\n\t" 10131 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10132 "SAHF\n" 10133 "exit:\tNOP // avoid branch to branch" %} 10134 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10135 ins_encode( Push_Reg_DPR(src1), 10136 OpcP, RegOpc(src2), 10137 cmpF_P6_fixup ); 10138 ins_pipe( pipe_slow ); 10139 %} 10140 10141 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10142 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10143 match(Set cr (CmpF src1 src2)); 10144 ins_cost(100); 10145 format %{ "FLD $src1\n\t" 10146 "FUCOMIP ST,$src2 // P6 instruction" %} 10147 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10148 ins_encode( Push_Reg_DPR(src1), 10149 OpcP, RegOpc(src2)); 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 10154 // Compare & branch 10155 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10156 predicate(UseSSE == 0); 10157 match(Set cr (CmpF src1 src2)); 10158 effect(KILL rax); 10159 ins_cost(200); 10160 format %{ "FLD $src1\n\t" 10161 "FCOMp $src2\n\t" 10162 "FNSTSW AX\n\t" 10163 "TEST AX,0x400\n\t" 10164 "JZ,s flags\n\t" 10165 "MOV AH,1\t# unordered treat as LT\n" 10166 "flags:\tSAHF" %} 10167 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10168 ins_encode( Push_Reg_DPR(src1), 10169 OpcP, RegOpc(src2), 10170 fpu_flags); 10171 ins_pipe( pipe_slow ); 10172 %} 10173 10174 // Compare vs zero into -1,0,1 10175 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10176 predicate(UseSSE == 0); 10177 match(Set dst (CmpF3 src1 zero)); 10178 effect(KILL cr, KILL rax); 10179 ins_cost(280); 10180 format %{ "FTSTF $dst,$src1" %} 10181 opcode(0xE4, 0xD9); 10182 ins_encode( Push_Reg_DPR(src1), 10183 OpcS, OpcP, PopFPU, 10184 CmpF_Result(dst)); 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 // Compare into -1,0,1 10189 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10190 predicate(UseSSE == 0); 10191 match(Set dst (CmpF3 src1 src2)); 10192 effect(KILL cr, KILL rax); 10193 ins_cost(300); 10194 format %{ "FCMPF $dst,$src1,$src2" %} 10195 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10196 ins_encode( Push_Reg_DPR(src1), 10197 OpcP, RegOpc(src2), 10198 CmpF_Result(dst)); 10199 ins_pipe( pipe_slow ); 10200 %} 10201 10202 // float compare and set condition codes in EFLAGS by XMM regs 10203 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10204 predicate(UseSSE>=1); 10205 match(Set cr (CmpF src1 src2)); 10206 ins_cost(145); 10207 format %{ "UCOMISS $src1,$src2\n\t" 10208 "JNP,s exit\n\t" 10209 "PUSHF\t# saw NaN, set CF\n\t" 10210 "AND [rsp], #0xffffff2b\n\t" 10211 "POPF\n" 10212 "exit:" %} 10213 ins_encode %{ 10214 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10215 emit_cmpfp_fixup(_masm); 10216 %} 10217 ins_pipe( pipe_slow ); 10218 %} 10219 10220 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10221 predicate(UseSSE>=1); 10222 match(Set cr (CmpF src1 src2)); 10223 ins_cost(100); 10224 format %{ "UCOMISS $src1,$src2" %} 10225 ins_encode %{ 10226 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10227 %} 10228 ins_pipe( pipe_slow ); 10229 %} 10230 10231 // float compare and set condition codes in EFLAGS by XMM regs 10232 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10233 predicate(UseSSE>=1); 10234 match(Set cr (CmpF src1 (LoadF src2))); 10235 ins_cost(165); 10236 format %{ "UCOMISS $src1,$src2\n\t" 10237 "JNP,s exit\n\t" 10238 "PUSHF\t# saw NaN, set CF\n\t" 10239 "AND [rsp], #0xffffff2b\n\t" 10240 "POPF\n" 10241 "exit:" %} 10242 ins_encode %{ 10243 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10244 emit_cmpfp_fixup(_masm); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 10249 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10250 predicate(UseSSE>=1); 10251 match(Set cr (CmpF src1 (LoadF src2))); 10252 ins_cost(100); 10253 format %{ "UCOMISS $src1,$src2" %} 10254 ins_encode %{ 10255 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10256 %} 10257 ins_pipe( pipe_slow ); 10258 %} 10259 10260 // Compare into -1,0,1 in XMM 10261 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10262 predicate(UseSSE>=1); 10263 match(Set dst (CmpF3 src1 src2)); 10264 effect(KILL cr); 10265 ins_cost(255); 10266 format %{ "UCOMISS $src1, $src2\n\t" 10267 "MOV $dst, #-1\n\t" 10268 "JP,s done\n\t" 10269 "JB,s done\n\t" 10270 "SETNE $dst\n\t" 10271 "MOVZB $dst, $dst\n" 10272 "done:" %} 10273 ins_encode %{ 10274 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10275 emit_cmpfp3(_masm, $dst$$Register); 10276 %} 10277 ins_pipe( pipe_slow ); 10278 %} 10279 10280 // Compare into -1,0,1 in XMM and memory 10281 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10282 predicate(UseSSE>=1); 10283 match(Set dst (CmpF3 src1 (LoadF src2))); 10284 effect(KILL cr); 10285 ins_cost(275); 10286 format %{ "UCOMISS $src1, $src2\n\t" 10287 "MOV $dst, #-1\n\t" 10288 "JP,s done\n\t" 10289 "JB,s done\n\t" 10290 "SETNE $dst\n\t" 10291 "MOVZB $dst, $dst\n" 10292 "done:" %} 10293 ins_encode %{ 10294 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10295 emit_cmpfp3(_masm, $dst$$Register); 10296 %} 10297 ins_pipe( pipe_slow ); 10298 %} 10299 10300 // Spill to obtain 24-bit precision 10301 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10302 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10303 match(Set dst (SubF src1 src2)); 10304 10305 format %{ "FSUB $dst,$src1 - $src2" %} 10306 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10307 ins_encode( Push_Reg_FPR(src1), 10308 OpcReg_FPR(src2), 10309 Pop_Mem_FPR(dst) ); 10310 ins_pipe( fpu_mem_reg_reg ); 10311 %} 10312 // 10313 // This instruction does not round to 24-bits 10314 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10315 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10316 match(Set dst (SubF dst src)); 10317 10318 format %{ "FSUB $dst,$src" %} 10319 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10320 ins_encode( Push_Reg_FPR(src), 10321 OpcP, RegOpc(dst) ); 10322 ins_pipe( fpu_reg_reg ); 10323 %} 10324 10325 // Spill to obtain 24-bit precision 10326 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10327 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10328 match(Set dst (AddF src1 src2)); 10329 10330 format %{ "FADD $dst,$src1,$src2" %} 10331 opcode(0xD8, 0x0); /* D8 C0+i */ 10332 ins_encode( Push_Reg_FPR(src2), 10333 OpcReg_FPR(src1), 10334 Pop_Mem_FPR(dst) ); 10335 ins_pipe( fpu_mem_reg_reg ); 10336 %} 10337 // 10338 // This instruction does not round to 24-bits 10339 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10340 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10341 match(Set dst (AddF dst src)); 10342 10343 format %{ "FLD $src\n\t" 10344 "FADDp $dst,ST" %} 10345 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10346 ins_encode( Push_Reg_FPR(src), 10347 OpcP, RegOpc(dst) ); 10348 ins_pipe( fpu_reg_reg ); 10349 %} 10350 10351 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10352 predicate(UseSSE==0); 10353 match(Set dst (AbsF src)); 10354 ins_cost(100); 10355 format %{ "FABS" %} 10356 opcode(0xE1, 0xD9); 10357 ins_encode( OpcS, OpcP ); 10358 ins_pipe( fpu_reg_reg ); 10359 %} 10360 10361 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10362 predicate(UseSSE==0); 10363 match(Set dst (NegF src)); 10364 ins_cost(100); 10365 format %{ "FCHS" %} 10366 opcode(0xE0, 0xD9); 10367 ins_encode( OpcS, OpcP ); 10368 ins_pipe( fpu_reg_reg ); 10369 %} 10370 10371 // Cisc-alternate to addFPR_reg 10372 // Spill to obtain 24-bit precision 10373 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10374 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10375 match(Set dst (AddF src1 (LoadF src2))); 10376 10377 format %{ "FLD $src2\n\t" 10378 "FADD ST,$src1\n\t" 10379 "FSTP_S $dst" %} 10380 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10381 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10382 OpcReg_FPR(src1), 10383 Pop_Mem_FPR(dst) ); 10384 ins_pipe( fpu_mem_reg_mem ); 10385 %} 10386 // 10387 // Cisc-alternate to addFPR_reg 10388 // This instruction does not round to 24-bits 10389 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10390 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10391 match(Set dst (AddF dst (LoadF src))); 10392 10393 format %{ "FADD $dst,$src" %} 10394 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10395 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10396 OpcP, RegOpc(dst) ); 10397 ins_pipe( fpu_reg_mem ); 10398 %} 10399 10400 // // Following two instructions for _222_mpegaudio 10401 // Spill to obtain 24-bit precision 10402 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10403 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10404 match(Set dst (AddF src1 src2)); 10405 10406 format %{ "FADD $dst,$src1,$src2" %} 10407 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10408 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10409 OpcReg_FPR(src2), 10410 Pop_Mem_FPR(dst) ); 10411 ins_pipe( fpu_mem_reg_mem ); 10412 %} 10413 10414 // Cisc-spill variant 10415 // Spill to obtain 24-bit precision 10416 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10417 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10418 match(Set dst (AddF src1 (LoadF src2))); 10419 10420 format %{ "FADD $dst,$src1,$src2 cisc" %} 10421 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10422 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10423 set_instruction_start, 10424 OpcP, RMopc_Mem(secondary,src1), 10425 Pop_Mem_FPR(dst) ); 10426 ins_pipe( fpu_mem_mem_mem ); 10427 %} 10428 10429 // Spill to obtain 24-bit precision 10430 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10431 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10432 match(Set dst (AddF src1 src2)); 10433 10434 format %{ "FADD $dst,$src1,$src2" %} 10435 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10436 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10437 set_instruction_start, 10438 OpcP, RMopc_Mem(secondary,src1), 10439 Pop_Mem_FPR(dst) ); 10440 ins_pipe( fpu_mem_mem_mem ); 10441 %} 10442 10443 10444 // Spill to obtain 24-bit precision 10445 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10446 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10447 match(Set dst (AddF src con)); 10448 format %{ "FLD $src\n\t" 10449 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10450 "FSTP_S $dst" %} 10451 ins_encode %{ 10452 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10453 __ fadd_s($constantaddress($con)); 10454 __ fstp_s(Address(rsp, $dst$$disp)); 10455 %} 10456 ins_pipe(fpu_mem_reg_con); 10457 %} 10458 // 10459 // This instruction does not round to 24-bits 10460 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10461 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10462 match(Set dst (AddF src con)); 10463 format %{ "FLD $src\n\t" 10464 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10465 "FSTP $dst" %} 10466 ins_encode %{ 10467 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10468 __ fadd_s($constantaddress($con)); 10469 __ fstp_d($dst$$reg); 10470 %} 10471 ins_pipe(fpu_reg_reg_con); 10472 %} 10473 10474 // Spill to obtain 24-bit precision 10475 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10476 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10477 match(Set dst (MulF src1 src2)); 10478 10479 format %{ "FLD $src1\n\t" 10480 "FMUL $src2\n\t" 10481 "FSTP_S $dst" %} 10482 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10483 ins_encode( Push_Reg_FPR(src1), 10484 OpcReg_FPR(src2), 10485 Pop_Mem_FPR(dst) ); 10486 ins_pipe( fpu_mem_reg_reg ); 10487 %} 10488 // 10489 // This instruction does not round to 24-bits 10490 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10491 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10492 match(Set dst (MulF src1 src2)); 10493 10494 format %{ "FLD $src1\n\t" 10495 "FMUL $src2\n\t" 10496 "FSTP_S $dst" %} 10497 opcode(0xD8, 0x1); /* D8 C8+i */ 10498 ins_encode( Push_Reg_FPR(src2), 10499 OpcReg_FPR(src1), 10500 Pop_Reg_FPR(dst) ); 10501 ins_pipe( fpu_reg_reg_reg ); 10502 %} 10503 10504 10505 // Spill to obtain 24-bit precision 10506 // Cisc-alternate to reg-reg multiply 10507 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10508 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10509 match(Set dst (MulF src1 (LoadF src2))); 10510 10511 format %{ "FLD_S $src2\n\t" 10512 "FMUL $src1\n\t" 10513 "FSTP_S $dst" %} 10514 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10515 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10516 OpcReg_FPR(src1), 10517 Pop_Mem_FPR(dst) ); 10518 ins_pipe( fpu_mem_reg_mem ); 10519 %} 10520 // 10521 // This instruction does not round to 24-bits 10522 // Cisc-alternate to reg-reg multiply 10523 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10524 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10525 match(Set dst (MulF src1 (LoadF src2))); 10526 10527 format %{ "FMUL $dst,$src1,$src2" %} 10528 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10529 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10530 OpcReg_FPR(src1), 10531 Pop_Reg_FPR(dst) ); 10532 ins_pipe( fpu_reg_reg_mem ); 10533 %} 10534 10535 // Spill to obtain 24-bit precision 10536 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10537 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10538 match(Set dst (MulF src1 src2)); 10539 10540 format %{ "FMUL $dst,$src1,$src2" %} 10541 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10542 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10543 set_instruction_start, 10544 OpcP, RMopc_Mem(secondary,src1), 10545 Pop_Mem_FPR(dst) ); 10546 ins_pipe( fpu_mem_mem_mem ); 10547 %} 10548 10549 // Spill to obtain 24-bit precision 10550 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10551 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10552 match(Set dst (MulF src con)); 10553 10554 format %{ "FLD $src\n\t" 10555 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10556 "FSTP_S $dst" %} 10557 ins_encode %{ 10558 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10559 __ fmul_s($constantaddress($con)); 10560 __ fstp_s(Address(rsp, $dst$$disp)); 10561 %} 10562 ins_pipe(fpu_mem_reg_con); 10563 %} 10564 // 10565 // This instruction does not round to 24-bits 10566 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10567 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10568 match(Set dst (MulF src con)); 10569 10570 format %{ "FLD $src\n\t" 10571 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10572 "FSTP $dst" %} 10573 ins_encode %{ 10574 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10575 __ fmul_s($constantaddress($con)); 10576 __ fstp_d($dst$$reg); 10577 %} 10578 ins_pipe(fpu_reg_reg_con); 10579 %} 10580 10581 10582 // 10583 // MACRO1 -- subsume unshared load into mulFPR 10584 // This instruction does not round to 24-bits 10585 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10586 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10587 match(Set dst (MulF (LoadF mem1) src)); 10588 10589 format %{ "FLD $mem1 ===MACRO1===\n\t" 10590 "FMUL ST,$src\n\t" 10591 "FSTP $dst" %} 10592 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10593 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10594 OpcReg_FPR(src), 10595 Pop_Reg_FPR(dst) ); 10596 ins_pipe( fpu_reg_reg_mem ); 10597 %} 10598 // 10599 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10600 // This instruction does not round to 24-bits 10601 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10602 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10603 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10604 ins_cost(95); 10605 10606 format %{ "FLD $mem1 ===MACRO2===\n\t" 10607 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10608 "FADD ST,$src2\n\t" 10609 "FSTP $dst" %} 10610 opcode(0xD9); /* LoadF D9 /0 */ 10611 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10612 FMul_ST_reg(src1), 10613 FAdd_ST_reg(src2), 10614 Pop_Reg_FPR(dst) ); 10615 ins_pipe( fpu_reg_mem_reg_reg ); 10616 %} 10617 10618 // MACRO3 -- addFPR a mulFPR 10619 // This instruction does not round to 24-bits. It is a '2-address' 10620 // instruction in that the result goes back to src2. This eliminates 10621 // a move from the macro; possibly the register allocator will have 10622 // to add it back (and maybe not). 10623 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10624 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10625 match(Set src2 (AddF (MulF src0 src1) src2)); 10626 10627 format %{ "FLD $src0 ===MACRO3===\n\t" 10628 "FMUL ST,$src1\n\t" 10629 "FADDP $src2,ST" %} 10630 opcode(0xD9); /* LoadF D9 /0 */ 10631 ins_encode( Push_Reg_FPR(src0), 10632 FMul_ST_reg(src1), 10633 FAddP_reg_ST(src2) ); 10634 ins_pipe( fpu_reg_reg_reg ); 10635 %} 10636 10637 // MACRO4 -- divFPR subFPR 10638 // This instruction does not round to 24-bits 10639 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10640 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10641 match(Set dst (DivF (SubF src2 src1) src3)); 10642 10643 format %{ "FLD $src2 ===MACRO4===\n\t" 10644 "FSUB ST,$src1\n\t" 10645 "FDIV ST,$src3\n\t" 10646 "FSTP $dst" %} 10647 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10648 ins_encode( Push_Reg_FPR(src2), 10649 subFPR_divFPR_encode(src1,src3), 10650 Pop_Reg_FPR(dst) ); 10651 ins_pipe( fpu_reg_reg_reg_reg ); 10652 %} 10653 10654 // Spill to obtain 24-bit precision 10655 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10656 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10657 match(Set dst (DivF src1 src2)); 10658 10659 format %{ "FDIV $dst,$src1,$src2" %} 10660 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10661 ins_encode( Push_Reg_FPR(src1), 10662 OpcReg_FPR(src2), 10663 Pop_Mem_FPR(dst) ); 10664 ins_pipe( fpu_mem_reg_reg ); 10665 %} 10666 // 10667 // This instruction does not round to 24-bits 10668 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10669 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10670 match(Set dst (DivF dst src)); 10671 10672 format %{ "FDIV $dst,$src" %} 10673 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10674 ins_encode( Push_Reg_FPR(src), 10675 OpcP, RegOpc(dst) ); 10676 ins_pipe( fpu_reg_reg ); 10677 %} 10678 10679 10680 // Spill to obtain 24-bit precision 10681 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10682 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10683 match(Set dst (ModF src1 src2)); 10684 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10685 10686 format %{ "FMOD $dst,$src1,$src2" %} 10687 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10688 emitModDPR(), 10689 Push_Result_Mod_DPR(src2), 10690 Pop_Mem_FPR(dst)); 10691 ins_pipe( pipe_slow ); 10692 %} 10693 // 10694 // This instruction does not round to 24-bits 10695 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10696 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10697 match(Set dst (ModF dst src)); 10698 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10699 10700 format %{ "FMOD $dst,$src" %} 10701 ins_encode(Push_Reg_Mod_DPR(dst, src), 10702 emitModDPR(), 10703 Push_Result_Mod_DPR(src), 10704 Pop_Reg_FPR(dst)); 10705 ins_pipe( pipe_slow ); 10706 %} 10707 10708 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10709 predicate(UseSSE>=1); 10710 match(Set dst (ModF src0 src1)); 10711 effect(KILL rax, KILL cr); 10712 format %{ "SUB ESP,4\t # FMOD\n" 10713 "\tMOVSS [ESP+0],$src1\n" 10714 "\tFLD_S [ESP+0]\n" 10715 "\tMOVSS [ESP+0],$src0\n" 10716 "\tFLD_S [ESP+0]\n" 10717 "loop:\tFPREM\n" 10718 "\tFWAIT\n" 10719 "\tFNSTSW AX\n" 10720 "\tSAHF\n" 10721 "\tJP loop\n" 10722 "\tFSTP_S [ESP+0]\n" 10723 "\tMOVSS $dst,[ESP+0]\n" 10724 "\tADD ESP,4\n" 10725 "\tFSTP ST0\t # Restore FPU Stack" 10726 %} 10727 ins_cost(250); 10728 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10729 ins_pipe( pipe_slow ); 10730 %} 10731 10732 10733 //----------Arithmetic Conversion Instructions--------------------------------- 10734 // The conversions operations are all Alpha sorted. Please keep it that way! 10735 10736 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10737 predicate(UseSSE==0); 10738 match(Set dst (RoundFloat src)); 10739 ins_cost(125); 10740 format %{ "FST_S $dst,$src\t# F-round" %} 10741 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10742 ins_pipe( fpu_mem_reg ); 10743 %} 10744 10745 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10746 predicate(UseSSE<=1); 10747 match(Set dst (RoundDouble src)); 10748 ins_cost(125); 10749 format %{ "FST_D $dst,$src\t# D-round" %} 10750 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10751 ins_pipe( fpu_mem_reg ); 10752 %} 10753 10754 // Force rounding to 24-bit precision and 6-bit exponent 10755 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10756 predicate(UseSSE==0); 10757 match(Set dst (ConvD2F src)); 10758 format %{ "FST_S $dst,$src\t# F-round" %} 10759 expand %{ 10760 roundFloat_mem_reg(dst,src); 10761 %} 10762 %} 10763 10764 // Force rounding to 24-bit precision and 6-bit exponent 10765 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10766 predicate(UseSSE==1); 10767 match(Set dst (ConvD2F src)); 10768 effect( KILL cr ); 10769 format %{ "SUB ESP,4\n\t" 10770 "FST_S [ESP],$src\t# F-round\n\t" 10771 "MOVSS $dst,[ESP]\n\t" 10772 "ADD ESP,4" %} 10773 ins_encode %{ 10774 __ subptr(rsp, 4); 10775 if ($src$$reg != FPR1L_enc) { 10776 __ fld_s($src$$reg-1); 10777 __ fstp_s(Address(rsp, 0)); 10778 } else { 10779 __ fst_s(Address(rsp, 0)); 10780 } 10781 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10782 __ addptr(rsp, 4); 10783 %} 10784 ins_pipe( pipe_slow ); 10785 %} 10786 10787 // Force rounding double precision to single precision 10788 instruct convD2F_reg(regF dst, regD src) %{ 10789 predicate(UseSSE>=2); 10790 match(Set dst (ConvD2F src)); 10791 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10792 ins_encode %{ 10793 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10794 %} 10795 ins_pipe( pipe_slow ); 10796 %} 10797 10798 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10799 predicate(UseSSE==0); 10800 match(Set dst (ConvF2D src)); 10801 format %{ "FST_S $dst,$src\t# D-round" %} 10802 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10803 ins_pipe( fpu_reg_reg ); 10804 %} 10805 10806 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10807 predicate(UseSSE==1); 10808 match(Set dst (ConvF2D src)); 10809 format %{ "FST_D $dst,$src\t# D-round" %} 10810 expand %{ 10811 roundDouble_mem_reg(dst,src); 10812 %} 10813 %} 10814 10815 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10816 predicate(UseSSE==1); 10817 match(Set dst (ConvF2D src)); 10818 effect( KILL cr ); 10819 format %{ "SUB ESP,4\n\t" 10820 "MOVSS [ESP] $src\n\t" 10821 "FLD_S [ESP]\n\t" 10822 "ADD ESP,4\n\t" 10823 "FSTP $dst\t# D-round" %} 10824 ins_encode %{ 10825 __ subptr(rsp, 4); 10826 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10827 __ fld_s(Address(rsp, 0)); 10828 __ addptr(rsp, 4); 10829 __ fstp_d($dst$$reg); 10830 %} 10831 ins_pipe( pipe_slow ); 10832 %} 10833 10834 instruct convF2D_reg(regD dst, regF src) %{ 10835 predicate(UseSSE>=2); 10836 match(Set dst (ConvF2D src)); 10837 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10838 ins_encode %{ 10839 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10840 %} 10841 ins_pipe( pipe_slow ); 10842 %} 10843 10844 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10845 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10846 predicate(UseSSE<=1); 10847 match(Set dst (ConvD2I src)); 10848 effect( KILL tmp, KILL cr ); 10849 format %{ "FLD $src\t# Convert double to int \n\t" 10850 "FLDCW trunc mode\n\t" 10851 "SUB ESP,4\n\t" 10852 "FISTp [ESP + #0]\n\t" 10853 "FLDCW std/24-bit mode\n\t" 10854 "POP EAX\n\t" 10855 "CMP EAX,0x80000000\n\t" 10856 "JNE,s fast\n\t" 10857 "FLD_D $src\n\t" 10858 "CALL d2i_wrapper\n" 10859 "fast:" %} 10860 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10861 ins_pipe( pipe_slow ); 10862 %} 10863 10864 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10865 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10866 predicate(UseSSE>=2); 10867 match(Set dst (ConvD2I src)); 10868 effect( KILL tmp, KILL cr ); 10869 format %{ "CVTTSD2SI $dst, $src\n\t" 10870 "CMP $dst,0x80000000\n\t" 10871 "JNE,s fast\n\t" 10872 "SUB ESP, 8\n\t" 10873 "MOVSD [ESP], $src\n\t" 10874 "FLD_D [ESP]\n\t" 10875 "ADD ESP, 8\n\t" 10876 "CALL d2i_wrapper\n" 10877 "fast:" %} 10878 ins_encode %{ 10879 Label fast; 10880 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10881 __ cmpl($dst$$Register, 0x80000000); 10882 __ jccb(Assembler::notEqual, fast); 10883 __ subptr(rsp, 8); 10884 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10885 __ fld_d(Address(rsp, 0)); 10886 __ addptr(rsp, 8); 10887 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10888 __ bind(fast); 10889 %} 10890 ins_pipe( pipe_slow ); 10891 %} 10892 10893 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10894 predicate(UseSSE<=1); 10895 match(Set dst (ConvD2L src)); 10896 effect( KILL cr ); 10897 format %{ "FLD $src\t# Convert double to long\n\t" 10898 "FLDCW trunc mode\n\t" 10899 "SUB ESP,8\n\t" 10900 "FISTp [ESP + #0]\n\t" 10901 "FLDCW std/24-bit mode\n\t" 10902 "POP EAX\n\t" 10903 "POP EDX\n\t" 10904 "CMP EDX,0x80000000\n\t" 10905 "JNE,s fast\n\t" 10906 "TEST EAX,EAX\n\t" 10907 "JNE,s fast\n\t" 10908 "FLD $src\n\t" 10909 "CALL d2l_wrapper\n" 10910 "fast:" %} 10911 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10912 ins_pipe( pipe_slow ); 10913 %} 10914 10915 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10916 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10917 predicate (UseSSE>=2); 10918 match(Set dst (ConvD2L src)); 10919 effect( KILL cr ); 10920 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10921 "MOVSD [ESP],$src\n\t" 10922 "FLD_D [ESP]\n\t" 10923 "FLDCW trunc mode\n\t" 10924 "FISTp [ESP + #0]\n\t" 10925 "FLDCW std/24-bit mode\n\t" 10926 "POP EAX\n\t" 10927 "POP EDX\n\t" 10928 "CMP EDX,0x80000000\n\t" 10929 "JNE,s fast\n\t" 10930 "TEST EAX,EAX\n\t" 10931 "JNE,s fast\n\t" 10932 "SUB ESP,8\n\t" 10933 "MOVSD [ESP],$src\n\t" 10934 "FLD_D [ESP]\n\t" 10935 "ADD ESP,8\n\t" 10936 "CALL d2l_wrapper\n" 10937 "fast:" %} 10938 ins_encode %{ 10939 Label fast; 10940 __ subptr(rsp, 8); 10941 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10942 __ fld_d(Address(rsp, 0)); 10943 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10944 __ fistp_d(Address(rsp, 0)); 10945 // Restore the rounding mode, mask the exception 10946 if (Compile::current()->in_24_bit_fp_mode()) { 10947 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10948 } else { 10949 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10950 } 10951 // Load the converted long, adjust CPU stack 10952 __ pop(rax); 10953 __ pop(rdx); 10954 __ cmpl(rdx, 0x80000000); 10955 __ jccb(Assembler::notEqual, fast); 10956 __ testl(rax, rax); 10957 __ jccb(Assembler::notEqual, fast); 10958 __ subptr(rsp, 8); 10959 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10960 __ fld_d(Address(rsp, 0)); 10961 __ addptr(rsp, 8); 10962 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10963 __ bind(fast); 10964 %} 10965 ins_pipe( pipe_slow ); 10966 %} 10967 10968 // Convert a double to an int. Java semantics require we do complex 10969 // manglations in the corner cases. So we set the rounding mode to 10970 // 'zero', store the darned double down as an int, and reset the 10971 // rounding mode to 'nearest'. The hardware stores a flag value down 10972 // if we would overflow or converted a NAN; we check for this and 10973 // and go the slow path if needed. 10974 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10975 predicate(UseSSE==0); 10976 match(Set dst (ConvF2I src)); 10977 effect( KILL tmp, KILL cr ); 10978 format %{ "FLD $src\t# Convert float to int \n\t" 10979 "FLDCW trunc mode\n\t" 10980 "SUB ESP,4\n\t" 10981 "FISTp [ESP + #0]\n\t" 10982 "FLDCW std/24-bit mode\n\t" 10983 "POP EAX\n\t" 10984 "CMP EAX,0x80000000\n\t" 10985 "JNE,s fast\n\t" 10986 "FLD $src\n\t" 10987 "CALL d2i_wrapper\n" 10988 "fast:" %} 10989 // DPR2I_encoding works for FPR2I 10990 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10991 ins_pipe( pipe_slow ); 10992 %} 10993 10994 // Convert a float in xmm to an int reg. 10995 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10996 predicate(UseSSE>=1); 10997 match(Set dst (ConvF2I src)); 10998 effect( KILL tmp, KILL cr ); 10999 format %{ "CVTTSS2SI $dst, $src\n\t" 11000 "CMP $dst,0x80000000\n\t" 11001 "JNE,s fast\n\t" 11002 "SUB ESP, 4\n\t" 11003 "MOVSS [ESP], $src\n\t" 11004 "FLD [ESP]\n\t" 11005 "ADD ESP, 4\n\t" 11006 "CALL d2i_wrapper\n" 11007 "fast:" %} 11008 ins_encode %{ 11009 Label fast; 11010 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 11011 __ cmpl($dst$$Register, 0x80000000); 11012 __ jccb(Assembler::notEqual, fast); 11013 __ subptr(rsp, 4); 11014 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11015 __ fld_s(Address(rsp, 0)); 11016 __ addptr(rsp, 4); 11017 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 11018 __ bind(fast); 11019 %} 11020 ins_pipe( pipe_slow ); 11021 %} 11022 11023 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 11024 predicate(UseSSE==0); 11025 match(Set dst (ConvF2L src)); 11026 effect( KILL cr ); 11027 format %{ "FLD $src\t# Convert float to long\n\t" 11028 "FLDCW trunc mode\n\t" 11029 "SUB ESP,8\n\t" 11030 "FISTp [ESP + #0]\n\t" 11031 "FLDCW std/24-bit mode\n\t" 11032 "POP EAX\n\t" 11033 "POP EDX\n\t" 11034 "CMP EDX,0x80000000\n\t" 11035 "JNE,s fast\n\t" 11036 "TEST EAX,EAX\n\t" 11037 "JNE,s fast\n\t" 11038 "FLD $src\n\t" 11039 "CALL d2l_wrapper\n" 11040 "fast:" %} 11041 // DPR2L_encoding works for FPR2L 11042 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11043 ins_pipe( pipe_slow ); 11044 %} 11045 11046 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11047 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11048 predicate (UseSSE>=1); 11049 match(Set dst (ConvF2L src)); 11050 effect( KILL cr ); 11051 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11052 "MOVSS [ESP],$src\n\t" 11053 "FLD_S [ESP]\n\t" 11054 "FLDCW trunc mode\n\t" 11055 "FISTp [ESP + #0]\n\t" 11056 "FLDCW std/24-bit mode\n\t" 11057 "POP EAX\n\t" 11058 "POP EDX\n\t" 11059 "CMP EDX,0x80000000\n\t" 11060 "JNE,s fast\n\t" 11061 "TEST EAX,EAX\n\t" 11062 "JNE,s fast\n\t" 11063 "SUB ESP,4\t# Convert float to long\n\t" 11064 "MOVSS [ESP],$src\n\t" 11065 "FLD_S [ESP]\n\t" 11066 "ADD ESP,4\n\t" 11067 "CALL d2l_wrapper\n" 11068 "fast:" %} 11069 ins_encode %{ 11070 Label fast; 11071 __ subptr(rsp, 8); 11072 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11073 __ fld_s(Address(rsp, 0)); 11074 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11075 __ fistp_d(Address(rsp, 0)); 11076 // Restore the rounding mode, mask the exception 11077 if (Compile::current()->in_24_bit_fp_mode()) { 11078 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11079 } else { 11080 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11081 } 11082 // Load the converted long, adjust CPU stack 11083 __ pop(rax); 11084 __ pop(rdx); 11085 __ cmpl(rdx, 0x80000000); 11086 __ jccb(Assembler::notEqual, fast); 11087 __ testl(rax, rax); 11088 __ jccb(Assembler::notEqual, fast); 11089 __ subptr(rsp, 4); 11090 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11091 __ fld_s(Address(rsp, 0)); 11092 __ addptr(rsp, 4); 11093 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11094 __ bind(fast); 11095 %} 11096 ins_pipe( pipe_slow ); 11097 %} 11098 11099 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11100 predicate( UseSSE<=1 ); 11101 match(Set dst (ConvI2D src)); 11102 format %{ "FILD $src\n\t" 11103 "FSTP $dst" %} 11104 opcode(0xDB, 0x0); /* DB /0 */ 11105 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11106 ins_pipe( fpu_reg_mem ); 11107 %} 11108 11109 instruct convI2D_reg(regD dst, rRegI src) %{ 11110 predicate( UseSSE>=2 && !UseXmmI2D ); 11111 match(Set dst (ConvI2D src)); 11112 format %{ "CVTSI2SD $dst,$src" %} 11113 ins_encode %{ 11114 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11115 %} 11116 ins_pipe( pipe_slow ); 11117 %} 11118 11119 instruct convI2D_mem(regD dst, memory mem) %{ 11120 predicate( UseSSE>=2 ); 11121 match(Set dst (ConvI2D (LoadI mem))); 11122 format %{ "CVTSI2SD $dst,$mem" %} 11123 ins_encode %{ 11124 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11125 %} 11126 ins_pipe( pipe_slow ); 11127 %} 11128 11129 instruct convXI2D_reg(regD dst, rRegI src) 11130 %{ 11131 predicate( UseSSE>=2 && UseXmmI2D ); 11132 match(Set dst (ConvI2D src)); 11133 11134 format %{ "MOVD $dst,$src\n\t" 11135 "CVTDQ2PD $dst,$dst\t# i2d" %} 11136 ins_encode %{ 11137 __ movdl($dst$$XMMRegister, $src$$Register); 11138 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11139 %} 11140 ins_pipe(pipe_slow); // XXX 11141 %} 11142 11143 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11144 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11145 match(Set dst (ConvI2D (LoadI mem))); 11146 format %{ "FILD $mem\n\t" 11147 "FSTP $dst" %} 11148 opcode(0xDB); /* DB /0 */ 11149 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11150 Pop_Reg_DPR(dst)); 11151 ins_pipe( fpu_reg_mem ); 11152 %} 11153 11154 // Convert a byte to a float; no rounding step needed. 11155 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11156 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11157 match(Set dst (ConvI2F src)); 11158 format %{ "FILD $src\n\t" 11159 "FSTP $dst" %} 11160 11161 opcode(0xDB, 0x0); /* DB /0 */ 11162 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11163 ins_pipe( fpu_reg_mem ); 11164 %} 11165 11166 // In 24-bit mode, force exponent rounding by storing back out 11167 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11168 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11169 match(Set dst (ConvI2F src)); 11170 ins_cost(200); 11171 format %{ "FILD $src\n\t" 11172 "FSTP_S $dst" %} 11173 opcode(0xDB, 0x0); /* DB /0 */ 11174 ins_encode( Push_Mem_I(src), 11175 Pop_Mem_FPR(dst)); 11176 ins_pipe( fpu_mem_mem ); 11177 %} 11178 11179 // In 24-bit mode, force exponent rounding by storing back out 11180 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11181 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11182 match(Set dst (ConvI2F (LoadI mem))); 11183 ins_cost(200); 11184 format %{ "FILD $mem\n\t" 11185 "FSTP_S $dst" %} 11186 opcode(0xDB); /* DB /0 */ 11187 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11188 Pop_Mem_FPR(dst)); 11189 ins_pipe( fpu_mem_mem ); 11190 %} 11191 11192 // This instruction does not round to 24-bits 11193 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11194 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11195 match(Set dst (ConvI2F src)); 11196 format %{ "FILD $src\n\t" 11197 "FSTP $dst" %} 11198 opcode(0xDB, 0x0); /* DB /0 */ 11199 ins_encode( Push_Mem_I(src), 11200 Pop_Reg_FPR(dst)); 11201 ins_pipe( fpu_reg_mem ); 11202 %} 11203 11204 // This instruction does not round to 24-bits 11205 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11206 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11207 match(Set dst (ConvI2F (LoadI mem))); 11208 format %{ "FILD $mem\n\t" 11209 "FSTP $dst" %} 11210 opcode(0xDB); /* DB /0 */ 11211 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11212 Pop_Reg_FPR(dst)); 11213 ins_pipe( fpu_reg_mem ); 11214 %} 11215 11216 // Convert an int to a float in xmm; no rounding step needed. 11217 instruct convI2F_reg(regF dst, rRegI src) %{ 11218 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11219 match(Set dst (ConvI2F src)); 11220 format %{ "CVTSI2SS $dst, $src" %} 11221 ins_encode %{ 11222 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11223 %} 11224 ins_pipe( pipe_slow ); 11225 %} 11226 11227 instruct convXI2F_reg(regF dst, rRegI src) 11228 %{ 11229 predicate( UseSSE>=2 && UseXmmI2F ); 11230 match(Set dst (ConvI2F src)); 11231 11232 format %{ "MOVD $dst,$src\n\t" 11233 "CVTDQ2PS $dst,$dst\t# i2f" %} 11234 ins_encode %{ 11235 __ movdl($dst$$XMMRegister, $src$$Register); 11236 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11237 %} 11238 ins_pipe(pipe_slow); // XXX 11239 %} 11240 11241 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11242 match(Set dst (ConvI2L src)); 11243 effect(KILL cr); 11244 ins_cost(375); 11245 format %{ "MOV $dst.lo,$src\n\t" 11246 "MOV $dst.hi,$src\n\t" 11247 "SAR $dst.hi,31" %} 11248 ins_encode(convert_int_long(dst,src)); 11249 ins_pipe( ialu_reg_reg_long ); 11250 %} 11251 11252 // Zero-extend convert int to long 11253 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11254 match(Set dst (AndL (ConvI2L src) mask) ); 11255 effect( KILL flags ); 11256 ins_cost(250); 11257 format %{ "MOV $dst.lo,$src\n\t" 11258 "XOR $dst.hi,$dst.hi" %} 11259 opcode(0x33); // XOR 11260 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11261 ins_pipe( ialu_reg_reg_long ); 11262 %} 11263 11264 // Zero-extend long 11265 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11266 match(Set dst (AndL src mask) ); 11267 effect( KILL flags ); 11268 ins_cost(250); 11269 format %{ "MOV $dst.lo,$src.lo\n\t" 11270 "XOR $dst.hi,$dst.hi\n\t" %} 11271 opcode(0x33); // XOR 11272 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11273 ins_pipe( ialu_reg_reg_long ); 11274 %} 11275 11276 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11277 predicate (UseSSE<=1); 11278 match(Set dst (ConvL2D src)); 11279 effect( KILL cr ); 11280 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11281 "PUSH $src.lo\n\t" 11282 "FILD ST,[ESP + #0]\n\t" 11283 "ADD ESP,8\n\t" 11284 "FSTP_D $dst\t# D-round" %} 11285 opcode(0xDF, 0x5); /* DF /5 */ 11286 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11287 ins_pipe( pipe_slow ); 11288 %} 11289 11290 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11291 predicate (UseSSE>=2); 11292 match(Set dst (ConvL2D src)); 11293 effect( KILL cr ); 11294 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11295 "PUSH $src.lo\n\t" 11296 "FILD_D [ESP]\n\t" 11297 "FSTP_D [ESP]\n\t" 11298 "MOVSD $dst,[ESP]\n\t" 11299 "ADD ESP,8" %} 11300 opcode(0xDF, 0x5); /* DF /5 */ 11301 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11302 ins_pipe( pipe_slow ); 11303 %} 11304 11305 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11306 predicate (UseSSE>=1); 11307 match(Set dst (ConvL2F src)); 11308 effect( KILL cr ); 11309 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11310 "PUSH $src.lo\n\t" 11311 "FILD_D [ESP]\n\t" 11312 "FSTP_S [ESP]\n\t" 11313 "MOVSS $dst,[ESP]\n\t" 11314 "ADD ESP,8" %} 11315 opcode(0xDF, 0x5); /* DF /5 */ 11316 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11317 ins_pipe( pipe_slow ); 11318 %} 11319 11320 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11321 match(Set dst (ConvL2F src)); 11322 effect( KILL cr ); 11323 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11324 "PUSH $src.lo\n\t" 11325 "FILD ST,[ESP + #0]\n\t" 11326 "ADD ESP,8\n\t" 11327 "FSTP_S $dst\t# F-round" %} 11328 opcode(0xDF, 0x5); /* DF /5 */ 11329 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11330 ins_pipe( pipe_slow ); 11331 %} 11332 11333 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11334 match(Set dst (ConvL2I src)); 11335 effect( DEF dst, USE src ); 11336 format %{ "MOV $dst,$src.lo" %} 11337 ins_encode(enc_CopyL_Lo(dst,src)); 11338 ins_pipe( ialu_reg_reg ); 11339 %} 11340 11341 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11342 match(Set dst (MoveF2I src)); 11343 effect( DEF dst, USE src ); 11344 ins_cost(100); 11345 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11346 ins_encode %{ 11347 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11348 %} 11349 ins_pipe( ialu_reg_mem ); 11350 %} 11351 11352 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11353 predicate(UseSSE==0); 11354 match(Set dst (MoveF2I src)); 11355 effect( DEF dst, USE src ); 11356 11357 ins_cost(125); 11358 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11359 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11360 ins_pipe( fpu_mem_reg ); 11361 %} 11362 11363 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11364 predicate(UseSSE>=1); 11365 match(Set dst (MoveF2I src)); 11366 effect( DEF dst, USE src ); 11367 11368 ins_cost(95); 11369 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11370 ins_encode %{ 11371 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11372 %} 11373 ins_pipe( pipe_slow ); 11374 %} 11375 11376 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11377 predicate(UseSSE>=2); 11378 match(Set dst (MoveF2I src)); 11379 effect( DEF dst, USE src ); 11380 ins_cost(85); 11381 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11382 ins_encode %{ 11383 __ movdl($dst$$Register, $src$$XMMRegister); 11384 %} 11385 ins_pipe( pipe_slow ); 11386 %} 11387 11388 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11389 match(Set dst (MoveI2F src)); 11390 effect( DEF dst, USE src ); 11391 11392 ins_cost(100); 11393 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11394 ins_encode %{ 11395 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11396 %} 11397 ins_pipe( ialu_mem_reg ); 11398 %} 11399 11400 11401 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11402 predicate(UseSSE==0); 11403 match(Set dst (MoveI2F src)); 11404 effect(DEF dst, USE src); 11405 11406 ins_cost(125); 11407 format %{ "FLD_S $src\n\t" 11408 "FSTP $dst\t# MoveI2F_stack_reg" %} 11409 opcode(0xD9); /* D9 /0, FLD m32real */ 11410 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11411 Pop_Reg_FPR(dst) ); 11412 ins_pipe( fpu_reg_mem ); 11413 %} 11414 11415 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11416 predicate(UseSSE>=1); 11417 match(Set dst (MoveI2F src)); 11418 effect( DEF dst, USE src ); 11419 11420 ins_cost(95); 11421 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11422 ins_encode %{ 11423 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11424 %} 11425 ins_pipe( pipe_slow ); 11426 %} 11427 11428 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11429 predicate(UseSSE>=2); 11430 match(Set dst (MoveI2F src)); 11431 effect( DEF dst, USE src ); 11432 11433 ins_cost(85); 11434 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11435 ins_encode %{ 11436 __ movdl($dst$$XMMRegister, $src$$Register); 11437 %} 11438 ins_pipe( pipe_slow ); 11439 %} 11440 11441 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11442 match(Set dst (MoveD2L src)); 11443 effect(DEF dst, USE src); 11444 11445 ins_cost(250); 11446 format %{ "MOV $dst.lo,$src\n\t" 11447 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11448 opcode(0x8B, 0x8B); 11449 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11450 ins_pipe( ialu_mem_long_reg ); 11451 %} 11452 11453 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11454 predicate(UseSSE<=1); 11455 match(Set dst (MoveD2L src)); 11456 effect(DEF dst, USE src); 11457 11458 ins_cost(125); 11459 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11460 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11461 ins_pipe( fpu_mem_reg ); 11462 %} 11463 11464 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11465 predicate(UseSSE>=2); 11466 match(Set dst (MoveD2L src)); 11467 effect(DEF dst, USE src); 11468 ins_cost(95); 11469 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11470 ins_encode %{ 11471 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11472 %} 11473 ins_pipe( pipe_slow ); 11474 %} 11475 11476 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11477 predicate(UseSSE>=2); 11478 match(Set dst (MoveD2L src)); 11479 effect(DEF dst, USE src, TEMP tmp); 11480 ins_cost(85); 11481 format %{ "MOVD $dst.lo,$src\n\t" 11482 "PSHUFLW $tmp,$src,0x4E\n\t" 11483 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11484 ins_encode %{ 11485 __ movdl($dst$$Register, $src$$XMMRegister); 11486 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11487 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11488 %} 11489 ins_pipe( pipe_slow ); 11490 %} 11491 11492 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11493 match(Set dst (MoveL2D src)); 11494 effect(DEF dst, USE src); 11495 11496 ins_cost(200); 11497 format %{ "MOV $dst,$src.lo\n\t" 11498 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11499 opcode(0x89, 0x89); 11500 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11501 ins_pipe( ialu_mem_long_reg ); 11502 %} 11503 11504 11505 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11506 predicate(UseSSE<=1); 11507 match(Set dst (MoveL2D src)); 11508 effect(DEF dst, USE src); 11509 ins_cost(125); 11510 11511 format %{ "FLD_D $src\n\t" 11512 "FSTP $dst\t# MoveL2D_stack_reg" %} 11513 opcode(0xDD); /* DD /0, FLD m64real */ 11514 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11515 Pop_Reg_DPR(dst) ); 11516 ins_pipe( fpu_reg_mem ); 11517 %} 11518 11519 11520 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11521 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11522 match(Set dst (MoveL2D src)); 11523 effect(DEF dst, USE src); 11524 11525 ins_cost(95); 11526 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11527 ins_encode %{ 11528 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11529 %} 11530 ins_pipe( pipe_slow ); 11531 %} 11532 11533 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11534 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11535 match(Set dst (MoveL2D src)); 11536 effect(DEF dst, USE src); 11537 11538 ins_cost(95); 11539 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11540 ins_encode %{ 11541 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11542 %} 11543 ins_pipe( pipe_slow ); 11544 %} 11545 11546 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11547 predicate(UseSSE>=2); 11548 match(Set dst (MoveL2D src)); 11549 effect(TEMP dst, USE src, TEMP tmp); 11550 ins_cost(85); 11551 format %{ "MOVD $dst,$src.lo\n\t" 11552 "MOVD $tmp,$src.hi\n\t" 11553 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11554 ins_encode %{ 11555 __ movdl($dst$$XMMRegister, $src$$Register); 11556 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11557 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11558 %} 11559 ins_pipe( pipe_slow ); 11560 %} 11561 11562 11563 // ======================================================================= 11564 // fast clearing of an array 11565 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11566 predicate(!((ClearArrayNode*)n)->is_large()); 11567 match(Set dummy (ClearArray cnt base)); 11568 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11569 11570 format %{ $$template 11571 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11572 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11573 $$emit$$"JG LARGE\n\t" 11574 $$emit$$"SHL ECX, 1\n\t" 11575 $$emit$$"DEC ECX\n\t" 11576 $$emit$$"JS DONE\t# Zero length\n\t" 11577 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11578 $$emit$$"DEC ECX\n\t" 11579 $$emit$$"JGE LOOP\n\t" 11580 $$emit$$"JMP DONE\n\t" 11581 $$emit$$"# LARGE:\n\t" 11582 if (UseFastStosb) { 11583 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11584 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11585 } else if (UseXMMForObjInit) { 11586 $$emit$$"MOV RDI,RAX\n\t" 11587 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11588 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11589 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11590 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11591 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11592 $$emit$$"ADD 0x40,RAX\n\t" 11593 $$emit$$"# L_zero_64_bytes:\n\t" 11594 $$emit$$"SUB 0x8,RCX\n\t" 11595 $$emit$$"JGE L_loop\n\t" 11596 $$emit$$"ADD 0x4,RCX\n\t" 11597 $$emit$$"JL L_tail\n\t" 11598 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11599 $$emit$$"ADD 0x20,RAX\n\t" 11600 $$emit$$"SUB 0x4,RCX\n\t" 11601 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11602 $$emit$$"ADD 0x4,RCX\n\t" 11603 $$emit$$"JLE L_end\n\t" 11604 $$emit$$"DEC RCX\n\t" 11605 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11606 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11607 $$emit$$"ADD 0x8,RAX\n\t" 11608 $$emit$$"DEC RCX\n\t" 11609 $$emit$$"JGE L_sloop\n\t" 11610 $$emit$$"# L_end:\n\t" 11611 } else { 11612 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11613 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11614 } 11615 $$emit$$"# DONE" 11616 %} 11617 ins_encode %{ 11618 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11619 $tmp$$XMMRegister, false); 11620 %} 11621 ins_pipe( pipe_slow ); 11622 %} 11623 11624 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11625 predicate(((ClearArrayNode*)n)->is_large()); 11626 match(Set dummy (ClearArray cnt base)); 11627 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11628 format %{ $$template 11629 if (UseFastStosb) { 11630 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11631 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11632 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11633 } else if (UseXMMForObjInit) { 11634 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11635 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11636 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11637 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11638 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11639 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11640 $$emit$$"ADD 0x40,RAX\n\t" 11641 $$emit$$"# L_zero_64_bytes:\n\t" 11642 $$emit$$"SUB 0x8,RCX\n\t" 11643 $$emit$$"JGE L_loop\n\t" 11644 $$emit$$"ADD 0x4,RCX\n\t" 11645 $$emit$$"JL L_tail\n\t" 11646 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11647 $$emit$$"ADD 0x20,RAX\n\t" 11648 $$emit$$"SUB 0x4,RCX\n\t" 11649 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11650 $$emit$$"ADD 0x4,RCX\n\t" 11651 $$emit$$"JLE L_end\n\t" 11652 $$emit$$"DEC RCX\n\t" 11653 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11654 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11655 $$emit$$"ADD 0x8,RAX\n\t" 11656 $$emit$$"DEC RCX\n\t" 11657 $$emit$$"JGE L_sloop\n\t" 11658 $$emit$$"# L_end:\n\t" 11659 } else { 11660 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11661 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11662 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11663 } 11664 $$emit$$"# DONE" 11665 %} 11666 ins_encode %{ 11667 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11668 $tmp$$XMMRegister, true); 11669 %} 11670 ins_pipe( pipe_slow ); 11671 %} 11672 11673 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11674 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11675 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11676 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11677 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11678 11679 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11680 ins_encode %{ 11681 __ string_compare($str1$$Register, $str2$$Register, 11682 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11683 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11684 %} 11685 ins_pipe( pipe_slow ); 11686 %} 11687 11688 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11689 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11690 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11691 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11692 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11693 11694 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11695 ins_encode %{ 11696 __ string_compare($str1$$Register, $str2$$Register, 11697 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11698 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11699 %} 11700 ins_pipe( pipe_slow ); 11701 %} 11702 11703 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11704 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11705 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11707 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11708 11709 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11710 ins_encode %{ 11711 __ string_compare($str1$$Register, $str2$$Register, 11712 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11713 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11714 %} 11715 ins_pipe( pipe_slow ); 11716 %} 11717 11718 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11719 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11720 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11721 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11722 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11723 11724 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11725 ins_encode %{ 11726 __ string_compare($str2$$Register, $str1$$Register, 11727 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11728 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11729 %} 11730 ins_pipe( pipe_slow ); 11731 %} 11732 11733 // fast string equals 11734 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11735 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11736 match(Set result (StrEquals (Binary str1 str2) cnt)); 11737 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11738 11739 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11740 ins_encode %{ 11741 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11742 $cnt$$Register, $result$$Register, $tmp3$$Register, 11743 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11744 %} 11745 11746 ins_pipe( pipe_slow ); 11747 %} 11748 11749 // fast search of substring with known size. 11750 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11751 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11752 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11753 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11754 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11755 11756 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11757 ins_encode %{ 11758 int icnt2 = (int)$int_cnt2$$constant; 11759 if (icnt2 >= 16) { 11760 // IndexOf for constant substrings with size >= 16 elements 11761 // which don't need to be loaded through stack. 11762 __ string_indexofC8($str1$$Register, $str2$$Register, 11763 $cnt1$$Register, $cnt2$$Register, 11764 icnt2, $result$$Register, 11765 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11766 } else { 11767 // Small strings are loaded through stack if they cross page boundary. 11768 __ string_indexof($str1$$Register, $str2$$Register, 11769 $cnt1$$Register, $cnt2$$Register, 11770 icnt2, $result$$Register, 11771 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11772 } 11773 %} 11774 ins_pipe( pipe_slow ); 11775 %} 11776 11777 // fast search of substring with known size. 11778 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11779 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11780 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11781 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11782 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11783 11784 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11785 ins_encode %{ 11786 int icnt2 = (int)$int_cnt2$$constant; 11787 if (icnt2 >= 8) { 11788 // IndexOf for constant substrings with size >= 8 elements 11789 // which don't need to be loaded through stack. 11790 __ string_indexofC8($str1$$Register, $str2$$Register, 11791 $cnt1$$Register, $cnt2$$Register, 11792 icnt2, $result$$Register, 11793 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11794 } else { 11795 // Small strings are loaded through stack if they cross page boundary. 11796 __ string_indexof($str1$$Register, $str2$$Register, 11797 $cnt1$$Register, $cnt2$$Register, 11798 icnt2, $result$$Register, 11799 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11800 } 11801 %} 11802 ins_pipe( pipe_slow ); 11803 %} 11804 11805 // fast search of substring with known size. 11806 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11807 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11808 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11809 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11810 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11811 11812 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11813 ins_encode %{ 11814 int icnt2 = (int)$int_cnt2$$constant; 11815 if (icnt2 >= 8) { 11816 // IndexOf for constant substrings with size >= 8 elements 11817 // which don't need to be loaded through stack. 11818 __ string_indexofC8($str1$$Register, $str2$$Register, 11819 $cnt1$$Register, $cnt2$$Register, 11820 icnt2, $result$$Register, 11821 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11822 } else { 11823 // Small strings are loaded through stack if they cross page boundary. 11824 __ string_indexof($str1$$Register, $str2$$Register, 11825 $cnt1$$Register, $cnt2$$Register, 11826 icnt2, $result$$Register, 11827 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11828 } 11829 %} 11830 ins_pipe( pipe_slow ); 11831 %} 11832 11833 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11834 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11835 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11836 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11837 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11838 11839 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11840 ins_encode %{ 11841 __ string_indexof($str1$$Register, $str2$$Register, 11842 $cnt1$$Register, $cnt2$$Register, 11843 (-1), $result$$Register, 11844 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11845 %} 11846 ins_pipe( pipe_slow ); 11847 %} 11848 11849 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11850 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11851 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11852 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11853 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11854 11855 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11856 ins_encode %{ 11857 __ string_indexof($str1$$Register, $str2$$Register, 11858 $cnt1$$Register, $cnt2$$Register, 11859 (-1), $result$$Register, 11860 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11861 %} 11862 ins_pipe( pipe_slow ); 11863 %} 11864 11865 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11866 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11867 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11868 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11869 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11870 11871 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11872 ins_encode %{ 11873 __ string_indexof($str1$$Register, $str2$$Register, 11874 $cnt1$$Register, $cnt2$$Register, 11875 (-1), $result$$Register, 11876 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11877 %} 11878 ins_pipe( pipe_slow ); 11879 %} 11880 11881 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11882 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11883 predicate(UseSSE42Intrinsics); 11884 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11885 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11886 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11887 ins_encode %{ 11888 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11889 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11890 %} 11891 ins_pipe( pipe_slow ); 11892 %} 11893 11894 // fast array equals 11895 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11896 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11897 %{ 11898 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11899 match(Set result (AryEq ary1 ary2)); 11900 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11901 //ins_cost(300); 11902 11903 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11904 ins_encode %{ 11905 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11906 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11907 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11908 %} 11909 ins_pipe( pipe_slow ); 11910 %} 11911 11912 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11913 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11914 %{ 11915 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11916 match(Set result (AryEq ary1 ary2)); 11917 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11918 //ins_cost(300); 11919 11920 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11921 ins_encode %{ 11922 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11923 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11924 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11925 %} 11926 ins_pipe( pipe_slow ); 11927 %} 11928 11929 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11930 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11931 %{ 11932 match(Set result (HasNegatives ary1 len)); 11933 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11934 11935 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11936 ins_encode %{ 11937 __ has_negatives($ary1$$Register, $len$$Register, 11938 $result$$Register, $tmp3$$Register, 11939 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11940 %} 11941 ins_pipe( pipe_slow ); 11942 %} 11943 11944 // fast char[] to byte[] compression 11945 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11946 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11947 match(Set result (StrCompressedCopy src (Binary dst len))); 11948 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11949 11950 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11951 ins_encode %{ 11952 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11953 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11954 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11955 %} 11956 ins_pipe( pipe_slow ); 11957 %} 11958 11959 // fast byte[] to char[] inflation 11960 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11961 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11962 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11963 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11964 11965 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11966 ins_encode %{ 11967 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11968 $tmp1$$XMMRegister, $tmp2$$Register); 11969 %} 11970 ins_pipe( pipe_slow ); 11971 %} 11972 11973 // encode char[] to byte[] in ISO_8859_1 11974 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11975 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11976 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11977 match(Set result (EncodeISOArray src (Binary dst len))); 11978 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11979 11980 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11981 ins_encode %{ 11982 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11983 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11984 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11985 %} 11986 ins_pipe( pipe_slow ); 11987 %} 11988 11989 11990 //----------Control Flow Instructions------------------------------------------ 11991 // Signed compare Instructions 11992 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11993 match(Set cr (CmpI op1 op2)); 11994 effect( DEF cr, USE op1, USE op2 ); 11995 format %{ "CMP $op1,$op2" %} 11996 opcode(0x3B); /* Opcode 3B /r */ 11997 ins_encode( OpcP, RegReg( op1, op2) ); 11998 ins_pipe( ialu_cr_reg_reg ); 11999 %} 12000 12001 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12002 match(Set cr (CmpI op1 op2)); 12003 effect( DEF cr, USE op1 ); 12004 format %{ "CMP $op1,$op2" %} 12005 opcode(0x81,0x07); /* Opcode 81 /7 */ 12006 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12007 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12008 ins_pipe( ialu_cr_reg_imm ); 12009 %} 12010 12011 // Cisc-spilled version of cmpI_eReg 12012 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12013 match(Set cr (CmpI op1 (LoadI op2))); 12014 12015 format %{ "CMP $op1,$op2" %} 12016 ins_cost(500); 12017 opcode(0x3B); /* Opcode 3B /r */ 12018 ins_encode( OpcP, RegMem( op1, op2) ); 12019 ins_pipe( ialu_cr_reg_mem ); 12020 %} 12021 12022 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 12023 match(Set cr (CmpI src zero)); 12024 effect( DEF cr, USE src ); 12025 12026 format %{ "TEST $src,$src" %} 12027 opcode(0x85); 12028 ins_encode( OpcP, RegReg( src, src ) ); 12029 ins_pipe( ialu_cr_reg_imm ); 12030 %} 12031 12032 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 12033 match(Set cr (CmpI (AndI src con) zero)); 12034 12035 format %{ "TEST $src,$con" %} 12036 opcode(0xF7,0x00); 12037 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12038 ins_pipe( ialu_cr_reg_imm ); 12039 %} 12040 12041 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 12042 match(Set cr (CmpI (AndI src mem) zero)); 12043 12044 format %{ "TEST $src,$mem" %} 12045 opcode(0x85); 12046 ins_encode( OpcP, RegMem( src, mem ) ); 12047 ins_pipe( ialu_cr_reg_mem ); 12048 %} 12049 12050 // Unsigned compare Instructions; really, same as signed except they 12051 // produce an eFlagsRegU instead of eFlagsReg. 12052 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12053 match(Set cr (CmpU op1 op2)); 12054 12055 format %{ "CMPu $op1,$op2" %} 12056 opcode(0x3B); /* Opcode 3B /r */ 12057 ins_encode( OpcP, RegReg( op1, op2) ); 12058 ins_pipe( ialu_cr_reg_reg ); 12059 %} 12060 12061 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12062 match(Set cr (CmpU op1 op2)); 12063 12064 format %{ "CMPu $op1,$op2" %} 12065 opcode(0x81,0x07); /* Opcode 81 /7 */ 12066 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12067 ins_pipe( ialu_cr_reg_imm ); 12068 %} 12069 12070 // // Cisc-spilled version of cmpU_eReg 12071 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12072 match(Set cr (CmpU op1 (LoadI op2))); 12073 12074 format %{ "CMPu $op1,$op2" %} 12075 ins_cost(500); 12076 opcode(0x3B); /* Opcode 3B /r */ 12077 ins_encode( OpcP, RegMem( op1, op2) ); 12078 ins_pipe( ialu_cr_reg_mem ); 12079 %} 12080 12081 // // Cisc-spilled version of cmpU_eReg 12082 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12083 // match(Set cr (CmpU (LoadI op1) op2)); 12084 // 12085 // format %{ "CMPu $op1,$op2" %} 12086 // ins_cost(500); 12087 // opcode(0x39); /* Opcode 39 /r */ 12088 // ins_encode( OpcP, RegMem( op1, op2) ); 12089 //%} 12090 12091 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12092 match(Set cr (CmpU src zero)); 12093 12094 format %{ "TESTu $src,$src" %} 12095 opcode(0x85); 12096 ins_encode( OpcP, RegReg( src, src ) ); 12097 ins_pipe( ialu_cr_reg_imm ); 12098 %} 12099 12100 // Unsigned pointer compare Instructions 12101 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12102 match(Set cr (CmpP op1 op2)); 12103 12104 format %{ "CMPu $op1,$op2" %} 12105 opcode(0x3B); /* Opcode 3B /r */ 12106 ins_encode( OpcP, RegReg( op1, op2) ); 12107 ins_pipe( ialu_cr_reg_reg ); 12108 %} 12109 12110 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12111 match(Set cr (CmpP op1 op2)); 12112 12113 format %{ "CMPu $op1,$op2" %} 12114 opcode(0x81,0x07); /* Opcode 81 /7 */ 12115 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12116 ins_pipe( ialu_cr_reg_imm ); 12117 %} 12118 12119 // // Cisc-spilled version of cmpP_eReg 12120 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12121 match(Set cr (CmpP op1 (LoadP op2))); 12122 12123 format %{ "CMPu $op1,$op2" %} 12124 ins_cost(500); 12125 opcode(0x3B); /* Opcode 3B /r */ 12126 ins_encode( OpcP, RegMem( op1, op2) ); 12127 ins_pipe( ialu_cr_reg_mem ); 12128 %} 12129 12130 // // Cisc-spilled version of cmpP_eReg 12131 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12132 // match(Set cr (CmpP (LoadP op1) op2)); 12133 // 12134 // format %{ "CMPu $op1,$op2" %} 12135 // ins_cost(500); 12136 // opcode(0x39); /* Opcode 39 /r */ 12137 // ins_encode( OpcP, RegMem( op1, op2) ); 12138 //%} 12139 12140 // Compare raw pointer (used in out-of-heap check). 12141 // Only works because non-oop pointers must be raw pointers 12142 // and raw pointers have no anti-dependencies. 12143 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12144 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12145 match(Set cr (CmpP op1 (LoadP op2))); 12146 12147 format %{ "CMPu $op1,$op2" %} 12148 opcode(0x3B); /* Opcode 3B /r */ 12149 ins_encode( OpcP, RegMem( op1, op2) ); 12150 ins_pipe( ialu_cr_reg_mem ); 12151 %} 12152 12153 // 12154 // This will generate a signed flags result. This should be ok 12155 // since any compare to a zero should be eq/neq. 12156 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12157 match(Set cr (CmpP src zero)); 12158 12159 format %{ "TEST $src,$src" %} 12160 opcode(0x85); 12161 ins_encode( OpcP, RegReg( src, src ) ); 12162 ins_pipe( ialu_cr_reg_imm ); 12163 %} 12164 12165 // Cisc-spilled version of testP_reg 12166 // This will generate a signed flags result. This should be ok 12167 // since any compare to a zero should be eq/neq. 12168 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12169 match(Set cr (CmpP (LoadP op) zero)); 12170 12171 format %{ "TEST $op,0xFFFFFFFF" %} 12172 ins_cost(500); 12173 opcode(0xF7); /* Opcode F7 /0 */ 12174 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12175 ins_pipe( ialu_cr_reg_imm ); 12176 %} 12177 12178 // Yanked all unsigned pointer compare operations. 12179 // Pointer compares are done with CmpP which is already unsigned. 12180 12181 //----------Max and Min-------------------------------------------------------- 12182 // Min Instructions 12183 //// 12184 // *** Min and Max using the conditional move are slower than the 12185 // *** branch version on a Pentium III. 12186 // // Conditional move for min 12187 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12188 // effect( USE_DEF op2, USE op1, USE cr ); 12189 // format %{ "CMOVlt $op2,$op1\t! min" %} 12190 // opcode(0x4C,0x0F); 12191 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12192 // ins_pipe( pipe_cmov_reg ); 12193 //%} 12194 // 12195 //// Min Register with Register (P6 version) 12196 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12197 // predicate(VM_Version::supports_cmov() ); 12198 // match(Set op2 (MinI op1 op2)); 12199 // ins_cost(200); 12200 // expand %{ 12201 // eFlagsReg cr; 12202 // compI_eReg(cr,op1,op2); 12203 // cmovI_reg_lt(op2,op1,cr); 12204 // %} 12205 //%} 12206 12207 // Min Register with Register (generic version) 12208 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12209 match(Set dst (MinI dst src)); 12210 effect(KILL flags); 12211 ins_cost(300); 12212 12213 format %{ "MIN $dst,$src" %} 12214 opcode(0xCC); 12215 ins_encode( min_enc(dst,src) ); 12216 ins_pipe( pipe_slow ); 12217 %} 12218 12219 // Max Register with Register 12220 // *** Min and Max using the conditional move are slower than the 12221 // *** branch version on a Pentium III. 12222 // // Conditional move for max 12223 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12224 // effect( USE_DEF op2, USE op1, USE cr ); 12225 // format %{ "CMOVgt $op2,$op1\t! max" %} 12226 // opcode(0x4F,0x0F); 12227 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12228 // ins_pipe( pipe_cmov_reg ); 12229 //%} 12230 // 12231 // // Max Register with Register (P6 version) 12232 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12233 // predicate(VM_Version::supports_cmov() ); 12234 // match(Set op2 (MaxI op1 op2)); 12235 // ins_cost(200); 12236 // expand %{ 12237 // eFlagsReg cr; 12238 // compI_eReg(cr,op1,op2); 12239 // cmovI_reg_gt(op2,op1,cr); 12240 // %} 12241 //%} 12242 12243 // Max Register with Register (generic version) 12244 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12245 match(Set dst (MaxI dst src)); 12246 effect(KILL flags); 12247 ins_cost(300); 12248 12249 format %{ "MAX $dst,$src" %} 12250 opcode(0xCC); 12251 ins_encode( max_enc(dst,src) ); 12252 ins_pipe( pipe_slow ); 12253 %} 12254 12255 // ============================================================================ 12256 // Counted Loop limit node which represents exact final iterator value. 12257 // Note: the resulting value should fit into integer range since 12258 // counted loops have limit check on overflow. 12259 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12260 match(Set limit (LoopLimit (Binary init limit) stride)); 12261 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12262 ins_cost(300); 12263 12264 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12265 ins_encode %{ 12266 int strd = (int)$stride$$constant; 12267 assert(strd != 1 && strd != -1, "sanity"); 12268 int m1 = (strd > 0) ? 1 : -1; 12269 // Convert limit to long (EAX:EDX) 12270 __ cdql(); 12271 // Convert init to long (init:tmp) 12272 __ movl($tmp$$Register, $init$$Register); 12273 __ sarl($tmp$$Register, 31); 12274 // $limit - $init 12275 __ subl($limit$$Register, $init$$Register); 12276 __ sbbl($limit_hi$$Register, $tmp$$Register); 12277 // + ($stride - 1) 12278 if (strd > 0) { 12279 __ addl($limit$$Register, (strd - 1)); 12280 __ adcl($limit_hi$$Register, 0); 12281 __ movl($tmp$$Register, strd); 12282 } else { 12283 __ addl($limit$$Register, (strd + 1)); 12284 __ adcl($limit_hi$$Register, -1); 12285 __ lneg($limit_hi$$Register, $limit$$Register); 12286 __ movl($tmp$$Register, -strd); 12287 } 12288 // signed devision: (EAX:EDX) / pos_stride 12289 __ idivl($tmp$$Register); 12290 if (strd < 0) { 12291 // restore sign 12292 __ negl($tmp$$Register); 12293 } 12294 // (EAX) * stride 12295 __ mull($tmp$$Register); 12296 // + init (ignore upper bits) 12297 __ addl($limit$$Register, $init$$Register); 12298 %} 12299 ins_pipe( pipe_slow ); 12300 %} 12301 12302 // ============================================================================ 12303 // Branch Instructions 12304 // Jump Table 12305 instruct jumpXtnd(rRegI switch_val) %{ 12306 match(Jump switch_val); 12307 ins_cost(350); 12308 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12309 ins_encode %{ 12310 // Jump to Address(table_base + switch_reg) 12311 Address index(noreg, $switch_val$$Register, Address::times_1); 12312 __ jump(ArrayAddress($constantaddress, index)); 12313 %} 12314 ins_pipe(pipe_jmp); 12315 %} 12316 12317 // Jump Direct - Label defines a relative address from JMP+1 12318 instruct jmpDir(label labl) %{ 12319 match(Goto); 12320 effect(USE labl); 12321 12322 ins_cost(300); 12323 format %{ "JMP $labl" %} 12324 size(5); 12325 ins_encode %{ 12326 Label* L = $labl$$label; 12327 __ jmp(*L, false); // Always long jump 12328 %} 12329 ins_pipe( pipe_jmp ); 12330 %} 12331 12332 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12333 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12334 match(If cop cr); 12335 effect(USE labl); 12336 12337 ins_cost(300); 12338 format %{ "J$cop $labl" %} 12339 size(6); 12340 ins_encode %{ 12341 Label* L = $labl$$label; 12342 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12343 %} 12344 ins_pipe( pipe_jcc ); 12345 %} 12346 12347 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12348 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12349 predicate(!n->has_vector_mask_set()); 12350 match(CountedLoopEnd cop cr); 12351 effect(USE labl); 12352 12353 ins_cost(300); 12354 format %{ "J$cop $labl\t# Loop end" %} 12355 size(6); 12356 ins_encode %{ 12357 Label* L = $labl$$label; 12358 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12359 %} 12360 ins_pipe( pipe_jcc ); 12361 %} 12362 12363 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12364 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12365 predicate(!n->has_vector_mask_set()); 12366 match(CountedLoopEnd cop cmp); 12367 effect(USE labl); 12368 12369 ins_cost(300); 12370 format %{ "J$cop,u $labl\t# Loop end" %} 12371 size(6); 12372 ins_encode %{ 12373 Label* L = $labl$$label; 12374 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12375 %} 12376 ins_pipe( pipe_jcc ); 12377 %} 12378 12379 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12380 predicate(!n->has_vector_mask_set()); 12381 match(CountedLoopEnd cop cmp); 12382 effect(USE labl); 12383 12384 ins_cost(200); 12385 format %{ "J$cop,u $labl\t# Loop end" %} 12386 size(6); 12387 ins_encode %{ 12388 Label* L = $labl$$label; 12389 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12390 %} 12391 ins_pipe( pipe_jcc ); 12392 %} 12393 12394 // mask version 12395 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12396 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12397 predicate(n->has_vector_mask_set()); 12398 match(CountedLoopEnd cop cr); 12399 effect(USE labl); 12400 12401 ins_cost(400); 12402 format %{ "J$cop $labl\t# Loop end\n\t" 12403 "restorevectmask \t# vector mask restore for loops" %} 12404 size(10); 12405 ins_encode %{ 12406 Label* L = $labl$$label; 12407 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12408 __ restorevectmask(); 12409 %} 12410 ins_pipe( pipe_jcc ); 12411 %} 12412 12413 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12414 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12415 predicate(n->has_vector_mask_set()); 12416 match(CountedLoopEnd cop cmp); 12417 effect(USE labl); 12418 12419 ins_cost(400); 12420 format %{ "J$cop,u $labl\t# Loop end\n\t" 12421 "restorevectmask \t# vector mask restore for loops" %} 12422 size(10); 12423 ins_encode %{ 12424 Label* L = $labl$$label; 12425 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12426 __ restorevectmask(); 12427 %} 12428 ins_pipe( pipe_jcc ); 12429 %} 12430 12431 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12432 predicate(n->has_vector_mask_set()); 12433 match(CountedLoopEnd cop cmp); 12434 effect(USE labl); 12435 12436 ins_cost(300); 12437 format %{ "J$cop,u $labl\t# Loop end\n\t" 12438 "restorevectmask \t# vector mask restore for loops" %} 12439 size(10); 12440 ins_encode %{ 12441 Label* L = $labl$$label; 12442 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12443 __ restorevectmask(); 12444 %} 12445 ins_pipe( pipe_jcc ); 12446 %} 12447 12448 // Jump Direct Conditional - using unsigned comparison 12449 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12450 match(If cop cmp); 12451 effect(USE labl); 12452 12453 ins_cost(300); 12454 format %{ "J$cop,u $labl" %} 12455 size(6); 12456 ins_encode %{ 12457 Label* L = $labl$$label; 12458 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12459 %} 12460 ins_pipe(pipe_jcc); 12461 %} 12462 12463 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12464 match(If cop cmp); 12465 effect(USE labl); 12466 12467 ins_cost(200); 12468 format %{ "J$cop,u $labl" %} 12469 size(6); 12470 ins_encode %{ 12471 Label* L = $labl$$label; 12472 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12473 %} 12474 ins_pipe(pipe_jcc); 12475 %} 12476 12477 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12478 match(If cop cmp); 12479 effect(USE labl); 12480 12481 ins_cost(200); 12482 format %{ $$template 12483 if ($cop$$cmpcode == Assembler::notEqual) { 12484 $$emit$$"JP,u $labl\n\t" 12485 $$emit$$"J$cop,u $labl" 12486 } else { 12487 $$emit$$"JP,u done\n\t" 12488 $$emit$$"J$cop,u $labl\n\t" 12489 $$emit$$"done:" 12490 } 12491 %} 12492 ins_encode %{ 12493 Label* l = $labl$$label; 12494 if ($cop$$cmpcode == Assembler::notEqual) { 12495 __ jcc(Assembler::parity, *l, false); 12496 __ jcc(Assembler::notEqual, *l, false); 12497 } else if ($cop$$cmpcode == Assembler::equal) { 12498 Label done; 12499 __ jccb(Assembler::parity, done); 12500 __ jcc(Assembler::equal, *l, false); 12501 __ bind(done); 12502 } else { 12503 ShouldNotReachHere(); 12504 } 12505 %} 12506 ins_pipe(pipe_jcc); 12507 %} 12508 12509 // ============================================================================ 12510 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12511 // array for an instance of the superklass. Set a hidden internal cache on a 12512 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12513 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12514 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12515 match(Set result (PartialSubtypeCheck sub super)); 12516 effect( KILL rcx, KILL cr ); 12517 12518 ins_cost(1100); // slightly larger than the next version 12519 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12520 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12521 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12522 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12523 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12524 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12525 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12526 "miss:\t" %} 12527 12528 opcode(0x1); // Force a XOR of EDI 12529 ins_encode( enc_PartialSubtypeCheck() ); 12530 ins_pipe( pipe_slow ); 12531 %} 12532 12533 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12534 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12535 effect( KILL rcx, KILL result ); 12536 12537 ins_cost(1000); 12538 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12539 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12540 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12541 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12542 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12543 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12544 "miss:\t" %} 12545 12546 opcode(0x0); // No need to XOR EDI 12547 ins_encode( enc_PartialSubtypeCheck() ); 12548 ins_pipe( pipe_slow ); 12549 %} 12550 12551 // ============================================================================ 12552 // Branch Instructions -- short offset versions 12553 // 12554 // These instructions are used to replace jumps of a long offset (the default 12555 // match) with jumps of a shorter offset. These instructions are all tagged 12556 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12557 // match rules in general matching. Instead, the ADLC generates a conversion 12558 // method in the MachNode which can be used to do in-place replacement of the 12559 // long variant with the shorter variant. The compiler will determine if a 12560 // branch can be taken by the is_short_branch_offset() predicate in the machine 12561 // specific code section of the file. 12562 12563 // Jump Direct - Label defines a relative address from JMP+1 12564 instruct jmpDir_short(label labl) %{ 12565 match(Goto); 12566 effect(USE labl); 12567 12568 ins_cost(300); 12569 format %{ "JMP,s $labl" %} 12570 size(2); 12571 ins_encode %{ 12572 Label* L = $labl$$label; 12573 __ jmpb(*L); 12574 %} 12575 ins_pipe( pipe_jmp ); 12576 ins_short_branch(1); 12577 %} 12578 12579 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12580 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12581 match(If cop cr); 12582 effect(USE labl); 12583 12584 ins_cost(300); 12585 format %{ "J$cop,s $labl" %} 12586 size(2); 12587 ins_encode %{ 12588 Label* L = $labl$$label; 12589 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12590 %} 12591 ins_pipe( pipe_jcc ); 12592 ins_short_branch(1); 12593 %} 12594 12595 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12596 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12597 match(CountedLoopEnd cop cr); 12598 effect(USE labl); 12599 12600 ins_cost(300); 12601 format %{ "J$cop,s $labl\t# Loop end" %} 12602 size(2); 12603 ins_encode %{ 12604 Label* L = $labl$$label; 12605 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12606 %} 12607 ins_pipe( pipe_jcc ); 12608 ins_short_branch(1); 12609 %} 12610 12611 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12612 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12613 match(CountedLoopEnd cop cmp); 12614 effect(USE labl); 12615 12616 ins_cost(300); 12617 format %{ "J$cop,us $labl\t# Loop end" %} 12618 size(2); 12619 ins_encode %{ 12620 Label* L = $labl$$label; 12621 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12622 %} 12623 ins_pipe( pipe_jcc ); 12624 ins_short_branch(1); 12625 %} 12626 12627 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12628 match(CountedLoopEnd cop cmp); 12629 effect(USE labl); 12630 12631 ins_cost(300); 12632 format %{ "J$cop,us $labl\t# Loop end" %} 12633 size(2); 12634 ins_encode %{ 12635 Label* L = $labl$$label; 12636 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12637 %} 12638 ins_pipe( pipe_jcc ); 12639 ins_short_branch(1); 12640 %} 12641 12642 // Jump Direct Conditional - using unsigned comparison 12643 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12644 match(If cop cmp); 12645 effect(USE labl); 12646 12647 ins_cost(300); 12648 format %{ "J$cop,us $labl" %} 12649 size(2); 12650 ins_encode %{ 12651 Label* L = $labl$$label; 12652 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12653 %} 12654 ins_pipe( pipe_jcc ); 12655 ins_short_branch(1); 12656 %} 12657 12658 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12659 match(If cop cmp); 12660 effect(USE labl); 12661 12662 ins_cost(300); 12663 format %{ "J$cop,us $labl" %} 12664 size(2); 12665 ins_encode %{ 12666 Label* L = $labl$$label; 12667 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12668 %} 12669 ins_pipe( pipe_jcc ); 12670 ins_short_branch(1); 12671 %} 12672 12673 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12674 match(If cop cmp); 12675 effect(USE labl); 12676 12677 ins_cost(300); 12678 format %{ $$template 12679 if ($cop$$cmpcode == Assembler::notEqual) { 12680 $$emit$$"JP,u,s $labl\n\t" 12681 $$emit$$"J$cop,u,s $labl" 12682 } else { 12683 $$emit$$"JP,u,s done\n\t" 12684 $$emit$$"J$cop,u,s $labl\n\t" 12685 $$emit$$"done:" 12686 } 12687 %} 12688 size(4); 12689 ins_encode %{ 12690 Label* l = $labl$$label; 12691 if ($cop$$cmpcode == Assembler::notEqual) { 12692 __ jccb(Assembler::parity, *l); 12693 __ jccb(Assembler::notEqual, *l); 12694 } else if ($cop$$cmpcode == Assembler::equal) { 12695 Label done; 12696 __ jccb(Assembler::parity, done); 12697 __ jccb(Assembler::equal, *l); 12698 __ bind(done); 12699 } else { 12700 ShouldNotReachHere(); 12701 } 12702 %} 12703 ins_pipe(pipe_jcc); 12704 ins_short_branch(1); 12705 %} 12706 12707 // ============================================================================ 12708 // Long Compare 12709 // 12710 // Currently we hold longs in 2 registers. Comparing such values efficiently 12711 // is tricky. The flavor of compare used depends on whether we are testing 12712 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12713 // The GE test is the negated LT test. The LE test can be had by commuting 12714 // the operands (yielding a GE test) and then negating; negate again for the 12715 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12716 // NE test is negated from that. 12717 12718 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12719 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12720 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12721 // are collapsed internally in the ADLC's dfa-gen code. The match for 12722 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12723 // foo match ends up with the wrong leaf. One fix is to not match both 12724 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12725 // both forms beat the trinary form of long-compare and both are very useful 12726 // on Intel which has so few registers. 12727 12728 // Manifest a CmpL result in an integer register. Very painful. 12729 // This is the test to avoid. 12730 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12731 match(Set dst (CmpL3 src1 src2)); 12732 effect( KILL flags ); 12733 ins_cost(1000); 12734 format %{ "XOR $dst,$dst\n\t" 12735 "CMP $src1.hi,$src2.hi\n\t" 12736 "JLT,s m_one\n\t" 12737 "JGT,s p_one\n\t" 12738 "CMP $src1.lo,$src2.lo\n\t" 12739 "JB,s m_one\n\t" 12740 "JEQ,s done\n" 12741 "p_one:\tINC $dst\n\t" 12742 "JMP,s done\n" 12743 "m_one:\tDEC $dst\n" 12744 "done:" %} 12745 ins_encode %{ 12746 Label p_one, m_one, done; 12747 __ xorptr($dst$$Register, $dst$$Register); 12748 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12749 __ jccb(Assembler::less, m_one); 12750 __ jccb(Assembler::greater, p_one); 12751 __ cmpl($src1$$Register, $src2$$Register); 12752 __ jccb(Assembler::below, m_one); 12753 __ jccb(Assembler::equal, done); 12754 __ bind(p_one); 12755 __ incrementl($dst$$Register); 12756 __ jmpb(done); 12757 __ bind(m_one); 12758 __ decrementl($dst$$Register); 12759 __ bind(done); 12760 %} 12761 ins_pipe( pipe_slow ); 12762 %} 12763 12764 //====== 12765 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12766 // compares. Can be used for LE or GT compares by reversing arguments. 12767 // NOT GOOD FOR EQ/NE tests. 12768 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12769 match( Set flags (CmpL src zero )); 12770 ins_cost(100); 12771 format %{ "TEST $src.hi,$src.hi" %} 12772 opcode(0x85); 12773 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12774 ins_pipe( ialu_cr_reg_reg ); 12775 %} 12776 12777 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12778 // compares. Can be used for LE or GT compares by reversing arguments. 12779 // NOT GOOD FOR EQ/NE tests. 12780 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12781 match( Set flags (CmpL src1 src2 )); 12782 effect( TEMP tmp ); 12783 ins_cost(300); 12784 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12785 "MOV $tmp,$src1.hi\n\t" 12786 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12787 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12788 ins_pipe( ialu_cr_reg_reg ); 12789 %} 12790 12791 // Long compares reg < zero/req OR reg >= zero/req. 12792 // Just a wrapper for a normal branch, plus the predicate test. 12793 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12794 match(If cmp flags); 12795 effect(USE labl); 12796 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12797 expand %{ 12798 jmpCon(cmp,flags,labl); // JLT or JGE... 12799 %} 12800 %} 12801 12802 //====== 12803 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12804 // compares. Can be used for LE or GT compares by reversing arguments. 12805 // NOT GOOD FOR EQ/NE tests. 12806 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12807 match(Set flags (CmpUL src zero)); 12808 ins_cost(100); 12809 format %{ "TEST $src.hi,$src.hi" %} 12810 opcode(0x85); 12811 ins_encode(OpcP, RegReg_Hi2(src, src)); 12812 ins_pipe(ialu_cr_reg_reg); 12813 %} 12814 12815 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12816 // compares. Can be used for LE or GT compares by reversing arguments. 12817 // NOT GOOD FOR EQ/NE tests. 12818 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12819 match(Set flags (CmpUL src1 src2)); 12820 effect(TEMP tmp); 12821 ins_cost(300); 12822 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12823 "MOV $tmp,$src1.hi\n\t" 12824 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12825 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12826 ins_pipe(ialu_cr_reg_reg); 12827 %} 12828 12829 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12830 // Just a wrapper for a normal branch, plus the predicate test. 12831 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12832 match(If cmp flags); 12833 effect(USE labl); 12834 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12835 expand %{ 12836 jmpCon(cmp, flags, labl); // JLT or JGE... 12837 %} 12838 %} 12839 12840 // Compare 2 longs and CMOVE longs. 12841 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12842 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12843 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12844 ins_cost(400); 12845 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12846 "CMOV$cmp $dst.hi,$src.hi" %} 12847 opcode(0x0F,0x40); 12848 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12849 ins_pipe( pipe_cmov_reg_long ); 12850 %} 12851 12852 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12853 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12854 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12855 ins_cost(500); 12856 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12857 "CMOV$cmp $dst.hi,$src.hi" %} 12858 opcode(0x0F,0x40); 12859 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12860 ins_pipe( pipe_cmov_reg_long ); 12861 %} 12862 12863 // Compare 2 longs and CMOVE ints. 12864 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12865 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12866 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12867 ins_cost(200); 12868 format %{ "CMOV$cmp $dst,$src" %} 12869 opcode(0x0F,0x40); 12870 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12871 ins_pipe( pipe_cmov_reg ); 12872 %} 12873 12874 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12875 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12876 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12877 ins_cost(250); 12878 format %{ "CMOV$cmp $dst,$src" %} 12879 opcode(0x0F,0x40); 12880 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12881 ins_pipe( pipe_cmov_mem ); 12882 %} 12883 12884 // Compare 2 longs and CMOVE ints. 12885 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12886 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12887 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12888 ins_cost(200); 12889 format %{ "CMOV$cmp $dst,$src" %} 12890 opcode(0x0F,0x40); 12891 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12892 ins_pipe( pipe_cmov_reg ); 12893 %} 12894 12895 // Compare 2 longs and CMOVE doubles 12896 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12897 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12898 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12899 ins_cost(200); 12900 expand %{ 12901 fcmovDPR_regS(cmp,flags,dst,src); 12902 %} 12903 %} 12904 12905 // Compare 2 longs and CMOVE doubles 12906 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12907 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12908 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12909 ins_cost(200); 12910 expand %{ 12911 fcmovD_regS(cmp,flags,dst,src); 12912 %} 12913 %} 12914 12915 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12916 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12917 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12918 ins_cost(200); 12919 expand %{ 12920 fcmovFPR_regS(cmp,flags,dst,src); 12921 %} 12922 %} 12923 12924 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12925 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12926 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12927 ins_cost(200); 12928 expand %{ 12929 fcmovF_regS(cmp,flags,dst,src); 12930 %} 12931 %} 12932 12933 //====== 12934 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12935 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12936 match( Set flags (CmpL src zero )); 12937 effect(TEMP tmp); 12938 ins_cost(200); 12939 format %{ "MOV $tmp,$src.lo\n\t" 12940 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12941 ins_encode( long_cmp_flags0( src, tmp ) ); 12942 ins_pipe( ialu_reg_reg_long ); 12943 %} 12944 12945 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12946 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12947 match( Set flags (CmpL src1 src2 )); 12948 ins_cost(200+300); 12949 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12950 "JNE,s skip\n\t" 12951 "CMP $src1.hi,$src2.hi\n\t" 12952 "skip:\t" %} 12953 ins_encode( long_cmp_flags1( src1, src2 ) ); 12954 ins_pipe( ialu_cr_reg_reg ); 12955 %} 12956 12957 // Long compare reg == zero/reg OR reg != zero/reg 12958 // Just a wrapper for a normal branch, plus the predicate test. 12959 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12960 match(If cmp flags); 12961 effect(USE labl); 12962 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12963 expand %{ 12964 jmpCon(cmp,flags,labl); // JEQ or JNE... 12965 %} 12966 %} 12967 12968 //====== 12969 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12970 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12971 match(Set flags (CmpUL src zero)); 12972 effect(TEMP tmp); 12973 ins_cost(200); 12974 format %{ "MOV $tmp,$src.lo\n\t" 12975 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12976 ins_encode(long_cmp_flags0(src, tmp)); 12977 ins_pipe(ialu_reg_reg_long); 12978 %} 12979 12980 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12981 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12982 match(Set flags (CmpUL src1 src2)); 12983 ins_cost(200+300); 12984 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12985 "JNE,s skip\n\t" 12986 "CMP $src1.hi,$src2.hi\n\t" 12987 "skip:\t" %} 12988 ins_encode(long_cmp_flags1(src1, src2)); 12989 ins_pipe(ialu_cr_reg_reg); 12990 %} 12991 12992 // Unsigned long compare reg == zero/reg OR reg != zero/reg 12993 // Just a wrapper for a normal branch, plus the predicate test. 12994 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 12995 match(If cmp flags); 12996 effect(USE labl); 12997 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 12998 expand %{ 12999 jmpCon(cmp, flags, labl); // JEQ or JNE... 13000 %} 13001 %} 13002 13003 // Compare 2 longs and CMOVE longs. 13004 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13005 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13006 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13007 ins_cost(400); 13008 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13009 "CMOV$cmp $dst.hi,$src.hi" %} 13010 opcode(0x0F,0x40); 13011 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13012 ins_pipe( pipe_cmov_reg_long ); 13013 %} 13014 13015 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13016 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13017 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13018 ins_cost(500); 13019 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13020 "CMOV$cmp $dst.hi,$src.hi" %} 13021 opcode(0x0F,0x40); 13022 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13023 ins_pipe( pipe_cmov_reg_long ); 13024 %} 13025 13026 // Compare 2 longs and CMOVE ints. 13027 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13028 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13029 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13030 ins_cost(200); 13031 format %{ "CMOV$cmp $dst,$src" %} 13032 opcode(0x0F,0x40); 13033 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13034 ins_pipe( pipe_cmov_reg ); 13035 %} 13036 13037 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13038 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13039 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13040 ins_cost(250); 13041 format %{ "CMOV$cmp $dst,$src" %} 13042 opcode(0x0F,0x40); 13043 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13044 ins_pipe( pipe_cmov_mem ); 13045 %} 13046 13047 // Compare 2 longs and CMOVE ints. 13048 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13049 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13050 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13051 ins_cost(200); 13052 format %{ "CMOV$cmp $dst,$src" %} 13053 opcode(0x0F,0x40); 13054 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13055 ins_pipe( pipe_cmov_reg ); 13056 %} 13057 13058 // Compare 2 longs and CMOVE doubles 13059 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13060 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13061 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13062 ins_cost(200); 13063 expand %{ 13064 fcmovDPR_regS(cmp,flags,dst,src); 13065 %} 13066 %} 13067 13068 // Compare 2 longs and CMOVE doubles 13069 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13070 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13071 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13072 ins_cost(200); 13073 expand %{ 13074 fcmovD_regS(cmp,flags,dst,src); 13075 %} 13076 %} 13077 13078 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13079 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13080 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13081 ins_cost(200); 13082 expand %{ 13083 fcmovFPR_regS(cmp,flags,dst,src); 13084 %} 13085 %} 13086 13087 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13088 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13089 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13090 ins_cost(200); 13091 expand %{ 13092 fcmovF_regS(cmp,flags,dst,src); 13093 %} 13094 %} 13095 13096 //====== 13097 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13098 // Same as cmpL_reg_flags_LEGT except must negate src 13099 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13100 match( Set flags (CmpL src zero )); 13101 effect( TEMP tmp ); 13102 ins_cost(300); 13103 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13104 "CMP $tmp,$src.lo\n\t" 13105 "SBB $tmp,$src.hi\n\t" %} 13106 ins_encode( long_cmp_flags3(src, tmp) ); 13107 ins_pipe( ialu_reg_reg_long ); 13108 %} 13109 13110 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13111 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13112 // requires a commuted test to get the same result. 13113 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13114 match( Set flags (CmpL src1 src2 )); 13115 effect( TEMP tmp ); 13116 ins_cost(300); 13117 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13118 "MOV $tmp,$src2.hi\n\t" 13119 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13120 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13121 ins_pipe( ialu_cr_reg_reg ); 13122 %} 13123 13124 // Long compares reg < zero/req OR reg >= zero/req. 13125 // Just a wrapper for a normal branch, plus the predicate test 13126 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13127 match(If cmp flags); 13128 effect(USE labl); 13129 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13130 ins_cost(300); 13131 expand %{ 13132 jmpCon(cmp,flags,labl); // JGT or JLE... 13133 %} 13134 %} 13135 13136 //====== 13137 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13138 // Same as cmpUL_reg_flags_LEGT except must negate src 13139 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13140 match(Set flags (CmpUL src zero)); 13141 effect(TEMP tmp); 13142 ins_cost(300); 13143 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13144 "CMP $tmp,$src.lo\n\t" 13145 "SBB $tmp,$src.hi\n\t" %} 13146 ins_encode(long_cmp_flags3(src, tmp)); 13147 ins_pipe(ialu_reg_reg_long); 13148 %} 13149 13150 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13151 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13152 // requires a commuted test to get the same result. 13153 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13154 match(Set flags (CmpUL src1 src2)); 13155 effect(TEMP tmp); 13156 ins_cost(300); 13157 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13158 "MOV $tmp,$src2.hi\n\t" 13159 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13160 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13161 ins_pipe(ialu_cr_reg_reg); 13162 %} 13163 13164 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13165 // Just a wrapper for a normal branch, plus the predicate test 13166 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13167 match(If cmp flags); 13168 effect(USE labl); 13169 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13170 ins_cost(300); 13171 expand %{ 13172 jmpCon(cmp, flags, labl); // JGT or JLE... 13173 %} 13174 %} 13175 13176 // Compare 2 longs and CMOVE longs. 13177 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13178 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13179 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13180 ins_cost(400); 13181 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13182 "CMOV$cmp $dst.hi,$src.hi" %} 13183 opcode(0x0F,0x40); 13184 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13185 ins_pipe( pipe_cmov_reg_long ); 13186 %} 13187 13188 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13189 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13190 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13191 ins_cost(500); 13192 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13193 "CMOV$cmp $dst.hi,$src.hi+4" %} 13194 opcode(0x0F,0x40); 13195 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13196 ins_pipe( pipe_cmov_reg_long ); 13197 %} 13198 13199 // Compare 2 longs and CMOVE ints. 13200 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13201 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13202 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13203 ins_cost(200); 13204 format %{ "CMOV$cmp $dst,$src" %} 13205 opcode(0x0F,0x40); 13206 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13207 ins_pipe( pipe_cmov_reg ); 13208 %} 13209 13210 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13211 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13212 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13213 ins_cost(250); 13214 format %{ "CMOV$cmp $dst,$src" %} 13215 opcode(0x0F,0x40); 13216 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13217 ins_pipe( pipe_cmov_mem ); 13218 %} 13219 13220 // Compare 2 longs and CMOVE ptrs. 13221 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13222 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13223 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13224 ins_cost(200); 13225 format %{ "CMOV$cmp $dst,$src" %} 13226 opcode(0x0F,0x40); 13227 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13228 ins_pipe( pipe_cmov_reg ); 13229 %} 13230 13231 // Compare 2 longs and CMOVE doubles 13232 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13233 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13234 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13235 ins_cost(200); 13236 expand %{ 13237 fcmovDPR_regS(cmp,flags,dst,src); 13238 %} 13239 %} 13240 13241 // Compare 2 longs and CMOVE doubles 13242 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13243 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13244 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13245 ins_cost(200); 13246 expand %{ 13247 fcmovD_regS(cmp,flags,dst,src); 13248 %} 13249 %} 13250 13251 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13252 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13253 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13254 ins_cost(200); 13255 expand %{ 13256 fcmovFPR_regS(cmp,flags,dst,src); 13257 %} 13258 %} 13259 13260 13261 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13262 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13263 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13264 ins_cost(200); 13265 expand %{ 13266 fcmovF_regS(cmp,flags,dst,src); 13267 %} 13268 %} 13269 13270 13271 // ============================================================================ 13272 // Procedure Call/Return Instructions 13273 // Call Java Static Instruction 13274 // Note: If this code changes, the corresponding ret_addr_offset() and 13275 // compute_padding() functions will have to be adjusted. 13276 instruct CallStaticJavaDirect(method meth) %{ 13277 match(CallStaticJava); 13278 effect(USE meth); 13279 13280 ins_cost(300); 13281 format %{ "CALL,static " %} 13282 opcode(0xE8); /* E8 cd */ 13283 ins_encode( pre_call_resets, 13284 Java_Static_Call( meth ), 13285 call_epilog, 13286 post_call_FPU ); 13287 ins_pipe( pipe_slow ); 13288 ins_alignment(4); 13289 %} 13290 13291 // Call Java Dynamic Instruction 13292 // Note: If this code changes, the corresponding ret_addr_offset() and 13293 // compute_padding() functions will have to be adjusted. 13294 instruct CallDynamicJavaDirect(method meth) %{ 13295 match(CallDynamicJava); 13296 effect(USE meth); 13297 13298 ins_cost(300); 13299 format %{ "MOV EAX,(oop)-1\n\t" 13300 "CALL,dynamic" %} 13301 opcode(0xE8); /* E8 cd */ 13302 ins_encode( pre_call_resets, 13303 Java_Dynamic_Call( meth ), 13304 call_epilog, 13305 post_call_FPU ); 13306 ins_pipe( pipe_slow ); 13307 ins_alignment(4); 13308 %} 13309 13310 // Call Runtime Instruction 13311 instruct CallRuntimeDirect(method meth) %{ 13312 match(CallRuntime ); 13313 effect(USE meth); 13314 13315 ins_cost(300); 13316 format %{ "CALL,runtime " %} 13317 opcode(0xE8); /* E8 cd */ 13318 // Use FFREEs to clear entries in float stack 13319 ins_encode( pre_call_resets, 13320 FFree_Float_Stack_All, 13321 Java_To_Runtime( meth ), 13322 post_call_FPU ); 13323 ins_pipe( pipe_slow ); 13324 %} 13325 13326 // Call runtime without safepoint 13327 instruct CallLeafDirect(method meth) %{ 13328 match(CallLeaf); 13329 effect(USE meth); 13330 13331 ins_cost(300); 13332 format %{ "CALL_LEAF,runtime " %} 13333 opcode(0xE8); /* E8 cd */ 13334 ins_encode( pre_call_resets, 13335 FFree_Float_Stack_All, 13336 Java_To_Runtime( meth ), 13337 Verify_FPU_For_Leaf, post_call_FPU ); 13338 ins_pipe( pipe_slow ); 13339 %} 13340 13341 instruct CallLeafNoFPDirect(method meth) %{ 13342 match(CallLeafNoFP); 13343 effect(USE meth); 13344 13345 ins_cost(300); 13346 format %{ "CALL_LEAF_NOFP,runtime " %} 13347 opcode(0xE8); /* E8 cd */ 13348 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13349 ins_pipe( pipe_slow ); 13350 %} 13351 13352 13353 // Return Instruction 13354 // Remove the return address & jump to it. 13355 instruct Ret() %{ 13356 match(Return); 13357 format %{ "RET" %} 13358 opcode(0xC3); 13359 ins_encode(OpcP); 13360 ins_pipe( pipe_jmp ); 13361 %} 13362 13363 // Tail Call; Jump from runtime stub to Java code. 13364 // Also known as an 'interprocedural jump'. 13365 // Target of jump will eventually return to caller. 13366 // TailJump below removes the return address. 13367 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13368 match(TailCall jump_target method_oop ); 13369 ins_cost(300); 13370 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13371 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13372 ins_encode( OpcP, RegOpc(jump_target) ); 13373 ins_pipe( pipe_jmp ); 13374 %} 13375 13376 13377 // Tail Jump; remove the return address; jump to target. 13378 // TailCall above leaves the return address around. 13379 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13380 match( TailJump jump_target ex_oop ); 13381 ins_cost(300); 13382 format %{ "POP EDX\t# pop return address into dummy\n\t" 13383 "JMP $jump_target " %} 13384 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13385 ins_encode( enc_pop_rdx, 13386 OpcP, RegOpc(jump_target) ); 13387 ins_pipe( pipe_jmp ); 13388 %} 13389 13390 // Create exception oop: created by stack-crawling runtime code. 13391 // Created exception is now available to this handler, and is setup 13392 // just prior to jumping to this handler. No code emitted. 13393 instruct CreateException( eAXRegP ex_oop ) 13394 %{ 13395 match(Set ex_oop (CreateEx)); 13396 13397 size(0); 13398 // use the following format syntax 13399 format %{ "# exception oop is in EAX; no code emitted" %} 13400 ins_encode(); 13401 ins_pipe( empty ); 13402 %} 13403 13404 13405 // Rethrow exception: 13406 // The exception oop will come in the first argument position. 13407 // Then JUMP (not call) to the rethrow stub code. 13408 instruct RethrowException() 13409 %{ 13410 match(Rethrow); 13411 13412 // use the following format syntax 13413 format %{ "JMP rethrow_stub" %} 13414 ins_encode(enc_rethrow); 13415 ins_pipe( pipe_jmp ); 13416 %} 13417 13418 // inlined locking and unlocking 13419 13420 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13421 predicate(Compile::current()->use_rtm()); 13422 match(Set cr (FastLock object box)); 13423 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13424 ins_cost(300); 13425 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13426 ins_encode %{ 13427 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13428 $scr$$Register, $cx1$$Register, $cx2$$Register, 13429 _counters, _rtm_counters, _stack_rtm_counters, 13430 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13431 true, ra_->C->profile_rtm()); 13432 %} 13433 ins_pipe(pipe_slow); 13434 %} 13435 13436 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13437 predicate(!Compile::current()->use_rtm()); 13438 match(Set cr (FastLock object box)); 13439 effect(TEMP tmp, TEMP scr, USE_KILL box); 13440 ins_cost(300); 13441 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13442 ins_encode %{ 13443 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13444 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13445 %} 13446 ins_pipe(pipe_slow); 13447 %} 13448 13449 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13450 match(Set cr (FastUnlock object box)); 13451 effect(TEMP tmp, USE_KILL box); 13452 ins_cost(300); 13453 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13454 ins_encode %{ 13455 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13456 %} 13457 ins_pipe(pipe_slow); 13458 %} 13459 13460 13461 13462 // ============================================================================ 13463 // Safepoint Instruction 13464 instruct safePoint_poll(eFlagsReg cr) %{ 13465 predicate(SafepointMechanism::uses_global_page_poll()); 13466 match(SafePoint); 13467 effect(KILL cr); 13468 13469 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13470 // On SPARC that might be acceptable as we can generate the address with 13471 // just a sethi, saving an or. By polling at offset 0 we can end up 13472 // putting additional pressure on the index-0 in the D$. Because of 13473 // alignment (just like the situation at hand) the lower indices tend 13474 // to see more traffic. It'd be better to change the polling address 13475 // to offset 0 of the last $line in the polling page. 13476 13477 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13478 ins_cost(125); 13479 size(6) ; 13480 ins_encode( Safepoint_Poll() ); 13481 ins_pipe( ialu_reg_mem ); 13482 %} 13483 13484 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13485 predicate(SafepointMechanism::uses_thread_local_poll()); 13486 match(SafePoint poll); 13487 effect(KILL cr, USE poll); 13488 13489 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13490 ins_cost(125); 13491 // EBP would need size(3) 13492 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13493 ins_encode %{ 13494 __ relocate(relocInfo::poll_type); 13495 address pre_pc = __ pc(); 13496 __ testl(rax, Address($poll$$Register, 0)); 13497 address post_pc = __ pc(); 13498 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13499 %} 13500 ins_pipe(ialu_reg_mem); 13501 %} 13502 13503 13504 // ============================================================================ 13505 // This name is KNOWN by the ADLC and cannot be changed. 13506 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13507 // for this guy. 13508 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13509 match(Set dst (ThreadLocal)); 13510 effect(DEF dst, KILL cr); 13511 13512 format %{ "MOV $dst, Thread::current()" %} 13513 ins_encode %{ 13514 Register dstReg = as_Register($dst$$reg); 13515 __ get_thread(dstReg); 13516 %} 13517 ins_pipe( ialu_reg_fat ); 13518 %} 13519 13520 13521 13522 //----------PEEPHOLE RULES----------------------------------------------------- 13523 // These must follow all instruction definitions as they use the names 13524 // defined in the instructions definitions. 13525 // 13526 // peepmatch ( root_instr_name [preceding_instruction]* ); 13527 // 13528 // peepconstraint %{ 13529 // (instruction_number.operand_name relational_op instruction_number.operand_name 13530 // [, ...] ); 13531 // // instruction numbers are zero-based using left to right order in peepmatch 13532 // 13533 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13534 // // provide an instruction_number.operand_name for each operand that appears 13535 // // in the replacement instruction's match rule 13536 // 13537 // ---------VM FLAGS--------------------------------------------------------- 13538 // 13539 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13540 // 13541 // Each peephole rule is given an identifying number starting with zero and 13542 // increasing by one in the order seen by the parser. An individual peephole 13543 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13544 // on the command-line. 13545 // 13546 // ---------CURRENT LIMITATIONS---------------------------------------------- 13547 // 13548 // Only match adjacent instructions in same basic block 13549 // Only equality constraints 13550 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13551 // Only one replacement instruction 13552 // 13553 // ---------EXAMPLE---------------------------------------------------------- 13554 // 13555 // // pertinent parts of existing instructions in architecture description 13556 // instruct movI(rRegI dst, rRegI src) %{ 13557 // match(Set dst (CopyI src)); 13558 // %} 13559 // 13560 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13561 // match(Set dst (AddI dst src)); 13562 // effect(KILL cr); 13563 // %} 13564 // 13565 // // Change (inc mov) to lea 13566 // peephole %{ 13567 // // increment preceeded by register-register move 13568 // peepmatch ( incI_eReg movI ); 13569 // // require that the destination register of the increment 13570 // // match the destination register of the move 13571 // peepconstraint ( 0.dst == 1.dst ); 13572 // // construct a replacement instruction that sets 13573 // // the destination to ( move's source register + one ) 13574 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13575 // %} 13576 // 13577 // Implementation no longer uses movX instructions since 13578 // machine-independent system no longer uses CopyX nodes. 13579 // 13580 // peephole %{ 13581 // peepmatch ( incI_eReg movI ); 13582 // peepconstraint ( 0.dst == 1.dst ); 13583 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13584 // %} 13585 // 13586 // peephole %{ 13587 // peepmatch ( decI_eReg movI ); 13588 // peepconstraint ( 0.dst == 1.dst ); 13589 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13590 // %} 13591 // 13592 // peephole %{ 13593 // peepmatch ( addI_eReg_imm movI ); 13594 // peepconstraint ( 0.dst == 1.dst ); 13595 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13596 // %} 13597 // 13598 // peephole %{ 13599 // peepmatch ( addP_eReg_imm movP ); 13600 // peepconstraint ( 0.dst == 1.dst ); 13601 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13602 // %} 13603 13604 // // Change load of spilled value to only a spill 13605 // instruct storeI(memory mem, rRegI src) %{ 13606 // match(Set mem (StoreI mem src)); 13607 // %} 13608 // 13609 // instruct loadI(rRegI dst, memory mem) %{ 13610 // match(Set dst (LoadI mem)); 13611 // %} 13612 // 13613 peephole %{ 13614 peepmatch ( loadI storeI ); 13615 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13616 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13617 %} 13618 13619 //----------SMARTSPILL RULES--------------------------------------------------- 13620 // These must follow all instruction definitions as they use the names 13621 // defined in the instructions definitions.