1 // 2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return SafepointMechanism::uses_thread_local_poll(); 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !Universe::heap()->is_scavengable(cast_to_oop(d32))), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 if (SafepointMechanism::uses_thread_local_poll()) { 710 Register pollReg = as_Register(EBX_enc); 711 MacroAssembler masm(&cbuf); 712 masm.get_thread(pollReg); 713 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 714 masm.relocate(relocInfo::poll_return_type); 715 masm.testl(rax, Address(pollReg, 0)); 716 } else { 717 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 718 emit_opcode(cbuf,0x85); 719 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 720 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 721 } 722 } 723 } 724 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 726 return MachNode::size(ra_); // too many variables; just compute it 727 // the hard way 728 } 729 730 int MachEpilogNode::reloc() const { 731 return 0; // a large enough number 732 } 733 734 const Pipeline * MachEpilogNode::pipeline() const { 735 return MachNode::pipeline_class(); 736 } 737 738 int MachEpilogNode::safepoint_offset() const { return 0; } 739 740 //============================================================================= 741 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 743 static enum RC rc_class( OptoReg::Name reg ) { 744 745 if( !OptoReg::is_valid(reg) ) return rc_bad; 746 if (OptoReg::is_stack(reg)) return rc_stack; 747 748 VMReg r = OptoReg::as_VMReg(reg); 749 if (r->is_Register()) return rc_int; 750 if (r->is_FloatRegister()) { 751 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 752 return rc_float; 753 } 754 assert(r->is_XMMRegister(), "must be"); 755 return rc_xmm; 756 } 757 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 759 int opcode, const char *op_str, int size, outputStream* st ) { 760 if( cbuf ) { 761 emit_opcode (*cbuf, opcode ); 762 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 763 #ifndef PRODUCT 764 } else if( !do_size ) { 765 if( size != 0 ) st->print("\n\t"); 766 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 767 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 768 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 769 } else { // FLD, FST, PUSH, POP 770 st->print("%s [ESP + #%d]",op_str,offset); 771 } 772 #endif 773 } 774 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 775 return size+3+offset_size; 776 } 777 778 // Helper for XMM registers. Extra opcode bits, limited syntax. 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 780 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 781 int in_size_in_bits = Assembler::EVEX_32bit; 782 int evex_encoding = 0; 783 if (reg_lo+1 == reg_hi) { 784 in_size_in_bits = Assembler::EVEX_64bit; 785 evex_encoding = Assembler::VEX_W; 786 } 787 if (cbuf) { 788 MacroAssembler _masm(cbuf); 789 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 790 // it maps more cases to single byte displacement 791 _masm.set_managed(); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 843 _masm.set_managed(); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 883 _masm.set_managed(); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 901 _masm.set_managed(); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1016 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Needs 2 CMOV's for longs. 1424 const int Matcher::long_cmove_cost() { return 1; } 1425 1426 // No CMOVF/CMOVD with SSE/SSE2 1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429 // Does the CPU require late expand (see block.cpp for description of late expand)? 1430 const bool Matcher::require_postalloc_expand = false; 1431 1432 // Do we need to mask the count passed to shift instructions or does 1433 // the cpu only look at the lower 5/6 bits anyway? 1434 const bool Matcher::need_masked_shift_count = false; 1435 1436 bool Matcher::narrow_oop_use_complex_address() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 bool Matcher::narrow_klass_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444 } 1445 1446 bool Matcher::const_oop_prefer_decode() { 1447 ShouldNotCallThis(); 1448 return true; 1449 } 1450 1451 bool Matcher::const_klass_prefer_decode() { 1452 ShouldNotCallThis(); 1453 return true; 1454 } 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 debug_only(int off1 = cbuf.insts_size()); 1890 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1891 %} 1892 1893 enc_class post_call_FPU %{ 1894 // If method sets FPU control word do it here also 1895 if (Compile::current()->in_24_bit_fp_mode()) { 1896 MacroAssembler masm(&cbuf); 1897 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1898 } 1899 %} 1900 1901 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1902 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1903 // who we intended to call. 1904 cbuf.set_insts_mark(); 1905 $$$emit8$primary; 1906 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), 1910 RELOC_IMM32); 1911 } else { 1912 int method_index = resolved_method_index(cbuf); 1913 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1914 : static_call_Relocation::spec(method_index); 1915 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1916 rspec, RELOC_DISP32); 1917 // Emit stubs for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943 // Following encoding is no longer used, but may be restored if calling 1944 // convention changes significantly. 1945 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946 // 1947 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948 // // int ic_reg = Matcher::inline_cache_reg(); 1949 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1950 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1952 // 1953 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954 // // // so we load it immediately before the call 1955 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957 // 1958 // // xor rbp,ebp 1959 // emit_opcode(cbuf, 0x33); 1960 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961 // 1962 // // CALL to interpreter. 1963 // cbuf.set_insts_mark(); 1964 // $$$emit8$primary; 1965 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967 // %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 emit_opcode(cbuf,0xF0); // [Lock] 2091 %} 2092 2093 // Cmp-xchg long value. 2094 // Note: we need to swap rbx, and rcx before and after the 2095 // cmpxchg8 instruction because the instruction uses 2096 // rcx as the high order word of the new value to store but 2097 // our register encoding uses rbx,. 2098 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2099 2100 // XCHG rbx,ecx 2101 emit_opcode(cbuf,0x87); 2102 emit_opcode(cbuf,0xD9); 2103 // [Lock] 2104 emit_opcode(cbuf,0xF0); 2105 // CMPXCHG8 [Eptr] 2106 emit_opcode(cbuf,0x0F); 2107 emit_opcode(cbuf,0xC7); 2108 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2109 // XCHG rbx,ecx 2110 emit_opcode(cbuf,0x87); 2111 emit_opcode(cbuf,0xD9); 2112 %} 2113 2114 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2115 // [Lock] 2116 emit_opcode(cbuf,0xF0); 2117 2118 // CMPXCHG [Eptr] 2119 emit_opcode(cbuf,0x0F); 2120 emit_opcode(cbuf,0xB1); 2121 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2122 %} 2123 2124 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2125 // [Lock] 2126 emit_opcode(cbuf,0xF0); 2127 2128 // CMPXCHGB [Eptr] 2129 emit_opcode(cbuf,0x0F); 2130 emit_opcode(cbuf,0xB0); 2131 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2132 %} 2133 2134 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2135 // [Lock] 2136 emit_opcode(cbuf,0xF0); 2137 2138 // 16-bit mode 2139 emit_opcode(cbuf, 0x66); 2140 2141 // CMPXCHGW [Eptr] 2142 emit_opcode(cbuf,0x0F); 2143 emit_opcode(cbuf,0xB1); 2144 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2145 %} 2146 2147 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2148 int res_encoding = $res$$reg; 2149 2150 // MOV res,0 2151 emit_opcode( cbuf, 0xB8 + res_encoding); 2152 emit_d32( cbuf, 0 ); 2153 // JNE,s fail 2154 emit_opcode(cbuf,0x75); 2155 emit_d8(cbuf, 5 ); 2156 // MOV res,1 2157 emit_opcode( cbuf, 0xB8 + res_encoding); 2158 emit_d32( cbuf, 1 ); 2159 // fail: 2160 %} 2161 2162 enc_class set_instruction_start( ) %{ 2163 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2164 %} 2165 2166 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2167 int reg_encoding = $ereg$$reg; 2168 int base = $mem$$base; 2169 int index = $mem$$index; 2170 int scale = $mem$$scale; 2171 int displace = $mem$$disp; 2172 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2173 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2174 %} 2175 2176 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2177 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2178 int base = $mem$$base; 2179 int index = $mem$$index; 2180 int scale = $mem$$scale; 2181 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2182 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2183 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2184 %} 2185 2186 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2187 int r1, r2; 2188 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2189 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2190 emit_opcode(cbuf,0x0F); 2191 emit_opcode(cbuf,$tertiary); 2192 emit_rm(cbuf, 0x3, r1, r2); 2193 emit_d8(cbuf,$cnt$$constant); 2194 emit_d8(cbuf,$primary); 2195 emit_rm(cbuf, 0x3, $secondary, r1); 2196 emit_d8(cbuf,$cnt$$constant); 2197 %} 2198 2199 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2200 emit_opcode( cbuf, 0x8B ); // Move 2201 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2202 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2203 emit_d8(cbuf,$primary); 2204 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2205 emit_d8(cbuf,$cnt$$constant-32); 2206 } 2207 emit_d8(cbuf,$primary); 2208 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2209 emit_d8(cbuf,31); 2210 %} 2211 2212 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2213 int r1, r2; 2214 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2215 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2216 2217 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2218 emit_rm(cbuf, 0x3, r1, r2); 2219 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2220 emit_opcode(cbuf,$primary); 2221 emit_rm(cbuf, 0x3, $secondary, r1); 2222 emit_d8(cbuf,$cnt$$constant-32); 2223 } 2224 emit_opcode(cbuf,0x33); // XOR r2,r2 2225 emit_rm(cbuf, 0x3, r2, r2); 2226 %} 2227 2228 // Clone of RegMem but accepts an extra parameter to access each 2229 // half of a double in memory; it never needs relocation info. 2230 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2231 emit_opcode(cbuf,$opcode$$constant); 2232 int reg_encoding = $rm_reg$$reg; 2233 int base = $mem$$base; 2234 int index = $mem$$index; 2235 int scale = $mem$$scale; 2236 int displace = $mem$$disp + $disp_for_half$$constant; 2237 relocInfo::relocType disp_reloc = relocInfo::none; 2238 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2239 %} 2240 2241 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2242 // 2243 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2244 // and it never needs relocation information. 2245 // Frequently used to move data between FPU's Stack Top and memory. 2246 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2247 int rm_byte_opcode = $rm_opcode$$constant; 2248 int base = $mem$$base; 2249 int index = $mem$$index; 2250 int scale = $mem$$scale; 2251 int displace = $mem$$disp; 2252 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2254 %} 2255 2256 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2257 int rm_byte_opcode = $rm_opcode$$constant; 2258 int base = $mem$$base; 2259 int index = $mem$$index; 2260 int scale = $mem$$scale; 2261 int displace = $mem$$disp; 2262 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2263 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2264 %} 2265 2266 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2267 int reg_encoding = $dst$$reg; 2268 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2269 int index = 0x04; // 0x04 indicates no index 2270 int scale = 0x00; // 0x00 indicates no scale 2271 int displace = $src1$$constant; // 0x00 indicates no displacement 2272 relocInfo::relocType disp_reloc = relocInfo::none; 2273 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2274 %} 2275 2276 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2277 // Compare dst,src 2278 emit_opcode(cbuf,0x3B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 // jmp dst < src around move 2281 emit_opcode(cbuf,0x7C); 2282 emit_d8(cbuf,2); 2283 // move dst,src 2284 emit_opcode(cbuf,0x8B); 2285 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2286 %} 2287 2288 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2289 // Compare dst,src 2290 emit_opcode(cbuf,0x3B); 2291 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2292 // jmp dst > src around move 2293 emit_opcode(cbuf,0x7F); 2294 emit_d8(cbuf,2); 2295 // move dst,src 2296 emit_opcode(cbuf,0x8B); 2297 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2298 %} 2299 2300 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2301 // If src is FPR1, we can just FST to store it. 2302 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2303 int reg_encoding = 0x2; // Just store 2304 int base = $mem$$base; 2305 int index = $mem$$index; 2306 int scale = $mem$$scale; 2307 int displace = $mem$$disp; 2308 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2309 if( $src$$reg != FPR1L_enc ) { 2310 reg_encoding = 0x3; // Store & pop 2311 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2312 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2313 } 2314 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2315 emit_opcode(cbuf,$primary); 2316 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2317 %} 2318 2319 enc_class neg_reg(rRegI dst) %{ 2320 // NEG $dst 2321 emit_opcode(cbuf,0xF7); 2322 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2323 %} 2324 2325 enc_class setLT_reg(eCXRegI dst) %{ 2326 // SETLT $dst 2327 emit_opcode(cbuf,0x0F); 2328 emit_opcode(cbuf,0x9C); 2329 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2330 %} 2331 2332 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2333 int tmpReg = $tmp$$reg; 2334 2335 // SUB $p,$q 2336 emit_opcode(cbuf,0x2B); 2337 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2338 // SBB $tmp,$tmp 2339 emit_opcode(cbuf,0x1B); 2340 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2341 // AND $tmp,$y 2342 emit_opcode(cbuf,0x23); 2343 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2344 // ADD $p,$tmp 2345 emit_opcode(cbuf,0x03); 2346 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2347 %} 2348 2349 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2350 // TEST shift,32 2351 emit_opcode(cbuf,0xF7); 2352 emit_rm(cbuf, 0x3, 0, ECX_enc); 2353 emit_d32(cbuf,0x20); 2354 // JEQ,s small 2355 emit_opcode(cbuf, 0x74); 2356 emit_d8(cbuf, 0x04); 2357 // MOV $dst.hi,$dst.lo 2358 emit_opcode( cbuf, 0x8B ); 2359 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2360 // CLR $dst.lo 2361 emit_opcode(cbuf, 0x33); 2362 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2363 // small: 2364 // SHLD $dst.hi,$dst.lo,$shift 2365 emit_opcode(cbuf,0x0F); 2366 emit_opcode(cbuf,0xA5); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2368 // SHL $dst.lo,$shift" 2369 emit_opcode(cbuf,0xD3); 2370 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2371 %} 2372 2373 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2374 // TEST shift,32 2375 emit_opcode(cbuf,0xF7); 2376 emit_rm(cbuf, 0x3, 0, ECX_enc); 2377 emit_d32(cbuf,0x20); 2378 // JEQ,s small 2379 emit_opcode(cbuf, 0x74); 2380 emit_d8(cbuf, 0x04); 2381 // MOV $dst.lo,$dst.hi 2382 emit_opcode( cbuf, 0x8B ); 2383 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2384 // CLR $dst.hi 2385 emit_opcode(cbuf, 0x33); 2386 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2387 // small: 2388 // SHRD $dst.lo,$dst.hi,$shift 2389 emit_opcode(cbuf,0x0F); 2390 emit_opcode(cbuf,0xAD); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2392 // SHR $dst.hi,$shift" 2393 emit_opcode(cbuf,0xD3); 2394 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2395 %} 2396 2397 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2398 // TEST shift,32 2399 emit_opcode(cbuf,0xF7); 2400 emit_rm(cbuf, 0x3, 0, ECX_enc); 2401 emit_d32(cbuf,0x20); 2402 // JEQ,s small 2403 emit_opcode(cbuf, 0x74); 2404 emit_d8(cbuf, 0x05); 2405 // MOV $dst.lo,$dst.hi 2406 emit_opcode( cbuf, 0x8B ); 2407 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2408 // SAR $dst.hi,31 2409 emit_opcode(cbuf, 0xC1); 2410 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2411 emit_d8(cbuf, 0x1F ); 2412 // small: 2413 // SHRD $dst.lo,$dst.hi,$shift 2414 emit_opcode(cbuf,0x0F); 2415 emit_opcode(cbuf,0xAD); 2416 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2417 // SAR $dst.hi,$shift" 2418 emit_opcode(cbuf,0xD3); 2419 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2420 %} 2421 2422 2423 // ----------------- Encodings for floating point unit ----------------- 2424 // May leave result in FPU-TOS or FPU reg depending on opcodes 2425 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2426 $$$emit8$primary; 2427 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2428 %} 2429 2430 // Pop argument in FPR0 with FSTP ST(0) 2431 enc_class PopFPU() %{ 2432 emit_opcode( cbuf, 0xDD ); 2433 emit_d8( cbuf, 0xD8 ); 2434 %} 2435 2436 // !!!!! equivalent to Pop_Reg_F 2437 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2438 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2439 emit_d8( cbuf, 0xD8+$dst$$reg ); 2440 %} 2441 2442 enc_class Push_Reg_DPR( regDPR dst ) %{ 2443 emit_opcode( cbuf, 0xD9 ); 2444 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2445 %} 2446 2447 enc_class strictfp_bias1( regDPR dst ) %{ 2448 emit_opcode( cbuf, 0xDB ); // FLD m80real 2449 emit_opcode( cbuf, 0x2D ); 2450 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2451 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2452 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2453 %} 2454 2455 enc_class strictfp_bias2( regDPR dst ) %{ 2456 emit_opcode( cbuf, 0xDB ); // FLD m80real 2457 emit_opcode( cbuf, 0x2D ); 2458 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2459 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2460 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2461 %} 2462 2463 // Special case for moving an integer register to a stack slot. 2464 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2465 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2466 %} 2467 2468 // Special case for moving a register to a stack slot. 2469 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2470 // Opcode already emitted 2471 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2472 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2473 emit_d32(cbuf, $dst$$disp); // Displacement 2474 %} 2475 2476 // Push the integer in stackSlot 'src' onto FP-stack 2477 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2478 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2479 %} 2480 2481 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2483 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2484 %} 2485 2486 // Same as Pop_Mem_F except for opcode 2487 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2488 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2489 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2490 %} 2491 2492 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2493 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2494 emit_d8( cbuf, 0xD8+$dst$$reg ); 2495 %} 2496 2497 enc_class Push_Reg_FPR( regFPR dst ) %{ 2498 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2499 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2500 %} 2501 2502 // Push FPU's float to a stack-slot, and pop FPU-stack 2503 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2504 int pop = 0x02; 2505 if ($src$$reg != FPR1L_enc) { 2506 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2507 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2508 pop = 0x03; 2509 } 2510 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2511 %} 2512 2513 // Push FPU's double to a stack-slot, and pop FPU-stack 2514 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2515 int pop = 0x02; 2516 if ($src$$reg != FPR1L_enc) { 2517 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2518 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2519 pop = 0x03; 2520 } 2521 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2522 %} 2523 2524 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2525 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2526 int pop = 0xD0 - 1; // -1 since we skip FLD 2527 if ($src$$reg != FPR1L_enc) { 2528 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2529 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2530 pop = 0xD8; 2531 } 2532 emit_opcode( cbuf, 0xDD ); 2533 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2534 %} 2535 2536 2537 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2538 // load dst in FPR0 2539 emit_opcode( cbuf, 0xD9 ); 2540 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2541 if ($src$$reg != FPR1L_enc) { 2542 // fincstp 2543 emit_opcode (cbuf, 0xD9); 2544 emit_opcode (cbuf, 0xF7); 2545 // swap src with FPR1: 2546 // FXCH FPR1 with src 2547 emit_opcode(cbuf, 0xD9); 2548 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2549 // fdecstp 2550 emit_opcode (cbuf, 0xD9); 2551 emit_opcode (cbuf, 0xF6); 2552 } 2553 %} 2554 2555 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ subptr(rsp, 8); 2558 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2559 __ fld_d(Address(rsp, 0)); 2560 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2561 __ fld_d(Address(rsp, 0)); 2562 %} 2563 2564 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ subptr(rsp, 4); 2567 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2568 __ fld_s(Address(rsp, 0)); 2569 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2570 __ fld_s(Address(rsp, 0)); 2571 %} 2572 2573 enc_class Push_ResultD(regD dst) %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ fstp_d(Address(rsp, 0)); 2576 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2577 __ addptr(rsp, 8); 2578 %} 2579 2580 enc_class Push_ResultF(regF dst, immI d8) %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ fstp_s(Address(rsp, 0)); 2583 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2584 __ addptr(rsp, $d8$$constant); 2585 %} 2586 2587 enc_class Push_SrcD(regD src) %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ subptr(rsp, 8); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class push_stack_temp_qword() %{ 2595 MacroAssembler _masm(&cbuf); 2596 __ subptr(rsp, 8); 2597 %} 2598 2599 enc_class pop_stack_temp_qword() %{ 2600 MacroAssembler _masm(&cbuf); 2601 __ addptr(rsp, 8); 2602 %} 2603 2604 enc_class push_xmm_to_fpr1(regD src) %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2607 __ fld_d(Address(rsp, 0)); 2608 %} 2609 2610 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2611 if ($src$$reg != FPR1L_enc) { 2612 // fincstp 2613 emit_opcode (cbuf, 0xD9); 2614 emit_opcode (cbuf, 0xF7); 2615 // FXCH FPR1 with src 2616 emit_opcode(cbuf, 0xD9); 2617 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2618 // fdecstp 2619 emit_opcode (cbuf, 0xD9); 2620 emit_opcode (cbuf, 0xF6); 2621 } 2622 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2623 // // FSTP FPR$dst$$reg 2624 // emit_opcode( cbuf, 0xDD ); 2625 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2626 %} 2627 2628 enc_class fnstsw_sahf_skip_parity() %{ 2629 // fnstsw ax 2630 emit_opcode( cbuf, 0xDF ); 2631 emit_opcode( cbuf, 0xE0 ); 2632 // sahf 2633 emit_opcode( cbuf, 0x9E ); 2634 // jnp ::skip 2635 emit_opcode( cbuf, 0x7B ); 2636 emit_opcode( cbuf, 0x05 ); 2637 %} 2638 2639 enc_class emitModDPR() %{ 2640 // fprem must be iterative 2641 // :: loop 2642 // fprem 2643 emit_opcode( cbuf, 0xD9 ); 2644 emit_opcode( cbuf, 0xF8 ); 2645 // wait 2646 emit_opcode( cbuf, 0x9b ); 2647 // fnstsw ax 2648 emit_opcode( cbuf, 0xDF ); 2649 emit_opcode( cbuf, 0xE0 ); 2650 // sahf 2651 emit_opcode( cbuf, 0x9E ); 2652 // jp ::loop 2653 emit_opcode( cbuf, 0x0F ); 2654 emit_opcode( cbuf, 0x8A ); 2655 emit_opcode( cbuf, 0xF4 ); 2656 emit_opcode( cbuf, 0xFF ); 2657 emit_opcode( cbuf, 0xFF ); 2658 emit_opcode( cbuf, 0xFF ); 2659 %} 2660 2661 enc_class fpu_flags() %{ 2662 // fnstsw_ax 2663 emit_opcode( cbuf, 0xDF); 2664 emit_opcode( cbuf, 0xE0); 2665 // test ax,0x0400 2666 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2667 emit_opcode( cbuf, 0xA9 ); 2668 emit_d16 ( cbuf, 0x0400 ); 2669 // // // This sequence works, but stalls for 12-16 cycles on PPro 2670 // // test rax,0x0400 2671 // emit_opcode( cbuf, 0xA9 ); 2672 // emit_d32 ( cbuf, 0x00000400 ); 2673 // 2674 // jz exit (no unordered comparison) 2675 emit_opcode( cbuf, 0x74 ); 2676 emit_d8 ( cbuf, 0x02 ); 2677 // mov ah,1 - treat as LT case (set carry flag) 2678 emit_opcode( cbuf, 0xB4 ); 2679 emit_d8 ( cbuf, 0x01 ); 2680 // sahf 2681 emit_opcode( cbuf, 0x9E); 2682 %} 2683 2684 enc_class cmpF_P6_fixup() %{ 2685 // Fixup the integer flags in case comparison involved a NaN 2686 // 2687 // JNP exit (no unordered comparison, P-flag is set by NaN) 2688 emit_opcode( cbuf, 0x7B ); 2689 emit_d8 ( cbuf, 0x03 ); 2690 // MOV AH,1 - treat as LT case (set carry flag) 2691 emit_opcode( cbuf, 0xB4 ); 2692 emit_d8 ( cbuf, 0x01 ); 2693 // SAHF 2694 emit_opcode( cbuf, 0x9E); 2695 // NOP // target for branch to avoid branch to branch 2696 emit_opcode( cbuf, 0x90); 2697 %} 2698 2699 // fnstsw_ax(); 2700 // sahf(); 2701 // movl(dst, nan_result); 2702 // jcc(Assembler::parity, exit); 2703 // movl(dst, less_result); 2704 // jcc(Assembler::below, exit); 2705 // movl(dst, equal_result); 2706 // jcc(Assembler::equal, exit); 2707 // movl(dst, greater_result); 2708 2709 // less_result = 1; 2710 // greater_result = -1; 2711 // equal_result = 0; 2712 // nan_result = -1; 2713 2714 enc_class CmpF_Result(rRegI dst) %{ 2715 // fnstsw_ax(); 2716 emit_opcode( cbuf, 0xDF); 2717 emit_opcode( cbuf, 0xE0); 2718 // sahf 2719 emit_opcode( cbuf, 0x9E); 2720 // movl(dst, nan_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, -1 ); 2723 // jcc(Assembler::parity, exit); 2724 emit_opcode( cbuf, 0x7A ); 2725 emit_d8 ( cbuf, 0x13 ); 2726 // movl(dst, less_result); 2727 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2728 emit_d32( cbuf, -1 ); 2729 // jcc(Assembler::below, exit); 2730 emit_opcode( cbuf, 0x72 ); 2731 emit_d8 ( cbuf, 0x0C ); 2732 // movl(dst, equal_result); 2733 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2734 emit_d32( cbuf, 0 ); 2735 // jcc(Assembler::equal, exit); 2736 emit_opcode( cbuf, 0x74 ); 2737 emit_d8 ( cbuf, 0x05 ); 2738 // movl(dst, greater_result); 2739 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2740 emit_d32( cbuf, 1 ); 2741 %} 2742 2743 2744 // Compare the longs and set flags 2745 // BROKEN! Do Not use as-is 2746 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2747 // CMP $src1.hi,$src2.hi 2748 emit_opcode( cbuf, 0x3B ); 2749 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2750 // JNE,s done 2751 emit_opcode(cbuf,0x75); 2752 emit_d8(cbuf, 2 ); 2753 // CMP $src1.lo,$src2.lo 2754 emit_opcode( cbuf, 0x3B ); 2755 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2756 // done: 2757 %} 2758 2759 enc_class convert_int_long( regL dst, rRegI src ) %{ 2760 // mov $dst.lo,$src 2761 int dst_encoding = $dst$$reg; 2762 int src_encoding = $src$$reg; 2763 encode_Copy( cbuf, dst_encoding , src_encoding ); 2764 // mov $dst.hi,$src 2765 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2766 // sar $dst.hi,31 2767 emit_opcode( cbuf, 0xC1 ); 2768 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2769 emit_d8(cbuf, 0x1F ); 2770 %} 2771 2772 enc_class convert_long_double( eRegL src ) %{ 2773 // push $src.hi 2774 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2775 // push $src.lo 2776 emit_opcode(cbuf, 0x50+$src$$reg ); 2777 // fild 64-bits at [SP] 2778 emit_opcode(cbuf,0xdf); 2779 emit_d8(cbuf, 0x6C); 2780 emit_d8(cbuf, 0x24); 2781 emit_d8(cbuf, 0x00); 2782 // pop stack 2783 emit_opcode(cbuf, 0x83); // add SP, #8 2784 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2785 emit_d8(cbuf, 0x8); 2786 %} 2787 2788 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2789 // IMUL EDX:EAX,$src1 2790 emit_opcode( cbuf, 0xF7 ); 2791 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2792 // SAR EDX,$cnt-32 2793 int shift_count = ((int)$cnt$$constant) - 32; 2794 if (shift_count > 0) { 2795 emit_opcode(cbuf, 0xC1); 2796 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2797 emit_d8(cbuf, shift_count); 2798 } 2799 %} 2800 2801 // this version doesn't have add sp, 8 2802 enc_class convert_long_double2( eRegL src ) %{ 2803 // push $src.hi 2804 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2805 // push $src.lo 2806 emit_opcode(cbuf, 0x50+$src$$reg ); 2807 // fild 64-bits at [SP] 2808 emit_opcode(cbuf,0xdf); 2809 emit_d8(cbuf, 0x6C); 2810 emit_d8(cbuf, 0x24); 2811 emit_d8(cbuf, 0x00); 2812 %} 2813 2814 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2815 // Basic idea: long = (long)int * (long)int 2816 // IMUL EDX:EAX, src 2817 emit_opcode( cbuf, 0xF7 ); 2818 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2819 %} 2820 2821 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2822 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2823 // MUL EDX:EAX, src 2824 emit_opcode( cbuf, 0xF7 ); 2825 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2826 %} 2827 2828 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2829 // Basic idea: lo(result) = lo(x_lo * y_lo) 2830 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2831 // MOV $tmp,$src.lo 2832 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2833 // IMUL $tmp,EDX 2834 emit_opcode( cbuf, 0x0F ); 2835 emit_opcode( cbuf, 0xAF ); 2836 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2837 // MOV EDX,$src.hi 2838 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2839 // IMUL EDX,EAX 2840 emit_opcode( cbuf, 0x0F ); 2841 emit_opcode( cbuf, 0xAF ); 2842 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2843 // ADD $tmp,EDX 2844 emit_opcode( cbuf, 0x03 ); 2845 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2846 // MUL EDX:EAX,$src.lo 2847 emit_opcode( cbuf, 0xF7 ); 2848 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2849 // ADD EDX,ESI 2850 emit_opcode( cbuf, 0x03 ); 2851 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2852 %} 2853 2854 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2855 // Basic idea: lo(result) = lo(src * y_lo) 2856 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2857 // IMUL $tmp,EDX,$src 2858 emit_opcode( cbuf, 0x6B ); 2859 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2860 emit_d8( cbuf, (int)$src$$constant ); 2861 // MOV EDX,$src 2862 emit_opcode(cbuf, 0xB8 + EDX_enc); 2863 emit_d32( cbuf, (int)$src$$constant ); 2864 // MUL EDX:EAX,EDX 2865 emit_opcode( cbuf, 0xF7 ); 2866 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2867 // ADD EDX,ESI 2868 emit_opcode( cbuf, 0x03 ); 2869 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2870 %} 2871 2872 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2873 // PUSH src1.hi 2874 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2875 // PUSH src1.lo 2876 emit_opcode(cbuf, 0x50+$src1$$reg ); 2877 // PUSH src2.hi 2878 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2879 // PUSH src2.lo 2880 emit_opcode(cbuf, 0x50+$src2$$reg ); 2881 // CALL directly to the runtime 2882 cbuf.set_insts_mark(); 2883 emit_opcode(cbuf,0xE8); // Call into runtime 2884 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2885 // Restore stack 2886 emit_opcode(cbuf, 0x83); // add SP, #framesize 2887 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2888 emit_d8(cbuf, 4*4); 2889 %} 2890 2891 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2892 // PUSH src1.hi 2893 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2894 // PUSH src1.lo 2895 emit_opcode(cbuf, 0x50+$src1$$reg ); 2896 // PUSH src2.hi 2897 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2898 // PUSH src2.lo 2899 emit_opcode(cbuf, 0x50+$src2$$reg ); 2900 // CALL directly to the runtime 2901 cbuf.set_insts_mark(); 2902 emit_opcode(cbuf,0xE8); // Call into runtime 2903 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2904 // Restore stack 2905 emit_opcode(cbuf, 0x83); // add SP, #framesize 2906 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2907 emit_d8(cbuf, 4*4); 2908 %} 2909 2910 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2911 // MOV $tmp,$src.lo 2912 emit_opcode(cbuf, 0x8B); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2914 // OR $tmp,$src.hi 2915 emit_opcode(cbuf, 0x0B); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2917 %} 2918 2919 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2920 // CMP $src1.lo,$src2.lo 2921 emit_opcode( cbuf, 0x3B ); 2922 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2923 // JNE,s skip 2924 emit_cc(cbuf, 0x70, 0x5); 2925 emit_d8(cbuf,2); 2926 // CMP $src1.hi,$src2.hi 2927 emit_opcode( cbuf, 0x3B ); 2928 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2929 %} 2930 2931 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2932 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2933 emit_opcode( cbuf, 0x3B ); 2934 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2935 // MOV $tmp,$src1.hi 2936 emit_opcode( cbuf, 0x8B ); 2937 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2938 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2939 emit_opcode( cbuf, 0x1B ); 2940 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2941 %} 2942 2943 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2944 // XOR $tmp,$tmp 2945 emit_opcode(cbuf,0x33); // XOR 2946 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2947 // CMP $tmp,$src.lo 2948 emit_opcode( cbuf, 0x3B ); 2949 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2950 // SBB $tmp,$src.hi 2951 emit_opcode( cbuf, 0x1B ); 2952 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2953 %} 2954 2955 // Sniff, sniff... smells like Gnu Superoptimizer 2956 enc_class neg_long( eRegL dst ) %{ 2957 emit_opcode(cbuf,0xF7); // NEG hi 2958 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2959 emit_opcode(cbuf,0xF7); // NEG lo 2960 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2961 emit_opcode(cbuf,0x83); // SBB hi,0 2962 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2963 emit_d8 (cbuf,0 ); 2964 %} 2965 2966 enc_class enc_pop_rdx() %{ 2967 emit_opcode(cbuf,0x5A); 2968 %} 2969 2970 enc_class enc_rethrow() %{ 2971 cbuf.set_insts_mark(); 2972 emit_opcode(cbuf, 0xE9); // jmp entry 2973 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2974 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2975 %} 2976 2977 2978 // Convert a double to an int. Java semantics require we do complex 2979 // manglelations in the corner cases. So we set the rounding mode to 2980 // 'zero', store the darned double down as an int, and reset the 2981 // rounding mode to 'nearest'. The hardware throws an exception which 2982 // patches up the correct value directly to the stack. 2983 enc_class DPR2I_encoding( regDPR src ) %{ 2984 // Flip to round-to-zero mode. We attempted to allow invalid-op 2985 // exceptions here, so that a NAN or other corner-case value will 2986 // thrown an exception (but normal values get converted at full speed). 2987 // However, I2C adapters and other float-stack manglers leave pending 2988 // invalid-op exceptions hanging. We would have to clear them before 2989 // enabling them and that is more expensive than just testing for the 2990 // invalid value Intel stores down in the corner cases. 2991 emit_opcode(cbuf,0xD9); // FLDCW trunc 2992 emit_opcode(cbuf,0x2D); 2993 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2994 // Allocate a word 2995 emit_opcode(cbuf,0x83); // SUB ESP,4 2996 emit_opcode(cbuf,0xEC); 2997 emit_d8(cbuf,0x04); 2998 // Encoding assumes a double has been pushed into FPR0. 2999 // Store down the double as an int, popping the FPU stack 3000 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3001 emit_opcode(cbuf,0x1C); 3002 emit_d8(cbuf,0x24); 3003 // Restore the rounding mode; mask the exception 3004 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3005 emit_opcode(cbuf,0x2D); 3006 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3007 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3008 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3009 3010 // Load the converted int; adjust CPU stack 3011 emit_opcode(cbuf,0x58); // POP EAX 3012 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3013 emit_d32 (cbuf,0x80000000); // 0x80000000 3014 emit_opcode(cbuf,0x75); // JNE around_slow_call 3015 emit_d8 (cbuf,0x07); // Size of slow_call 3016 // Push src onto stack slow-path 3017 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3018 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3019 // CALL directly to the runtime 3020 cbuf.set_insts_mark(); 3021 emit_opcode(cbuf,0xE8); // Call into runtime 3022 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3023 // Carry on here... 3024 %} 3025 3026 enc_class DPR2L_encoding( regDPR src ) %{ 3027 emit_opcode(cbuf,0xD9); // FLDCW trunc 3028 emit_opcode(cbuf,0x2D); 3029 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3030 // Allocate a word 3031 emit_opcode(cbuf,0x83); // SUB ESP,8 3032 emit_opcode(cbuf,0xEC); 3033 emit_d8(cbuf,0x08); 3034 // Encoding assumes a double has been pushed into FPR0. 3035 // Store down the double as a long, popping the FPU stack 3036 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3037 emit_opcode(cbuf,0x3C); 3038 emit_d8(cbuf,0x24); 3039 // Restore the rounding mode; mask the exception 3040 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3041 emit_opcode(cbuf,0x2D); 3042 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3043 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3044 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3045 3046 // Load the converted int; adjust CPU stack 3047 emit_opcode(cbuf,0x58); // POP EAX 3048 emit_opcode(cbuf,0x5A); // POP EDX 3049 emit_opcode(cbuf,0x81); // CMP EDX,imm 3050 emit_d8 (cbuf,0xFA); // rdx 3051 emit_d32 (cbuf,0x80000000); // 0x80000000 3052 emit_opcode(cbuf,0x75); // JNE around_slow_call 3053 emit_d8 (cbuf,0x07+4); // Size of slow_call 3054 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3055 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3056 emit_opcode(cbuf,0x75); // JNE around_slow_call 3057 emit_d8 (cbuf,0x07); // Size of slow_call 3058 // Push src onto stack slow-path 3059 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3060 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3061 // CALL directly to the runtime 3062 cbuf.set_insts_mark(); 3063 emit_opcode(cbuf,0xE8); // Call into runtime 3064 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3065 // Carry on here... 3066 %} 3067 3068 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3069 // Operand was loaded from memory into fp ST (stack top) 3070 // FMUL ST,$src /* D8 C8+i */ 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3073 %} 3074 3075 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3076 // FADDP ST,src2 /* D8 C0+i */ 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3079 //could use FADDP src2,fpST /* DE C0+i */ 3080 %} 3081 3082 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3083 // FADDP src2,ST /* DE C0+i */ 3084 emit_opcode(cbuf, 0xDE); 3085 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3086 %} 3087 3088 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3089 // Operand has been loaded into fp ST (stack top) 3090 // FSUB ST,$src1 3091 emit_opcode(cbuf, 0xD8); 3092 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3093 3094 // FDIV 3095 emit_opcode(cbuf, 0xD8); 3096 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3097 %} 3098 3099 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3100 // Operand was loaded from memory into fp ST (stack top) 3101 // FADD ST,$src /* D8 C0+i */ 3102 emit_opcode(cbuf, 0xD8); 3103 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3104 3105 // FMUL ST,src2 /* D8 C*+i */ 3106 emit_opcode(cbuf, 0xD8); 3107 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3108 %} 3109 3110 3111 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3112 // Operand was loaded from memory into fp ST (stack top) 3113 // FADD ST,$src /* D8 C0+i */ 3114 emit_opcode(cbuf, 0xD8); 3115 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3116 3117 // FMULP src2,ST /* DE C8+i */ 3118 emit_opcode(cbuf, 0xDE); 3119 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3120 %} 3121 3122 // Atomically load the volatile long 3123 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3124 emit_opcode(cbuf,0xDF); 3125 int rm_byte_opcode = 0x05; 3126 int base = $mem$$base; 3127 int index = $mem$$index; 3128 int scale = $mem$$scale; 3129 int displace = $mem$$disp; 3130 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3131 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3132 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3133 %} 3134 3135 // Volatile Store Long. Must be atomic, so move it into 3136 // the FP TOS and then do a 64-bit FIST. Has to probe the 3137 // target address before the store (for null-ptr checks) 3138 // so the memory operand is used twice in the encoding. 3139 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3140 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3141 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3142 emit_opcode(cbuf,0xDF); 3143 int rm_byte_opcode = 0x07; 3144 int base = $mem$$base; 3145 int index = $mem$$index; 3146 int scale = $mem$$scale; 3147 int displace = $mem$$disp; 3148 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3149 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3150 %} 3151 3152 // Safepoint Poll. This polls the safepoint page, and causes an 3153 // exception if it is not readable. Unfortunately, it kills the condition code 3154 // in the process 3155 // We current use TESTL [spp],EDI 3156 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3157 3158 enc_class Safepoint_Poll() %{ 3159 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3160 emit_opcode(cbuf,0x85); 3161 emit_rm (cbuf, 0x0, 0x7, 0x5); 3162 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3163 %} 3164 %} 3165 3166 3167 //----------FRAME-------------------------------------------------------------- 3168 // Definition of frame structure and management information. 3169 // 3170 // S T A C K L A Y O U T Allocators stack-slot number 3171 // | (to get allocators register number 3172 // G Owned by | | v add OptoReg::stack0()) 3173 // r CALLER | | 3174 // o | +--------+ pad to even-align allocators stack-slot 3175 // w V | pad0 | numbers; owned by CALLER 3176 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3177 // h ^ | in | 5 3178 // | | args | 4 Holes in incoming args owned by SELF 3179 // | | | | 3 3180 // | | +--------+ 3181 // V | | old out| Empty on Intel, window on Sparc 3182 // | old |preserve| Must be even aligned. 3183 // | SP-+--------+----> Matcher::_old_SP, even aligned 3184 // | | in | 3 area for Intel ret address 3185 // Owned by |preserve| Empty on Sparc. 3186 // SELF +--------+ 3187 // | | pad2 | 2 pad to align old SP 3188 // | +--------+ 1 3189 // | | locks | 0 3190 // | +--------+----> OptoReg::stack0(), even aligned 3191 // | | pad1 | 11 pad to align new SP 3192 // | +--------+ 3193 // | | | 10 3194 // | | spills | 9 spills 3195 // V | | 8 (pad0 slot for callee) 3196 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3197 // ^ | out | 7 3198 // | | args | 6 Holes in outgoing args owned by CALLEE 3199 // Owned by +--------+ 3200 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3201 // | new |preserve| Must be even-aligned. 3202 // | SP-+--------+----> Matcher::_new_SP, even aligned 3203 // | | | 3204 // 3205 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3206 // known from SELF's arguments and the Java calling convention. 3207 // Region 6-7 is determined per call site. 3208 // Note 2: If the calling convention leaves holes in the incoming argument 3209 // area, those holes are owned by SELF. Holes in the outgoing area 3210 // are owned by the CALLEE. Holes should not be nessecary in the 3211 // incoming area, as the Java calling convention is completely under 3212 // the control of the AD file. Doubles can be sorted and packed to 3213 // avoid holes. Holes in the outgoing arguments may be nessecary for 3214 // varargs C calling conventions. 3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3216 // even aligned with pad0 as needed. 3217 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3218 // region 6-11 is even aligned; it may be padded out more so that 3219 // the region from SP to FP meets the minimum stack alignment. 3220 3221 frame %{ 3222 // What direction does stack grow in (assumed to be same for C & Java) 3223 stack_direction(TOWARDS_LOW); 3224 3225 // These three registers define part of the calling convention 3226 // between compiled code and the interpreter. 3227 inline_cache_reg(EAX); // Inline Cache Register 3228 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3229 3230 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3231 cisc_spilling_operand_name(indOffset32); 3232 3233 // Number of stack slots consumed by locking an object 3234 sync_stack_slots(1); 3235 3236 // Compiled code's Frame Pointer 3237 frame_pointer(ESP); 3238 // Interpreter stores its frame pointer in a register which is 3239 // stored to the stack by I2CAdaptors. 3240 // I2CAdaptors convert from interpreted java to compiled java. 3241 interpreter_frame_pointer(EBP); 3242 3243 // Stack alignment requirement 3244 // Alignment size in bytes (128-bit -> 16 bytes) 3245 stack_alignment(StackAlignmentInBytes); 3246 3247 // Number of stack slots between incoming argument block and the start of 3248 // a new frame. The PROLOG must add this many slots to the stack. The 3249 // EPILOG must remove this many slots. Intel needs one slot for 3250 // return address and one for rbp, (must save rbp) 3251 in_preserve_stack_slots(2+VerifyStackAtCalls); 3252 3253 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3254 // for calls to C. Supports the var-args backing area for register parms. 3255 varargs_C_out_slots_killed(0); 3256 3257 // The after-PROLOG location of the return address. Location of 3258 // return address specifies a type (REG or STACK) and a number 3259 // representing the register number (i.e. - use a register name) or 3260 // stack slot. 3261 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3262 // Otherwise, it is above the locks and verification slot and alignment word 3263 return_addr(STACK - 1 + 3264 align_up((Compile::current()->in_preserve_stack_slots() + 3265 Compile::current()->fixed_slots()), 3266 stack_alignment_in_slots())); 3267 3268 // Body of function which returns an integer array locating 3269 // arguments either in registers or in stack slots. Passed an array 3270 // of ideal registers called "sig" and a "length" count. Stack-slot 3271 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3272 // arguments for a CALLEE. Incoming stack arguments are 3273 // automatically biased by the preserve_stack_slots field above. 3274 calling_convention %{ 3275 // No difference between ingoing/outgoing just pass false 3276 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3277 %} 3278 3279 3280 // Body of function which returns an integer array locating 3281 // arguments either in registers or in stack slots. Passed an array 3282 // of ideal registers called "sig" and a "length" count. Stack-slot 3283 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3284 // arguments for a CALLEE. Incoming stack arguments are 3285 // automatically biased by the preserve_stack_slots field above. 3286 c_calling_convention %{ 3287 // This is obviously always outgoing 3288 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3289 %} 3290 3291 // Location of C & interpreter return values 3292 c_return_value %{ 3293 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3294 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3295 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3296 3297 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3298 // that C functions return float and double results in XMM0. 3299 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3300 return OptoRegPair(XMM0b_num,XMM0_num); 3301 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3302 return OptoRegPair(OptoReg::Bad,XMM0_num); 3303 3304 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3305 %} 3306 3307 // Location of return values 3308 return_value %{ 3309 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3310 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3311 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3312 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3313 return OptoRegPair(XMM0b_num,XMM0_num); 3314 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3315 return OptoRegPair(OptoReg::Bad,XMM0_num); 3316 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3317 %} 3318 3319 %} 3320 3321 //----------ATTRIBUTES--------------------------------------------------------- 3322 //----------Operand Attributes------------------------------------------------- 3323 op_attrib op_cost(0); // Required cost attribute 3324 3325 //----------Instruction Attributes--------------------------------------------- 3326 ins_attrib ins_cost(100); // Required cost attribute 3327 ins_attrib ins_size(8); // Required size attribute (in bits) 3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3329 // non-matching short branch variant of some 3330 // long branch? 3331 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3332 // specifies the alignment that some part of the instruction (not 3333 // necessarily the start) requires. If > 1, a compute_padding() 3334 // function must be provided for the instruction 3335 3336 //----------OPERANDS----------------------------------------------------------- 3337 // Operand definitions must precede instruction definitions for correct parsing 3338 // in the ADLC because operands constitute user defined types which are used in 3339 // instruction definitions. 3340 3341 //----------Simple Operands---------------------------------------------------- 3342 // Immediate Operands 3343 // Integer Immediate 3344 operand immI() %{ 3345 match(ConI); 3346 3347 op_cost(10); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 // Constant for test vs zero 3353 operand immI0() %{ 3354 predicate(n->get_int() == 0); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 // Constant for increment 3363 operand immI1() %{ 3364 predicate(n->get_int() == 1); 3365 match(ConI); 3366 3367 op_cost(0); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 // Constant for decrement 3373 operand immI_M1() %{ 3374 predicate(n->get_int() == -1); 3375 match(ConI); 3376 3377 op_cost(0); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 // Valid scale values for addressing modes 3383 operand immI2() %{ 3384 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3385 match(ConI); 3386 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 operand immI8() %{ 3392 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3393 match(ConI); 3394 3395 op_cost(5); 3396 format %{ %} 3397 interface(CONST_INTER); 3398 %} 3399 3400 operand immI16() %{ 3401 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3402 match(ConI); 3403 3404 op_cost(10); 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Int Immediate non-negative 3410 operand immU31() 3411 %{ 3412 predicate(n->get_int() >= 0); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // Constant for long shifts 3421 operand immI_32() %{ 3422 predicate( n->get_int() == 32 ); 3423 match(ConI); 3424 3425 op_cost(0); 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 operand immI_1_31() %{ 3431 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3432 match(ConI); 3433 3434 op_cost(0); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 operand immI_32_63() %{ 3440 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3441 match(ConI); 3442 op_cost(0); 3443 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immI_1() %{ 3449 predicate( n->get_int() == 1 ); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 operand immI_2() %{ 3458 predicate( n->get_int() == 2 ); 3459 match(ConI); 3460 3461 op_cost(0); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 operand immI_3() %{ 3467 predicate( n->get_int() == 3 ); 3468 match(ConI); 3469 3470 op_cost(0); 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Pointer Immediate 3476 operand immP() %{ 3477 match(ConP); 3478 3479 op_cost(10); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 // NULL Pointer Immediate 3485 operand immP0() %{ 3486 predicate( n->get_ptr() == 0 ); 3487 match(ConP); 3488 op_cost(0); 3489 3490 format %{ %} 3491 interface(CONST_INTER); 3492 %} 3493 3494 // Long Immediate 3495 operand immL() %{ 3496 match(ConL); 3497 3498 op_cost(20); 3499 format %{ %} 3500 interface(CONST_INTER); 3501 %} 3502 3503 // Long Immediate zero 3504 operand immL0() %{ 3505 predicate( n->get_long() == 0L ); 3506 match(ConL); 3507 op_cost(0); 3508 3509 format %{ %} 3510 interface(CONST_INTER); 3511 %} 3512 3513 // Long Immediate zero 3514 operand immL_M1() %{ 3515 predicate( n->get_long() == -1L ); 3516 match(ConL); 3517 op_cost(0); 3518 3519 format %{ %} 3520 interface(CONST_INTER); 3521 %} 3522 3523 // Long immediate from 0 to 127. 3524 // Used for a shorter form of long mul by 10. 3525 operand immL_127() %{ 3526 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3527 match(ConL); 3528 op_cost(0); 3529 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 // Long Immediate: low 32-bit mask 3535 operand immL_32bits() %{ 3536 predicate(n->get_long() == 0xFFFFFFFFL); 3537 match(ConL); 3538 op_cost(0); 3539 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 // Long Immediate: low 32-bit mask 3545 operand immL32() %{ 3546 predicate(n->get_long() == (int)(n->get_long())); 3547 match(ConL); 3548 op_cost(20); 3549 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 //Double Immediate zero 3555 operand immDPR0() %{ 3556 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3557 // bug that generates code such that NaNs compare equal to 0.0 3558 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3559 match(ConD); 3560 3561 op_cost(5); 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 // Double Immediate one 3567 operand immDPR1() %{ 3568 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3569 match(ConD); 3570 3571 op_cost(5); 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Double Immediate 3577 operand immDPR() %{ 3578 predicate(UseSSE<=1); 3579 match(ConD); 3580 3581 op_cost(5); 3582 format %{ %} 3583 interface(CONST_INTER); 3584 %} 3585 3586 operand immD() %{ 3587 predicate(UseSSE>=2); 3588 match(ConD); 3589 3590 op_cost(5); 3591 format %{ %} 3592 interface(CONST_INTER); 3593 %} 3594 3595 // Double Immediate zero 3596 operand immD0() %{ 3597 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3598 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3599 // compare equal to -0.0. 3600 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3601 match(ConD); 3602 3603 format %{ %} 3604 interface(CONST_INTER); 3605 %} 3606 3607 // Float Immediate zero 3608 operand immFPR0() %{ 3609 predicate(UseSSE == 0 && n->getf() == 0.0F); 3610 match(ConF); 3611 3612 op_cost(5); 3613 format %{ %} 3614 interface(CONST_INTER); 3615 %} 3616 3617 // Float Immediate one 3618 operand immFPR1() %{ 3619 predicate(UseSSE == 0 && n->getf() == 1.0F); 3620 match(ConF); 3621 3622 op_cost(5); 3623 format %{ %} 3624 interface(CONST_INTER); 3625 %} 3626 3627 // Float Immediate 3628 operand immFPR() %{ 3629 predicate( UseSSE == 0 ); 3630 match(ConF); 3631 3632 op_cost(5); 3633 format %{ %} 3634 interface(CONST_INTER); 3635 %} 3636 3637 // Float Immediate 3638 operand immF() %{ 3639 predicate(UseSSE >= 1); 3640 match(ConF); 3641 3642 op_cost(5); 3643 format %{ %} 3644 interface(CONST_INTER); 3645 %} 3646 3647 // Float Immediate zero. Zero and not -0.0 3648 operand immF0() %{ 3649 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3650 match(ConF); 3651 3652 op_cost(5); 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Immediates for special shifts (sign extend) 3658 3659 // Constants for increment 3660 operand immI_16() %{ 3661 predicate( n->get_int() == 16 ); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 operand immI_24() %{ 3669 predicate( n->get_int() == 24 ); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 // Constant for byte-wide masking 3677 operand immI_255() %{ 3678 predicate( n->get_int() == 255 ); 3679 match(ConI); 3680 3681 format %{ %} 3682 interface(CONST_INTER); 3683 %} 3684 3685 // Constant for short-wide masking 3686 operand immI_65535() %{ 3687 predicate(n->get_int() == 65535); 3688 match(ConI); 3689 3690 format %{ %} 3691 interface(CONST_INTER); 3692 %} 3693 3694 // Register Operands 3695 // Integer Register 3696 operand rRegI() %{ 3697 constraint(ALLOC_IN_RC(int_reg)); 3698 match(RegI); 3699 match(xRegI); 3700 match(eAXRegI); 3701 match(eBXRegI); 3702 match(eCXRegI); 3703 match(eDXRegI); 3704 match(eDIRegI); 3705 match(eSIRegI); 3706 3707 format %{ %} 3708 interface(REG_INTER); 3709 %} 3710 3711 // Subset of Integer Register 3712 operand xRegI(rRegI reg) %{ 3713 constraint(ALLOC_IN_RC(int_x_reg)); 3714 match(reg); 3715 match(eAXRegI); 3716 match(eBXRegI); 3717 match(eCXRegI); 3718 match(eDXRegI); 3719 3720 format %{ %} 3721 interface(REG_INTER); 3722 %} 3723 3724 // Special Registers 3725 operand eAXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(eax_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EAX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 // Special Registers 3735 operand eBXRegI(xRegI reg) %{ 3736 constraint(ALLOC_IN_RC(ebx_reg)); 3737 match(reg); 3738 match(rRegI); 3739 3740 format %{ "EBX" %} 3741 interface(REG_INTER); 3742 %} 3743 3744 operand eCXRegI(xRegI reg) %{ 3745 constraint(ALLOC_IN_RC(ecx_reg)); 3746 match(reg); 3747 match(rRegI); 3748 3749 format %{ "ECX" %} 3750 interface(REG_INTER); 3751 %} 3752 3753 operand eDXRegI(xRegI reg) %{ 3754 constraint(ALLOC_IN_RC(edx_reg)); 3755 match(reg); 3756 match(rRegI); 3757 3758 format %{ "EDX" %} 3759 interface(REG_INTER); 3760 %} 3761 3762 operand eDIRegI(xRegI reg) %{ 3763 constraint(ALLOC_IN_RC(edi_reg)); 3764 match(reg); 3765 match(rRegI); 3766 3767 format %{ "EDI" %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand naxRegI() %{ 3772 constraint(ALLOC_IN_RC(nax_reg)); 3773 match(RegI); 3774 match(eCXRegI); 3775 match(eDXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 operand nadxRegI() %{ 3784 constraint(ALLOC_IN_RC(nadx_reg)); 3785 match(RegI); 3786 match(eBXRegI); 3787 match(eCXRegI); 3788 match(eSIRegI); 3789 match(eDIRegI); 3790 3791 format %{ %} 3792 interface(REG_INTER); 3793 %} 3794 3795 operand ncxRegI() %{ 3796 constraint(ALLOC_IN_RC(ncx_reg)); 3797 match(RegI); 3798 match(eAXRegI); 3799 match(eDXRegI); 3800 match(eSIRegI); 3801 match(eDIRegI); 3802 3803 format %{ %} 3804 interface(REG_INTER); 3805 %} 3806 3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3808 // // 3809 operand eSIRegI(xRegI reg) %{ 3810 constraint(ALLOC_IN_RC(esi_reg)); 3811 match(reg); 3812 match(rRegI); 3813 3814 format %{ "ESI" %} 3815 interface(REG_INTER); 3816 %} 3817 3818 // Pointer Register 3819 operand anyRegP() %{ 3820 constraint(ALLOC_IN_RC(any_reg)); 3821 match(RegP); 3822 match(eAXRegP); 3823 match(eBXRegP); 3824 match(eCXRegP); 3825 match(eDIRegP); 3826 match(eRegP); 3827 3828 format %{ %} 3829 interface(REG_INTER); 3830 %} 3831 3832 operand eRegP() %{ 3833 constraint(ALLOC_IN_RC(int_reg)); 3834 match(RegP); 3835 match(eAXRegP); 3836 match(eBXRegP); 3837 match(eCXRegP); 3838 match(eDIRegP); 3839 3840 format %{ %} 3841 interface(REG_INTER); 3842 %} 3843 3844 // On windows95, EBP is not safe to use for implicit null tests. 3845 operand eRegP_no_EBP() %{ 3846 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3847 match(RegP); 3848 match(eAXRegP); 3849 match(eBXRegP); 3850 match(eCXRegP); 3851 match(eDIRegP); 3852 3853 op_cost(100); 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 operand naxRegP() %{ 3859 constraint(ALLOC_IN_RC(nax_reg)); 3860 match(RegP); 3861 match(eBXRegP); 3862 match(eDXRegP); 3863 match(eCXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 operand nabxRegP() %{ 3872 constraint(ALLOC_IN_RC(nabx_reg)); 3873 match(RegP); 3874 match(eCXRegP); 3875 match(eDXRegP); 3876 match(eSIRegP); 3877 match(eDIRegP); 3878 3879 format %{ %} 3880 interface(REG_INTER); 3881 %} 3882 3883 operand pRegP() %{ 3884 constraint(ALLOC_IN_RC(p_reg)); 3885 match(RegP); 3886 match(eBXRegP); 3887 match(eDXRegP); 3888 match(eSIRegP); 3889 match(eDIRegP); 3890 3891 format %{ %} 3892 interface(REG_INTER); 3893 %} 3894 3895 // Special Registers 3896 // Return a pointer value 3897 operand eAXRegP(eRegP reg) %{ 3898 constraint(ALLOC_IN_RC(eax_reg)); 3899 match(reg); 3900 format %{ "EAX" %} 3901 interface(REG_INTER); 3902 %} 3903 3904 // Used in AtomicAdd 3905 operand eBXRegP(eRegP reg) %{ 3906 constraint(ALLOC_IN_RC(ebx_reg)); 3907 match(reg); 3908 format %{ "EBX" %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Tail-call (interprocedural jump) to interpreter 3913 operand eCXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(ecx_reg)); 3915 match(reg); 3916 format %{ "ECX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eSIRegP(eRegP reg) %{ 3921 constraint(ALLOC_IN_RC(esi_reg)); 3922 match(reg); 3923 format %{ "ESI" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 // Used in rep stosw 3928 operand eDIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(edi_reg)); 3930 match(reg); 3931 format %{ "EDI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eRegL() %{ 3936 constraint(ALLOC_IN_RC(long_reg)); 3937 match(RegL); 3938 match(eADXRegL); 3939 3940 format %{ %} 3941 interface(REG_INTER); 3942 %} 3943 3944 operand eADXRegL( eRegL reg ) %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(reg); 3947 3948 format %{ "EDX:EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eBCXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(ebcx_reg)); 3954 match(reg); 3955 3956 format %{ "EBX:ECX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 // Special case for integer high multiply 3961 operand eADXRegL_low_only() %{ 3962 constraint(ALLOC_IN_RC(eadx_reg)); 3963 match(RegL); 3964 3965 format %{ "EAX" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 // Flags register, used as output of compare instructions 3970 operand eFlagsReg() %{ 3971 constraint(ALLOC_IN_RC(int_flags)); 3972 match(RegFlags); 3973 3974 format %{ "EFLAGS" %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Flags register, used as output of FLOATING POINT compare instructions 3979 operand eFlagsRegU() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 3983 format %{ "EFLAGS_U" %} 3984 interface(REG_INTER); 3985 %} 3986 3987 operand eFlagsRegUCF() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 predicate(false); 3991 3992 format %{ "EFLAGS_U_CF" %} 3993 interface(REG_INTER); 3994 %} 3995 3996 // Condition Code Register used by long compare 3997 operand flagsReg_long_LTGE() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_LTGE" %} 4001 interface(REG_INTER); 4002 %} 4003 operand flagsReg_long_EQNE() %{ 4004 constraint(ALLOC_IN_RC(int_flags)); 4005 match(RegFlags); 4006 format %{ "FLAGS_EQNE" %} 4007 interface(REG_INTER); 4008 %} 4009 operand flagsReg_long_LEGT() %{ 4010 constraint(ALLOC_IN_RC(int_flags)); 4011 match(RegFlags); 4012 format %{ "FLAGS_LEGT" %} 4013 interface(REG_INTER); 4014 %} 4015 4016 // Condition Code Register used by unsigned long compare 4017 operand flagsReg_ulong_LTGE() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_U_LTGE" %} 4021 interface(REG_INTER); 4022 %} 4023 operand flagsReg_ulong_EQNE() %{ 4024 constraint(ALLOC_IN_RC(int_flags)); 4025 match(RegFlags); 4026 format %{ "FLAGS_U_EQNE" %} 4027 interface(REG_INTER); 4028 %} 4029 operand flagsReg_ulong_LEGT() %{ 4030 constraint(ALLOC_IN_RC(int_flags)); 4031 match(RegFlags); 4032 format %{ "FLAGS_U_LEGT" %} 4033 interface(REG_INTER); 4034 %} 4035 4036 // Float register operands 4037 operand regDPR() %{ 4038 predicate( UseSSE < 2 ); 4039 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4040 match(RegD); 4041 match(regDPR1); 4042 match(regDPR2); 4043 format %{ %} 4044 interface(REG_INTER); 4045 %} 4046 4047 operand regDPR1(regDPR reg) %{ 4048 predicate( UseSSE < 2 ); 4049 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4050 match(reg); 4051 format %{ "FPR1" %} 4052 interface(REG_INTER); 4053 %} 4054 4055 operand regDPR2(regDPR reg) %{ 4056 predicate( UseSSE < 2 ); 4057 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4058 match(reg); 4059 format %{ "FPR2" %} 4060 interface(REG_INTER); 4061 %} 4062 4063 operand regnotDPR1(regDPR reg) %{ 4064 predicate( UseSSE < 2 ); 4065 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4066 match(reg); 4067 format %{ %} 4068 interface(REG_INTER); 4069 %} 4070 4071 // Float register operands 4072 operand regFPR() %{ 4073 predicate( UseSSE < 2 ); 4074 constraint(ALLOC_IN_RC(fp_flt_reg)); 4075 match(RegF); 4076 match(regFPR1); 4077 format %{ %} 4078 interface(REG_INTER); 4079 %} 4080 4081 // Float register operands 4082 operand regFPR1(regFPR reg) %{ 4083 predicate( UseSSE < 2 ); 4084 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4085 match(reg); 4086 format %{ "FPR1" %} 4087 interface(REG_INTER); 4088 %} 4089 4090 // XMM Float register operands 4091 operand regF() %{ 4092 predicate( UseSSE>=1 ); 4093 constraint(ALLOC_IN_RC(float_reg_legacy)); 4094 match(RegF); 4095 format %{ %} 4096 interface(REG_INTER); 4097 %} 4098 4099 // Float register operands 4100 operand vlRegF() %{ 4101 constraint(ALLOC_IN_RC(float_reg_vl)); 4102 match(RegF); 4103 4104 format %{ %} 4105 interface(REG_INTER); 4106 %} 4107 4108 // XMM Double register operands 4109 operand regD() %{ 4110 predicate( UseSSE>=2 ); 4111 constraint(ALLOC_IN_RC(double_reg_legacy)); 4112 match(RegD); 4113 format %{ %} 4114 interface(REG_INTER); 4115 %} 4116 4117 // Double register operands 4118 operand vlRegD() %{ 4119 constraint(ALLOC_IN_RC(double_reg_vl)); 4120 match(RegD); 4121 4122 format %{ %} 4123 interface(REG_INTER); 4124 %} 4125 4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4127 // runtime code generation via reg_class_dynamic. 4128 operand vecS() %{ 4129 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4130 match(VecS); 4131 4132 format %{ %} 4133 interface(REG_INTER); 4134 %} 4135 4136 operand legVecS() %{ 4137 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4138 match(VecS); 4139 4140 format %{ %} 4141 interface(REG_INTER); 4142 %} 4143 4144 operand vecD() %{ 4145 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4146 match(VecD); 4147 4148 format %{ %} 4149 interface(REG_INTER); 4150 %} 4151 4152 operand legVecD() %{ 4153 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4154 match(VecD); 4155 4156 format %{ %} 4157 interface(REG_INTER); 4158 %} 4159 4160 operand vecX() %{ 4161 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4162 match(VecX); 4163 4164 format %{ %} 4165 interface(REG_INTER); 4166 %} 4167 4168 operand legVecX() %{ 4169 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4170 match(VecX); 4171 4172 format %{ %} 4173 interface(REG_INTER); 4174 %} 4175 4176 operand vecY() %{ 4177 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4178 match(VecY); 4179 4180 format %{ %} 4181 interface(REG_INTER); 4182 %} 4183 4184 operand legVecY() %{ 4185 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4186 match(VecY); 4187 4188 format %{ %} 4189 interface(REG_INTER); 4190 %} 4191 4192 //----------Memory Operands---------------------------------------------------- 4193 // Direct Memory Operand 4194 operand direct(immP addr) %{ 4195 match(addr); 4196 4197 format %{ "[$addr]" %} 4198 interface(MEMORY_INTER) %{ 4199 base(0xFFFFFFFF); 4200 index(0x4); 4201 scale(0x0); 4202 disp($addr); 4203 %} 4204 %} 4205 4206 // Indirect Memory Operand 4207 operand indirect(eRegP reg) %{ 4208 constraint(ALLOC_IN_RC(int_reg)); 4209 match(reg); 4210 4211 format %{ "[$reg]" %} 4212 interface(MEMORY_INTER) %{ 4213 base($reg); 4214 index(0x4); 4215 scale(0x0); 4216 disp(0x0); 4217 %} 4218 %} 4219 4220 // Indirect Memory Plus Short Offset Operand 4221 operand indOffset8(eRegP reg, immI8 off) %{ 4222 match(AddP reg off); 4223 4224 format %{ "[$reg + $off]" %} 4225 interface(MEMORY_INTER) %{ 4226 base($reg); 4227 index(0x4); 4228 scale(0x0); 4229 disp($off); 4230 %} 4231 %} 4232 4233 // Indirect Memory Plus Long Offset Operand 4234 operand indOffset32(eRegP reg, immI off) %{ 4235 match(AddP reg off); 4236 4237 format %{ "[$reg + $off]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index(0x4); 4241 scale(0x0); 4242 disp($off); 4243 %} 4244 %} 4245 4246 // Indirect Memory Plus Long Offset Operand 4247 operand indOffset32X(rRegI reg, immP off) %{ 4248 match(AddP off reg); 4249 4250 format %{ "[$reg + $off]" %} 4251 interface(MEMORY_INTER) %{ 4252 base($reg); 4253 index(0x4); 4254 scale(0x0); 4255 disp($off); 4256 %} 4257 %} 4258 4259 // Indirect Memory Plus Index Register Plus Offset Operand 4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4261 match(AddP (AddP reg ireg) off); 4262 4263 op_cost(10); 4264 format %{"[$reg + $off + $ireg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base($reg); 4267 index($ireg); 4268 scale(0x0); 4269 disp($off); 4270 %} 4271 %} 4272 4273 // Indirect Memory Plus Index Register Plus Offset Operand 4274 operand indIndex(eRegP reg, rRegI ireg) %{ 4275 match(AddP reg ireg); 4276 4277 op_cost(10); 4278 format %{"[$reg + $ireg]" %} 4279 interface(MEMORY_INTER) %{ 4280 base($reg); 4281 index($ireg); 4282 scale(0x0); 4283 disp(0x0); 4284 %} 4285 %} 4286 4287 // // ------------------------------------------------------------------------- 4288 // // 486 architecture doesn't support "scale * index + offset" with out a base 4289 // // ------------------------------------------------------------------------- 4290 // // Scaled Memory Operands 4291 // // Indirect Memory Times Scale Plus Offset Operand 4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4293 // match(AddP off (LShiftI ireg scale)); 4294 // 4295 // op_cost(10); 4296 // format %{"[$off + $ireg << $scale]" %} 4297 // interface(MEMORY_INTER) %{ 4298 // base(0x4); 4299 // index($ireg); 4300 // scale($scale); 4301 // disp($off); 4302 // %} 4303 // %} 4304 4305 // Indirect Memory Times Scale Plus Index Register 4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4307 match(AddP reg (LShiftI ireg scale)); 4308 4309 op_cost(10); 4310 format %{"[$reg + $ireg << $scale]" %} 4311 interface(MEMORY_INTER) %{ 4312 base($reg); 4313 index($ireg); 4314 scale($scale); 4315 disp(0x0); 4316 %} 4317 %} 4318 4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4321 match(AddP (AddP reg (LShiftI ireg scale)) off); 4322 4323 op_cost(10); 4324 format %{"[$reg + $off + $ireg << $scale]" %} 4325 interface(MEMORY_INTER) %{ 4326 base($reg); 4327 index($ireg); 4328 scale($scale); 4329 disp($off); 4330 %} 4331 %} 4332 4333 //----------Load Long Memory Operands------------------------------------------ 4334 // The load-long idiom will use it's address expression again after loading 4335 // the first word of the long. If the load-long destination overlaps with 4336 // registers used in the addressing expression, the 2nd half will be loaded 4337 // from a clobbered address. Fix this by requiring that load-long use 4338 // address registers that do not overlap with the load-long target. 4339 4340 // load-long support 4341 operand load_long_RegP() %{ 4342 constraint(ALLOC_IN_RC(esi_reg)); 4343 match(RegP); 4344 match(eSIRegP); 4345 op_cost(100); 4346 format %{ %} 4347 interface(REG_INTER); 4348 %} 4349 4350 // Indirect Memory Operand Long 4351 operand load_long_indirect(load_long_RegP reg) %{ 4352 constraint(ALLOC_IN_RC(esi_reg)); 4353 match(reg); 4354 4355 format %{ "[$reg]" %} 4356 interface(MEMORY_INTER) %{ 4357 base($reg); 4358 index(0x4); 4359 scale(0x0); 4360 disp(0x0); 4361 %} 4362 %} 4363 4364 // Indirect Memory Plus Long Offset Operand 4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4366 match(AddP reg off); 4367 4368 format %{ "[$reg + $off]" %} 4369 interface(MEMORY_INTER) %{ 4370 base($reg); 4371 index(0x4); 4372 scale(0x0); 4373 disp($off); 4374 %} 4375 %} 4376 4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4378 4379 4380 operand legRegF() %{ 4381 predicate( UseSSE>=1 ); 4382 constraint(ALLOC_IN_RC(float_reg_legacy)); 4383 match(RegF); 4384 format %{ %} 4385 interface(REG_INTER); 4386 %} 4387 4388 operand legRegD() %{ 4389 predicate( UseSSE>=2 ); 4390 constraint(ALLOC_IN_RC(double_reg_legacy)); 4391 match(RegD); 4392 format %{ %} 4393 interface(REG_INTER); 4394 %} 4395 4396 4397 4398 //----------Special Memory Operands-------------------------------------------- 4399 // Stack Slot Operand - This operand is used for loading and storing temporary 4400 // values on the stack where a match requires a value to 4401 // flow through memory. 4402 operand stackSlotP(sRegP reg) %{ 4403 constraint(ALLOC_IN_RC(stack_slots)); 4404 // No match rule because this operand is only generated in matching 4405 format %{ "[$reg]" %} 4406 interface(MEMORY_INTER) %{ 4407 base(0x4); // ESP 4408 index(0x4); // No Index 4409 scale(0x0); // No Scale 4410 disp($reg); // Stack Offset 4411 %} 4412 %} 4413 4414 operand stackSlotI(sRegI reg) %{ 4415 constraint(ALLOC_IN_RC(stack_slots)); 4416 // No match rule because this operand is only generated in matching 4417 format %{ "[$reg]" %} 4418 interface(MEMORY_INTER) %{ 4419 base(0x4); // ESP 4420 index(0x4); // No Index 4421 scale(0x0); // No Scale 4422 disp($reg); // Stack Offset 4423 %} 4424 %} 4425 4426 operand stackSlotF(sRegF reg) %{ 4427 constraint(ALLOC_IN_RC(stack_slots)); 4428 // No match rule because this operand is only generated in matching 4429 format %{ "[$reg]" %} 4430 interface(MEMORY_INTER) %{ 4431 base(0x4); // ESP 4432 index(0x4); // No Index 4433 scale(0x0); // No Scale 4434 disp($reg); // Stack Offset 4435 %} 4436 %} 4437 4438 operand stackSlotD(sRegD reg) %{ 4439 constraint(ALLOC_IN_RC(stack_slots)); 4440 // No match rule because this operand is only generated in matching 4441 format %{ "[$reg]" %} 4442 interface(MEMORY_INTER) %{ 4443 base(0x4); // ESP 4444 index(0x4); // No Index 4445 scale(0x0); // No Scale 4446 disp($reg); // Stack Offset 4447 %} 4448 %} 4449 4450 operand stackSlotL(sRegL reg) %{ 4451 constraint(ALLOC_IN_RC(stack_slots)); 4452 // No match rule because this operand is only generated in matching 4453 format %{ "[$reg]" %} 4454 interface(MEMORY_INTER) %{ 4455 base(0x4); // ESP 4456 index(0x4); // No Index 4457 scale(0x0); // No Scale 4458 disp($reg); // Stack Offset 4459 %} 4460 %} 4461 4462 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4463 // Indirect Memory Operand 4464 operand indirect_win95_safe(eRegP_no_EBP reg) 4465 %{ 4466 constraint(ALLOC_IN_RC(int_reg)); 4467 match(reg); 4468 4469 op_cost(100); 4470 format %{ "[$reg]" %} 4471 interface(MEMORY_INTER) %{ 4472 base($reg); 4473 index(0x4); 4474 scale(0x0); 4475 disp(0x0); 4476 %} 4477 %} 4478 4479 // Indirect Memory Plus Short Offset Operand 4480 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4481 %{ 4482 match(AddP reg off); 4483 4484 op_cost(100); 4485 format %{ "[$reg + $off]" %} 4486 interface(MEMORY_INTER) %{ 4487 base($reg); 4488 index(0x4); 4489 scale(0x0); 4490 disp($off); 4491 %} 4492 %} 4493 4494 // Indirect Memory Plus Long Offset Operand 4495 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4496 %{ 4497 match(AddP reg off); 4498 4499 op_cost(100); 4500 format %{ "[$reg + $off]" %} 4501 interface(MEMORY_INTER) %{ 4502 base($reg); 4503 index(0x4); 4504 scale(0x0); 4505 disp($off); 4506 %} 4507 %} 4508 4509 // Indirect Memory Plus Index Register Plus Offset Operand 4510 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4511 %{ 4512 match(AddP (AddP reg ireg) off); 4513 4514 op_cost(100); 4515 format %{"[$reg + $off + $ireg]" %} 4516 interface(MEMORY_INTER) %{ 4517 base($reg); 4518 index($ireg); 4519 scale(0x0); 4520 disp($off); 4521 %} 4522 %} 4523 4524 // Indirect Memory Times Scale Plus Index Register 4525 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4526 %{ 4527 match(AddP reg (LShiftI ireg scale)); 4528 4529 op_cost(100); 4530 format %{"[$reg + $ireg << $scale]" %} 4531 interface(MEMORY_INTER) %{ 4532 base($reg); 4533 index($ireg); 4534 scale($scale); 4535 disp(0x0); 4536 %} 4537 %} 4538 4539 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4540 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4541 %{ 4542 match(AddP (AddP reg (LShiftI ireg scale)) off); 4543 4544 op_cost(100); 4545 format %{"[$reg + $off + $ireg << $scale]" %} 4546 interface(MEMORY_INTER) %{ 4547 base($reg); 4548 index($ireg); 4549 scale($scale); 4550 disp($off); 4551 %} 4552 %} 4553 4554 //----------Conditional Branch Operands---------------------------------------- 4555 // Comparison Op - This is the operation of the comparison, and is limited to 4556 // the following set of codes: 4557 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4558 // 4559 // Other attributes of the comparison, such as unsignedness, are specified 4560 // by the comparison instruction that sets a condition code flags register. 4561 // That result is represented by a flags operand whose subtype is appropriate 4562 // to the unsignedness (etc.) of the comparison. 4563 // 4564 // Later, the instruction which matches both the Comparison Op (a Bool) and 4565 // the flags (produced by the Cmp) specifies the coding of the comparison op 4566 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4567 4568 // Comparision Code 4569 operand cmpOp() %{ 4570 match(Bool); 4571 4572 format %{ "" %} 4573 interface(COND_INTER) %{ 4574 equal(0x4, "e"); 4575 not_equal(0x5, "ne"); 4576 less(0xC, "l"); 4577 greater_equal(0xD, "ge"); 4578 less_equal(0xE, "le"); 4579 greater(0xF, "g"); 4580 overflow(0x0, "o"); 4581 no_overflow(0x1, "no"); 4582 %} 4583 %} 4584 4585 // Comparison Code, unsigned compare. Used by FP also, with 4586 // C2 (unordered) turned into GT or LT already. The other bits 4587 // C0 and C3 are turned into Carry & Zero flags. 4588 operand cmpOpU() %{ 4589 match(Bool); 4590 4591 format %{ "" %} 4592 interface(COND_INTER) %{ 4593 equal(0x4, "e"); 4594 not_equal(0x5, "ne"); 4595 less(0x2, "b"); 4596 greater_equal(0x3, "nb"); 4597 less_equal(0x6, "be"); 4598 greater(0x7, "nbe"); 4599 overflow(0x0, "o"); 4600 no_overflow(0x1, "no"); 4601 %} 4602 %} 4603 4604 // Floating comparisons that don't require any fixup for the unordered case 4605 operand cmpOpUCF() %{ 4606 match(Bool); 4607 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4608 n->as_Bool()->_test._test == BoolTest::ge || 4609 n->as_Bool()->_test._test == BoolTest::le || 4610 n->as_Bool()->_test._test == BoolTest::gt); 4611 format %{ "" %} 4612 interface(COND_INTER) %{ 4613 equal(0x4, "e"); 4614 not_equal(0x5, "ne"); 4615 less(0x2, "b"); 4616 greater_equal(0x3, "nb"); 4617 less_equal(0x6, "be"); 4618 greater(0x7, "nbe"); 4619 overflow(0x0, "o"); 4620 no_overflow(0x1, "no"); 4621 %} 4622 %} 4623 4624 4625 // Floating comparisons that can be fixed up with extra conditional jumps 4626 operand cmpOpUCF2() %{ 4627 match(Bool); 4628 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4629 n->as_Bool()->_test._test == BoolTest::eq); 4630 format %{ "" %} 4631 interface(COND_INTER) %{ 4632 equal(0x4, "e"); 4633 not_equal(0x5, "ne"); 4634 less(0x2, "b"); 4635 greater_equal(0x3, "nb"); 4636 less_equal(0x6, "be"); 4637 greater(0x7, "nbe"); 4638 overflow(0x0, "o"); 4639 no_overflow(0x1, "no"); 4640 %} 4641 %} 4642 4643 // Comparison Code for FP conditional move 4644 operand cmpOp_fcmov() %{ 4645 match(Bool); 4646 4647 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4648 n->as_Bool()->_test._test != BoolTest::no_overflow); 4649 format %{ "" %} 4650 interface(COND_INTER) %{ 4651 equal (0x0C8); 4652 not_equal (0x1C8); 4653 less (0x0C0); 4654 greater_equal(0x1C0); 4655 less_equal (0x0D0); 4656 greater (0x1D0); 4657 overflow(0x0, "o"); // not really supported by the instruction 4658 no_overflow(0x1, "no"); // not really supported by the instruction 4659 %} 4660 %} 4661 4662 // Comparison Code used in long compares 4663 operand cmpOp_commute() %{ 4664 match(Bool); 4665 4666 format %{ "" %} 4667 interface(COND_INTER) %{ 4668 equal(0x4, "e"); 4669 not_equal(0x5, "ne"); 4670 less(0xF, "g"); 4671 greater_equal(0xE, "le"); 4672 less_equal(0xD, "ge"); 4673 greater(0xC, "l"); 4674 overflow(0x0, "o"); 4675 no_overflow(0x1, "no"); 4676 %} 4677 %} 4678 4679 // Comparison Code used in unsigned long compares 4680 operand cmpOpU_commute() %{ 4681 match(Bool); 4682 4683 format %{ "" %} 4684 interface(COND_INTER) %{ 4685 equal(0x4, "e"); 4686 not_equal(0x5, "ne"); 4687 less(0x7, "nbe"); 4688 greater_equal(0x6, "be"); 4689 less_equal(0x3, "nb"); 4690 greater(0x2, "b"); 4691 overflow(0x0, "o"); 4692 no_overflow(0x1, "no"); 4693 %} 4694 %} 4695 4696 //----------OPERAND CLASSES---------------------------------------------------- 4697 // Operand Classes are groups of operands that are used as to simplify 4698 // instruction definitions by not requiring the AD writer to specify separate 4699 // instructions for every form of operand when the instruction accepts 4700 // multiple operand types with the same basic encoding and format. The classic 4701 // case of this is memory operands. 4702 4703 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4704 indIndex, indIndexScale, indIndexScaleOffset); 4705 4706 // Long memory operations are encoded in 2 instructions and a +4 offset. 4707 // This means some kind of offset is always required and you cannot use 4708 // an oop as the offset (done when working on static globals). 4709 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4710 indIndex, indIndexScale, indIndexScaleOffset); 4711 4712 4713 //----------PIPELINE----------------------------------------------------------- 4714 // Rules which define the behavior of the target architectures pipeline. 4715 pipeline %{ 4716 4717 //----------ATTRIBUTES--------------------------------------------------------- 4718 attributes %{ 4719 variable_size_instructions; // Fixed size instructions 4720 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4721 instruction_unit_size = 1; // An instruction is 1 bytes long 4722 instruction_fetch_unit_size = 16; // The processor fetches one line 4723 instruction_fetch_units = 1; // of 16 bytes 4724 4725 // List of nop instructions 4726 nops( MachNop ); 4727 %} 4728 4729 //----------RESOURCES---------------------------------------------------------- 4730 // Resources are the functional units available to the machine 4731 4732 // Generic P2/P3 pipeline 4733 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4734 // 3 instructions decoded per cycle. 4735 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4736 // 2 ALU op, only ALU0 handles mul/div instructions. 4737 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4738 MS0, MS1, MEM = MS0 | MS1, 4739 BR, FPU, 4740 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4741 4742 //----------PIPELINE DESCRIPTION----------------------------------------------- 4743 // Pipeline Description specifies the stages in the machine's pipeline 4744 4745 // Generic P2/P3 pipeline 4746 pipe_desc(S0, S1, S2, S3, S4, S5); 4747 4748 //----------PIPELINE CLASSES--------------------------------------------------- 4749 // Pipeline Classes describe the stages in which input and output are 4750 // referenced by the hardware pipeline. 4751 4752 // Naming convention: ialu or fpu 4753 // Then: _reg 4754 // Then: _reg if there is a 2nd register 4755 // Then: _long if it's a pair of instructions implementing a long 4756 // Then: _fat if it requires the big decoder 4757 // Or: _mem if it requires the big decoder and a memory unit. 4758 4759 // Integer ALU reg operation 4760 pipe_class ialu_reg(rRegI dst) %{ 4761 single_instruction; 4762 dst : S4(write); 4763 dst : S3(read); 4764 DECODE : S0; // any decoder 4765 ALU : S3; // any alu 4766 %} 4767 4768 // Long ALU reg operation 4769 pipe_class ialu_reg_long(eRegL dst) %{ 4770 instruction_count(2); 4771 dst : S4(write); 4772 dst : S3(read); 4773 DECODE : S0(2); // any 2 decoders 4774 ALU : S3(2); // both alus 4775 %} 4776 4777 // Integer ALU reg operation using big decoder 4778 pipe_class ialu_reg_fat(rRegI dst) %{ 4779 single_instruction; 4780 dst : S4(write); 4781 dst : S3(read); 4782 D0 : S0; // big decoder only 4783 ALU : S3; // any alu 4784 %} 4785 4786 // Long ALU reg operation using big decoder 4787 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4788 instruction_count(2); 4789 dst : S4(write); 4790 dst : S3(read); 4791 D0 : S0(2); // big decoder only; twice 4792 ALU : S3(2); // any 2 alus 4793 %} 4794 4795 // Integer ALU reg-reg operation 4796 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4797 single_instruction; 4798 dst : S4(write); 4799 src : S3(read); 4800 DECODE : S0; // any decoder 4801 ALU : S3; // any alu 4802 %} 4803 4804 // Long ALU reg-reg operation 4805 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4806 instruction_count(2); 4807 dst : S4(write); 4808 src : S3(read); 4809 DECODE : S0(2); // any 2 decoders 4810 ALU : S3(2); // both alus 4811 %} 4812 4813 // Integer ALU reg-reg operation 4814 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4815 single_instruction; 4816 dst : S4(write); 4817 src : S3(read); 4818 D0 : S0; // big decoder only 4819 ALU : S3; // any alu 4820 %} 4821 4822 // Long ALU reg-reg operation 4823 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4824 instruction_count(2); 4825 dst : S4(write); 4826 src : S3(read); 4827 D0 : S0(2); // big decoder only; twice 4828 ALU : S3(2); // both alus 4829 %} 4830 4831 // Integer ALU reg-mem operation 4832 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4833 single_instruction; 4834 dst : S5(write); 4835 mem : S3(read); 4836 D0 : S0; // big decoder only 4837 ALU : S4; // any alu 4838 MEM : S3; // any mem 4839 %} 4840 4841 // Long ALU reg-mem operation 4842 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4843 instruction_count(2); 4844 dst : S5(write); 4845 mem : S3(read); 4846 D0 : S0(2); // big decoder only; twice 4847 ALU : S4(2); // any 2 alus 4848 MEM : S3(2); // both mems 4849 %} 4850 4851 // Integer mem operation (prefetch) 4852 pipe_class ialu_mem(memory mem) 4853 %{ 4854 single_instruction; 4855 mem : S3(read); 4856 D0 : S0; // big decoder only 4857 MEM : S3; // any mem 4858 %} 4859 4860 // Integer Store to Memory 4861 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4862 single_instruction; 4863 mem : S3(read); 4864 src : S5(read); 4865 D0 : S0; // big decoder only 4866 ALU : S4; // any alu 4867 MEM : S3; 4868 %} 4869 4870 // Long Store to Memory 4871 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4872 instruction_count(2); 4873 mem : S3(read); 4874 src : S5(read); 4875 D0 : S0(2); // big decoder only; twice 4876 ALU : S4(2); // any 2 alus 4877 MEM : S3(2); // Both mems 4878 %} 4879 4880 // Integer Store to Memory 4881 pipe_class ialu_mem_imm(memory mem) %{ 4882 single_instruction; 4883 mem : S3(read); 4884 D0 : S0; // big decoder only 4885 ALU : S4; // any alu 4886 MEM : S3; 4887 %} 4888 4889 // Integer ALU0 reg-reg operation 4890 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4891 single_instruction; 4892 dst : S4(write); 4893 src : S3(read); 4894 D0 : S0; // Big decoder only 4895 ALU0 : S3; // only alu0 4896 %} 4897 4898 // Integer ALU0 reg-mem operation 4899 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4900 single_instruction; 4901 dst : S5(write); 4902 mem : S3(read); 4903 D0 : S0; // big decoder only 4904 ALU0 : S4; // ALU0 only 4905 MEM : S3; // any mem 4906 %} 4907 4908 // Integer ALU reg-reg operation 4909 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4910 single_instruction; 4911 cr : S4(write); 4912 src1 : S3(read); 4913 src2 : S3(read); 4914 DECODE : S0; // any decoder 4915 ALU : S3; // any alu 4916 %} 4917 4918 // Integer ALU reg-imm operation 4919 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4920 single_instruction; 4921 cr : S4(write); 4922 src1 : S3(read); 4923 DECODE : S0; // any decoder 4924 ALU : S3; // any alu 4925 %} 4926 4927 // Integer ALU reg-mem operation 4928 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4929 single_instruction; 4930 cr : S4(write); 4931 src1 : S3(read); 4932 src2 : S3(read); 4933 D0 : S0; // big decoder only 4934 ALU : S4; // any alu 4935 MEM : S3; 4936 %} 4937 4938 // Conditional move reg-reg 4939 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4940 instruction_count(4); 4941 y : S4(read); 4942 q : S3(read); 4943 p : S3(read); 4944 DECODE : S0(4); // any decoder 4945 %} 4946 4947 // Conditional move reg-reg 4948 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4949 single_instruction; 4950 dst : S4(write); 4951 src : S3(read); 4952 cr : S3(read); 4953 DECODE : S0; // any decoder 4954 %} 4955 4956 // Conditional move reg-mem 4957 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4958 single_instruction; 4959 dst : S4(write); 4960 src : S3(read); 4961 cr : S3(read); 4962 DECODE : S0; // any decoder 4963 MEM : S3; 4964 %} 4965 4966 // Conditional move reg-reg long 4967 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4968 single_instruction; 4969 dst : S4(write); 4970 src : S3(read); 4971 cr : S3(read); 4972 DECODE : S0(2); // any 2 decoders 4973 %} 4974 4975 // Conditional move double reg-reg 4976 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4977 single_instruction; 4978 dst : S4(write); 4979 src : S3(read); 4980 cr : S3(read); 4981 DECODE : S0; // any decoder 4982 %} 4983 4984 // Float reg-reg operation 4985 pipe_class fpu_reg(regDPR dst) %{ 4986 instruction_count(2); 4987 dst : S3(read); 4988 DECODE : S0(2); // any 2 decoders 4989 FPU : S3; 4990 %} 4991 4992 // Float reg-reg operation 4993 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4994 instruction_count(2); 4995 dst : S4(write); 4996 src : S3(read); 4997 DECODE : S0(2); // any 2 decoders 4998 FPU : S3; 4999 %} 5000 5001 // Float reg-reg operation 5002 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 5003 instruction_count(3); 5004 dst : S4(write); 5005 src1 : S3(read); 5006 src2 : S3(read); 5007 DECODE : S0(3); // any 3 decoders 5008 FPU : S3(2); 5009 %} 5010 5011 // Float reg-reg operation 5012 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 5013 instruction_count(4); 5014 dst : S4(write); 5015 src1 : S3(read); 5016 src2 : S3(read); 5017 src3 : S3(read); 5018 DECODE : S0(4); // any 3 decoders 5019 FPU : S3(2); 5020 %} 5021 5022 // Float reg-reg operation 5023 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 5024 instruction_count(4); 5025 dst : S4(write); 5026 src1 : S3(read); 5027 src2 : S3(read); 5028 src3 : S3(read); 5029 DECODE : S1(3); // any 3 decoders 5030 D0 : S0; // Big decoder only 5031 FPU : S3(2); 5032 MEM : S3; 5033 %} 5034 5035 // Float reg-mem operation 5036 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 5037 instruction_count(2); 5038 dst : S5(write); 5039 mem : S3(read); 5040 D0 : S0; // big decoder only 5041 DECODE : S1; // any decoder for FPU POP 5042 FPU : S4; 5043 MEM : S3; // any mem 5044 %} 5045 5046 // Float reg-mem operation 5047 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 5048 instruction_count(3); 5049 dst : S5(write); 5050 src1 : S3(read); 5051 mem : S3(read); 5052 D0 : S0; // big decoder only 5053 DECODE : S1(2); // any decoder for FPU POP 5054 FPU : S4; 5055 MEM : S3; // any mem 5056 %} 5057 5058 // Float mem-reg operation 5059 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5060 instruction_count(2); 5061 src : S5(read); 5062 mem : S3(read); 5063 DECODE : S0; // any decoder for FPU PUSH 5064 D0 : S1; // big decoder only 5065 FPU : S4; 5066 MEM : S3; // any mem 5067 %} 5068 5069 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5070 instruction_count(3); 5071 src1 : S3(read); 5072 src2 : S3(read); 5073 mem : S3(read); 5074 DECODE : S0(2); // any decoder for FPU PUSH 5075 D0 : S1; // big decoder only 5076 FPU : S4; 5077 MEM : S3; // any mem 5078 %} 5079 5080 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5081 instruction_count(3); 5082 src1 : S3(read); 5083 src2 : S3(read); 5084 mem : S4(read); 5085 DECODE : S0; // any decoder for FPU PUSH 5086 D0 : S0(2); // big decoder only 5087 FPU : S4; 5088 MEM : S3(2); // any mem 5089 %} 5090 5091 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5092 instruction_count(2); 5093 src1 : S3(read); 5094 dst : S4(read); 5095 D0 : S0(2); // big decoder only 5096 MEM : S3(2); // any mem 5097 %} 5098 5099 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5100 instruction_count(3); 5101 src1 : S3(read); 5102 src2 : S3(read); 5103 dst : S4(read); 5104 D0 : S0(3); // big decoder only 5105 FPU : S4; 5106 MEM : S3(3); // any mem 5107 %} 5108 5109 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5110 instruction_count(3); 5111 src1 : S4(read); 5112 mem : S4(read); 5113 DECODE : S0; // any decoder for FPU PUSH 5114 D0 : S0(2); // big decoder only 5115 FPU : S4; 5116 MEM : S3(2); // any mem 5117 %} 5118 5119 // Float load constant 5120 pipe_class fpu_reg_con(regDPR dst) %{ 5121 instruction_count(2); 5122 dst : S5(write); 5123 D0 : S0; // big decoder only for the load 5124 DECODE : S1; // any decoder for FPU POP 5125 FPU : S4; 5126 MEM : S3; // any mem 5127 %} 5128 5129 // Float load constant 5130 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5131 instruction_count(3); 5132 dst : S5(write); 5133 src : S3(read); 5134 D0 : S0; // big decoder only for the load 5135 DECODE : S1(2); // any decoder for FPU POP 5136 FPU : S4; 5137 MEM : S3; // any mem 5138 %} 5139 5140 // UnConditional branch 5141 pipe_class pipe_jmp( label labl ) %{ 5142 single_instruction; 5143 BR : S3; 5144 %} 5145 5146 // Conditional branch 5147 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5148 single_instruction; 5149 cr : S1(read); 5150 BR : S3; 5151 %} 5152 5153 // Allocation idiom 5154 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5155 instruction_count(1); force_serialization; 5156 fixed_latency(6); 5157 heap_ptr : S3(read); 5158 DECODE : S0(3); 5159 D0 : S2; 5160 MEM : S3; 5161 ALU : S3(2); 5162 dst : S5(write); 5163 BR : S5; 5164 %} 5165 5166 // Generic big/slow expanded idiom 5167 pipe_class pipe_slow( ) %{ 5168 instruction_count(10); multiple_bundles; force_serialization; 5169 fixed_latency(100); 5170 D0 : S0(2); 5171 MEM : S3(2); 5172 %} 5173 5174 // The real do-nothing guy 5175 pipe_class empty( ) %{ 5176 instruction_count(0); 5177 %} 5178 5179 // Define the class for the Nop node 5180 define %{ 5181 MachNop = empty; 5182 %} 5183 5184 %} 5185 5186 //----------INSTRUCTIONS------------------------------------------------------- 5187 // 5188 // match -- States which machine-independent subtree may be replaced 5189 // by this instruction. 5190 // ins_cost -- The estimated cost of this instruction is used by instruction 5191 // selection to identify a minimum cost tree of machine 5192 // instructions that matches a tree of machine-independent 5193 // instructions. 5194 // format -- A string providing the disassembly for this instruction. 5195 // The value of an instruction's operand may be inserted 5196 // by referring to it with a '$' prefix. 5197 // opcode -- Three instruction opcodes may be provided. These are referred 5198 // to within an encode class as $primary, $secondary, and $tertiary 5199 // respectively. The primary opcode is commonly used to 5200 // indicate the type of machine instruction, while secondary 5201 // and tertiary are often used for prefix options or addressing 5202 // modes. 5203 // ins_encode -- A list of encode classes with parameters. The encode class 5204 // name must have been defined in an 'enc_class' specification 5205 // in the encode section of the architecture description. 5206 5207 //----------BSWAP-Instruction-------------------------------------------------- 5208 instruct bytes_reverse_int(rRegI dst) %{ 5209 match(Set dst (ReverseBytesI dst)); 5210 5211 format %{ "BSWAP $dst" %} 5212 opcode(0x0F, 0xC8); 5213 ins_encode( OpcP, OpcSReg(dst) ); 5214 ins_pipe( ialu_reg ); 5215 %} 5216 5217 instruct bytes_reverse_long(eRegL dst) %{ 5218 match(Set dst (ReverseBytesL dst)); 5219 5220 format %{ "BSWAP $dst.lo\n\t" 5221 "BSWAP $dst.hi\n\t" 5222 "XCHG $dst.lo $dst.hi" %} 5223 5224 ins_cost(125); 5225 ins_encode( bswap_long_bytes(dst) ); 5226 ins_pipe( ialu_reg_reg); 5227 %} 5228 5229 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5230 match(Set dst (ReverseBytesUS dst)); 5231 effect(KILL cr); 5232 5233 format %{ "BSWAP $dst\n\t" 5234 "SHR $dst,16\n\t" %} 5235 ins_encode %{ 5236 __ bswapl($dst$$Register); 5237 __ shrl($dst$$Register, 16); 5238 %} 5239 ins_pipe( ialu_reg ); 5240 %} 5241 5242 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5243 match(Set dst (ReverseBytesS dst)); 5244 effect(KILL cr); 5245 5246 format %{ "BSWAP $dst\n\t" 5247 "SAR $dst,16\n\t" %} 5248 ins_encode %{ 5249 __ bswapl($dst$$Register); 5250 __ sarl($dst$$Register, 16); 5251 %} 5252 ins_pipe( ialu_reg ); 5253 %} 5254 5255 5256 //---------- Zeros Count Instructions ------------------------------------------ 5257 5258 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5259 predicate(UseCountLeadingZerosInstruction); 5260 match(Set dst (CountLeadingZerosI src)); 5261 effect(KILL cr); 5262 5263 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5264 ins_encode %{ 5265 __ lzcntl($dst$$Register, $src$$Register); 5266 %} 5267 ins_pipe(ialu_reg); 5268 %} 5269 5270 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5271 predicate(!UseCountLeadingZerosInstruction); 5272 match(Set dst (CountLeadingZerosI src)); 5273 effect(KILL cr); 5274 5275 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5276 "JNZ skip\n\t" 5277 "MOV $dst, -1\n" 5278 "skip:\n\t" 5279 "NEG $dst\n\t" 5280 "ADD $dst, 31" %} 5281 ins_encode %{ 5282 Register Rdst = $dst$$Register; 5283 Register Rsrc = $src$$Register; 5284 Label skip; 5285 __ bsrl(Rdst, Rsrc); 5286 __ jccb(Assembler::notZero, skip); 5287 __ movl(Rdst, -1); 5288 __ bind(skip); 5289 __ negl(Rdst); 5290 __ addl(Rdst, BitsPerInt - 1); 5291 %} 5292 ins_pipe(ialu_reg); 5293 %} 5294 5295 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5296 predicate(UseCountLeadingZerosInstruction); 5297 match(Set dst (CountLeadingZerosL src)); 5298 effect(TEMP dst, KILL cr); 5299 5300 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5301 "JNC done\n\t" 5302 "LZCNT $dst, $src.lo\n\t" 5303 "ADD $dst, 32\n" 5304 "done:" %} 5305 ins_encode %{ 5306 Register Rdst = $dst$$Register; 5307 Register Rsrc = $src$$Register; 5308 Label done; 5309 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5310 __ jccb(Assembler::carryClear, done); 5311 __ lzcntl(Rdst, Rsrc); 5312 __ addl(Rdst, BitsPerInt); 5313 __ bind(done); 5314 %} 5315 ins_pipe(ialu_reg); 5316 %} 5317 5318 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5319 predicate(!UseCountLeadingZerosInstruction); 5320 match(Set dst (CountLeadingZerosL src)); 5321 effect(TEMP dst, KILL cr); 5322 5323 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5324 "JZ msw_is_zero\n\t" 5325 "ADD $dst, 32\n\t" 5326 "JMP not_zero\n" 5327 "msw_is_zero:\n\t" 5328 "BSR $dst, $src.lo\n\t" 5329 "JNZ not_zero\n\t" 5330 "MOV $dst, -1\n" 5331 "not_zero:\n\t" 5332 "NEG $dst\n\t" 5333 "ADD $dst, 63\n" %} 5334 ins_encode %{ 5335 Register Rdst = $dst$$Register; 5336 Register Rsrc = $src$$Register; 5337 Label msw_is_zero; 5338 Label not_zero; 5339 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5340 __ jccb(Assembler::zero, msw_is_zero); 5341 __ addl(Rdst, BitsPerInt); 5342 __ jmpb(not_zero); 5343 __ bind(msw_is_zero); 5344 __ bsrl(Rdst, Rsrc); 5345 __ jccb(Assembler::notZero, not_zero); 5346 __ movl(Rdst, -1); 5347 __ bind(not_zero); 5348 __ negl(Rdst); 5349 __ addl(Rdst, BitsPerLong - 1); 5350 %} 5351 ins_pipe(ialu_reg); 5352 %} 5353 5354 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5355 predicate(UseCountTrailingZerosInstruction); 5356 match(Set dst (CountTrailingZerosI src)); 5357 effect(KILL cr); 5358 5359 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5360 ins_encode %{ 5361 __ tzcntl($dst$$Register, $src$$Register); 5362 %} 5363 ins_pipe(ialu_reg); 5364 %} 5365 5366 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5367 predicate(!UseCountTrailingZerosInstruction); 5368 match(Set dst (CountTrailingZerosI src)); 5369 effect(KILL cr); 5370 5371 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5372 "JNZ done\n\t" 5373 "MOV $dst, 32\n" 5374 "done:" %} 5375 ins_encode %{ 5376 Register Rdst = $dst$$Register; 5377 Label done; 5378 __ bsfl(Rdst, $src$$Register); 5379 __ jccb(Assembler::notZero, done); 5380 __ movl(Rdst, BitsPerInt); 5381 __ bind(done); 5382 %} 5383 ins_pipe(ialu_reg); 5384 %} 5385 5386 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5387 predicate(UseCountTrailingZerosInstruction); 5388 match(Set dst (CountTrailingZerosL src)); 5389 effect(TEMP dst, KILL cr); 5390 5391 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5392 "JNC done\n\t" 5393 "TZCNT $dst, $src.hi\n\t" 5394 "ADD $dst, 32\n" 5395 "done:" %} 5396 ins_encode %{ 5397 Register Rdst = $dst$$Register; 5398 Register Rsrc = $src$$Register; 5399 Label done; 5400 __ tzcntl(Rdst, Rsrc); 5401 __ jccb(Assembler::carryClear, done); 5402 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5403 __ addl(Rdst, BitsPerInt); 5404 __ bind(done); 5405 %} 5406 ins_pipe(ialu_reg); 5407 %} 5408 5409 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5410 predicate(!UseCountTrailingZerosInstruction); 5411 match(Set dst (CountTrailingZerosL src)); 5412 effect(TEMP dst, KILL cr); 5413 5414 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5415 "JNZ done\n\t" 5416 "BSF $dst, $src.hi\n\t" 5417 "JNZ msw_not_zero\n\t" 5418 "MOV $dst, 32\n" 5419 "msw_not_zero:\n\t" 5420 "ADD $dst, 32\n" 5421 "done:" %} 5422 ins_encode %{ 5423 Register Rdst = $dst$$Register; 5424 Register Rsrc = $src$$Register; 5425 Label msw_not_zero; 5426 Label done; 5427 __ bsfl(Rdst, Rsrc); 5428 __ jccb(Assembler::notZero, done); 5429 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5430 __ jccb(Assembler::notZero, msw_not_zero); 5431 __ movl(Rdst, BitsPerInt); 5432 __ bind(msw_not_zero); 5433 __ addl(Rdst, BitsPerInt); 5434 __ bind(done); 5435 %} 5436 ins_pipe(ialu_reg); 5437 %} 5438 5439 5440 //---------- Population Count Instructions ------------------------------------- 5441 5442 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5443 predicate(UsePopCountInstruction); 5444 match(Set dst (PopCountI src)); 5445 effect(KILL cr); 5446 5447 format %{ "POPCNT $dst, $src" %} 5448 ins_encode %{ 5449 __ popcntl($dst$$Register, $src$$Register); 5450 %} 5451 ins_pipe(ialu_reg); 5452 %} 5453 5454 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5455 predicate(UsePopCountInstruction); 5456 match(Set dst (PopCountI (LoadI mem))); 5457 effect(KILL cr); 5458 5459 format %{ "POPCNT $dst, $mem" %} 5460 ins_encode %{ 5461 __ popcntl($dst$$Register, $mem$$Address); 5462 %} 5463 ins_pipe(ialu_reg); 5464 %} 5465 5466 // Note: Long.bitCount(long) returns an int. 5467 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5468 predicate(UsePopCountInstruction); 5469 match(Set dst (PopCountL src)); 5470 effect(KILL cr, TEMP tmp, TEMP dst); 5471 5472 format %{ "POPCNT $dst, $src.lo\n\t" 5473 "POPCNT $tmp, $src.hi\n\t" 5474 "ADD $dst, $tmp" %} 5475 ins_encode %{ 5476 __ popcntl($dst$$Register, $src$$Register); 5477 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5478 __ addl($dst$$Register, $tmp$$Register); 5479 %} 5480 ins_pipe(ialu_reg); 5481 %} 5482 5483 // Note: Long.bitCount(long) returns an int. 5484 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5485 predicate(UsePopCountInstruction); 5486 match(Set dst (PopCountL (LoadL mem))); 5487 effect(KILL cr, TEMP tmp, TEMP dst); 5488 5489 format %{ "POPCNT $dst, $mem\n\t" 5490 "POPCNT $tmp, $mem+4\n\t" 5491 "ADD $dst, $tmp" %} 5492 ins_encode %{ 5493 //__ popcntl($dst$$Register, $mem$$Address$$first); 5494 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5495 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5496 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5497 __ addl($dst$$Register, $tmp$$Register); 5498 %} 5499 ins_pipe(ialu_reg); 5500 %} 5501 5502 5503 //----------Load/Store/Move Instructions--------------------------------------- 5504 //----------Load Instructions-------------------------------------------------- 5505 // Load Byte (8bit signed) 5506 instruct loadB(xRegI dst, memory mem) %{ 5507 match(Set dst (LoadB mem)); 5508 5509 ins_cost(125); 5510 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5511 5512 ins_encode %{ 5513 __ movsbl($dst$$Register, $mem$$Address); 5514 %} 5515 5516 ins_pipe(ialu_reg_mem); 5517 %} 5518 5519 // Load Byte (8bit signed) into Long Register 5520 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5521 match(Set dst (ConvI2L (LoadB mem))); 5522 effect(KILL cr); 5523 5524 ins_cost(375); 5525 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5526 "MOV $dst.hi,$dst.lo\n\t" 5527 "SAR $dst.hi,7" %} 5528 5529 ins_encode %{ 5530 __ movsbl($dst$$Register, $mem$$Address); 5531 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5532 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5533 %} 5534 5535 ins_pipe(ialu_reg_mem); 5536 %} 5537 5538 // Load Unsigned Byte (8bit UNsigned) 5539 instruct loadUB(xRegI dst, memory mem) %{ 5540 match(Set dst (LoadUB mem)); 5541 5542 ins_cost(125); 5543 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5544 5545 ins_encode %{ 5546 __ movzbl($dst$$Register, $mem$$Address); 5547 %} 5548 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5553 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (LoadUB mem))); 5555 effect(KILL cr); 5556 5557 ins_cost(250); 5558 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5559 "XOR $dst.hi,$dst.hi" %} 5560 5561 ins_encode %{ 5562 Register Rdst = $dst$$Register; 5563 __ movzbl(Rdst, $mem$$Address); 5564 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5565 %} 5566 5567 ins_pipe(ialu_reg_mem); 5568 %} 5569 5570 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5571 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5572 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5573 effect(KILL cr); 5574 5575 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5576 "XOR $dst.hi,$dst.hi\n\t" 5577 "AND $dst.lo,right_n_bits($mask, 8)" %} 5578 ins_encode %{ 5579 Register Rdst = $dst$$Register; 5580 __ movzbl(Rdst, $mem$$Address); 5581 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5582 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5583 %} 5584 ins_pipe(ialu_reg_mem); 5585 %} 5586 5587 // Load Short (16bit signed) 5588 instruct loadS(rRegI dst, memory mem) %{ 5589 match(Set dst (LoadS mem)); 5590 5591 ins_cost(125); 5592 format %{ "MOVSX $dst,$mem\t# short" %} 5593 5594 ins_encode %{ 5595 __ movswl($dst$$Register, $mem$$Address); 5596 %} 5597 5598 ins_pipe(ialu_reg_mem); 5599 %} 5600 5601 // Load Short (16 bit signed) to Byte (8 bit signed) 5602 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5603 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5604 5605 ins_cost(125); 5606 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5607 ins_encode %{ 5608 __ movsbl($dst$$Register, $mem$$Address); 5609 %} 5610 ins_pipe(ialu_reg_mem); 5611 %} 5612 5613 // Load Short (16bit signed) into Long Register 5614 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5615 match(Set dst (ConvI2L (LoadS mem))); 5616 effect(KILL cr); 5617 5618 ins_cost(375); 5619 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5620 "MOV $dst.hi,$dst.lo\n\t" 5621 "SAR $dst.hi,15" %} 5622 5623 ins_encode %{ 5624 __ movswl($dst$$Register, $mem$$Address); 5625 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5626 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5627 %} 5628 5629 ins_pipe(ialu_reg_mem); 5630 %} 5631 5632 // Load Unsigned Short/Char (16bit unsigned) 5633 instruct loadUS(rRegI dst, memory mem) %{ 5634 match(Set dst (LoadUS mem)); 5635 5636 ins_cost(125); 5637 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5638 5639 ins_encode %{ 5640 __ movzwl($dst$$Register, $mem$$Address); 5641 %} 5642 5643 ins_pipe(ialu_reg_mem); 5644 %} 5645 5646 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5647 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5648 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5649 5650 ins_cost(125); 5651 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5652 ins_encode %{ 5653 __ movsbl($dst$$Register, $mem$$Address); 5654 %} 5655 ins_pipe(ialu_reg_mem); 5656 %} 5657 5658 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5659 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5660 match(Set dst (ConvI2L (LoadUS mem))); 5661 effect(KILL cr); 5662 5663 ins_cost(250); 5664 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5665 "XOR $dst.hi,$dst.hi" %} 5666 5667 ins_encode %{ 5668 __ movzwl($dst$$Register, $mem$$Address); 5669 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5670 %} 5671 5672 ins_pipe(ialu_reg_mem); 5673 %} 5674 5675 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5676 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5677 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5678 effect(KILL cr); 5679 5680 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5681 "XOR $dst.hi,$dst.hi" %} 5682 ins_encode %{ 5683 Register Rdst = $dst$$Register; 5684 __ movzbl(Rdst, $mem$$Address); 5685 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5686 %} 5687 ins_pipe(ialu_reg_mem); 5688 %} 5689 5690 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5691 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5692 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5693 effect(KILL cr); 5694 5695 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5696 "XOR $dst.hi,$dst.hi\n\t" 5697 "AND $dst.lo,right_n_bits($mask, 16)" %} 5698 ins_encode %{ 5699 Register Rdst = $dst$$Register; 5700 __ movzwl(Rdst, $mem$$Address); 5701 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5702 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5703 %} 5704 ins_pipe(ialu_reg_mem); 5705 %} 5706 5707 // Load Integer 5708 instruct loadI(rRegI dst, memory mem) %{ 5709 match(Set dst (LoadI mem)); 5710 5711 ins_cost(125); 5712 format %{ "MOV $dst,$mem\t# int" %} 5713 5714 ins_encode %{ 5715 __ movl($dst$$Register, $mem$$Address); 5716 %} 5717 5718 ins_pipe(ialu_reg_mem); 5719 %} 5720 5721 // Load Integer (32 bit signed) to Byte (8 bit signed) 5722 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5723 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5724 5725 ins_cost(125); 5726 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5727 ins_encode %{ 5728 __ movsbl($dst$$Register, $mem$$Address); 5729 %} 5730 ins_pipe(ialu_reg_mem); 5731 %} 5732 5733 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5734 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5735 match(Set dst (AndI (LoadI mem) mask)); 5736 5737 ins_cost(125); 5738 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5739 ins_encode %{ 5740 __ movzbl($dst$$Register, $mem$$Address); 5741 %} 5742 ins_pipe(ialu_reg_mem); 5743 %} 5744 5745 // Load Integer (32 bit signed) to Short (16 bit signed) 5746 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5747 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5748 5749 ins_cost(125); 5750 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5751 ins_encode %{ 5752 __ movswl($dst$$Register, $mem$$Address); 5753 %} 5754 ins_pipe(ialu_reg_mem); 5755 %} 5756 5757 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5758 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5759 match(Set dst (AndI (LoadI mem) mask)); 5760 5761 ins_cost(125); 5762 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5763 ins_encode %{ 5764 __ movzwl($dst$$Register, $mem$$Address); 5765 %} 5766 ins_pipe(ialu_reg_mem); 5767 %} 5768 5769 // Load Integer into Long Register 5770 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5771 match(Set dst (ConvI2L (LoadI mem))); 5772 effect(KILL cr); 5773 5774 ins_cost(375); 5775 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5776 "MOV $dst.hi,$dst.lo\n\t" 5777 "SAR $dst.hi,31" %} 5778 5779 ins_encode %{ 5780 __ movl($dst$$Register, $mem$$Address); 5781 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5782 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5783 %} 5784 5785 ins_pipe(ialu_reg_mem); 5786 %} 5787 5788 // Load Integer with mask 0xFF into Long Register 5789 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5790 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5791 effect(KILL cr); 5792 5793 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5794 "XOR $dst.hi,$dst.hi" %} 5795 ins_encode %{ 5796 Register Rdst = $dst$$Register; 5797 __ movzbl(Rdst, $mem$$Address); 5798 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5799 %} 5800 ins_pipe(ialu_reg_mem); 5801 %} 5802 5803 // Load Integer with mask 0xFFFF into Long Register 5804 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5805 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5806 effect(KILL cr); 5807 5808 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5809 "XOR $dst.hi,$dst.hi" %} 5810 ins_encode %{ 5811 Register Rdst = $dst$$Register; 5812 __ movzwl(Rdst, $mem$$Address); 5813 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5814 %} 5815 ins_pipe(ialu_reg_mem); 5816 %} 5817 5818 // Load Integer with 31-bit mask into Long Register 5819 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5820 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5821 effect(KILL cr); 5822 5823 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5824 "XOR $dst.hi,$dst.hi\n\t" 5825 "AND $dst.lo,$mask" %} 5826 ins_encode %{ 5827 Register Rdst = $dst$$Register; 5828 __ movl(Rdst, $mem$$Address); 5829 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5830 __ andl(Rdst, $mask$$constant); 5831 %} 5832 ins_pipe(ialu_reg_mem); 5833 %} 5834 5835 // Load Unsigned Integer into Long Register 5836 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5837 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5838 effect(KILL cr); 5839 5840 ins_cost(250); 5841 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5842 "XOR $dst.hi,$dst.hi" %} 5843 5844 ins_encode %{ 5845 __ movl($dst$$Register, $mem$$Address); 5846 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5847 %} 5848 5849 ins_pipe(ialu_reg_mem); 5850 %} 5851 5852 // Load Long. Cannot clobber address while loading, so restrict address 5853 // register to ESI 5854 instruct loadL(eRegL dst, load_long_memory mem) %{ 5855 predicate(!((LoadLNode*)n)->require_atomic_access()); 5856 match(Set dst (LoadL mem)); 5857 5858 ins_cost(250); 5859 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5860 "MOV $dst.hi,$mem+4" %} 5861 5862 ins_encode %{ 5863 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5864 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5865 __ movl($dst$$Register, Amemlo); 5866 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5867 %} 5868 5869 ins_pipe(ialu_reg_long_mem); 5870 %} 5871 5872 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5873 // then store it down to the stack and reload on the int 5874 // side. 5875 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5876 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5877 match(Set dst (LoadL mem)); 5878 5879 ins_cost(200); 5880 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5881 "FISTp $dst" %} 5882 ins_encode(enc_loadL_volatile(mem,dst)); 5883 ins_pipe( fpu_reg_mem ); 5884 %} 5885 5886 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5887 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5888 match(Set dst (LoadL mem)); 5889 effect(TEMP tmp); 5890 ins_cost(180); 5891 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5892 "MOVSD $dst,$tmp" %} 5893 ins_encode %{ 5894 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5895 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5896 %} 5897 ins_pipe( pipe_slow ); 5898 %} 5899 5900 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5901 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5902 match(Set dst (LoadL mem)); 5903 effect(TEMP tmp); 5904 ins_cost(160); 5905 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5906 "MOVD $dst.lo,$tmp\n\t" 5907 "PSRLQ $tmp,32\n\t" 5908 "MOVD $dst.hi,$tmp" %} 5909 ins_encode %{ 5910 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5911 __ movdl($dst$$Register, $tmp$$XMMRegister); 5912 __ psrlq($tmp$$XMMRegister, 32); 5913 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5914 %} 5915 ins_pipe( pipe_slow ); 5916 %} 5917 5918 // Load Range 5919 instruct loadRange(rRegI dst, memory mem) %{ 5920 match(Set dst (LoadRange mem)); 5921 5922 ins_cost(125); 5923 format %{ "MOV $dst,$mem" %} 5924 opcode(0x8B); 5925 ins_encode( OpcP, RegMem(dst,mem)); 5926 ins_pipe( ialu_reg_mem ); 5927 %} 5928 5929 5930 // Load Pointer 5931 instruct loadP(eRegP dst, memory mem) %{ 5932 match(Set dst (LoadP mem)); 5933 5934 ins_cost(125); 5935 format %{ "MOV $dst,$mem" %} 5936 opcode(0x8B); 5937 ins_encode( OpcP, RegMem(dst,mem)); 5938 ins_pipe( ialu_reg_mem ); 5939 %} 5940 5941 // Load Klass Pointer 5942 instruct loadKlass(eRegP dst, memory mem) %{ 5943 match(Set dst (LoadKlass mem)); 5944 5945 ins_cost(125); 5946 format %{ "MOV $dst,$mem" %} 5947 opcode(0x8B); 5948 ins_encode( OpcP, RegMem(dst,mem)); 5949 ins_pipe( ialu_reg_mem ); 5950 %} 5951 5952 // Load Double 5953 instruct loadDPR(regDPR dst, memory mem) %{ 5954 predicate(UseSSE<=1); 5955 match(Set dst (LoadD mem)); 5956 5957 ins_cost(150); 5958 format %{ "FLD_D ST,$mem\n\t" 5959 "FSTP $dst" %} 5960 opcode(0xDD); /* DD /0 */ 5961 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5962 Pop_Reg_DPR(dst) ); 5963 ins_pipe( fpu_reg_mem ); 5964 %} 5965 5966 // Load Double to XMM 5967 instruct loadD(regD dst, memory mem) %{ 5968 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5969 match(Set dst (LoadD mem)); 5970 ins_cost(145); 5971 format %{ "MOVSD $dst,$mem" %} 5972 ins_encode %{ 5973 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct loadD_partial(regD dst, memory mem) %{ 5979 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5980 match(Set dst (LoadD mem)); 5981 ins_cost(145); 5982 format %{ "MOVLPD $dst,$mem" %} 5983 ins_encode %{ 5984 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 // Load to XMM register (single-precision floating point) 5990 // MOVSS instruction 5991 instruct loadF(regF dst, memory mem) %{ 5992 predicate(UseSSE>=1); 5993 match(Set dst (LoadF mem)); 5994 ins_cost(145); 5995 format %{ "MOVSS $dst,$mem" %} 5996 ins_encode %{ 5997 __ movflt ($dst$$XMMRegister, $mem$$Address); 5998 %} 5999 ins_pipe( pipe_slow ); 6000 %} 6001 6002 // Load Float 6003 instruct MoveF2LEG(legRegF dst, regF src) %{ 6004 match(Set dst src); 6005 format %{ "movss $dst,$src\t! if src != dst load float (4 bytes)" %} 6006 ins_encode %{ 6007 if ($dst$$reg != $src$$reg) { 6008 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6009 } 6010 %} 6011 ins_pipe( fpu_reg_reg ); 6012 %} 6013 6014 // Load Float 6015 instruct MoveLEG2F(regF dst, legRegF src) %{ 6016 match(Set dst src); 6017 format %{ "movss $dst,$src\t! if src != dst load float (4 bytes)" %} 6018 ins_encode %{ 6019 if ($dst$$reg != $src$$reg) { 6020 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6021 } 6022 %} 6023 ins_pipe( fpu_reg_reg ); 6024 %} 6025 6026 6027 6028 // Load Float 6029 instruct loadFPR(regFPR dst, memory mem) %{ 6030 predicate(UseSSE==0); 6031 match(Set dst (LoadF mem)); 6032 6033 ins_cost(150); 6034 format %{ "FLD_S ST,$mem\n\t" 6035 "FSTP $dst" %} 6036 opcode(0xD9); /* D9 /0 */ 6037 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6038 Pop_Reg_FPR(dst) ); 6039 ins_pipe( fpu_reg_mem ); 6040 %} 6041 6042 // Load Effective Address 6043 instruct leaP8(eRegP dst, indOffset8 mem) %{ 6044 match(Set dst mem); 6045 6046 ins_cost(110); 6047 format %{ "LEA $dst,$mem" %} 6048 opcode(0x8D); 6049 ins_encode( OpcP, RegMem(dst,mem)); 6050 ins_pipe( ialu_reg_reg_fat ); 6051 %} 6052 6053 instruct leaP32(eRegP dst, indOffset32 mem) %{ 6054 match(Set dst mem); 6055 6056 ins_cost(110); 6057 format %{ "LEA $dst,$mem" %} 6058 opcode(0x8D); 6059 ins_encode( OpcP, RegMem(dst,mem)); 6060 ins_pipe( ialu_reg_reg_fat ); 6061 %} 6062 6063 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 6064 match(Set dst mem); 6065 6066 ins_cost(110); 6067 format %{ "LEA $dst,$mem" %} 6068 opcode(0x8D); 6069 ins_encode( OpcP, RegMem(dst,mem)); 6070 ins_pipe( ialu_reg_reg_fat ); 6071 %} 6072 6073 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 6074 match(Set dst mem); 6075 6076 ins_cost(110); 6077 format %{ "LEA $dst,$mem" %} 6078 opcode(0x8D); 6079 ins_encode( OpcP, RegMem(dst,mem)); 6080 ins_pipe( ialu_reg_reg_fat ); 6081 %} 6082 6083 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6084 match(Set dst mem); 6085 6086 ins_cost(110); 6087 format %{ "LEA $dst,$mem" %} 6088 opcode(0x8D); 6089 ins_encode( OpcP, RegMem(dst,mem)); 6090 ins_pipe( ialu_reg_reg_fat ); 6091 %} 6092 6093 // Load Constant 6094 instruct loadConI(rRegI dst, immI src) %{ 6095 match(Set dst src); 6096 6097 format %{ "MOV $dst,$src" %} 6098 ins_encode( LdImmI(dst, src) ); 6099 ins_pipe( ialu_reg_fat ); 6100 %} 6101 6102 // Load Constant zero 6103 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6104 match(Set dst src); 6105 effect(KILL cr); 6106 6107 ins_cost(50); 6108 format %{ "XOR $dst,$dst" %} 6109 opcode(0x33); /* + rd */ 6110 ins_encode( OpcP, RegReg( dst, dst ) ); 6111 ins_pipe( ialu_reg ); 6112 %} 6113 6114 instruct loadConP(eRegP dst, immP src) %{ 6115 match(Set dst src); 6116 6117 format %{ "MOV $dst,$src" %} 6118 opcode(0xB8); /* + rd */ 6119 ins_encode( LdImmP(dst, src) ); 6120 ins_pipe( ialu_reg_fat ); 6121 %} 6122 6123 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6124 match(Set dst src); 6125 effect(KILL cr); 6126 ins_cost(200); 6127 format %{ "MOV $dst.lo,$src.lo\n\t" 6128 "MOV $dst.hi,$src.hi" %} 6129 opcode(0xB8); 6130 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6131 ins_pipe( ialu_reg_long_fat ); 6132 %} 6133 6134 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6135 match(Set dst src); 6136 effect(KILL cr); 6137 ins_cost(150); 6138 format %{ "XOR $dst.lo,$dst.lo\n\t" 6139 "XOR $dst.hi,$dst.hi" %} 6140 opcode(0x33,0x33); 6141 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6142 ins_pipe( ialu_reg_long ); 6143 %} 6144 6145 // The instruction usage is guarded by predicate in operand immFPR(). 6146 instruct loadConFPR(regFPR dst, immFPR con) %{ 6147 match(Set dst con); 6148 ins_cost(125); 6149 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6150 "FSTP $dst" %} 6151 ins_encode %{ 6152 __ fld_s($constantaddress($con)); 6153 __ fstp_d($dst$$reg); 6154 %} 6155 ins_pipe(fpu_reg_con); 6156 %} 6157 6158 // The instruction usage is guarded by predicate in operand immFPR0(). 6159 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6160 match(Set dst con); 6161 ins_cost(125); 6162 format %{ "FLDZ ST\n\t" 6163 "FSTP $dst" %} 6164 ins_encode %{ 6165 __ fldz(); 6166 __ fstp_d($dst$$reg); 6167 %} 6168 ins_pipe(fpu_reg_con); 6169 %} 6170 6171 // The instruction usage is guarded by predicate in operand immFPR1(). 6172 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6173 match(Set dst con); 6174 ins_cost(125); 6175 format %{ "FLD1 ST\n\t" 6176 "FSTP $dst" %} 6177 ins_encode %{ 6178 __ fld1(); 6179 __ fstp_d($dst$$reg); 6180 %} 6181 ins_pipe(fpu_reg_con); 6182 %} 6183 6184 // The instruction usage is guarded by predicate in operand immF(). 6185 instruct loadConF(regF dst, immF con) %{ 6186 match(Set dst con); 6187 ins_cost(125); 6188 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6189 ins_encode %{ 6190 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6191 %} 6192 ins_pipe(pipe_slow); 6193 %} 6194 6195 // The instruction usage is guarded by predicate in operand immF0(). 6196 instruct loadConF0(regF dst, immF0 src) %{ 6197 match(Set dst src); 6198 ins_cost(100); 6199 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6200 ins_encode %{ 6201 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6202 %} 6203 ins_pipe(pipe_slow); 6204 %} 6205 6206 // The instruction usage is guarded by predicate in operand immDPR(). 6207 instruct loadConDPR(regDPR dst, immDPR con) %{ 6208 match(Set dst con); 6209 ins_cost(125); 6210 6211 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6212 "FSTP $dst" %} 6213 ins_encode %{ 6214 __ fld_d($constantaddress($con)); 6215 __ fstp_d($dst$$reg); 6216 %} 6217 ins_pipe(fpu_reg_con); 6218 %} 6219 6220 // The instruction usage is guarded by predicate in operand immDPR0(). 6221 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6222 match(Set dst con); 6223 ins_cost(125); 6224 6225 format %{ "FLDZ ST\n\t" 6226 "FSTP $dst" %} 6227 ins_encode %{ 6228 __ fldz(); 6229 __ fstp_d($dst$$reg); 6230 %} 6231 ins_pipe(fpu_reg_con); 6232 %} 6233 6234 // The instruction usage is guarded by predicate in operand immDPR1(). 6235 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6236 match(Set dst con); 6237 ins_cost(125); 6238 6239 format %{ "FLD1 ST\n\t" 6240 "FSTP $dst" %} 6241 ins_encode %{ 6242 __ fld1(); 6243 __ fstp_d($dst$$reg); 6244 %} 6245 ins_pipe(fpu_reg_con); 6246 %} 6247 6248 // The instruction usage is guarded by predicate in operand immD(). 6249 instruct loadConD(regD dst, immD con) %{ 6250 match(Set dst con); 6251 ins_cost(125); 6252 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6253 ins_encode %{ 6254 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6255 %} 6256 ins_pipe(pipe_slow); 6257 %} 6258 6259 // Load Double 6260 instruct MoveD2LEG(legRegD dst, regD src) %{ 6261 match(Set dst src); 6262 format %{ "movsd $dst,$src\t! if src != dst load double (8 bytes)" %} 6263 ins_encode %{ 6264 if ($dst$$reg != $src$$reg) { 6265 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6266 } 6267 %} 6268 ins_pipe( fpu_reg_reg ); 6269 %} 6270 6271 // Load Double 6272 instruct MoveLEG2D(regD dst, legRegD src) %{ 6273 match(Set dst src); 6274 format %{ "movsd $dst,$src\t! if src != dst load double (8 bytes)" %} 6275 ins_encode %{ 6276 if ($dst$$reg != $src$$reg) { 6277 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6278 } 6279 %} 6280 ins_pipe( fpu_reg_reg ); 6281 %} 6282 6283 6284 // The instruction usage is guarded by predicate in operand immD0(). 6285 instruct loadConD0(regD dst, immD0 src) %{ 6286 match(Set dst src); 6287 ins_cost(100); 6288 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6289 ins_encode %{ 6290 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 // Load Stack Slot 6296 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6297 match(Set dst src); 6298 ins_cost(125); 6299 6300 format %{ "MOV $dst,$src" %} 6301 opcode(0x8B); 6302 ins_encode( OpcP, RegMem(dst,src)); 6303 ins_pipe( ialu_reg_mem ); 6304 %} 6305 6306 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6307 match(Set dst src); 6308 6309 ins_cost(200); 6310 format %{ "MOV $dst,$src.lo\n\t" 6311 "MOV $dst+4,$src.hi" %} 6312 opcode(0x8B, 0x8B); 6313 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6314 ins_pipe( ialu_mem_long_reg ); 6315 %} 6316 6317 // Load Stack Slot 6318 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6319 match(Set dst src); 6320 ins_cost(125); 6321 6322 format %{ "MOV $dst,$src" %} 6323 opcode(0x8B); 6324 ins_encode( OpcP, RegMem(dst,src)); 6325 ins_pipe( ialu_reg_mem ); 6326 %} 6327 6328 // Load Stack Slot 6329 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6330 match(Set dst src); 6331 ins_cost(125); 6332 6333 format %{ "FLD_S $src\n\t" 6334 "FSTP $dst" %} 6335 opcode(0xD9); /* D9 /0, FLD m32real */ 6336 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6337 Pop_Reg_FPR(dst) ); 6338 ins_pipe( fpu_reg_mem ); 6339 %} 6340 6341 // Load Stack Slot 6342 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6343 match(Set dst src); 6344 ins_cost(125); 6345 6346 format %{ "FLD_D $src\n\t" 6347 "FSTP $dst" %} 6348 opcode(0xDD); /* DD /0, FLD m64real */ 6349 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6350 Pop_Reg_DPR(dst) ); 6351 ins_pipe( fpu_reg_mem ); 6352 %} 6353 6354 // Prefetch instructions for allocation. 6355 // Must be safe to execute with invalid address (cannot fault). 6356 6357 instruct prefetchAlloc0( memory mem ) %{ 6358 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6359 match(PrefetchAllocation mem); 6360 ins_cost(0); 6361 size(0); 6362 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6363 ins_encode(); 6364 ins_pipe(empty); 6365 %} 6366 6367 instruct prefetchAlloc( memory mem ) %{ 6368 predicate(AllocatePrefetchInstr==3); 6369 match( PrefetchAllocation mem ); 6370 ins_cost(100); 6371 6372 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6373 ins_encode %{ 6374 __ prefetchw($mem$$Address); 6375 %} 6376 ins_pipe(ialu_mem); 6377 %} 6378 6379 instruct prefetchAllocNTA( memory mem ) %{ 6380 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6381 match(PrefetchAllocation mem); 6382 ins_cost(100); 6383 6384 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6385 ins_encode %{ 6386 __ prefetchnta($mem$$Address); 6387 %} 6388 ins_pipe(ialu_mem); 6389 %} 6390 6391 instruct prefetchAllocT0( memory mem ) %{ 6392 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6393 match(PrefetchAllocation mem); 6394 ins_cost(100); 6395 6396 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6397 ins_encode %{ 6398 __ prefetcht0($mem$$Address); 6399 %} 6400 ins_pipe(ialu_mem); 6401 %} 6402 6403 instruct prefetchAllocT2( memory mem ) %{ 6404 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6405 match(PrefetchAllocation mem); 6406 ins_cost(100); 6407 6408 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6409 ins_encode %{ 6410 __ prefetcht2($mem$$Address); 6411 %} 6412 ins_pipe(ialu_mem); 6413 %} 6414 6415 //----------Store Instructions------------------------------------------------- 6416 6417 // Store Byte 6418 instruct storeB(memory mem, xRegI src) %{ 6419 match(Set mem (StoreB mem src)); 6420 6421 ins_cost(125); 6422 format %{ "MOV8 $mem,$src" %} 6423 opcode(0x88); 6424 ins_encode( OpcP, RegMem( src, mem ) ); 6425 ins_pipe( ialu_mem_reg ); 6426 %} 6427 6428 // Store Char/Short 6429 instruct storeC(memory mem, rRegI src) %{ 6430 match(Set mem (StoreC mem src)); 6431 6432 ins_cost(125); 6433 format %{ "MOV16 $mem,$src" %} 6434 opcode(0x89, 0x66); 6435 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6436 ins_pipe( ialu_mem_reg ); 6437 %} 6438 6439 // Store Integer 6440 instruct storeI(memory mem, rRegI src) %{ 6441 match(Set mem (StoreI mem src)); 6442 6443 ins_cost(125); 6444 format %{ "MOV $mem,$src" %} 6445 opcode(0x89); 6446 ins_encode( OpcP, RegMem( src, mem ) ); 6447 ins_pipe( ialu_mem_reg ); 6448 %} 6449 6450 // Store Long 6451 instruct storeL(long_memory mem, eRegL src) %{ 6452 predicate(!((StoreLNode*)n)->require_atomic_access()); 6453 match(Set mem (StoreL mem src)); 6454 6455 ins_cost(200); 6456 format %{ "MOV $mem,$src.lo\n\t" 6457 "MOV $mem+4,$src.hi" %} 6458 opcode(0x89, 0x89); 6459 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6460 ins_pipe( ialu_mem_long_reg ); 6461 %} 6462 6463 // Store Long to Integer 6464 instruct storeL2I(memory mem, eRegL src) %{ 6465 match(Set mem (StoreI mem (ConvL2I src))); 6466 6467 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6468 ins_encode %{ 6469 __ movl($mem$$Address, $src$$Register); 6470 %} 6471 ins_pipe(ialu_mem_reg); 6472 %} 6473 6474 // Volatile Store Long. Must be atomic, so move it into 6475 // the FP TOS and then do a 64-bit FIST. Has to probe the 6476 // target address before the store (for null-ptr checks) 6477 // so the memory operand is used twice in the encoding. 6478 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6479 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6480 match(Set mem (StoreL mem src)); 6481 effect( KILL cr ); 6482 ins_cost(400); 6483 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6484 "FILD $src\n\t" 6485 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6486 opcode(0x3B); 6487 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6488 ins_pipe( fpu_reg_mem ); 6489 %} 6490 6491 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6492 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6493 match(Set mem (StoreL mem src)); 6494 effect( TEMP tmp, KILL cr ); 6495 ins_cost(380); 6496 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6497 "MOVSD $tmp,$src\n\t" 6498 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6499 ins_encode %{ 6500 __ cmpl(rax, $mem$$Address); 6501 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6502 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6508 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6509 match(Set mem (StoreL mem src)); 6510 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6511 ins_cost(360); 6512 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6513 "MOVD $tmp,$src.lo\n\t" 6514 "MOVD $tmp2,$src.hi\n\t" 6515 "PUNPCKLDQ $tmp,$tmp2\n\t" 6516 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6517 ins_encode %{ 6518 __ cmpl(rax, $mem$$Address); 6519 __ movdl($tmp$$XMMRegister, $src$$Register); 6520 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6521 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6522 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6523 %} 6524 ins_pipe( pipe_slow ); 6525 %} 6526 6527 // Store Pointer; for storing unknown oops and raw pointers 6528 instruct storeP(memory mem, anyRegP src) %{ 6529 match(Set mem (StoreP mem src)); 6530 6531 ins_cost(125); 6532 format %{ "MOV $mem,$src" %} 6533 opcode(0x89); 6534 ins_encode( OpcP, RegMem( src, mem ) ); 6535 ins_pipe( ialu_mem_reg ); 6536 %} 6537 6538 // Store Integer Immediate 6539 instruct storeImmI(memory mem, immI src) %{ 6540 match(Set mem (StoreI mem src)); 6541 6542 ins_cost(150); 6543 format %{ "MOV $mem,$src" %} 6544 opcode(0xC7); /* C7 /0 */ 6545 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6546 ins_pipe( ialu_mem_imm ); 6547 %} 6548 6549 // Store Short/Char Immediate 6550 instruct storeImmI16(memory mem, immI16 src) %{ 6551 predicate(UseStoreImmI16); 6552 match(Set mem (StoreC mem src)); 6553 6554 ins_cost(150); 6555 format %{ "MOV16 $mem,$src" %} 6556 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6557 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6558 ins_pipe( ialu_mem_imm ); 6559 %} 6560 6561 // Store Pointer Immediate; null pointers or constant oops that do not 6562 // need card-mark barriers. 6563 instruct storeImmP(memory mem, immP src) %{ 6564 match(Set mem (StoreP mem src)); 6565 6566 ins_cost(150); 6567 format %{ "MOV $mem,$src" %} 6568 opcode(0xC7); /* C7 /0 */ 6569 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6570 ins_pipe( ialu_mem_imm ); 6571 %} 6572 6573 // Store Byte Immediate 6574 instruct storeImmB(memory mem, immI8 src) %{ 6575 match(Set mem (StoreB mem src)); 6576 6577 ins_cost(150); 6578 format %{ "MOV8 $mem,$src" %} 6579 opcode(0xC6); /* C6 /0 */ 6580 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6581 ins_pipe( ialu_mem_imm ); 6582 %} 6583 6584 // Store CMS card-mark Immediate 6585 instruct storeImmCM(memory mem, immI8 src) %{ 6586 match(Set mem (StoreCM mem src)); 6587 6588 ins_cost(150); 6589 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6590 opcode(0xC6); /* C6 /0 */ 6591 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6592 ins_pipe( ialu_mem_imm ); 6593 %} 6594 6595 // Store Double 6596 instruct storeDPR( memory mem, regDPR1 src) %{ 6597 predicate(UseSSE<=1); 6598 match(Set mem (StoreD mem src)); 6599 6600 ins_cost(100); 6601 format %{ "FST_D $mem,$src" %} 6602 opcode(0xDD); /* DD /2 */ 6603 ins_encode( enc_FPR_store(mem,src) ); 6604 ins_pipe( fpu_mem_reg ); 6605 %} 6606 6607 // Store double does rounding on x86 6608 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6609 predicate(UseSSE<=1); 6610 match(Set mem (StoreD mem (RoundDouble src))); 6611 6612 ins_cost(100); 6613 format %{ "FST_D $mem,$src\t# round" %} 6614 opcode(0xDD); /* DD /2 */ 6615 ins_encode( enc_FPR_store(mem,src) ); 6616 ins_pipe( fpu_mem_reg ); 6617 %} 6618 6619 // Store XMM register to memory (double-precision floating points) 6620 // MOVSD instruction 6621 instruct storeD(memory mem, regD src) %{ 6622 predicate(UseSSE>=2); 6623 match(Set mem (StoreD mem src)); 6624 ins_cost(95); 6625 format %{ "MOVSD $mem,$src" %} 6626 ins_encode %{ 6627 __ movdbl($mem$$Address, $src$$XMMRegister); 6628 %} 6629 ins_pipe( pipe_slow ); 6630 %} 6631 6632 // Load Double 6633 instruct MoveD2VL(vlRegD dst, regD src) %{ 6634 match(Set dst src); 6635 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6636 ins_encode %{ 6637 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6638 %} 6639 ins_pipe( fpu_reg_reg ); 6640 %} 6641 6642 // Load Double 6643 instruct MoveVL2D(regD dst, vlRegD src) %{ 6644 match(Set dst src); 6645 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6646 ins_encode %{ 6647 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6648 %} 6649 ins_pipe( fpu_reg_reg ); 6650 %} 6651 6652 // Store XMM register to memory (single-precision floating point) 6653 // MOVSS instruction 6654 instruct storeF(memory mem, regF src) %{ 6655 predicate(UseSSE>=1); 6656 match(Set mem (StoreF mem src)); 6657 ins_cost(95); 6658 format %{ "MOVSS $mem,$src" %} 6659 ins_encode %{ 6660 __ movflt($mem$$Address, $src$$XMMRegister); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 // Load Float 6666 instruct MoveF2VL(vlRegF dst, regF src) %{ 6667 match(Set dst src); 6668 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6669 ins_encode %{ 6670 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6671 %} 6672 ins_pipe( fpu_reg_reg ); 6673 %} 6674 6675 // Load Float 6676 instruct MoveVL2F(regF dst, vlRegF src) %{ 6677 match(Set dst src); 6678 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6679 ins_encode %{ 6680 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6681 %} 6682 ins_pipe( fpu_reg_reg ); 6683 %} 6684 6685 // Store Float 6686 instruct storeFPR( memory mem, regFPR1 src) %{ 6687 predicate(UseSSE==0); 6688 match(Set mem (StoreF mem src)); 6689 6690 ins_cost(100); 6691 format %{ "FST_S $mem,$src" %} 6692 opcode(0xD9); /* D9 /2 */ 6693 ins_encode( enc_FPR_store(mem,src) ); 6694 ins_pipe( fpu_mem_reg ); 6695 %} 6696 6697 // Store Float does rounding on x86 6698 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6699 predicate(UseSSE==0); 6700 match(Set mem (StoreF mem (RoundFloat src))); 6701 6702 ins_cost(100); 6703 format %{ "FST_S $mem,$src\t# round" %} 6704 opcode(0xD9); /* D9 /2 */ 6705 ins_encode( enc_FPR_store(mem,src) ); 6706 ins_pipe( fpu_mem_reg ); 6707 %} 6708 6709 // Store Float does rounding on x86 6710 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6711 predicate(UseSSE<=1); 6712 match(Set mem (StoreF mem (ConvD2F src))); 6713 6714 ins_cost(100); 6715 format %{ "FST_S $mem,$src\t# D-round" %} 6716 opcode(0xD9); /* D9 /2 */ 6717 ins_encode( enc_FPR_store(mem,src) ); 6718 ins_pipe( fpu_mem_reg ); 6719 %} 6720 6721 // Store immediate Float value (it is faster than store from FPU register) 6722 // The instruction usage is guarded by predicate in operand immFPR(). 6723 instruct storeFPR_imm( memory mem, immFPR src) %{ 6724 match(Set mem (StoreF mem src)); 6725 6726 ins_cost(50); 6727 format %{ "MOV $mem,$src\t# store float" %} 6728 opcode(0xC7); /* C7 /0 */ 6729 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6730 ins_pipe( ialu_mem_imm ); 6731 %} 6732 6733 // Store immediate Float value (it is faster than store from XMM register) 6734 // The instruction usage is guarded by predicate in operand immF(). 6735 instruct storeF_imm( memory mem, immF src) %{ 6736 match(Set mem (StoreF mem src)); 6737 6738 ins_cost(50); 6739 format %{ "MOV $mem,$src\t# store float" %} 6740 opcode(0xC7); /* C7 /0 */ 6741 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6742 ins_pipe( ialu_mem_imm ); 6743 %} 6744 6745 // Store Integer to stack slot 6746 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6747 match(Set dst src); 6748 6749 ins_cost(100); 6750 format %{ "MOV $dst,$src" %} 6751 opcode(0x89); 6752 ins_encode( OpcPRegSS( dst, src ) ); 6753 ins_pipe( ialu_mem_reg ); 6754 %} 6755 6756 // Store Integer to stack slot 6757 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6758 match(Set dst src); 6759 6760 ins_cost(100); 6761 format %{ "MOV $dst,$src" %} 6762 opcode(0x89); 6763 ins_encode( OpcPRegSS( dst, src ) ); 6764 ins_pipe( ialu_mem_reg ); 6765 %} 6766 6767 // Store Long to stack slot 6768 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6769 match(Set dst src); 6770 6771 ins_cost(200); 6772 format %{ "MOV $dst,$src.lo\n\t" 6773 "MOV $dst+4,$src.hi" %} 6774 opcode(0x89, 0x89); 6775 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6776 ins_pipe( ialu_mem_long_reg ); 6777 %} 6778 6779 //----------MemBar Instructions----------------------------------------------- 6780 // Memory barrier flavors 6781 6782 instruct membar_acquire() %{ 6783 match(MemBarAcquire); 6784 match(LoadFence); 6785 ins_cost(400); 6786 6787 size(0); 6788 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6789 ins_encode(); 6790 ins_pipe(empty); 6791 %} 6792 6793 instruct membar_acquire_lock() %{ 6794 match(MemBarAcquireLock); 6795 ins_cost(0); 6796 6797 size(0); 6798 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6799 ins_encode( ); 6800 ins_pipe(empty); 6801 %} 6802 6803 instruct membar_release() %{ 6804 match(MemBarRelease); 6805 match(StoreFence); 6806 ins_cost(400); 6807 6808 size(0); 6809 format %{ "MEMBAR-release ! (empty encoding)" %} 6810 ins_encode( ); 6811 ins_pipe(empty); 6812 %} 6813 6814 instruct membar_release_lock() %{ 6815 match(MemBarReleaseLock); 6816 ins_cost(0); 6817 6818 size(0); 6819 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6820 ins_encode( ); 6821 ins_pipe(empty); 6822 %} 6823 6824 instruct membar_volatile(eFlagsReg cr) %{ 6825 match(MemBarVolatile); 6826 effect(KILL cr); 6827 ins_cost(400); 6828 6829 format %{ 6830 $$template 6831 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6832 %} 6833 ins_encode %{ 6834 __ membar(Assembler::StoreLoad); 6835 %} 6836 ins_pipe(pipe_slow); 6837 %} 6838 6839 instruct unnecessary_membar_volatile() %{ 6840 match(MemBarVolatile); 6841 predicate(Matcher::post_store_load_barrier(n)); 6842 ins_cost(0); 6843 6844 size(0); 6845 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6846 ins_encode( ); 6847 ins_pipe(empty); 6848 %} 6849 6850 instruct membar_storestore() %{ 6851 match(MemBarStoreStore); 6852 ins_cost(0); 6853 6854 size(0); 6855 format %{ "MEMBAR-storestore (empty encoding)" %} 6856 ins_encode( ); 6857 ins_pipe(empty); 6858 %} 6859 6860 //----------Move Instructions-------------------------------------------------- 6861 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6862 match(Set dst (CastX2P src)); 6863 format %{ "# X2P $dst, $src" %} 6864 ins_encode( /*empty encoding*/ ); 6865 ins_cost(0); 6866 ins_pipe(empty); 6867 %} 6868 6869 instruct castP2X(rRegI dst, eRegP src ) %{ 6870 match(Set dst (CastP2X src)); 6871 ins_cost(50); 6872 format %{ "MOV $dst, $src\t# CastP2X" %} 6873 ins_encode( enc_Copy( dst, src) ); 6874 ins_pipe( ialu_reg_reg ); 6875 %} 6876 6877 //----------Conditional Move--------------------------------------------------- 6878 // Conditional move 6879 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6880 predicate(!VM_Version::supports_cmov() ); 6881 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6882 ins_cost(200); 6883 format %{ "J$cop,us skip\t# signed cmove\n\t" 6884 "MOV $dst,$src\n" 6885 "skip:" %} 6886 ins_encode %{ 6887 Label Lskip; 6888 // Invert sense of branch from sense of CMOV 6889 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6890 __ movl($dst$$Register, $src$$Register); 6891 __ bind(Lskip); 6892 %} 6893 ins_pipe( pipe_cmov_reg ); 6894 %} 6895 6896 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6897 predicate(!VM_Version::supports_cmov() ); 6898 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6899 ins_cost(200); 6900 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6901 "MOV $dst,$src\n" 6902 "skip:" %} 6903 ins_encode %{ 6904 Label Lskip; 6905 // Invert sense of branch from sense of CMOV 6906 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6907 __ movl($dst$$Register, $src$$Register); 6908 __ bind(Lskip); 6909 %} 6910 ins_pipe( pipe_cmov_reg ); 6911 %} 6912 6913 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6914 predicate(VM_Version::supports_cmov() ); 6915 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6916 ins_cost(200); 6917 format %{ "CMOV$cop $dst,$src" %} 6918 opcode(0x0F,0x40); 6919 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6920 ins_pipe( pipe_cmov_reg ); 6921 %} 6922 6923 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6924 predicate(VM_Version::supports_cmov() ); 6925 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6926 ins_cost(200); 6927 format %{ "CMOV$cop $dst,$src" %} 6928 opcode(0x0F,0x40); 6929 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6930 ins_pipe( pipe_cmov_reg ); 6931 %} 6932 6933 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6934 predicate(VM_Version::supports_cmov() ); 6935 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6936 ins_cost(200); 6937 expand %{ 6938 cmovI_regU(cop, cr, dst, src); 6939 %} 6940 %} 6941 6942 // Conditional move 6943 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6944 predicate(VM_Version::supports_cmov() ); 6945 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6946 ins_cost(250); 6947 format %{ "CMOV$cop $dst,$src" %} 6948 opcode(0x0F,0x40); 6949 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6950 ins_pipe( pipe_cmov_mem ); 6951 %} 6952 6953 // Conditional move 6954 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6955 predicate(VM_Version::supports_cmov() ); 6956 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6957 ins_cost(250); 6958 format %{ "CMOV$cop $dst,$src" %} 6959 opcode(0x0F,0x40); 6960 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6961 ins_pipe( pipe_cmov_mem ); 6962 %} 6963 6964 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6965 predicate(VM_Version::supports_cmov() ); 6966 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6967 ins_cost(250); 6968 expand %{ 6969 cmovI_memU(cop, cr, dst, src); 6970 %} 6971 %} 6972 6973 // Conditional move 6974 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6975 predicate(VM_Version::supports_cmov() ); 6976 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6977 ins_cost(200); 6978 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6979 opcode(0x0F,0x40); 6980 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6981 ins_pipe( pipe_cmov_reg ); 6982 %} 6983 6984 // Conditional move (non-P6 version) 6985 // Note: a CMoveP is generated for stubs and native wrappers 6986 // regardless of whether we are on a P6, so we 6987 // emulate a cmov here 6988 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6989 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6990 ins_cost(300); 6991 format %{ "Jn$cop skip\n\t" 6992 "MOV $dst,$src\t# pointer\n" 6993 "skip:" %} 6994 opcode(0x8b); 6995 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6996 ins_pipe( pipe_cmov_reg ); 6997 %} 6998 6999 // Conditional move 7000 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 7001 predicate(VM_Version::supports_cmov() ); 7002 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 format %{ "CMOV$cop $dst,$src\t# ptr" %} 7005 opcode(0x0F,0x40); 7006 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 7007 ins_pipe( pipe_cmov_reg ); 7008 %} 7009 7010 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 7011 predicate(VM_Version::supports_cmov() ); 7012 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 7013 ins_cost(200); 7014 expand %{ 7015 cmovP_regU(cop, cr, dst, src); 7016 %} 7017 %} 7018 7019 // DISABLED: Requires the ADLC to emit a bottom_type call that 7020 // correctly meets the two pointer arguments; one is an incoming 7021 // register but the other is a memory operand. ALSO appears to 7022 // be buggy with implicit null checks. 7023 // 7024 //// Conditional move 7025 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 7026 // predicate(VM_Version::supports_cmov() ); 7027 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 7028 // ins_cost(250); 7029 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 7030 // opcode(0x0F,0x40); 7031 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7032 // ins_pipe( pipe_cmov_mem ); 7033 //%} 7034 // 7035 //// Conditional move 7036 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 7037 // predicate(VM_Version::supports_cmov() ); 7038 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 7039 // ins_cost(250); 7040 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 7041 // opcode(0x0F,0x40); 7042 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7043 // ins_pipe( pipe_cmov_mem ); 7044 //%} 7045 7046 // Conditional move 7047 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 7048 predicate(UseSSE<=1); 7049 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7050 ins_cost(200); 7051 format %{ "FCMOV$cop $dst,$src\t# double" %} 7052 opcode(0xDA); 7053 ins_encode( enc_cmov_dpr(cop,src) ); 7054 ins_pipe( pipe_cmovDPR_reg ); 7055 %} 7056 7057 // Conditional move 7058 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 7059 predicate(UseSSE==0); 7060 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7061 ins_cost(200); 7062 format %{ "FCMOV$cop $dst,$src\t# float" %} 7063 opcode(0xDA); 7064 ins_encode( enc_cmov_dpr(cop,src) ); 7065 ins_pipe( pipe_cmovDPR_reg ); 7066 %} 7067 7068 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7069 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 7070 predicate(UseSSE<=1); 7071 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7072 ins_cost(200); 7073 format %{ "Jn$cop skip\n\t" 7074 "MOV $dst,$src\t# double\n" 7075 "skip:" %} 7076 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7077 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 7078 ins_pipe( pipe_cmovDPR_reg ); 7079 %} 7080 7081 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7082 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 7083 predicate(UseSSE==0); 7084 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7085 ins_cost(200); 7086 format %{ "Jn$cop skip\n\t" 7087 "MOV $dst,$src\t# float\n" 7088 "skip:" %} 7089 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7090 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 7091 ins_pipe( pipe_cmovDPR_reg ); 7092 %} 7093 7094 // No CMOVE with SSE/SSE2 7095 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7096 predicate (UseSSE>=1); 7097 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7098 ins_cost(200); 7099 format %{ "Jn$cop skip\n\t" 7100 "MOVSS $dst,$src\t# float\n" 7101 "skip:" %} 7102 ins_encode %{ 7103 Label skip; 7104 // Invert sense of branch from sense of CMOV 7105 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7106 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7107 __ bind(skip); 7108 %} 7109 ins_pipe( pipe_slow ); 7110 %} 7111 7112 // No CMOVE with SSE/SSE2 7113 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7114 predicate (UseSSE>=2); 7115 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7116 ins_cost(200); 7117 format %{ "Jn$cop skip\n\t" 7118 "MOVSD $dst,$src\t# float\n" 7119 "skip:" %} 7120 ins_encode %{ 7121 Label skip; 7122 // Invert sense of branch from sense of CMOV 7123 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7124 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7125 __ bind(skip); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 // unsigned version 7131 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 7132 predicate (UseSSE>=1); 7133 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7134 ins_cost(200); 7135 format %{ "Jn$cop skip\n\t" 7136 "MOVSS $dst,$src\t# float\n" 7137 "skip:" %} 7138 ins_encode %{ 7139 Label skip; 7140 // Invert sense of branch from sense of CMOV 7141 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7142 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7143 __ bind(skip); 7144 %} 7145 ins_pipe( pipe_slow ); 7146 %} 7147 7148 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7149 predicate (UseSSE>=1); 7150 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7151 ins_cost(200); 7152 expand %{ 7153 fcmovF_regU(cop, cr, dst, src); 7154 %} 7155 %} 7156 7157 // unsigned version 7158 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7159 predicate (UseSSE>=2); 7160 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7161 ins_cost(200); 7162 format %{ "Jn$cop skip\n\t" 7163 "MOVSD $dst,$src\t# float\n" 7164 "skip:" %} 7165 ins_encode %{ 7166 Label skip; 7167 // Invert sense of branch from sense of CMOV 7168 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7169 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7170 __ bind(skip); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7176 predicate (UseSSE>=2); 7177 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7178 ins_cost(200); 7179 expand %{ 7180 fcmovD_regU(cop, cr, dst, src); 7181 %} 7182 %} 7183 7184 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7185 predicate(VM_Version::supports_cmov() ); 7186 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7187 ins_cost(200); 7188 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7189 "CMOV$cop $dst.hi,$src.hi" %} 7190 opcode(0x0F,0x40); 7191 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7192 ins_pipe( pipe_cmov_reg_long ); 7193 %} 7194 7195 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7196 predicate(VM_Version::supports_cmov() ); 7197 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7198 ins_cost(200); 7199 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7200 "CMOV$cop $dst.hi,$src.hi" %} 7201 opcode(0x0F,0x40); 7202 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7203 ins_pipe( pipe_cmov_reg_long ); 7204 %} 7205 7206 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7207 predicate(VM_Version::supports_cmov() ); 7208 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7209 ins_cost(200); 7210 expand %{ 7211 cmovL_regU(cop, cr, dst, src); 7212 %} 7213 %} 7214 7215 //----------Arithmetic Instructions-------------------------------------------- 7216 //----------Addition Instructions---------------------------------------------- 7217 7218 // Integer Addition Instructions 7219 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7220 match(Set dst (AddI dst src)); 7221 effect(KILL cr); 7222 7223 size(2); 7224 format %{ "ADD $dst,$src" %} 7225 opcode(0x03); 7226 ins_encode( OpcP, RegReg( dst, src) ); 7227 ins_pipe( ialu_reg_reg ); 7228 %} 7229 7230 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7231 match(Set dst (AddI dst src)); 7232 effect(KILL cr); 7233 7234 format %{ "ADD $dst,$src" %} 7235 opcode(0x81, 0x00); /* /0 id */ 7236 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7237 ins_pipe( ialu_reg ); 7238 %} 7239 7240 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7241 predicate(UseIncDec); 7242 match(Set dst (AddI dst src)); 7243 effect(KILL cr); 7244 7245 size(1); 7246 format %{ "INC $dst" %} 7247 opcode(0x40); /* */ 7248 ins_encode( Opc_plus( primary, dst ) ); 7249 ins_pipe( ialu_reg ); 7250 %} 7251 7252 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7253 match(Set dst (AddI src0 src1)); 7254 ins_cost(110); 7255 7256 format %{ "LEA $dst,[$src0 + $src1]" %} 7257 opcode(0x8D); /* 0x8D /r */ 7258 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7259 ins_pipe( ialu_reg_reg ); 7260 %} 7261 7262 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7263 match(Set dst (AddP src0 src1)); 7264 ins_cost(110); 7265 7266 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7267 opcode(0x8D); /* 0x8D /r */ 7268 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7269 ins_pipe( ialu_reg_reg ); 7270 %} 7271 7272 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7273 predicate(UseIncDec); 7274 match(Set dst (AddI dst src)); 7275 effect(KILL cr); 7276 7277 size(1); 7278 format %{ "DEC $dst" %} 7279 opcode(0x48); /* */ 7280 ins_encode( Opc_plus( primary, dst ) ); 7281 ins_pipe( ialu_reg ); 7282 %} 7283 7284 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7285 match(Set dst (AddP dst src)); 7286 effect(KILL cr); 7287 7288 size(2); 7289 format %{ "ADD $dst,$src" %} 7290 opcode(0x03); 7291 ins_encode( OpcP, RegReg( dst, src) ); 7292 ins_pipe( ialu_reg_reg ); 7293 %} 7294 7295 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7296 match(Set dst (AddP dst src)); 7297 effect(KILL cr); 7298 7299 format %{ "ADD $dst,$src" %} 7300 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7301 // ins_encode( RegImm( dst, src) ); 7302 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7303 ins_pipe( ialu_reg ); 7304 %} 7305 7306 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7307 match(Set dst (AddI dst (LoadI src))); 7308 effect(KILL cr); 7309 7310 ins_cost(125); 7311 format %{ "ADD $dst,$src" %} 7312 opcode(0x03); 7313 ins_encode( OpcP, RegMem( dst, src) ); 7314 ins_pipe( ialu_reg_mem ); 7315 %} 7316 7317 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7318 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7319 effect(KILL cr); 7320 7321 ins_cost(150); 7322 format %{ "ADD $dst,$src" %} 7323 opcode(0x01); /* Opcode 01 /r */ 7324 ins_encode( OpcP, RegMem( src, dst ) ); 7325 ins_pipe( ialu_mem_reg ); 7326 %} 7327 7328 // Add Memory with Immediate 7329 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7330 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7331 effect(KILL cr); 7332 7333 ins_cost(125); 7334 format %{ "ADD $dst,$src" %} 7335 opcode(0x81); /* Opcode 81 /0 id */ 7336 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7337 ins_pipe( ialu_mem_imm ); 7338 %} 7339 7340 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7341 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7342 effect(KILL cr); 7343 7344 ins_cost(125); 7345 format %{ "INC $dst" %} 7346 opcode(0xFF); /* Opcode FF /0 */ 7347 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7348 ins_pipe( ialu_mem_imm ); 7349 %} 7350 7351 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7352 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7353 effect(KILL cr); 7354 7355 ins_cost(125); 7356 format %{ "DEC $dst" %} 7357 opcode(0xFF); /* Opcode FF /1 */ 7358 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7359 ins_pipe( ialu_mem_imm ); 7360 %} 7361 7362 7363 instruct checkCastPP( eRegP dst ) %{ 7364 match(Set dst (CheckCastPP dst)); 7365 7366 size(0); 7367 format %{ "#checkcastPP of $dst" %} 7368 ins_encode( /*empty encoding*/ ); 7369 ins_pipe( empty ); 7370 %} 7371 7372 instruct castPP( eRegP dst ) %{ 7373 match(Set dst (CastPP dst)); 7374 format %{ "#castPP of $dst" %} 7375 ins_encode( /*empty encoding*/ ); 7376 ins_pipe( empty ); 7377 %} 7378 7379 instruct castII( rRegI dst ) %{ 7380 match(Set dst (CastII dst)); 7381 format %{ "#castII of $dst" %} 7382 ins_encode( /*empty encoding*/ ); 7383 ins_cost(0); 7384 ins_pipe( empty ); 7385 %} 7386 7387 7388 // Load-locked - same as a regular pointer load when used with compare-swap 7389 instruct loadPLocked(eRegP dst, memory mem) %{ 7390 match(Set dst (LoadPLocked mem)); 7391 7392 ins_cost(125); 7393 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7394 opcode(0x8B); 7395 ins_encode( OpcP, RegMem(dst,mem)); 7396 ins_pipe( ialu_reg_mem ); 7397 %} 7398 7399 // Conditional-store of the updated heap-top. 7400 // Used during allocation of the shared heap. 7401 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7402 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7403 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7404 // EAX is killed if there is contention, but then it's also unused. 7405 // In the common case of no contention, EAX holds the new oop address. 7406 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7407 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7408 ins_pipe( pipe_cmpxchg ); 7409 %} 7410 7411 // Conditional-store of an int value. 7412 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7413 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7414 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7415 effect(KILL oldval); 7416 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7417 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7418 ins_pipe( pipe_cmpxchg ); 7419 %} 7420 7421 // Conditional-store of a long value. 7422 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7423 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7424 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7425 effect(KILL oldval); 7426 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7427 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7428 "XCHG EBX,ECX" 7429 %} 7430 ins_encode %{ 7431 // Note: we need to swap rbx, and rcx before and after the 7432 // cmpxchg8 instruction because the instruction uses 7433 // rcx as the high order word of the new value to store but 7434 // our register encoding uses rbx. 7435 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7436 __ lock(); 7437 __ cmpxchg8($mem$$Address); 7438 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7439 %} 7440 ins_pipe( pipe_cmpxchg ); 7441 %} 7442 7443 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7444 7445 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7446 predicate(VM_Version::supports_cx8()); 7447 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7448 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7449 effect(KILL cr, KILL oldval); 7450 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7451 "MOV $res,0\n\t" 7452 "JNE,s fail\n\t" 7453 "MOV $res,1\n" 7454 "fail:" %} 7455 ins_encode( enc_cmpxchg8(mem_ptr), 7456 enc_flags_ne_to_boolean(res) ); 7457 ins_pipe( pipe_cmpxchg ); 7458 %} 7459 7460 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7461 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7462 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7463 effect(KILL cr, KILL oldval); 7464 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7465 "MOV $res,0\n\t" 7466 "JNE,s fail\n\t" 7467 "MOV $res,1\n" 7468 "fail:" %} 7469 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7470 ins_pipe( pipe_cmpxchg ); 7471 %} 7472 7473 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7474 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7475 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7476 effect(KILL cr, KILL oldval); 7477 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7478 "MOV $res,0\n\t" 7479 "JNE,s fail\n\t" 7480 "MOV $res,1\n" 7481 "fail:" %} 7482 ins_encode( enc_cmpxchgb(mem_ptr), 7483 enc_flags_ne_to_boolean(res) ); 7484 ins_pipe( pipe_cmpxchg ); 7485 %} 7486 7487 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7488 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7489 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7490 effect(KILL cr, KILL oldval); 7491 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7492 "MOV $res,0\n\t" 7493 "JNE,s fail\n\t" 7494 "MOV $res,1\n" 7495 "fail:" %} 7496 ins_encode( enc_cmpxchgw(mem_ptr), 7497 enc_flags_ne_to_boolean(res) ); 7498 ins_pipe( pipe_cmpxchg ); 7499 %} 7500 7501 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7502 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7503 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7504 effect(KILL cr, KILL oldval); 7505 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7506 "MOV $res,0\n\t" 7507 "JNE,s fail\n\t" 7508 "MOV $res,1\n" 7509 "fail:" %} 7510 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7511 ins_pipe( pipe_cmpxchg ); 7512 %} 7513 7514 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7515 predicate(VM_Version::supports_cx8()); 7516 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7517 effect(KILL cr); 7518 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7519 ins_encode( enc_cmpxchg8(mem_ptr) ); 7520 ins_pipe( pipe_cmpxchg ); 7521 %} 7522 7523 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7524 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7525 effect(KILL cr); 7526 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7527 ins_encode( enc_cmpxchg(mem_ptr) ); 7528 ins_pipe( pipe_cmpxchg ); 7529 %} 7530 7531 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7532 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7533 effect(KILL cr); 7534 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7535 ins_encode( enc_cmpxchgb(mem_ptr) ); 7536 ins_pipe( pipe_cmpxchg ); 7537 %} 7538 7539 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7540 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7541 effect(KILL cr); 7542 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7543 ins_encode( enc_cmpxchgw(mem_ptr) ); 7544 ins_pipe( pipe_cmpxchg ); 7545 %} 7546 7547 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7548 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7549 effect(KILL cr); 7550 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7551 ins_encode( enc_cmpxchg(mem_ptr) ); 7552 ins_pipe( pipe_cmpxchg ); 7553 %} 7554 7555 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7556 predicate(n->as_LoadStore()->result_not_used()); 7557 match(Set dummy (GetAndAddB mem add)); 7558 effect(KILL cr); 7559 format %{ "ADDB [$mem],$add" %} 7560 ins_encode %{ 7561 __ lock(); 7562 __ addb($mem$$Address, $add$$constant); 7563 %} 7564 ins_pipe( pipe_cmpxchg ); 7565 %} 7566 7567 // Important to match to xRegI: only 8-bit regs. 7568 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7569 match(Set newval (GetAndAddB mem newval)); 7570 effect(KILL cr); 7571 format %{ "XADDB [$mem],$newval" %} 7572 ins_encode %{ 7573 __ lock(); 7574 __ xaddb($mem$$Address, $newval$$Register); 7575 %} 7576 ins_pipe( pipe_cmpxchg ); 7577 %} 7578 7579 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7580 predicate(n->as_LoadStore()->result_not_used()); 7581 match(Set dummy (GetAndAddS mem add)); 7582 effect(KILL cr); 7583 format %{ "ADDS [$mem],$add" %} 7584 ins_encode %{ 7585 __ lock(); 7586 __ addw($mem$$Address, $add$$constant); 7587 %} 7588 ins_pipe( pipe_cmpxchg ); 7589 %} 7590 7591 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7592 match(Set newval (GetAndAddS mem newval)); 7593 effect(KILL cr); 7594 format %{ "XADDS [$mem],$newval" %} 7595 ins_encode %{ 7596 __ lock(); 7597 __ xaddw($mem$$Address, $newval$$Register); 7598 %} 7599 ins_pipe( pipe_cmpxchg ); 7600 %} 7601 7602 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7603 predicate(n->as_LoadStore()->result_not_used()); 7604 match(Set dummy (GetAndAddI mem add)); 7605 effect(KILL cr); 7606 format %{ "ADDL [$mem],$add" %} 7607 ins_encode %{ 7608 __ lock(); 7609 __ addl($mem$$Address, $add$$constant); 7610 %} 7611 ins_pipe( pipe_cmpxchg ); 7612 %} 7613 7614 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7615 match(Set newval (GetAndAddI mem newval)); 7616 effect(KILL cr); 7617 format %{ "XADDL [$mem],$newval" %} 7618 ins_encode %{ 7619 __ lock(); 7620 __ xaddl($mem$$Address, $newval$$Register); 7621 %} 7622 ins_pipe( pipe_cmpxchg ); 7623 %} 7624 7625 // Important to match to xRegI: only 8-bit regs. 7626 instruct xchgB( memory mem, xRegI newval) %{ 7627 match(Set newval (GetAndSetB mem newval)); 7628 format %{ "XCHGB $newval,[$mem]" %} 7629 ins_encode %{ 7630 __ xchgb($newval$$Register, $mem$$Address); 7631 %} 7632 ins_pipe( pipe_cmpxchg ); 7633 %} 7634 7635 instruct xchgS( memory mem, rRegI newval) %{ 7636 match(Set newval (GetAndSetS mem newval)); 7637 format %{ "XCHGW $newval,[$mem]" %} 7638 ins_encode %{ 7639 __ xchgw($newval$$Register, $mem$$Address); 7640 %} 7641 ins_pipe( pipe_cmpxchg ); 7642 %} 7643 7644 instruct xchgI( memory mem, rRegI newval) %{ 7645 match(Set newval (GetAndSetI mem newval)); 7646 format %{ "XCHGL $newval,[$mem]" %} 7647 ins_encode %{ 7648 __ xchgl($newval$$Register, $mem$$Address); 7649 %} 7650 ins_pipe( pipe_cmpxchg ); 7651 %} 7652 7653 instruct xchgP( memory mem, pRegP newval) %{ 7654 match(Set newval (GetAndSetP mem newval)); 7655 format %{ "XCHGL $newval,[$mem]" %} 7656 ins_encode %{ 7657 __ xchgl($newval$$Register, $mem$$Address); 7658 %} 7659 ins_pipe( pipe_cmpxchg ); 7660 %} 7661 7662 //----------Subtraction Instructions------------------------------------------- 7663 7664 // Integer Subtraction Instructions 7665 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7666 match(Set dst (SubI dst src)); 7667 effect(KILL cr); 7668 7669 size(2); 7670 format %{ "SUB $dst,$src" %} 7671 opcode(0x2B); 7672 ins_encode( OpcP, RegReg( dst, src) ); 7673 ins_pipe( ialu_reg_reg ); 7674 %} 7675 7676 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7677 match(Set dst (SubI dst src)); 7678 effect(KILL cr); 7679 7680 format %{ "SUB $dst,$src" %} 7681 opcode(0x81,0x05); /* Opcode 81 /5 */ 7682 // ins_encode( RegImm( dst, src) ); 7683 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7684 ins_pipe( ialu_reg ); 7685 %} 7686 7687 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7688 match(Set dst (SubI dst (LoadI src))); 7689 effect(KILL cr); 7690 7691 ins_cost(125); 7692 format %{ "SUB $dst,$src" %} 7693 opcode(0x2B); 7694 ins_encode( OpcP, RegMem( dst, src) ); 7695 ins_pipe( ialu_reg_mem ); 7696 %} 7697 7698 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7699 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7700 effect(KILL cr); 7701 7702 ins_cost(150); 7703 format %{ "SUB $dst,$src" %} 7704 opcode(0x29); /* Opcode 29 /r */ 7705 ins_encode( OpcP, RegMem( src, dst ) ); 7706 ins_pipe( ialu_mem_reg ); 7707 %} 7708 7709 // Subtract from a pointer 7710 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7711 match(Set dst (AddP dst (SubI zero src))); 7712 effect(KILL cr); 7713 7714 size(2); 7715 format %{ "SUB $dst,$src" %} 7716 opcode(0x2B); 7717 ins_encode( OpcP, RegReg( dst, src) ); 7718 ins_pipe( ialu_reg_reg ); 7719 %} 7720 7721 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7722 match(Set dst (SubI zero dst)); 7723 effect(KILL cr); 7724 7725 size(2); 7726 format %{ "NEG $dst" %} 7727 opcode(0xF7,0x03); // Opcode F7 /3 7728 ins_encode( OpcP, RegOpc( dst ) ); 7729 ins_pipe( ialu_reg ); 7730 %} 7731 7732 //----------Multiplication/Division Instructions------------------------------- 7733 // Integer Multiplication Instructions 7734 // Multiply Register 7735 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7736 match(Set dst (MulI dst src)); 7737 effect(KILL cr); 7738 7739 size(3); 7740 ins_cost(300); 7741 format %{ "IMUL $dst,$src" %} 7742 opcode(0xAF, 0x0F); 7743 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7744 ins_pipe( ialu_reg_reg_alu0 ); 7745 %} 7746 7747 // Multiply 32-bit Immediate 7748 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7749 match(Set dst (MulI src imm)); 7750 effect(KILL cr); 7751 7752 ins_cost(300); 7753 format %{ "IMUL $dst,$src,$imm" %} 7754 opcode(0x69); /* 69 /r id */ 7755 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7756 ins_pipe( ialu_reg_reg_alu0 ); 7757 %} 7758 7759 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7760 match(Set dst src); 7761 effect(KILL cr); 7762 7763 // Note that this is artificially increased to make it more expensive than loadConL 7764 ins_cost(250); 7765 format %{ "MOV EAX,$src\t// low word only" %} 7766 opcode(0xB8); 7767 ins_encode( LdImmL_Lo(dst, src) ); 7768 ins_pipe( ialu_reg_fat ); 7769 %} 7770 7771 // Multiply by 32-bit Immediate, taking the shifted high order results 7772 // (special case for shift by 32) 7773 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7774 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7775 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7776 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7777 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7778 effect(USE src1, KILL cr); 7779 7780 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7781 ins_cost(0*100 + 1*400 - 150); 7782 format %{ "IMUL EDX:EAX,$src1" %} 7783 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 // Multiply by 32-bit Immediate, taking the shifted high order results 7788 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7789 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7790 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7791 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7792 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7793 effect(USE src1, KILL cr); 7794 7795 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7796 ins_cost(1*100 + 1*400 - 150); 7797 format %{ "IMUL EDX:EAX,$src1\n\t" 7798 "SAR EDX,$cnt-32" %} 7799 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 // Multiply Memory 32-bit Immediate 7804 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7805 match(Set dst (MulI (LoadI src) imm)); 7806 effect(KILL cr); 7807 7808 ins_cost(300); 7809 format %{ "IMUL $dst,$src,$imm" %} 7810 opcode(0x69); /* 69 /r id */ 7811 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7812 ins_pipe( ialu_reg_mem_alu0 ); 7813 %} 7814 7815 // Multiply Memory 7816 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7817 match(Set dst (MulI dst (LoadI src))); 7818 effect(KILL cr); 7819 7820 ins_cost(350); 7821 format %{ "IMUL $dst,$src" %} 7822 opcode(0xAF, 0x0F); 7823 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7824 ins_pipe( ialu_reg_mem_alu0 ); 7825 %} 7826 7827 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7828 %{ 7829 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7830 effect(KILL cr, KILL src2); 7831 7832 expand %{ mulI_eReg(dst, src1, cr); 7833 mulI_eReg(src2, src3, cr); 7834 addI_eReg(dst, src2, cr); %} 7835 %} 7836 7837 // Multiply Register Int to Long 7838 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7839 // Basic Idea: long = (long)int * (long)int 7840 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7841 effect(DEF dst, USE src, USE src1, KILL flags); 7842 7843 ins_cost(300); 7844 format %{ "IMUL $dst,$src1" %} 7845 7846 ins_encode( long_int_multiply( dst, src1 ) ); 7847 ins_pipe( ialu_reg_reg_alu0 ); 7848 %} 7849 7850 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7851 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7852 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7853 effect(KILL flags); 7854 7855 ins_cost(300); 7856 format %{ "MUL $dst,$src1" %} 7857 7858 ins_encode( long_uint_multiply(dst, src1) ); 7859 ins_pipe( ialu_reg_reg_alu0 ); 7860 %} 7861 7862 // Multiply Register Long 7863 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7864 match(Set dst (MulL dst src)); 7865 effect(KILL cr, TEMP tmp); 7866 ins_cost(4*100+3*400); 7867 // Basic idea: lo(result) = lo(x_lo * y_lo) 7868 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7869 format %{ "MOV $tmp,$src.lo\n\t" 7870 "IMUL $tmp,EDX\n\t" 7871 "MOV EDX,$src.hi\n\t" 7872 "IMUL EDX,EAX\n\t" 7873 "ADD $tmp,EDX\n\t" 7874 "MUL EDX:EAX,$src.lo\n\t" 7875 "ADD EDX,$tmp" %} 7876 ins_encode( long_multiply( dst, src, tmp ) ); 7877 ins_pipe( pipe_slow ); 7878 %} 7879 7880 // Multiply Register Long where the left operand's high 32 bits are zero 7881 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7882 predicate(is_operand_hi32_zero(n->in(1))); 7883 match(Set dst (MulL dst src)); 7884 effect(KILL cr, TEMP tmp); 7885 ins_cost(2*100+2*400); 7886 // Basic idea: lo(result) = lo(x_lo * y_lo) 7887 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7888 format %{ "MOV $tmp,$src.hi\n\t" 7889 "IMUL $tmp,EAX\n\t" 7890 "MUL EDX:EAX,$src.lo\n\t" 7891 "ADD EDX,$tmp" %} 7892 ins_encode %{ 7893 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7894 __ imull($tmp$$Register, rax); 7895 __ mull($src$$Register); 7896 __ addl(rdx, $tmp$$Register); 7897 %} 7898 ins_pipe( pipe_slow ); 7899 %} 7900 7901 // Multiply Register Long where the right operand's high 32 bits are zero 7902 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7903 predicate(is_operand_hi32_zero(n->in(2))); 7904 match(Set dst (MulL dst src)); 7905 effect(KILL cr, TEMP tmp); 7906 ins_cost(2*100+2*400); 7907 // Basic idea: lo(result) = lo(x_lo * y_lo) 7908 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7909 format %{ "MOV $tmp,$src.lo\n\t" 7910 "IMUL $tmp,EDX\n\t" 7911 "MUL EDX:EAX,$src.lo\n\t" 7912 "ADD EDX,$tmp" %} 7913 ins_encode %{ 7914 __ movl($tmp$$Register, $src$$Register); 7915 __ imull($tmp$$Register, rdx); 7916 __ mull($src$$Register); 7917 __ addl(rdx, $tmp$$Register); 7918 %} 7919 ins_pipe( pipe_slow ); 7920 %} 7921 7922 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7923 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7924 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7925 match(Set dst (MulL dst src)); 7926 effect(KILL cr); 7927 ins_cost(1*400); 7928 // Basic idea: lo(result) = lo(x_lo * y_lo) 7929 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7930 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7931 ins_encode %{ 7932 __ mull($src$$Register); 7933 %} 7934 ins_pipe( pipe_slow ); 7935 %} 7936 7937 // Multiply Register Long by small constant 7938 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7939 match(Set dst (MulL dst src)); 7940 effect(KILL cr, TEMP tmp); 7941 ins_cost(2*100+2*400); 7942 size(12); 7943 // Basic idea: lo(result) = lo(src * EAX) 7944 // hi(result) = hi(src * EAX) + lo(src * EDX) 7945 format %{ "IMUL $tmp,EDX,$src\n\t" 7946 "MOV EDX,$src\n\t" 7947 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7948 "ADD EDX,$tmp" %} 7949 ins_encode( long_multiply_con( dst, src, tmp ) ); 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 // Integer DIV with Register 7954 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7955 match(Set rax (DivI rax div)); 7956 effect(KILL rdx, KILL cr); 7957 size(26); 7958 ins_cost(30*100+10*100); 7959 format %{ "CMP EAX,0x80000000\n\t" 7960 "JNE,s normal\n\t" 7961 "XOR EDX,EDX\n\t" 7962 "CMP ECX,-1\n\t" 7963 "JE,s done\n" 7964 "normal: CDQ\n\t" 7965 "IDIV $div\n\t" 7966 "done:" %} 7967 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7968 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7969 ins_pipe( ialu_reg_reg_alu0 ); 7970 %} 7971 7972 // Divide Register Long 7973 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7974 match(Set dst (DivL src1 src2)); 7975 effect( KILL cr, KILL cx, KILL bx ); 7976 ins_cost(10000); 7977 format %{ "PUSH $src1.hi\n\t" 7978 "PUSH $src1.lo\n\t" 7979 "PUSH $src2.hi\n\t" 7980 "PUSH $src2.lo\n\t" 7981 "CALL SharedRuntime::ldiv\n\t" 7982 "ADD ESP,16" %} 7983 ins_encode( long_div(src1,src2) ); 7984 ins_pipe( pipe_slow ); 7985 %} 7986 7987 // Integer DIVMOD with Register, both quotient and mod results 7988 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7989 match(DivModI rax div); 7990 effect(KILL cr); 7991 size(26); 7992 ins_cost(30*100+10*100); 7993 format %{ "CMP EAX,0x80000000\n\t" 7994 "JNE,s normal\n\t" 7995 "XOR EDX,EDX\n\t" 7996 "CMP ECX,-1\n\t" 7997 "JE,s done\n" 7998 "normal: CDQ\n\t" 7999 "IDIV $div\n\t" 8000 "done:" %} 8001 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8002 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8003 ins_pipe( pipe_slow ); 8004 %} 8005 8006 // Integer MOD with Register 8007 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 8008 match(Set rdx (ModI rax div)); 8009 effect(KILL rax, KILL cr); 8010 8011 size(26); 8012 ins_cost(300); 8013 format %{ "CDQ\n\t" 8014 "IDIV $div" %} 8015 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 8016 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 8017 ins_pipe( ialu_reg_reg_alu0 ); 8018 %} 8019 8020 // Remainder Register Long 8021 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 8022 match(Set dst (ModL src1 src2)); 8023 effect( KILL cr, KILL cx, KILL bx ); 8024 ins_cost(10000); 8025 format %{ "PUSH $src1.hi\n\t" 8026 "PUSH $src1.lo\n\t" 8027 "PUSH $src2.hi\n\t" 8028 "PUSH $src2.lo\n\t" 8029 "CALL SharedRuntime::lrem\n\t" 8030 "ADD ESP,16" %} 8031 ins_encode( long_mod(src1,src2) ); 8032 ins_pipe( pipe_slow ); 8033 %} 8034 8035 // Divide Register Long (no special case since divisor != -1) 8036 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8037 match(Set dst (DivL dst imm)); 8038 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8039 ins_cost(1000); 8040 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 8041 "XOR $tmp2,$tmp2\n\t" 8042 "CMP $tmp,EDX\n\t" 8043 "JA,s fast\n\t" 8044 "MOV $tmp2,EAX\n\t" 8045 "MOV EAX,EDX\n\t" 8046 "MOV EDX,0\n\t" 8047 "JLE,s pos\n\t" 8048 "LNEG EAX : $tmp2\n\t" 8049 "DIV $tmp # unsigned division\n\t" 8050 "XCHG EAX,$tmp2\n\t" 8051 "DIV $tmp\n\t" 8052 "LNEG $tmp2 : EAX\n\t" 8053 "JMP,s done\n" 8054 "pos:\n\t" 8055 "DIV $tmp\n\t" 8056 "XCHG EAX,$tmp2\n" 8057 "fast:\n\t" 8058 "DIV $tmp\n" 8059 "done:\n\t" 8060 "MOV EDX,$tmp2\n\t" 8061 "NEG EDX:EAX # if $imm < 0" %} 8062 ins_encode %{ 8063 int con = (int)$imm$$constant; 8064 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8065 int pcon = (con > 0) ? con : -con; 8066 Label Lfast, Lpos, Ldone; 8067 8068 __ movl($tmp$$Register, pcon); 8069 __ xorl($tmp2$$Register,$tmp2$$Register); 8070 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8071 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 8072 8073 __ movl($tmp2$$Register, $dst$$Register); // save 8074 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8075 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8076 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8077 8078 // Negative dividend. 8079 // convert value to positive to use unsigned division 8080 __ lneg($dst$$Register, $tmp2$$Register); 8081 __ divl($tmp$$Register); 8082 __ xchgl($dst$$Register, $tmp2$$Register); 8083 __ divl($tmp$$Register); 8084 // revert result back to negative 8085 __ lneg($tmp2$$Register, $dst$$Register); 8086 __ jmpb(Ldone); 8087 8088 __ bind(Lpos); 8089 __ divl($tmp$$Register); // Use unsigned division 8090 __ xchgl($dst$$Register, $tmp2$$Register); 8091 // Fallthrow for final divide, tmp2 has 32 bit hi result 8092 8093 __ bind(Lfast); 8094 // fast path: src is positive 8095 __ divl($tmp$$Register); // Use unsigned division 8096 8097 __ bind(Ldone); 8098 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8099 if (con < 0) { 8100 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8101 } 8102 %} 8103 ins_pipe( pipe_slow ); 8104 %} 8105 8106 // Remainder Register Long (remainder fit into 32 bits) 8107 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8108 match(Set dst (ModL dst imm)); 8109 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8110 ins_cost(1000); 8111 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8112 "CMP $tmp,EDX\n\t" 8113 "JA,s fast\n\t" 8114 "MOV $tmp2,EAX\n\t" 8115 "MOV EAX,EDX\n\t" 8116 "MOV EDX,0\n\t" 8117 "JLE,s pos\n\t" 8118 "LNEG EAX : $tmp2\n\t" 8119 "DIV $tmp # unsigned division\n\t" 8120 "MOV EAX,$tmp2\n\t" 8121 "DIV $tmp\n\t" 8122 "NEG EDX\n\t" 8123 "JMP,s done\n" 8124 "pos:\n\t" 8125 "DIV $tmp\n\t" 8126 "MOV EAX,$tmp2\n" 8127 "fast:\n\t" 8128 "DIV $tmp\n" 8129 "done:\n\t" 8130 "MOV EAX,EDX\n\t" 8131 "SAR EDX,31\n\t" %} 8132 ins_encode %{ 8133 int con = (int)$imm$$constant; 8134 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8135 int pcon = (con > 0) ? con : -con; 8136 Label Lfast, Lpos, Ldone; 8137 8138 __ movl($tmp$$Register, pcon); 8139 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8140 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8141 8142 __ movl($tmp2$$Register, $dst$$Register); // save 8143 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8144 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8145 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8146 8147 // Negative dividend. 8148 // convert value to positive to use unsigned division 8149 __ lneg($dst$$Register, $tmp2$$Register); 8150 __ divl($tmp$$Register); 8151 __ movl($dst$$Register, $tmp2$$Register); 8152 __ divl($tmp$$Register); 8153 // revert remainder back to negative 8154 __ negl(HIGH_FROM_LOW($dst$$Register)); 8155 __ jmpb(Ldone); 8156 8157 __ bind(Lpos); 8158 __ divl($tmp$$Register); 8159 __ movl($dst$$Register, $tmp2$$Register); 8160 8161 __ bind(Lfast); 8162 // fast path: src is positive 8163 __ divl($tmp$$Register); 8164 8165 __ bind(Ldone); 8166 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8167 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8168 8169 %} 8170 ins_pipe( pipe_slow ); 8171 %} 8172 8173 // Integer Shift Instructions 8174 // Shift Left by one 8175 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8176 match(Set dst (LShiftI dst shift)); 8177 effect(KILL cr); 8178 8179 size(2); 8180 format %{ "SHL $dst,$shift" %} 8181 opcode(0xD1, 0x4); /* D1 /4 */ 8182 ins_encode( OpcP, RegOpc( dst ) ); 8183 ins_pipe( ialu_reg ); 8184 %} 8185 8186 // Shift Left by 8-bit immediate 8187 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8188 match(Set dst (LShiftI dst shift)); 8189 effect(KILL cr); 8190 8191 size(3); 8192 format %{ "SHL $dst,$shift" %} 8193 opcode(0xC1, 0x4); /* C1 /4 ib */ 8194 ins_encode( RegOpcImm( dst, shift) ); 8195 ins_pipe( ialu_reg ); 8196 %} 8197 8198 // Shift Left by variable 8199 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8200 match(Set dst (LShiftI dst shift)); 8201 effect(KILL cr); 8202 8203 size(2); 8204 format %{ "SHL $dst,$shift" %} 8205 opcode(0xD3, 0x4); /* D3 /4 */ 8206 ins_encode( OpcP, RegOpc( dst ) ); 8207 ins_pipe( ialu_reg_reg ); 8208 %} 8209 8210 // Arithmetic shift right by one 8211 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8212 match(Set dst (RShiftI dst shift)); 8213 effect(KILL cr); 8214 8215 size(2); 8216 format %{ "SAR $dst,$shift" %} 8217 opcode(0xD1, 0x7); /* D1 /7 */ 8218 ins_encode( OpcP, RegOpc( dst ) ); 8219 ins_pipe( ialu_reg ); 8220 %} 8221 8222 // Arithmetic shift right by one 8223 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8224 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8225 effect(KILL cr); 8226 format %{ "SAR $dst,$shift" %} 8227 opcode(0xD1, 0x7); /* D1 /7 */ 8228 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8229 ins_pipe( ialu_mem_imm ); 8230 %} 8231 8232 // Arithmetic Shift Right by 8-bit immediate 8233 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8234 match(Set dst (RShiftI dst shift)); 8235 effect(KILL cr); 8236 8237 size(3); 8238 format %{ "SAR $dst,$shift" %} 8239 opcode(0xC1, 0x7); /* C1 /7 ib */ 8240 ins_encode( RegOpcImm( dst, shift ) ); 8241 ins_pipe( ialu_mem_imm ); 8242 %} 8243 8244 // Arithmetic Shift Right by 8-bit immediate 8245 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8246 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8247 effect(KILL cr); 8248 8249 format %{ "SAR $dst,$shift" %} 8250 opcode(0xC1, 0x7); /* C1 /7 ib */ 8251 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8252 ins_pipe( ialu_mem_imm ); 8253 %} 8254 8255 // Arithmetic Shift Right by variable 8256 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8257 match(Set dst (RShiftI dst shift)); 8258 effect(KILL cr); 8259 8260 size(2); 8261 format %{ "SAR $dst,$shift" %} 8262 opcode(0xD3, 0x7); /* D3 /7 */ 8263 ins_encode( OpcP, RegOpc( dst ) ); 8264 ins_pipe( ialu_reg_reg ); 8265 %} 8266 8267 // Logical shift right by one 8268 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8269 match(Set dst (URShiftI dst shift)); 8270 effect(KILL cr); 8271 8272 size(2); 8273 format %{ "SHR $dst,$shift" %} 8274 opcode(0xD1, 0x5); /* D1 /5 */ 8275 ins_encode( OpcP, RegOpc( dst ) ); 8276 ins_pipe( ialu_reg ); 8277 %} 8278 8279 // Logical Shift Right by 8-bit immediate 8280 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8281 match(Set dst (URShiftI dst shift)); 8282 effect(KILL cr); 8283 8284 size(3); 8285 format %{ "SHR $dst,$shift" %} 8286 opcode(0xC1, 0x5); /* C1 /5 ib */ 8287 ins_encode( RegOpcImm( dst, shift) ); 8288 ins_pipe( ialu_reg ); 8289 %} 8290 8291 8292 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8293 // This idiom is used by the compiler for the i2b bytecode. 8294 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8295 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8296 8297 size(3); 8298 format %{ "MOVSX $dst,$src :8" %} 8299 ins_encode %{ 8300 __ movsbl($dst$$Register, $src$$Register); 8301 %} 8302 ins_pipe(ialu_reg_reg); 8303 %} 8304 8305 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8306 // This idiom is used by the compiler the i2s bytecode. 8307 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8308 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8309 8310 size(3); 8311 format %{ "MOVSX $dst,$src :16" %} 8312 ins_encode %{ 8313 __ movswl($dst$$Register, $src$$Register); 8314 %} 8315 ins_pipe(ialu_reg_reg); 8316 %} 8317 8318 8319 // Logical Shift Right by variable 8320 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8321 match(Set dst (URShiftI dst shift)); 8322 effect(KILL cr); 8323 8324 size(2); 8325 format %{ "SHR $dst,$shift" %} 8326 opcode(0xD3, 0x5); /* D3 /5 */ 8327 ins_encode( OpcP, RegOpc( dst ) ); 8328 ins_pipe( ialu_reg_reg ); 8329 %} 8330 8331 8332 //----------Logical Instructions----------------------------------------------- 8333 //----------Integer Logical Instructions--------------------------------------- 8334 // And Instructions 8335 // And Register with Register 8336 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8337 match(Set dst (AndI dst src)); 8338 effect(KILL cr); 8339 8340 size(2); 8341 format %{ "AND $dst,$src" %} 8342 opcode(0x23); 8343 ins_encode( OpcP, RegReg( dst, src) ); 8344 ins_pipe( ialu_reg_reg ); 8345 %} 8346 8347 // And Register with Immediate 8348 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8349 match(Set dst (AndI dst src)); 8350 effect(KILL cr); 8351 8352 format %{ "AND $dst,$src" %} 8353 opcode(0x81,0x04); /* Opcode 81 /4 */ 8354 // ins_encode( RegImm( dst, src) ); 8355 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8356 ins_pipe( ialu_reg ); 8357 %} 8358 8359 // And Register with Memory 8360 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8361 match(Set dst (AndI dst (LoadI src))); 8362 effect(KILL cr); 8363 8364 ins_cost(125); 8365 format %{ "AND $dst,$src" %} 8366 opcode(0x23); 8367 ins_encode( OpcP, RegMem( dst, src) ); 8368 ins_pipe( ialu_reg_mem ); 8369 %} 8370 8371 // And Memory with Register 8372 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8373 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8374 effect(KILL cr); 8375 8376 ins_cost(150); 8377 format %{ "AND $dst,$src" %} 8378 opcode(0x21); /* Opcode 21 /r */ 8379 ins_encode( OpcP, RegMem( src, dst ) ); 8380 ins_pipe( ialu_mem_reg ); 8381 %} 8382 8383 // And Memory with Immediate 8384 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8385 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8386 effect(KILL cr); 8387 8388 ins_cost(125); 8389 format %{ "AND $dst,$src" %} 8390 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8391 // ins_encode( MemImm( dst, src) ); 8392 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8393 ins_pipe( ialu_mem_imm ); 8394 %} 8395 8396 // BMI1 instructions 8397 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8398 match(Set dst (AndI (XorI src1 minus_1) src2)); 8399 predicate(UseBMI1Instructions); 8400 effect(KILL cr); 8401 8402 format %{ "ANDNL $dst, $src1, $src2" %} 8403 8404 ins_encode %{ 8405 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8406 %} 8407 ins_pipe(ialu_reg); 8408 %} 8409 8410 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8411 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8412 predicate(UseBMI1Instructions); 8413 effect(KILL cr); 8414 8415 ins_cost(125); 8416 format %{ "ANDNL $dst, $src1, $src2" %} 8417 8418 ins_encode %{ 8419 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8420 %} 8421 ins_pipe(ialu_reg_mem); 8422 %} 8423 8424 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8425 match(Set dst (AndI (SubI imm_zero src) src)); 8426 predicate(UseBMI1Instructions); 8427 effect(KILL cr); 8428 8429 format %{ "BLSIL $dst, $src" %} 8430 8431 ins_encode %{ 8432 __ blsil($dst$$Register, $src$$Register); 8433 %} 8434 ins_pipe(ialu_reg); 8435 %} 8436 8437 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8438 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8439 predicate(UseBMI1Instructions); 8440 effect(KILL cr); 8441 8442 ins_cost(125); 8443 format %{ "BLSIL $dst, $src" %} 8444 8445 ins_encode %{ 8446 __ blsil($dst$$Register, $src$$Address); 8447 %} 8448 ins_pipe(ialu_reg_mem); 8449 %} 8450 8451 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8452 %{ 8453 match(Set dst (XorI (AddI src minus_1) src)); 8454 predicate(UseBMI1Instructions); 8455 effect(KILL cr); 8456 8457 format %{ "BLSMSKL $dst, $src" %} 8458 8459 ins_encode %{ 8460 __ blsmskl($dst$$Register, $src$$Register); 8461 %} 8462 8463 ins_pipe(ialu_reg); 8464 %} 8465 8466 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8467 %{ 8468 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8469 predicate(UseBMI1Instructions); 8470 effect(KILL cr); 8471 8472 ins_cost(125); 8473 format %{ "BLSMSKL $dst, $src" %} 8474 8475 ins_encode %{ 8476 __ blsmskl($dst$$Register, $src$$Address); 8477 %} 8478 8479 ins_pipe(ialu_reg_mem); 8480 %} 8481 8482 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8483 %{ 8484 match(Set dst (AndI (AddI src minus_1) src) ); 8485 predicate(UseBMI1Instructions); 8486 effect(KILL cr); 8487 8488 format %{ "BLSRL $dst, $src" %} 8489 8490 ins_encode %{ 8491 __ blsrl($dst$$Register, $src$$Register); 8492 %} 8493 8494 ins_pipe(ialu_reg); 8495 %} 8496 8497 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8498 %{ 8499 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8500 predicate(UseBMI1Instructions); 8501 effect(KILL cr); 8502 8503 ins_cost(125); 8504 format %{ "BLSRL $dst, $src" %} 8505 8506 ins_encode %{ 8507 __ blsrl($dst$$Register, $src$$Address); 8508 %} 8509 8510 ins_pipe(ialu_reg_mem); 8511 %} 8512 8513 // Or Instructions 8514 // Or Register with Register 8515 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8516 match(Set dst (OrI dst src)); 8517 effect(KILL cr); 8518 8519 size(2); 8520 format %{ "OR $dst,$src" %} 8521 opcode(0x0B); 8522 ins_encode( OpcP, RegReg( dst, src) ); 8523 ins_pipe( ialu_reg_reg ); 8524 %} 8525 8526 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8527 match(Set dst (OrI dst (CastP2X src))); 8528 effect(KILL cr); 8529 8530 size(2); 8531 format %{ "OR $dst,$src" %} 8532 opcode(0x0B); 8533 ins_encode( OpcP, RegReg( dst, src) ); 8534 ins_pipe( ialu_reg_reg ); 8535 %} 8536 8537 8538 // Or Register with Immediate 8539 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8540 match(Set dst (OrI dst src)); 8541 effect(KILL cr); 8542 8543 format %{ "OR $dst,$src" %} 8544 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8545 // ins_encode( RegImm( dst, src) ); 8546 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8547 ins_pipe( ialu_reg ); 8548 %} 8549 8550 // Or Register with Memory 8551 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8552 match(Set dst (OrI dst (LoadI src))); 8553 effect(KILL cr); 8554 8555 ins_cost(125); 8556 format %{ "OR $dst,$src" %} 8557 opcode(0x0B); 8558 ins_encode( OpcP, RegMem( dst, src) ); 8559 ins_pipe( ialu_reg_mem ); 8560 %} 8561 8562 // Or Memory with Register 8563 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8564 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8565 effect(KILL cr); 8566 8567 ins_cost(150); 8568 format %{ "OR $dst,$src" %} 8569 opcode(0x09); /* Opcode 09 /r */ 8570 ins_encode( OpcP, RegMem( src, dst ) ); 8571 ins_pipe( ialu_mem_reg ); 8572 %} 8573 8574 // Or Memory with Immediate 8575 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8576 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8577 effect(KILL cr); 8578 8579 ins_cost(125); 8580 format %{ "OR $dst,$src" %} 8581 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8582 // ins_encode( MemImm( dst, src) ); 8583 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8584 ins_pipe( ialu_mem_imm ); 8585 %} 8586 8587 // ROL/ROR 8588 // ROL expand 8589 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8590 effect(USE_DEF dst, USE shift, KILL cr); 8591 8592 format %{ "ROL $dst, $shift" %} 8593 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8594 ins_encode( OpcP, RegOpc( dst )); 8595 ins_pipe( ialu_reg ); 8596 %} 8597 8598 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8599 effect(USE_DEF dst, USE shift, KILL cr); 8600 8601 format %{ "ROL $dst, $shift" %} 8602 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8603 ins_encode( RegOpcImm(dst, shift) ); 8604 ins_pipe(ialu_reg); 8605 %} 8606 8607 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8608 effect(USE_DEF dst, USE shift, KILL cr); 8609 8610 format %{ "ROL $dst, $shift" %} 8611 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8612 ins_encode(OpcP, RegOpc(dst)); 8613 ins_pipe( ialu_reg_reg ); 8614 %} 8615 // end of ROL expand 8616 8617 // ROL 32bit by one once 8618 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8619 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8620 8621 expand %{ 8622 rolI_eReg_imm1(dst, lshift, cr); 8623 %} 8624 %} 8625 8626 // ROL 32bit var by imm8 once 8627 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8628 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8629 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8630 8631 expand %{ 8632 rolI_eReg_imm8(dst, lshift, cr); 8633 %} 8634 %} 8635 8636 // ROL 32bit var by var once 8637 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8638 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8639 8640 expand %{ 8641 rolI_eReg_CL(dst, shift, cr); 8642 %} 8643 %} 8644 8645 // ROL 32bit var by var once 8646 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8647 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8648 8649 expand %{ 8650 rolI_eReg_CL(dst, shift, cr); 8651 %} 8652 %} 8653 8654 // ROR expand 8655 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8656 effect(USE_DEF dst, USE shift, KILL cr); 8657 8658 format %{ "ROR $dst, $shift" %} 8659 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8660 ins_encode( OpcP, RegOpc( dst ) ); 8661 ins_pipe( ialu_reg ); 8662 %} 8663 8664 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8665 effect (USE_DEF dst, USE shift, KILL cr); 8666 8667 format %{ "ROR $dst, $shift" %} 8668 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8669 ins_encode( RegOpcImm(dst, shift) ); 8670 ins_pipe( ialu_reg ); 8671 %} 8672 8673 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8674 effect(USE_DEF dst, USE shift, KILL cr); 8675 8676 format %{ "ROR $dst, $shift" %} 8677 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8678 ins_encode(OpcP, RegOpc(dst)); 8679 ins_pipe( ialu_reg_reg ); 8680 %} 8681 // end of ROR expand 8682 8683 // ROR right once 8684 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8685 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8686 8687 expand %{ 8688 rorI_eReg_imm1(dst, rshift, cr); 8689 %} 8690 %} 8691 8692 // ROR 32bit by immI8 once 8693 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8694 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8695 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8696 8697 expand %{ 8698 rorI_eReg_imm8(dst, rshift, cr); 8699 %} 8700 %} 8701 8702 // ROR 32bit var by var once 8703 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8704 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8705 8706 expand %{ 8707 rorI_eReg_CL(dst, shift, cr); 8708 %} 8709 %} 8710 8711 // ROR 32bit var by var once 8712 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8713 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8714 8715 expand %{ 8716 rorI_eReg_CL(dst, shift, cr); 8717 %} 8718 %} 8719 8720 // Xor Instructions 8721 // Xor Register with Register 8722 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8723 match(Set dst (XorI dst src)); 8724 effect(KILL cr); 8725 8726 size(2); 8727 format %{ "XOR $dst,$src" %} 8728 opcode(0x33); 8729 ins_encode( OpcP, RegReg( dst, src) ); 8730 ins_pipe( ialu_reg_reg ); 8731 %} 8732 8733 // Xor Register with Immediate -1 8734 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8735 match(Set dst (XorI dst imm)); 8736 8737 size(2); 8738 format %{ "NOT $dst" %} 8739 ins_encode %{ 8740 __ notl($dst$$Register); 8741 %} 8742 ins_pipe( ialu_reg ); 8743 %} 8744 8745 // Xor Register with Immediate 8746 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8747 match(Set dst (XorI dst src)); 8748 effect(KILL cr); 8749 8750 format %{ "XOR $dst,$src" %} 8751 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8752 // ins_encode( RegImm( dst, src) ); 8753 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8754 ins_pipe( ialu_reg ); 8755 %} 8756 8757 // Xor Register with Memory 8758 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8759 match(Set dst (XorI dst (LoadI src))); 8760 effect(KILL cr); 8761 8762 ins_cost(125); 8763 format %{ "XOR $dst,$src" %} 8764 opcode(0x33); 8765 ins_encode( OpcP, RegMem(dst, src) ); 8766 ins_pipe( ialu_reg_mem ); 8767 %} 8768 8769 // Xor Memory with Register 8770 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8771 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8772 effect(KILL cr); 8773 8774 ins_cost(150); 8775 format %{ "XOR $dst,$src" %} 8776 opcode(0x31); /* Opcode 31 /r */ 8777 ins_encode( OpcP, RegMem( src, dst ) ); 8778 ins_pipe( ialu_mem_reg ); 8779 %} 8780 8781 // Xor Memory with Immediate 8782 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8783 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8784 effect(KILL cr); 8785 8786 ins_cost(125); 8787 format %{ "XOR $dst,$src" %} 8788 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8789 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8790 ins_pipe( ialu_mem_imm ); 8791 %} 8792 8793 //----------Convert Int to Boolean--------------------------------------------- 8794 8795 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8796 effect( DEF dst, USE src ); 8797 format %{ "MOV $dst,$src" %} 8798 ins_encode( enc_Copy( dst, src) ); 8799 ins_pipe( ialu_reg_reg ); 8800 %} 8801 8802 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8803 effect( USE_DEF dst, USE src, KILL cr ); 8804 8805 size(4); 8806 format %{ "NEG $dst\n\t" 8807 "ADC $dst,$src" %} 8808 ins_encode( neg_reg(dst), 8809 OpcRegReg(0x13,dst,src) ); 8810 ins_pipe( ialu_reg_reg_long ); 8811 %} 8812 8813 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8814 match(Set dst (Conv2B src)); 8815 8816 expand %{ 8817 movI_nocopy(dst,src); 8818 ci2b(dst,src,cr); 8819 %} 8820 %} 8821 8822 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8823 effect( DEF dst, USE src ); 8824 format %{ "MOV $dst,$src" %} 8825 ins_encode( enc_Copy( dst, src) ); 8826 ins_pipe( ialu_reg_reg ); 8827 %} 8828 8829 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8830 effect( USE_DEF dst, USE src, KILL cr ); 8831 format %{ "NEG $dst\n\t" 8832 "ADC $dst,$src" %} 8833 ins_encode( neg_reg(dst), 8834 OpcRegReg(0x13,dst,src) ); 8835 ins_pipe( ialu_reg_reg_long ); 8836 %} 8837 8838 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8839 match(Set dst (Conv2B src)); 8840 8841 expand %{ 8842 movP_nocopy(dst,src); 8843 cp2b(dst,src,cr); 8844 %} 8845 %} 8846 8847 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8848 match(Set dst (CmpLTMask p q)); 8849 effect(KILL cr); 8850 ins_cost(400); 8851 8852 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8853 format %{ "XOR $dst,$dst\n\t" 8854 "CMP $p,$q\n\t" 8855 "SETlt $dst\n\t" 8856 "NEG $dst" %} 8857 ins_encode %{ 8858 Register Rp = $p$$Register; 8859 Register Rq = $q$$Register; 8860 Register Rd = $dst$$Register; 8861 Label done; 8862 __ xorl(Rd, Rd); 8863 __ cmpl(Rp, Rq); 8864 __ setb(Assembler::less, Rd); 8865 __ negl(Rd); 8866 %} 8867 8868 ins_pipe(pipe_slow); 8869 %} 8870 8871 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8872 match(Set dst (CmpLTMask dst zero)); 8873 effect(DEF dst, KILL cr); 8874 ins_cost(100); 8875 8876 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8877 ins_encode %{ 8878 __ sarl($dst$$Register, 31); 8879 %} 8880 ins_pipe(ialu_reg); 8881 %} 8882 8883 /* better to save a register than avoid a branch */ 8884 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8885 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8886 effect(KILL cr); 8887 ins_cost(400); 8888 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8889 "JGE done\n\t" 8890 "ADD $p,$y\n" 8891 "done: " %} 8892 ins_encode %{ 8893 Register Rp = $p$$Register; 8894 Register Rq = $q$$Register; 8895 Register Ry = $y$$Register; 8896 Label done; 8897 __ subl(Rp, Rq); 8898 __ jccb(Assembler::greaterEqual, done); 8899 __ addl(Rp, Ry); 8900 __ bind(done); 8901 %} 8902 8903 ins_pipe(pipe_cmplt); 8904 %} 8905 8906 /* better to save a register than avoid a branch */ 8907 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8908 match(Set y (AndI (CmpLTMask p q) y)); 8909 effect(KILL cr); 8910 8911 ins_cost(300); 8912 8913 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8914 "JLT done\n\t" 8915 "XORL $y, $y\n" 8916 "done: " %} 8917 ins_encode %{ 8918 Register Rp = $p$$Register; 8919 Register Rq = $q$$Register; 8920 Register Ry = $y$$Register; 8921 Label done; 8922 __ cmpl(Rp, Rq); 8923 __ jccb(Assembler::less, done); 8924 __ xorl(Ry, Ry); 8925 __ bind(done); 8926 %} 8927 8928 ins_pipe(pipe_cmplt); 8929 %} 8930 8931 /* If I enable this, I encourage spilling in the inner loop of compress. 8932 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8933 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8934 */ 8935 //----------Overflow Math Instructions----------------------------------------- 8936 8937 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8938 %{ 8939 match(Set cr (OverflowAddI op1 op2)); 8940 effect(DEF cr, USE_KILL op1, USE op2); 8941 8942 format %{ "ADD $op1, $op2\t# overflow check int" %} 8943 8944 ins_encode %{ 8945 __ addl($op1$$Register, $op2$$Register); 8946 %} 8947 ins_pipe(ialu_reg_reg); 8948 %} 8949 8950 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8951 %{ 8952 match(Set cr (OverflowAddI op1 op2)); 8953 effect(DEF cr, USE_KILL op1, USE op2); 8954 8955 format %{ "ADD $op1, $op2\t# overflow check int" %} 8956 8957 ins_encode %{ 8958 __ addl($op1$$Register, $op2$$constant); 8959 %} 8960 ins_pipe(ialu_reg_reg); 8961 %} 8962 8963 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8964 %{ 8965 match(Set cr (OverflowSubI op1 op2)); 8966 8967 format %{ "CMP $op1, $op2\t# overflow check int" %} 8968 ins_encode %{ 8969 __ cmpl($op1$$Register, $op2$$Register); 8970 %} 8971 ins_pipe(ialu_reg_reg); 8972 %} 8973 8974 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8975 %{ 8976 match(Set cr (OverflowSubI op1 op2)); 8977 8978 format %{ "CMP $op1, $op2\t# overflow check int" %} 8979 ins_encode %{ 8980 __ cmpl($op1$$Register, $op2$$constant); 8981 %} 8982 ins_pipe(ialu_reg_reg); 8983 %} 8984 8985 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8986 %{ 8987 match(Set cr (OverflowSubI zero op2)); 8988 effect(DEF cr, USE_KILL op2); 8989 8990 format %{ "NEG $op2\t# overflow check int" %} 8991 ins_encode %{ 8992 __ negl($op2$$Register); 8993 %} 8994 ins_pipe(ialu_reg_reg); 8995 %} 8996 8997 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8998 %{ 8999 match(Set cr (OverflowMulI op1 op2)); 9000 effect(DEF cr, USE_KILL op1, USE op2); 9001 9002 format %{ "IMUL $op1, $op2\t# overflow check int" %} 9003 ins_encode %{ 9004 __ imull($op1$$Register, $op2$$Register); 9005 %} 9006 ins_pipe(ialu_reg_reg_alu0); 9007 %} 9008 9009 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 9010 %{ 9011 match(Set cr (OverflowMulI op1 op2)); 9012 effect(DEF cr, TEMP tmp, USE op1, USE op2); 9013 9014 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 9015 ins_encode %{ 9016 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 9017 %} 9018 ins_pipe(ialu_reg_reg_alu0); 9019 %} 9020 9021 //----------Long Instructions------------------------------------------------ 9022 // Add Long Register with Register 9023 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9024 match(Set dst (AddL dst src)); 9025 effect(KILL cr); 9026 ins_cost(200); 9027 format %{ "ADD $dst.lo,$src.lo\n\t" 9028 "ADC $dst.hi,$src.hi" %} 9029 opcode(0x03, 0x13); 9030 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9031 ins_pipe( ialu_reg_reg_long ); 9032 %} 9033 9034 // Add Long Register with Immediate 9035 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9036 match(Set dst (AddL dst src)); 9037 effect(KILL cr); 9038 format %{ "ADD $dst.lo,$src.lo\n\t" 9039 "ADC $dst.hi,$src.hi" %} 9040 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 9041 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9042 ins_pipe( ialu_reg_long ); 9043 %} 9044 9045 // Add Long Register with Memory 9046 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9047 match(Set dst (AddL dst (LoadL mem))); 9048 effect(KILL cr); 9049 ins_cost(125); 9050 format %{ "ADD $dst.lo,$mem\n\t" 9051 "ADC $dst.hi,$mem+4" %} 9052 opcode(0x03, 0x13); 9053 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9054 ins_pipe( ialu_reg_long_mem ); 9055 %} 9056 9057 // Subtract Long Register with Register. 9058 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9059 match(Set dst (SubL dst src)); 9060 effect(KILL cr); 9061 ins_cost(200); 9062 format %{ "SUB $dst.lo,$src.lo\n\t" 9063 "SBB $dst.hi,$src.hi" %} 9064 opcode(0x2B, 0x1B); 9065 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9066 ins_pipe( ialu_reg_reg_long ); 9067 %} 9068 9069 // Subtract Long Register with Immediate 9070 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9071 match(Set dst (SubL dst src)); 9072 effect(KILL cr); 9073 format %{ "SUB $dst.lo,$src.lo\n\t" 9074 "SBB $dst.hi,$src.hi" %} 9075 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9076 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9077 ins_pipe( ialu_reg_long ); 9078 %} 9079 9080 // Subtract Long Register with Memory 9081 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9082 match(Set dst (SubL dst (LoadL mem))); 9083 effect(KILL cr); 9084 ins_cost(125); 9085 format %{ "SUB $dst.lo,$mem\n\t" 9086 "SBB $dst.hi,$mem+4" %} 9087 opcode(0x2B, 0x1B); 9088 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9089 ins_pipe( ialu_reg_long_mem ); 9090 %} 9091 9092 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9093 match(Set dst (SubL zero dst)); 9094 effect(KILL cr); 9095 ins_cost(300); 9096 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9097 ins_encode( neg_long(dst) ); 9098 ins_pipe( ialu_reg_reg_long ); 9099 %} 9100 9101 // And Long Register with Register 9102 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9103 match(Set dst (AndL dst src)); 9104 effect(KILL cr); 9105 format %{ "AND $dst.lo,$src.lo\n\t" 9106 "AND $dst.hi,$src.hi" %} 9107 opcode(0x23,0x23); 9108 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9109 ins_pipe( ialu_reg_reg_long ); 9110 %} 9111 9112 // And Long Register with Immediate 9113 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9114 match(Set dst (AndL dst src)); 9115 effect(KILL cr); 9116 format %{ "AND $dst.lo,$src.lo\n\t" 9117 "AND $dst.hi,$src.hi" %} 9118 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9119 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9120 ins_pipe( ialu_reg_long ); 9121 %} 9122 9123 // And Long Register with Memory 9124 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9125 match(Set dst (AndL dst (LoadL mem))); 9126 effect(KILL cr); 9127 ins_cost(125); 9128 format %{ "AND $dst.lo,$mem\n\t" 9129 "AND $dst.hi,$mem+4" %} 9130 opcode(0x23, 0x23); 9131 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9132 ins_pipe( ialu_reg_long_mem ); 9133 %} 9134 9135 // BMI1 instructions 9136 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9137 match(Set dst (AndL (XorL src1 minus_1) src2)); 9138 predicate(UseBMI1Instructions); 9139 effect(KILL cr, TEMP dst); 9140 9141 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9142 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9143 %} 9144 9145 ins_encode %{ 9146 Register Rdst = $dst$$Register; 9147 Register Rsrc1 = $src1$$Register; 9148 Register Rsrc2 = $src2$$Register; 9149 __ andnl(Rdst, Rsrc1, Rsrc2); 9150 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9151 %} 9152 ins_pipe(ialu_reg_reg_long); 9153 %} 9154 9155 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9156 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9157 predicate(UseBMI1Instructions); 9158 effect(KILL cr, TEMP dst); 9159 9160 ins_cost(125); 9161 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9162 "ANDNL $dst.hi, $src1.hi, $src2+4" 9163 %} 9164 9165 ins_encode %{ 9166 Register Rdst = $dst$$Register; 9167 Register Rsrc1 = $src1$$Register; 9168 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9169 9170 __ andnl(Rdst, Rsrc1, $src2$$Address); 9171 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9172 %} 9173 ins_pipe(ialu_reg_mem); 9174 %} 9175 9176 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9177 match(Set dst (AndL (SubL imm_zero src) src)); 9178 predicate(UseBMI1Instructions); 9179 effect(KILL cr, TEMP dst); 9180 9181 format %{ "MOVL $dst.hi, 0\n\t" 9182 "BLSIL $dst.lo, $src.lo\n\t" 9183 "JNZ done\n\t" 9184 "BLSIL $dst.hi, $src.hi\n" 9185 "done:" 9186 %} 9187 9188 ins_encode %{ 9189 Label done; 9190 Register Rdst = $dst$$Register; 9191 Register Rsrc = $src$$Register; 9192 __ movl(HIGH_FROM_LOW(Rdst), 0); 9193 __ blsil(Rdst, Rsrc); 9194 __ jccb(Assembler::notZero, done); 9195 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9196 __ bind(done); 9197 %} 9198 ins_pipe(ialu_reg); 9199 %} 9200 9201 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9202 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9203 predicate(UseBMI1Instructions); 9204 effect(KILL cr, TEMP dst); 9205 9206 ins_cost(125); 9207 format %{ "MOVL $dst.hi, 0\n\t" 9208 "BLSIL $dst.lo, $src\n\t" 9209 "JNZ done\n\t" 9210 "BLSIL $dst.hi, $src+4\n" 9211 "done:" 9212 %} 9213 9214 ins_encode %{ 9215 Label done; 9216 Register Rdst = $dst$$Register; 9217 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9218 9219 __ movl(HIGH_FROM_LOW(Rdst), 0); 9220 __ blsil(Rdst, $src$$Address); 9221 __ jccb(Assembler::notZero, done); 9222 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9223 __ bind(done); 9224 %} 9225 ins_pipe(ialu_reg_mem); 9226 %} 9227 9228 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9229 %{ 9230 match(Set dst (XorL (AddL src minus_1) src)); 9231 predicate(UseBMI1Instructions); 9232 effect(KILL cr, TEMP dst); 9233 9234 format %{ "MOVL $dst.hi, 0\n\t" 9235 "BLSMSKL $dst.lo, $src.lo\n\t" 9236 "JNC done\n\t" 9237 "BLSMSKL $dst.hi, $src.hi\n" 9238 "done:" 9239 %} 9240 9241 ins_encode %{ 9242 Label done; 9243 Register Rdst = $dst$$Register; 9244 Register Rsrc = $src$$Register; 9245 __ movl(HIGH_FROM_LOW(Rdst), 0); 9246 __ blsmskl(Rdst, Rsrc); 9247 __ jccb(Assembler::carryClear, done); 9248 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9249 __ bind(done); 9250 %} 9251 9252 ins_pipe(ialu_reg); 9253 %} 9254 9255 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9256 %{ 9257 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9258 predicate(UseBMI1Instructions); 9259 effect(KILL cr, TEMP dst); 9260 9261 ins_cost(125); 9262 format %{ "MOVL $dst.hi, 0\n\t" 9263 "BLSMSKL $dst.lo, $src\n\t" 9264 "JNC done\n\t" 9265 "BLSMSKL $dst.hi, $src+4\n" 9266 "done:" 9267 %} 9268 9269 ins_encode %{ 9270 Label done; 9271 Register Rdst = $dst$$Register; 9272 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9273 9274 __ movl(HIGH_FROM_LOW(Rdst), 0); 9275 __ blsmskl(Rdst, $src$$Address); 9276 __ jccb(Assembler::carryClear, done); 9277 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9278 __ bind(done); 9279 %} 9280 9281 ins_pipe(ialu_reg_mem); 9282 %} 9283 9284 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9285 %{ 9286 match(Set dst (AndL (AddL src minus_1) src) ); 9287 predicate(UseBMI1Instructions); 9288 effect(KILL cr, TEMP dst); 9289 9290 format %{ "MOVL $dst.hi, $src.hi\n\t" 9291 "BLSRL $dst.lo, $src.lo\n\t" 9292 "JNC done\n\t" 9293 "BLSRL $dst.hi, $src.hi\n" 9294 "done:" 9295 %} 9296 9297 ins_encode %{ 9298 Label done; 9299 Register Rdst = $dst$$Register; 9300 Register Rsrc = $src$$Register; 9301 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9302 __ blsrl(Rdst, Rsrc); 9303 __ jccb(Assembler::carryClear, done); 9304 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9305 __ bind(done); 9306 %} 9307 9308 ins_pipe(ialu_reg); 9309 %} 9310 9311 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9312 %{ 9313 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9314 predicate(UseBMI1Instructions); 9315 effect(KILL cr, TEMP dst); 9316 9317 ins_cost(125); 9318 format %{ "MOVL $dst.hi, $src+4\n\t" 9319 "BLSRL $dst.lo, $src\n\t" 9320 "JNC done\n\t" 9321 "BLSRL $dst.hi, $src+4\n" 9322 "done:" 9323 %} 9324 9325 ins_encode %{ 9326 Label done; 9327 Register Rdst = $dst$$Register; 9328 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9329 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9330 __ blsrl(Rdst, $src$$Address); 9331 __ jccb(Assembler::carryClear, done); 9332 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9333 __ bind(done); 9334 %} 9335 9336 ins_pipe(ialu_reg_mem); 9337 %} 9338 9339 // Or Long Register with Register 9340 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9341 match(Set dst (OrL dst src)); 9342 effect(KILL cr); 9343 format %{ "OR $dst.lo,$src.lo\n\t" 9344 "OR $dst.hi,$src.hi" %} 9345 opcode(0x0B,0x0B); 9346 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9347 ins_pipe( ialu_reg_reg_long ); 9348 %} 9349 9350 // Or Long Register with Immediate 9351 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9352 match(Set dst (OrL dst src)); 9353 effect(KILL cr); 9354 format %{ "OR $dst.lo,$src.lo\n\t" 9355 "OR $dst.hi,$src.hi" %} 9356 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9357 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9358 ins_pipe( ialu_reg_long ); 9359 %} 9360 9361 // Or Long Register with Memory 9362 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9363 match(Set dst (OrL dst (LoadL mem))); 9364 effect(KILL cr); 9365 ins_cost(125); 9366 format %{ "OR $dst.lo,$mem\n\t" 9367 "OR $dst.hi,$mem+4" %} 9368 opcode(0x0B,0x0B); 9369 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9370 ins_pipe( ialu_reg_long_mem ); 9371 %} 9372 9373 // Xor Long Register with Register 9374 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9375 match(Set dst (XorL dst src)); 9376 effect(KILL cr); 9377 format %{ "XOR $dst.lo,$src.lo\n\t" 9378 "XOR $dst.hi,$src.hi" %} 9379 opcode(0x33,0x33); 9380 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9381 ins_pipe( ialu_reg_reg_long ); 9382 %} 9383 9384 // Xor Long Register with Immediate -1 9385 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9386 match(Set dst (XorL dst imm)); 9387 format %{ "NOT $dst.lo\n\t" 9388 "NOT $dst.hi" %} 9389 ins_encode %{ 9390 __ notl($dst$$Register); 9391 __ notl(HIGH_FROM_LOW($dst$$Register)); 9392 %} 9393 ins_pipe( ialu_reg_long ); 9394 %} 9395 9396 // Xor Long Register with Immediate 9397 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9398 match(Set dst (XorL dst src)); 9399 effect(KILL cr); 9400 format %{ "XOR $dst.lo,$src.lo\n\t" 9401 "XOR $dst.hi,$src.hi" %} 9402 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9403 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9404 ins_pipe( ialu_reg_long ); 9405 %} 9406 9407 // Xor Long Register with Memory 9408 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9409 match(Set dst (XorL dst (LoadL mem))); 9410 effect(KILL cr); 9411 ins_cost(125); 9412 format %{ "XOR $dst.lo,$mem\n\t" 9413 "XOR $dst.hi,$mem+4" %} 9414 opcode(0x33,0x33); 9415 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9416 ins_pipe( ialu_reg_long_mem ); 9417 %} 9418 9419 // Shift Left Long by 1 9420 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9421 predicate(UseNewLongLShift); 9422 match(Set dst (LShiftL dst cnt)); 9423 effect(KILL cr); 9424 ins_cost(100); 9425 format %{ "ADD $dst.lo,$dst.lo\n\t" 9426 "ADC $dst.hi,$dst.hi" %} 9427 ins_encode %{ 9428 __ addl($dst$$Register,$dst$$Register); 9429 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9430 %} 9431 ins_pipe( ialu_reg_long ); 9432 %} 9433 9434 // Shift Left Long by 2 9435 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9436 predicate(UseNewLongLShift); 9437 match(Set dst (LShiftL dst cnt)); 9438 effect(KILL cr); 9439 ins_cost(100); 9440 format %{ "ADD $dst.lo,$dst.lo\n\t" 9441 "ADC $dst.hi,$dst.hi\n\t" 9442 "ADD $dst.lo,$dst.lo\n\t" 9443 "ADC $dst.hi,$dst.hi" %} 9444 ins_encode %{ 9445 __ addl($dst$$Register,$dst$$Register); 9446 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9447 __ addl($dst$$Register,$dst$$Register); 9448 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9449 %} 9450 ins_pipe( ialu_reg_long ); 9451 %} 9452 9453 // Shift Left Long by 3 9454 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9455 predicate(UseNewLongLShift); 9456 match(Set dst (LShiftL dst cnt)); 9457 effect(KILL cr); 9458 ins_cost(100); 9459 format %{ "ADD $dst.lo,$dst.lo\n\t" 9460 "ADC $dst.hi,$dst.hi\n\t" 9461 "ADD $dst.lo,$dst.lo\n\t" 9462 "ADC $dst.hi,$dst.hi\n\t" 9463 "ADD $dst.lo,$dst.lo\n\t" 9464 "ADC $dst.hi,$dst.hi" %} 9465 ins_encode %{ 9466 __ addl($dst$$Register,$dst$$Register); 9467 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9468 __ addl($dst$$Register,$dst$$Register); 9469 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9470 __ addl($dst$$Register,$dst$$Register); 9471 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9472 %} 9473 ins_pipe( ialu_reg_long ); 9474 %} 9475 9476 // Shift Left Long by 1-31 9477 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9478 match(Set dst (LShiftL dst cnt)); 9479 effect(KILL cr); 9480 ins_cost(200); 9481 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9482 "SHL $dst.lo,$cnt" %} 9483 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9484 ins_encode( move_long_small_shift(dst,cnt) ); 9485 ins_pipe( ialu_reg_long ); 9486 %} 9487 9488 // Shift Left Long by 32-63 9489 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9490 match(Set dst (LShiftL dst cnt)); 9491 effect(KILL cr); 9492 ins_cost(300); 9493 format %{ "MOV $dst.hi,$dst.lo\n" 9494 "\tSHL $dst.hi,$cnt-32\n" 9495 "\tXOR $dst.lo,$dst.lo" %} 9496 opcode(0xC1, 0x4); /* C1 /4 ib */ 9497 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9498 ins_pipe( ialu_reg_long ); 9499 %} 9500 9501 // Shift Left Long by variable 9502 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9503 match(Set dst (LShiftL dst shift)); 9504 effect(KILL cr); 9505 ins_cost(500+200); 9506 size(17); 9507 format %{ "TEST $shift,32\n\t" 9508 "JEQ,s small\n\t" 9509 "MOV $dst.hi,$dst.lo\n\t" 9510 "XOR $dst.lo,$dst.lo\n" 9511 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9512 "SHL $dst.lo,$shift" %} 9513 ins_encode( shift_left_long( dst, shift ) ); 9514 ins_pipe( pipe_slow ); 9515 %} 9516 9517 // Shift Right Long by 1-31 9518 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9519 match(Set dst (URShiftL dst cnt)); 9520 effect(KILL cr); 9521 ins_cost(200); 9522 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9523 "SHR $dst.hi,$cnt" %} 9524 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9525 ins_encode( move_long_small_shift(dst,cnt) ); 9526 ins_pipe( ialu_reg_long ); 9527 %} 9528 9529 // Shift Right Long by 32-63 9530 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9531 match(Set dst (URShiftL dst cnt)); 9532 effect(KILL cr); 9533 ins_cost(300); 9534 format %{ "MOV $dst.lo,$dst.hi\n" 9535 "\tSHR $dst.lo,$cnt-32\n" 9536 "\tXOR $dst.hi,$dst.hi" %} 9537 opcode(0xC1, 0x5); /* C1 /5 ib */ 9538 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9539 ins_pipe( ialu_reg_long ); 9540 %} 9541 9542 // Shift Right Long by variable 9543 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9544 match(Set dst (URShiftL dst shift)); 9545 effect(KILL cr); 9546 ins_cost(600); 9547 size(17); 9548 format %{ "TEST $shift,32\n\t" 9549 "JEQ,s small\n\t" 9550 "MOV $dst.lo,$dst.hi\n\t" 9551 "XOR $dst.hi,$dst.hi\n" 9552 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9553 "SHR $dst.hi,$shift" %} 9554 ins_encode( shift_right_long( dst, shift ) ); 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 // Shift Right Long by 1-31 9559 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9560 match(Set dst (RShiftL dst cnt)); 9561 effect(KILL cr); 9562 ins_cost(200); 9563 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9564 "SAR $dst.hi,$cnt" %} 9565 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9566 ins_encode( move_long_small_shift(dst,cnt) ); 9567 ins_pipe( ialu_reg_long ); 9568 %} 9569 9570 // Shift Right Long by 32-63 9571 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9572 match(Set dst (RShiftL dst cnt)); 9573 effect(KILL cr); 9574 ins_cost(300); 9575 format %{ "MOV $dst.lo,$dst.hi\n" 9576 "\tSAR $dst.lo,$cnt-32\n" 9577 "\tSAR $dst.hi,31" %} 9578 opcode(0xC1, 0x7); /* C1 /7 ib */ 9579 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9580 ins_pipe( ialu_reg_long ); 9581 %} 9582 9583 // Shift Right arithmetic Long by variable 9584 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9585 match(Set dst (RShiftL dst shift)); 9586 effect(KILL cr); 9587 ins_cost(600); 9588 size(18); 9589 format %{ "TEST $shift,32\n\t" 9590 "JEQ,s small\n\t" 9591 "MOV $dst.lo,$dst.hi\n\t" 9592 "SAR $dst.hi,31\n" 9593 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9594 "SAR $dst.hi,$shift" %} 9595 ins_encode( shift_right_arith_long( dst, shift ) ); 9596 ins_pipe( pipe_slow ); 9597 %} 9598 9599 9600 //----------Double Instructions------------------------------------------------ 9601 // Double Math 9602 9603 // Compare & branch 9604 9605 // P6 version of float compare, sets condition codes in EFLAGS 9606 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9607 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9608 match(Set cr (CmpD src1 src2)); 9609 effect(KILL rax); 9610 ins_cost(150); 9611 format %{ "FLD $src1\n\t" 9612 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9613 "JNP exit\n\t" 9614 "MOV ah,1 // saw a NaN, set CF\n\t" 9615 "SAHF\n" 9616 "exit:\tNOP // avoid branch to branch" %} 9617 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9618 ins_encode( Push_Reg_DPR(src1), 9619 OpcP, RegOpc(src2), 9620 cmpF_P6_fixup ); 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9625 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9626 match(Set cr (CmpD src1 src2)); 9627 ins_cost(150); 9628 format %{ "FLD $src1\n\t" 9629 "FUCOMIP ST,$src2 // P6 instruction" %} 9630 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9631 ins_encode( Push_Reg_DPR(src1), 9632 OpcP, RegOpc(src2)); 9633 ins_pipe( pipe_slow ); 9634 %} 9635 9636 // Compare & branch 9637 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9638 predicate(UseSSE<=1); 9639 match(Set cr (CmpD src1 src2)); 9640 effect(KILL rax); 9641 ins_cost(200); 9642 format %{ "FLD $src1\n\t" 9643 "FCOMp $src2\n\t" 9644 "FNSTSW AX\n\t" 9645 "TEST AX,0x400\n\t" 9646 "JZ,s flags\n\t" 9647 "MOV AH,1\t# unordered treat as LT\n" 9648 "flags:\tSAHF" %} 9649 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9650 ins_encode( Push_Reg_DPR(src1), 9651 OpcP, RegOpc(src2), 9652 fpu_flags); 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 // Compare vs zero into -1,0,1 9657 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9658 predicate(UseSSE<=1); 9659 match(Set dst (CmpD3 src1 zero)); 9660 effect(KILL cr, KILL rax); 9661 ins_cost(280); 9662 format %{ "FTSTD $dst,$src1" %} 9663 opcode(0xE4, 0xD9); 9664 ins_encode( Push_Reg_DPR(src1), 9665 OpcS, OpcP, PopFPU, 9666 CmpF_Result(dst)); 9667 ins_pipe( pipe_slow ); 9668 %} 9669 9670 // Compare into -1,0,1 9671 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9672 predicate(UseSSE<=1); 9673 match(Set dst (CmpD3 src1 src2)); 9674 effect(KILL cr, KILL rax); 9675 ins_cost(300); 9676 format %{ "FCMPD $dst,$src1,$src2" %} 9677 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9678 ins_encode( Push_Reg_DPR(src1), 9679 OpcP, RegOpc(src2), 9680 CmpF_Result(dst)); 9681 ins_pipe( pipe_slow ); 9682 %} 9683 9684 // float compare and set condition codes in EFLAGS by XMM regs 9685 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9686 predicate(UseSSE>=2); 9687 match(Set cr (CmpD src1 src2)); 9688 ins_cost(145); 9689 format %{ "UCOMISD $src1,$src2\n\t" 9690 "JNP,s exit\n\t" 9691 "PUSHF\t# saw NaN, set CF\n\t" 9692 "AND [rsp], #0xffffff2b\n\t" 9693 "POPF\n" 9694 "exit:" %} 9695 ins_encode %{ 9696 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9697 emit_cmpfp_fixup(_masm); 9698 %} 9699 ins_pipe( pipe_slow ); 9700 %} 9701 9702 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9703 predicate(UseSSE>=2); 9704 match(Set cr (CmpD src1 src2)); 9705 ins_cost(100); 9706 format %{ "UCOMISD $src1,$src2" %} 9707 ins_encode %{ 9708 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9709 %} 9710 ins_pipe( pipe_slow ); 9711 %} 9712 9713 // float compare and set condition codes in EFLAGS by XMM regs 9714 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9715 predicate(UseSSE>=2); 9716 match(Set cr (CmpD src1 (LoadD src2))); 9717 ins_cost(145); 9718 format %{ "UCOMISD $src1,$src2\n\t" 9719 "JNP,s exit\n\t" 9720 "PUSHF\t# saw NaN, set CF\n\t" 9721 "AND [rsp], #0xffffff2b\n\t" 9722 "POPF\n" 9723 "exit:" %} 9724 ins_encode %{ 9725 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9726 emit_cmpfp_fixup(_masm); 9727 %} 9728 ins_pipe( pipe_slow ); 9729 %} 9730 9731 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9732 predicate(UseSSE>=2); 9733 match(Set cr (CmpD src1 (LoadD src2))); 9734 ins_cost(100); 9735 format %{ "UCOMISD $src1,$src2" %} 9736 ins_encode %{ 9737 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9738 %} 9739 ins_pipe( pipe_slow ); 9740 %} 9741 9742 // Compare into -1,0,1 in XMM 9743 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9744 predicate(UseSSE>=2); 9745 match(Set dst (CmpD3 src1 src2)); 9746 effect(KILL cr); 9747 ins_cost(255); 9748 format %{ "UCOMISD $src1, $src2\n\t" 9749 "MOV $dst, #-1\n\t" 9750 "JP,s done\n\t" 9751 "JB,s done\n\t" 9752 "SETNE $dst\n\t" 9753 "MOVZB $dst, $dst\n" 9754 "done:" %} 9755 ins_encode %{ 9756 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9757 emit_cmpfp3(_masm, $dst$$Register); 9758 %} 9759 ins_pipe( pipe_slow ); 9760 %} 9761 9762 // Compare into -1,0,1 in XMM and memory 9763 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9764 predicate(UseSSE>=2); 9765 match(Set dst (CmpD3 src1 (LoadD src2))); 9766 effect(KILL cr); 9767 ins_cost(275); 9768 format %{ "UCOMISD $src1, $src2\n\t" 9769 "MOV $dst, #-1\n\t" 9770 "JP,s done\n\t" 9771 "JB,s done\n\t" 9772 "SETNE $dst\n\t" 9773 "MOVZB $dst, $dst\n" 9774 "done:" %} 9775 ins_encode %{ 9776 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9777 emit_cmpfp3(_masm, $dst$$Register); 9778 %} 9779 ins_pipe( pipe_slow ); 9780 %} 9781 9782 9783 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9784 predicate (UseSSE <=1); 9785 match(Set dst (SubD dst src)); 9786 9787 format %{ "FLD $src\n\t" 9788 "DSUBp $dst,ST" %} 9789 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9790 ins_cost(150); 9791 ins_encode( Push_Reg_DPR(src), 9792 OpcP, RegOpc(dst) ); 9793 ins_pipe( fpu_reg_reg ); 9794 %} 9795 9796 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9797 predicate (UseSSE <=1); 9798 match(Set dst (RoundDouble (SubD src1 src2))); 9799 ins_cost(250); 9800 9801 format %{ "FLD $src2\n\t" 9802 "DSUB ST,$src1\n\t" 9803 "FSTP_D $dst\t# D-round" %} 9804 opcode(0xD8, 0x5); 9805 ins_encode( Push_Reg_DPR(src2), 9806 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9807 ins_pipe( fpu_mem_reg_reg ); 9808 %} 9809 9810 9811 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9812 predicate (UseSSE <=1); 9813 match(Set dst (SubD dst (LoadD src))); 9814 ins_cost(150); 9815 9816 format %{ "FLD $src\n\t" 9817 "DSUBp $dst,ST" %} 9818 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9819 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9820 OpcP, RegOpc(dst) ); 9821 ins_pipe( fpu_reg_mem ); 9822 %} 9823 9824 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9825 predicate (UseSSE<=1); 9826 match(Set dst (AbsD src)); 9827 ins_cost(100); 9828 format %{ "FABS" %} 9829 opcode(0xE1, 0xD9); 9830 ins_encode( OpcS, OpcP ); 9831 ins_pipe( fpu_reg_reg ); 9832 %} 9833 9834 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9835 predicate(UseSSE<=1); 9836 match(Set dst (NegD src)); 9837 ins_cost(100); 9838 format %{ "FCHS" %} 9839 opcode(0xE0, 0xD9); 9840 ins_encode( OpcS, OpcP ); 9841 ins_pipe( fpu_reg_reg ); 9842 %} 9843 9844 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9845 predicate(UseSSE<=1); 9846 match(Set dst (AddD dst src)); 9847 format %{ "FLD $src\n\t" 9848 "DADD $dst,ST" %} 9849 size(4); 9850 ins_cost(150); 9851 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9852 ins_encode( Push_Reg_DPR(src), 9853 OpcP, RegOpc(dst) ); 9854 ins_pipe( fpu_reg_reg ); 9855 %} 9856 9857 9858 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9859 predicate(UseSSE<=1); 9860 match(Set dst (RoundDouble (AddD src1 src2))); 9861 ins_cost(250); 9862 9863 format %{ "FLD $src2\n\t" 9864 "DADD ST,$src1\n\t" 9865 "FSTP_D $dst\t# D-round" %} 9866 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9867 ins_encode( Push_Reg_DPR(src2), 9868 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9869 ins_pipe( fpu_mem_reg_reg ); 9870 %} 9871 9872 9873 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9874 predicate(UseSSE<=1); 9875 match(Set dst (AddD dst (LoadD src))); 9876 ins_cost(150); 9877 9878 format %{ "FLD $src\n\t" 9879 "DADDp $dst,ST" %} 9880 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9881 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9882 OpcP, RegOpc(dst) ); 9883 ins_pipe( fpu_reg_mem ); 9884 %} 9885 9886 // add-to-memory 9887 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9888 predicate(UseSSE<=1); 9889 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9890 ins_cost(150); 9891 9892 format %{ "FLD_D $dst\n\t" 9893 "DADD ST,$src\n\t" 9894 "FST_D $dst" %} 9895 opcode(0xDD, 0x0); 9896 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9897 Opcode(0xD8), RegOpc(src), 9898 set_instruction_start, 9899 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9900 ins_pipe( fpu_reg_mem ); 9901 %} 9902 9903 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9904 predicate(UseSSE<=1); 9905 match(Set dst (AddD dst con)); 9906 ins_cost(125); 9907 format %{ "FLD1\n\t" 9908 "DADDp $dst,ST" %} 9909 ins_encode %{ 9910 __ fld1(); 9911 __ faddp($dst$$reg); 9912 %} 9913 ins_pipe(fpu_reg); 9914 %} 9915 9916 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9917 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9918 match(Set dst (AddD dst con)); 9919 ins_cost(200); 9920 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9921 "DADDp $dst,ST" %} 9922 ins_encode %{ 9923 __ fld_d($constantaddress($con)); 9924 __ faddp($dst$$reg); 9925 %} 9926 ins_pipe(fpu_reg_mem); 9927 %} 9928 9929 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9930 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9931 match(Set dst (RoundDouble (AddD src con))); 9932 ins_cost(200); 9933 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9934 "DADD ST,$src\n\t" 9935 "FSTP_D $dst\t# D-round" %} 9936 ins_encode %{ 9937 __ fld_d($constantaddress($con)); 9938 __ fadd($src$$reg); 9939 __ fstp_d(Address(rsp, $dst$$disp)); 9940 %} 9941 ins_pipe(fpu_mem_reg_con); 9942 %} 9943 9944 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9945 predicate(UseSSE<=1); 9946 match(Set dst (MulD dst src)); 9947 format %{ "FLD $src\n\t" 9948 "DMULp $dst,ST" %} 9949 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9950 ins_cost(150); 9951 ins_encode( Push_Reg_DPR(src), 9952 OpcP, RegOpc(dst) ); 9953 ins_pipe( fpu_reg_reg ); 9954 %} 9955 9956 // Strict FP instruction biases argument before multiply then 9957 // biases result to avoid double rounding of subnormals. 9958 // 9959 // scale arg1 by multiplying arg1 by 2^(-15360) 9960 // load arg2 9961 // multiply scaled arg1 by arg2 9962 // rescale product by 2^(15360) 9963 // 9964 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9965 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9966 match(Set dst (MulD dst src)); 9967 ins_cost(1); // Select this instruction for all strict FP double multiplies 9968 9969 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9970 "DMULp $dst,ST\n\t" 9971 "FLD $src\n\t" 9972 "DMULp $dst,ST\n\t" 9973 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9974 "DMULp $dst,ST\n\t" %} 9975 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9976 ins_encode( strictfp_bias1(dst), 9977 Push_Reg_DPR(src), 9978 OpcP, RegOpc(dst), 9979 strictfp_bias2(dst) ); 9980 ins_pipe( fpu_reg_reg ); 9981 %} 9982 9983 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9984 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9985 match(Set dst (MulD dst con)); 9986 ins_cost(200); 9987 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9988 "DMULp $dst,ST" %} 9989 ins_encode %{ 9990 __ fld_d($constantaddress($con)); 9991 __ fmulp($dst$$reg); 9992 %} 9993 ins_pipe(fpu_reg_mem); 9994 %} 9995 9996 9997 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9998 predicate( UseSSE<=1 ); 9999 match(Set dst (MulD dst (LoadD src))); 10000 ins_cost(200); 10001 format %{ "FLD_D $src\n\t" 10002 "DMULp $dst,ST" %} 10003 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 10004 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10005 OpcP, RegOpc(dst) ); 10006 ins_pipe( fpu_reg_mem ); 10007 %} 10008 10009 // 10010 // Cisc-alternate to reg-reg multiply 10011 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 10012 predicate( UseSSE<=1 ); 10013 match(Set dst (MulD src (LoadD mem))); 10014 ins_cost(250); 10015 format %{ "FLD_D $mem\n\t" 10016 "DMUL ST,$src\n\t" 10017 "FSTP_D $dst" %} 10018 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 10019 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 10020 OpcReg_FPR(src), 10021 Pop_Reg_DPR(dst) ); 10022 ins_pipe( fpu_reg_reg_mem ); 10023 %} 10024 10025 10026 // MACRO3 -- addDPR a mulDPR 10027 // This instruction is a '2-address' instruction in that the result goes 10028 // back to src2. This eliminates a move from the macro; possibly the 10029 // register allocator will have to add it back (and maybe not). 10030 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 10031 predicate( UseSSE<=1 ); 10032 match(Set src2 (AddD (MulD src0 src1) src2)); 10033 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10034 "DMUL ST,$src1\n\t" 10035 "DADDp $src2,ST" %} 10036 ins_cost(250); 10037 opcode(0xDD); /* LoadD DD /0 */ 10038 ins_encode( Push_Reg_FPR(src0), 10039 FMul_ST_reg(src1), 10040 FAddP_reg_ST(src2) ); 10041 ins_pipe( fpu_reg_reg_reg ); 10042 %} 10043 10044 10045 // MACRO3 -- subDPR a mulDPR 10046 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 10047 predicate( UseSSE<=1 ); 10048 match(Set src2 (SubD (MulD src0 src1) src2)); 10049 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10050 "DMUL ST,$src1\n\t" 10051 "DSUBRp $src2,ST" %} 10052 ins_cost(250); 10053 ins_encode( Push_Reg_FPR(src0), 10054 FMul_ST_reg(src1), 10055 Opcode(0xDE), Opc_plus(0xE0,src2)); 10056 ins_pipe( fpu_reg_reg_reg ); 10057 %} 10058 10059 10060 instruct divDPR_reg(regDPR dst, regDPR src) %{ 10061 predicate( UseSSE<=1 ); 10062 match(Set dst (DivD dst src)); 10063 10064 format %{ "FLD $src\n\t" 10065 "FDIVp $dst,ST" %} 10066 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10067 ins_cost(150); 10068 ins_encode( Push_Reg_DPR(src), 10069 OpcP, RegOpc(dst) ); 10070 ins_pipe( fpu_reg_reg ); 10071 %} 10072 10073 // Strict FP instruction biases argument before division then 10074 // biases result, to avoid double rounding of subnormals. 10075 // 10076 // scale dividend by multiplying dividend by 2^(-15360) 10077 // load divisor 10078 // divide scaled dividend by divisor 10079 // rescale quotient by 2^(15360) 10080 // 10081 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 10082 predicate (UseSSE<=1); 10083 match(Set dst (DivD dst src)); 10084 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10085 ins_cost(01); 10086 10087 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10088 "DMULp $dst,ST\n\t" 10089 "FLD $src\n\t" 10090 "FDIVp $dst,ST\n\t" 10091 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10092 "DMULp $dst,ST\n\t" %} 10093 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10094 ins_encode( strictfp_bias1(dst), 10095 Push_Reg_DPR(src), 10096 OpcP, RegOpc(dst), 10097 strictfp_bias2(dst) ); 10098 ins_pipe( fpu_reg_reg ); 10099 %} 10100 10101 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 10102 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10103 match(Set dst (RoundDouble (DivD src1 src2))); 10104 10105 format %{ "FLD $src1\n\t" 10106 "FDIV ST,$src2\n\t" 10107 "FSTP_D $dst\t# D-round" %} 10108 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10109 ins_encode( Push_Reg_DPR(src1), 10110 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 10111 ins_pipe( fpu_mem_reg_reg ); 10112 %} 10113 10114 10115 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 10116 predicate(UseSSE<=1); 10117 match(Set dst (ModD dst src)); 10118 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10119 10120 format %{ "DMOD $dst,$src" %} 10121 ins_cost(250); 10122 ins_encode(Push_Reg_Mod_DPR(dst, src), 10123 emitModDPR(), 10124 Push_Result_Mod_DPR(src), 10125 Pop_Reg_DPR(dst)); 10126 ins_pipe( pipe_slow ); 10127 %} 10128 10129 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10130 predicate(UseSSE>=2); 10131 match(Set dst (ModD src0 src1)); 10132 effect(KILL rax, KILL cr); 10133 10134 format %{ "SUB ESP,8\t # DMOD\n" 10135 "\tMOVSD [ESP+0],$src1\n" 10136 "\tFLD_D [ESP+0]\n" 10137 "\tMOVSD [ESP+0],$src0\n" 10138 "\tFLD_D [ESP+0]\n" 10139 "loop:\tFPREM\n" 10140 "\tFWAIT\n" 10141 "\tFNSTSW AX\n" 10142 "\tSAHF\n" 10143 "\tJP loop\n" 10144 "\tFSTP_D [ESP+0]\n" 10145 "\tMOVSD $dst,[ESP+0]\n" 10146 "\tADD ESP,8\n" 10147 "\tFSTP ST0\t # Restore FPU Stack" 10148 %} 10149 ins_cost(250); 10150 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10151 ins_pipe( pipe_slow ); 10152 %} 10153 10154 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10155 predicate (UseSSE<=1); 10156 match(Set dst(AtanD dst src)); 10157 format %{ "DATA $dst,$src" %} 10158 opcode(0xD9, 0xF3); 10159 ins_encode( Push_Reg_DPR(src), 10160 OpcP, OpcS, RegOpc(dst) ); 10161 ins_pipe( pipe_slow ); 10162 %} 10163 10164 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10165 predicate (UseSSE>=2); 10166 match(Set dst(AtanD dst src)); 10167 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10168 format %{ "DATA $dst,$src" %} 10169 opcode(0xD9, 0xF3); 10170 ins_encode( Push_SrcD(src), 10171 OpcP, OpcS, Push_ResultD(dst) ); 10172 ins_pipe( pipe_slow ); 10173 %} 10174 10175 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10176 predicate (UseSSE<=1); 10177 match(Set dst (SqrtD src)); 10178 format %{ "DSQRT $dst,$src" %} 10179 opcode(0xFA, 0xD9); 10180 ins_encode( Push_Reg_DPR(src), 10181 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10182 ins_pipe( pipe_slow ); 10183 %} 10184 10185 //-------------Float Instructions------------------------------- 10186 // Float Math 10187 10188 // Code for float compare: 10189 // fcompp(); 10190 // fwait(); fnstsw_ax(); 10191 // sahf(); 10192 // movl(dst, unordered_result); 10193 // jcc(Assembler::parity, exit); 10194 // movl(dst, less_result); 10195 // jcc(Assembler::below, exit); 10196 // movl(dst, equal_result); 10197 // jcc(Assembler::equal, exit); 10198 // movl(dst, greater_result); 10199 // exit: 10200 10201 // P6 version of float compare, sets condition codes in EFLAGS 10202 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10203 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10204 match(Set cr (CmpF src1 src2)); 10205 effect(KILL rax); 10206 ins_cost(150); 10207 format %{ "FLD $src1\n\t" 10208 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10209 "JNP exit\n\t" 10210 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10211 "SAHF\n" 10212 "exit:\tNOP // avoid branch to branch" %} 10213 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10214 ins_encode( Push_Reg_DPR(src1), 10215 OpcP, RegOpc(src2), 10216 cmpF_P6_fixup ); 10217 ins_pipe( pipe_slow ); 10218 %} 10219 10220 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10221 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10222 match(Set cr (CmpF src1 src2)); 10223 ins_cost(100); 10224 format %{ "FLD $src1\n\t" 10225 "FUCOMIP ST,$src2 // P6 instruction" %} 10226 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10227 ins_encode( Push_Reg_DPR(src1), 10228 OpcP, RegOpc(src2)); 10229 ins_pipe( pipe_slow ); 10230 %} 10231 10232 10233 // Compare & branch 10234 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10235 predicate(UseSSE == 0); 10236 match(Set cr (CmpF src1 src2)); 10237 effect(KILL rax); 10238 ins_cost(200); 10239 format %{ "FLD $src1\n\t" 10240 "FCOMp $src2\n\t" 10241 "FNSTSW AX\n\t" 10242 "TEST AX,0x400\n\t" 10243 "JZ,s flags\n\t" 10244 "MOV AH,1\t# unordered treat as LT\n" 10245 "flags:\tSAHF" %} 10246 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10247 ins_encode( Push_Reg_DPR(src1), 10248 OpcP, RegOpc(src2), 10249 fpu_flags); 10250 ins_pipe( pipe_slow ); 10251 %} 10252 10253 // Compare vs zero into -1,0,1 10254 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10255 predicate(UseSSE == 0); 10256 match(Set dst (CmpF3 src1 zero)); 10257 effect(KILL cr, KILL rax); 10258 ins_cost(280); 10259 format %{ "FTSTF $dst,$src1" %} 10260 opcode(0xE4, 0xD9); 10261 ins_encode( Push_Reg_DPR(src1), 10262 OpcS, OpcP, PopFPU, 10263 CmpF_Result(dst)); 10264 ins_pipe( pipe_slow ); 10265 %} 10266 10267 // Compare into -1,0,1 10268 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10269 predicate(UseSSE == 0); 10270 match(Set dst (CmpF3 src1 src2)); 10271 effect(KILL cr, KILL rax); 10272 ins_cost(300); 10273 format %{ "FCMPF $dst,$src1,$src2" %} 10274 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10275 ins_encode( Push_Reg_DPR(src1), 10276 OpcP, RegOpc(src2), 10277 CmpF_Result(dst)); 10278 ins_pipe( pipe_slow ); 10279 %} 10280 10281 // float compare and set condition codes in EFLAGS by XMM regs 10282 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10283 predicate(UseSSE>=1); 10284 match(Set cr (CmpF src1 src2)); 10285 ins_cost(145); 10286 format %{ "UCOMISS $src1,$src2\n\t" 10287 "JNP,s exit\n\t" 10288 "PUSHF\t# saw NaN, set CF\n\t" 10289 "AND [rsp], #0xffffff2b\n\t" 10290 "POPF\n" 10291 "exit:" %} 10292 ins_encode %{ 10293 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10294 emit_cmpfp_fixup(_masm); 10295 %} 10296 ins_pipe( pipe_slow ); 10297 %} 10298 10299 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10300 predicate(UseSSE>=1); 10301 match(Set cr (CmpF src1 src2)); 10302 ins_cost(100); 10303 format %{ "UCOMISS $src1,$src2" %} 10304 ins_encode %{ 10305 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10306 %} 10307 ins_pipe( pipe_slow ); 10308 %} 10309 10310 // float compare and set condition codes in EFLAGS by XMM regs 10311 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10312 predicate(UseSSE>=1); 10313 match(Set cr (CmpF src1 (LoadF src2))); 10314 ins_cost(165); 10315 format %{ "UCOMISS $src1,$src2\n\t" 10316 "JNP,s exit\n\t" 10317 "PUSHF\t# saw NaN, set CF\n\t" 10318 "AND [rsp], #0xffffff2b\n\t" 10319 "POPF\n" 10320 "exit:" %} 10321 ins_encode %{ 10322 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10323 emit_cmpfp_fixup(_masm); 10324 %} 10325 ins_pipe( pipe_slow ); 10326 %} 10327 10328 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10329 predicate(UseSSE>=1); 10330 match(Set cr (CmpF src1 (LoadF src2))); 10331 ins_cost(100); 10332 format %{ "UCOMISS $src1,$src2" %} 10333 ins_encode %{ 10334 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10335 %} 10336 ins_pipe( pipe_slow ); 10337 %} 10338 10339 // Compare into -1,0,1 in XMM 10340 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10341 predicate(UseSSE>=1); 10342 match(Set dst (CmpF3 src1 src2)); 10343 effect(KILL cr); 10344 ins_cost(255); 10345 format %{ "UCOMISS $src1, $src2\n\t" 10346 "MOV $dst, #-1\n\t" 10347 "JP,s done\n\t" 10348 "JB,s done\n\t" 10349 "SETNE $dst\n\t" 10350 "MOVZB $dst, $dst\n" 10351 "done:" %} 10352 ins_encode %{ 10353 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10354 emit_cmpfp3(_masm, $dst$$Register); 10355 %} 10356 ins_pipe( pipe_slow ); 10357 %} 10358 10359 // Compare into -1,0,1 in XMM and memory 10360 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10361 predicate(UseSSE>=1); 10362 match(Set dst (CmpF3 src1 (LoadF src2))); 10363 effect(KILL cr); 10364 ins_cost(275); 10365 format %{ "UCOMISS $src1, $src2\n\t" 10366 "MOV $dst, #-1\n\t" 10367 "JP,s done\n\t" 10368 "JB,s done\n\t" 10369 "SETNE $dst\n\t" 10370 "MOVZB $dst, $dst\n" 10371 "done:" %} 10372 ins_encode %{ 10373 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10374 emit_cmpfp3(_masm, $dst$$Register); 10375 %} 10376 ins_pipe( pipe_slow ); 10377 %} 10378 10379 // Spill to obtain 24-bit precision 10380 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10381 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10382 match(Set dst (SubF src1 src2)); 10383 10384 format %{ "FSUB $dst,$src1 - $src2" %} 10385 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10386 ins_encode( Push_Reg_FPR(src1), 10387 OpcReg_FPR(src2), 10388 Pop_Mem_FPR(dst) ); 10389 ins_pipe( fpu_mem_reg_reg ); 10390 %} 10391 // 10392 // This instruction does not round to 24-bits 10393 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10394 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10395 match(Set dst (SubF dst src)); 10396 10397 format %{ "FSUB $dst,$src" %} 10398 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10399 ins_encode( Push_Reg_FPR(src), 10400 OpcP, RegOpc(dst) ); 10401 ins_pipe( fpu_reg_reg ); 10402 %} 10403 10404 // Spill to obtain 24-bit precision 10405 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10406 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10407 match(Set dst (AddF src1 src2)); 10408 10409 format %{ "FADD $dst,$src1,$src2" %} 10410 opcode(0xD8, 0x0); /* D8 C0+i */ 10411 ins_encode( Push_Reg_FPR(src2), 10412 OpcReg_FPR(src1), 10413 Pop_Mem_FPR(dst) ); 10414 ins_pipe( fpu_mem_reg_reg ); 10415 %} 10416 // 10417 // This instruction does not round to 24-bits 10418 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10419 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10420 match(Set dst (AddF dst src)); 10421 10422 format %{ "FLD $src\n\t" 10423 "FADDp $dst,ST" %} 10424 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10425 ins_encode( Push_Reg_FPR(src), 10426 OpcP, RegOpc(dst) ); 10427 ins_pipe( fpu_reg_reg ); 10428 %} 10429 10430 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10431 predicate(UseSSE==0); 10432 match(Set dst (AbsF src)); 10433 ins_cost(100); 10434 format %{ "FABS" %} 10435 opcode(0xE1, 0xD9); 10436 ins_encode( OpcS, OpcP ); 10437 ins_pipe( fpu_reg_reg ); 10438 %} 10439 10440 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10441 predicate(UseSSE==0); 10442 match(Set dst (NegF src)); 10443 ins_cost(100); 10444 format %{ "FCHS" %} 10445 opcode(0xE0, 0xD9); 10446 ins_encode( OpcS, OpcP ); 10447 ins_pipe( fpu_reg_reg ); 10448 %} 10449 10450 // Cisc-alternate to addFPR_reg 10451 // Spill to obtain 24-bit precision 10452 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10453 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10454 match(Set dst (AddF src1 (LoadF src2))); 10455 10456 format %{ "FLD $src2\n\t" 10457 "FADD ST,$src1\n\t" 10458 "FSTP_S $dst" %} 10459 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10460 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10461 OpcReg_FPR(src1), 10462 Pop_Mem_FPR(dst) ); 10463 ins_pipe( fpu_mem_reg_mem ); 10464 %} 10465 // 10466 // Cisc-alternate to addFPR_reg 10467 // This instruction does not round to 24-bits 10468 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10469 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10470 match(Set dst (AddF dst (LoadF src))); 10471 10472 format %{ "FADD $dst,$src" %} 10473 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10474 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10475 OpcP, RegOpc(dst) ); 10476 ins_pipe( fpu_reg_mem ); 10477 %} 10478 10479 // // Following two instructions for _222_mpegaudio 10480 // Spill to obtain 24-bit precision 10481 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10482 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10483 match(Set dst (AddF src1 src2)); 10484 10485 format %{ "FADD $dst,$src1,$src2" %} 10486 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10487 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10488 OpcReg_FPR(src2), 10489 Pop_Mem_FPR(dst) ); 10490 ins_pipe( fpu_mem_reg_mem ); 10491 %} 10492 10493 // Cisc-spill variant 10494 // Spill to obtain 24-bit precision 10495 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10496 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10497 match(Set dst (AddF src1 (LoadF src2))); 10498 10499 format %{ "FADD $dst,$src1,$src2 cisc" %} 10500 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10501 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10502 set_instruction_start, 10503 OpcP, RMopc_Mem(secondary,src1), 10504 Pop_Mem_FPR(dst) ); 10505 ins_pipe( fpu_mem_mem_mem ); 10506 %} 10507 10508 // Spill to obtain 24-bit precision 10509 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10510 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10511 match(Set dst (AddF src1 src2)); 10512 10513 format %{ "FADD $dst,$src1,$src2" %} 10514 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10515 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10516 set_instruction_start, 10517 OpcP, RMopc_Mem(secondary,src1), 10518 Pop_Mem_FPR(dst) ); 10519 ins_pipe( fpu_mem_mem_mem ); 10520 %} 10521 10522 10523 // Spill to obtain 24-bit precision 10524 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10525 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10526 match(Set dst (AddF src con)); 10527 format %{ "FLD $src\n\t" 10528 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10529 "FSTP_S $dst" %} 10530 ins_encode %{ 10531 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10532 __ fadd_s($constantaddress($con)); 10533 __ fstp_s(Address(rsp, $dst$$disp)); 10534 %} 10535 ins_pipe(fpu_mem_reg_con); 10536 %} 10537 // 10538 // This instruction does not round to 24-bits 10539 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10540 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10541 match(Set dst (AddF src con)); 10542 format %{ "FLD $src\n\t" 10543 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10544 "FSTP $dst" %} 10545 ins_encode %{ 10546 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10547 __ fadd_s($constantaddress($con)); 10548 __ fstp_d($dst$$reg); 10549 %} 10550 ins_pipe(fpu_reg_reg_con); 10551 %} 10552 10553 // Spill to obtain 24-bit precision 10554 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10555 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10556 match(Set dst (MulF src1 src2)); 10557 10558 format %{ "FLD $src1\n\t" 10559 "FMUL $src2\n\t" 10560 "FSTP_S $dst" %} 10561 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10562 ins_encode( Push_Reg_FPR(src1), 10563 OpcReg_FPR(src2), 10564 Pop_Mem_FPR(dst) ); 10565 ins_pipe( fpu_mem_reg_reg ); 10566 %} 10567 // 10568 // This instruction does not round to 24-bits 10569 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10570 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10571 match(Set dst (MulF src1 src2)); 10572 10573 format %{ "FLD $src1\n\t" 10574 "FMUL $src2\n\t" 10575 "FSTP_S $dst" %} 10576 opcode(0xD8, 0x1); /* D8 C8+i */ 10577 ins_encode( Push_Reg_FPR(src2), 10578 OpcReg_FPR(src1), 10579 Pop_Reg_FPR(dst) ); 10580 ins_pipe( fpu_reg_reg_reg ); 10581 %} 10582 10583 10584 // Spill to obtain 24-bit precision 10585 // Cisc-alternate to reg-reg multiply 10586 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10587 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10588 match(Set dst (MulF src1 (LoadF src2))); 10589 10590 format %{ "FLD_S $src2\n\t" 10591 "FMUL $src1\n\t" 10592 "FSTP_S $dst" %} 10593 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10594 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10595 OpcReg_FPR(src1), 10596 Pop_Mem_FPR(dst) ); 10597 ins_pipe( fpu_mem_reg_mem ); 10598 %} 10599 // 10600 // This instruction does not round to 24-bits 10601 // Cisc-alternate to reg-reg multiply 10602 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10603 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10604 match(Set dst (MulF src1 (LoadF src2))); 10605 10606 format %{ "FMUL $dst,$src1,$src2" %} 10607 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10608 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10609 OpcReg_FPR(src1), 10610 Pop_Reg_FPR(dst) ); 10611 ins_pipe( fpu_reg_reg_mem ); 10612 %} 10613 10614 // Spill to obtain 24-bit precision 10615 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10616 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10617 match(Set dst (MulF src1 src2)); 10618 10619 format %{ "FMUL $dst,$src1,$src2" %} 10620 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10621 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10622 set_instruction_start, 10623 OpcP, RMopc_Mem(secondary,src1), 10624 Pop_Mem_FPR(dst) ); 10625 ins_pipe( fpu_mem_mem_mem ); 10626 %} 10627 10628 // Spill to obtain 24-bit precision 10629 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10630 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10631 match(Set dst (MulF src con)); 10632 10633 format %{ "FLD $src\n\t" 10634 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10635 "FSTP_S $dst" %} 10636 ins_encode %{ 10637 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10638 __ fmul_s($constantaddress($con)); 10639 __ fstp_s(Address(rsp, $dst$$disp)); 10640 %} 10641 ins_pipe(fpu_mem_reg_con); 10642 %} 10643 // 10644 // This instruction does not round to 24-bits 10645 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10646 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10647 match(Set dst (MulF src con)); 10648 10649 format %{ "FLD $src\n\t" 10650 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10651 "FSTP $dst" %} 10652 ins_encode %{ 10653 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10654 __ fmul_s($constantaddress($con)); 10655 __ fstp_d($dst$$reg); 10656 %} 10657 ins_pipe(fpu_reg_reg_con); 10658 %} 10659 10660 10661 // 10662 // MACRO1 -- subsume unshared load into mulFPR 10663 // This instruction does not round to 24-bits 10664 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10665 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10666 match(Set dst (MulF (LoadF mem1) src)); 10667 10668 format %{ "FLD $mem1 ===MACRO1===\n\t" 10669 "FMUL ST,$src\n\t" 10670 "FSTP $dst" %} 10671 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10672 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10673 OpcReg_FPR(src), 10674 Pop_Reg_FPR(dst) ); 10675 ins_pipe( fpu_reg_reg_mem ); 10676 %} 10677 // 10678 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10679 // This instruction does not round to 24-bits 10680 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10681 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10682 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10683 ins_cost(95); 10684 10685 format %{ "FLD $mem1 ===MACRO2===\n\t" 10686 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10687 "FADD ST,$src2\n\t" 10688 "FSTP $dst" %} 10689 opcode(0xD9); /* LoadF D9 /0 */ 10690 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10691 FMul_ST_reg(src1), 10692 FAdd_ST_reg(src2), 10693 Pop_Reg_FPR(dst) ); 10694 ins_pipe( fpu_reg_mem_reg_reg ); 10695 %} 10696 10697 // MACRO3 -- addFPR a mulFPR 10698 // This instruction does not round to 24-bits. It is a '2-address' 10699 // instruction in that the result goes back to src2. This eliminates 10700 // a move from the macro; possibly the register allocator will have 10701 // to add it back (and maybe not). 10702 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10703 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10704 match(Set src2 (AddF (MulF src0 src1) src2)); 10705 10706 format %{ "FLD $src0 ===MACRO3===\n\t" 10707 "FMUL ST,$src1\n\t" 10708 "FADDP $src2,ST" %} 10709 opcode(0xD9); /* LoadF D9 /0 */ 10710 ins_encode( Push_Reg_FPR(src0), 10711 FMul_ST_reg(src1), 10712 FAddP_reg_ST(src2) ); 10713 ins_pipe( fpu_reg_reg_reg ); 10714 %} 10715 10716 // MACRO4 -- divFPR subFPR 10717 // This instruction does not round to 24-bits 10718 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10719 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10720 match(Set dst (DivF (SubF src2 src1) src3)); 10721 10722 format %{ "FLD $src2 ===MACRO4===\n\t" 10723 "FSUB ST,$src1\n\t" 10724 "FDIV ST,$src3\n\t" 10725 "FSTP $dst" %} 10726 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10727 ins_encode( Push_Reg_FPR(src2), 10728 subFPR_divFPR_encode(src1,src3), 10729 Pop_Reg_FPR(dst) ); 10730 ins_pipe( fpu_reg_reg_reg_reg ); 10731 %} 10732 10733 // Spill to obtain 24-bit precision 10734 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10735 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10736 match(Set dst (DivF src1 src2)); 10737 10738 format %{ "FDIV $dst,$src1,$src2" %} 10739 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10740 ins_encode( Push_Reg_FPR(src1), 10741 OpcReg_FPR(src2), 10742 Pop_Mem_FPR(dst) ); 10743 ins_pipe( fpu_mem_reg_reg ); 10744 %} 10745 // 10746 // This instruction does not round to 24-bits 10747 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10748 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10749 match(Set dst (DivF dst src)); 10750 10751 format %{ "FDIV $dst,$src" %} 10752 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10753 ins_encode( Push_Reg_FPR(src), 10754 OpcP, RegOpc(dst) ); 10755 ins_pipe( fpu_reg_reg ); 10756 %} 10757 10758 10759 // Spill to obtain 24-bit precision 10760 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10761 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10762 match(Set dst (ModF src1 src2)); 10763 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10764 10765 format %{ "FMOD $dst,$src1,$src2" %} 10766 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10767 emitModDPR(), 10768 Push_Result_Mod_DPR(src2), 10769 Pop_Mem_FPR(dst)); 10770 ins_pipe( pipe_slow ); 10771 %} 10772 // 10773 // This instruction does not round to 24-bits 10774 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10775 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10776 match(Set dst (ModF dst src)); 10777 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10778 10779 format %{ "FMOD $dst,$src" %} 10780 ins_encode(Push_Reg_Mod_DPR(dst, src), 10781 emitModDPR(), 10782 Push_Result_Mod_DPR(src), 10783 Pop_Reg_FPR(dst)); 10784 ins_pipe( pipe_slow ); 10785 %} 10786 10787 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10788 predicate(UseSSE>=1); 10789 match(Set dst (ModF src0 src1)); 10790 effect(KILL rax, KILL cr); 10791 format %{ "SUB ESP,4\t # FMOD\n" 10792 "\tMOVSS [ESP+0],$src1\n" 10793 "\tFLD_S [ESP+0]\n" 10794 "\tMOVSS [ESP+0],$src0\n" 10795 "\tFLD_S [ESP+0]\n" 10796 "loop:\tFPREM\n" 10797 "\tFWAIT\n" 10798 "\tFNSTSW AX\n" 10799 "\tSAHF\n" 10800 "\tJP loop\n" 10801 "\tFSTP_S [ESP+0]\n" 10802 "\tMOVSS $dst,[ESP+0]\n" 10803 "\tADD ESP,4\n" 10804 "\tFSTP ST0\t # Restore FPU Stack" 10805 %} 10806 ins_cost(250); 10807 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10808 ins_pipe( pipe_slow ); 10809 %} 10810 10811 10812 //----------Arithmetic Conversion Instructions--------------------------------- 10813 // The conversions operations are all Alpha sorted. Please keep it that way! 10814 10815 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10816 predicate(UseSSE==0); 10817 match(Set dst (RoundFloat src)); 10818 ins_cost(125); 10819 format %{ "FST_S $dst,$src\t# F-round" %} 10820 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10821 ins_pipe( fpu_mem_reg ); 10822 %} 10823 10824 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10825 predicate(UseSSE<=1); 10826 match(Set dst (RoundDouble src)); 10827 ins_cost(125); 10828 format %{ "FST_D $dst,$src\t# D-round" %} 10829 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10830 ins_pipe( fpu_mem_reg ); 10831 %} 10832 10833 // Force rounding to 24-bit precision and 6-bit exponent 10834 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10835 predicate(UseSSE==0); 10836 match(Set dst (ConvD2F src)); 10837 format %{ "FST_S $dst,$src\t# F-round" %} 10838 expand %{ 10839 roundFloat_mem_reg(dst,src); 10840 %} 10841 %} 10842 10843 // Force rounding to 24-bit precision and 6-bit exponent 10844 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10845 predicate(UseSSE==1); 10846 match(Set dst (ConvD2F src)); 10847 effect( KILL cr ); 10848 format %{ "SUB ESP,4\n\t" 10849 "FST_S [ESP],$src\t# F-round\n\t" 10850 "MOVSS $dst,[ESP]\n\t" 10851 "ADD ESP,4" %} 10852 ins_encode %{ 10853 __ subptr(rsp, 4); 10854 if ($src$$reg != FPR1L_enc) { 10855 __ fld_s($src$$reg-1); 10856 __ fstp_s(Address(rsp, 0)); 10857 } else { 10858 __ fst_s(Address(rsp, 0)); 10859 } 10860 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10861 __ addptr(rsp, 4); 10862 %} 10863 ins_pipe( pipe_slow ); 10864 %} 10865 10866 // Force rounding double precision to single precision 10867 instruct convD2F_reg(regF dst, regD src) %{ 10868 predicate(UseSSE>=2); 10869 match(Set dst (ConvD2F src)); 10870 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10871 ins_encode %{ 10872 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10873 %} 10874 ins_pipe( pipe_slow ); 10875 %} 10876 10877 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10878 predicate(UseSSE==0); 10879 match(Set dst (ConvF2D src)); 10880 format %{ "FST_S $dst,$src\t# D-round" %} 10881 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10882 ins_pipe( fpu_reg_reg ); 10883 %} 10884 10885 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10886 predicate(UseSSE==1); 10887 match(Set dst (ConvF2D src)); 10888 format %{ "FST_D $dst,$src\t# D-round" %} 10889 expand %{ 10890 roundDouble_mem_reg(dst,src); 10891 %} 10892 %} 10893 10894 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10895 predicate(UseSSE==1); 10896 match(Set dst (ConvF2D src)); 10897 effect( KILL cr ); 10898 format %{ "SUB ESP,4\n\t" 10899 "MOVSS [ESP] $src\n\t" 10900 "FLD_S [ESP]\n\t" 10901 "ADD ESP,4\n\t" 10902 "FSTP $dst\t# D-round" %} 10903 ins_encode %{ 10904 __ subptr(rsp, 4); 10905 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10906 __ fld_s(Address(rsp, 0)); 10907 __ addptr(rsp, 4); 10908 __ fstp_d($dst$$reg); 10909 %} 10910 ins_pipe( pipe_slow ); 10911 %} 10912 10913 instruct convF2D_reg(regD dst, regF src) %{ 10914 predicate(UseSSE>=2); 10915 match(Set dst (ConvF2D src)); 10916 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10917 ins_encode %{ 10918 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10919 %} 10920 ins_pipe( pipe_slow ); 10921 %} 10922 10923 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10924 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10925 predicate(UseSSE<=1); 10926 match(Set dst (ConvD2I src)); 10927 effect( KILL tmp, KILL cr ); 10928 format %{ "FLD $src\t# Convert double to int \n\t" 10929 "FLDCW trunc mode\n\t" 10930 "SUB ESP,4\n\t" 10931 "FISTp [ESP + #0]\n\t" 10932 "FLDCW std/24-bit mode\n\t" 10933 "POP EAX\n\t" 10934 "CMP EAX,0x80000000\n\t" 10935 "JNE,s fast\n\t" 10936 "FLD_D $src\n\t" 10937 "CALL d2i_wrapper\n" 10938 "fast:" %} 10939 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10940 ins_pipe( pipe_slow ); 10941 %} 10942 10943 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10944 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10945 predicate(UseSSE>=2); 10946 match(Set dst (ConvD2I src)); 10947 effect( KILL tmp, KILL cr ); 10948 format %{ "CVTTSD2SI $dst, $src\n\t" 10949 "CMP $dst,0x80000000\n\t" 10950 "JNE,s fast\n\t" 10951 "SUB ESP, 8\n\t" 10952 "MOVSD [ESP], $src\n\t" 10953 "FLD_D [ESP]\n\t" 10954 "ADD ESP, 8\n\t" 10955 "CALL d2i_wrapper\n" 10956 "fast:" %} 10957 ins_encode %{ 10958 Label fast; 10959 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10960 __ cmpl($dst$$Register, 0x80000000); 10961 __ jccb(Assembler::notEqual, fast); 10962 __ subptr(rsp, 8); 10963 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10964 __ fld_d(Address(rsp, 0)); 10965 __ addptr(rsp, 8); 10966 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10967 __ bind(fast); 10968 %} 10969 ins_pipe( pipe_slow ); 10970 %} 10971 10972 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10973 predicate(UseSSE<=1); 10974 match(Set dst (ConvD2L src)); 10975 effect( KILL cr ); 10976 format %{ "FLD $src\t# Convert double to long\n\t" 10977 "FLDCW trunc mode\n\t" 10978 "SUB ESP,8\n\t" 10979 "FISTp [ESP + #0]\n\t" 10980 "FLDCW std/24-bit mode\n\t" 10981 "POP EAX\n\t" 10982 "POP EDX\n\t" 10983 "CMP EDX,0x80000000\n\t" 10984 "JNE,s fast\n\t" 10985 "TEST EAX,EAX\n\t" 10986 "JNE,s fast\n\t" 10987 "FLD $src\n\t" 10988 "CALL d2l_wrapper\n" 10989 "fast:" %} 10990 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10991 ins_pipe( pipe_slow ); 10992 %} 10993 10994 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10995 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10996 predicate (UseSSE>=2); 10997 match(Set dst (ConvD2L src)); 10998 effect( KILL cr ); 10999 format %{ "SUB ESP,8\t# Convert double to long\n\t" 11000 "MOVSD [ESP],$src\n\t" 11001 "FLD_D [ESP]\n\t" 11002 "FLDCW trunc mode\n\t" 11003 "FISTp [ESP + #0]\n\t" 11004 "FLDCW std/24-bit mode\n\t" 11005 "POP EAX\n\t" 11006 "POP EDX\n\t" 11007 "CMP EDX,0x80000000\n\t" 11008 "JNE,s fast\n\t" 11009 "TEST EAX,EAX\n\t" 11010 "JNE,s fast\n\t" 11011 "SUB ESP,8\n\t" 11012 "MOVSD [ESP],$src\n\t" 11013 "FLD_D [ESP]\n\t" 11014 "ADD ESP,8\n\t" 11015 "CALL d2l_wrapper\n" 11016 "fast:" %} 11017 ins_encode %{ 11018 Label fast; 11019 __ subptr(rsp, 8); 11020 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 11021 __ fld_d(Address(rsp, 0)); 11022 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11023 __ fistp_d(Address(rsp, 0)); 11024 // Restore the rounding mode, mask the exception 11025 if (Compile::current()->in_24_bit_fp_mode()) { 11026 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11027 } else { 11028 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11029 } 11030 // Load the converted long, adjust CPU stack 11031 __ pop(rax); 11032 __ pop(rdx); 11033 __ cmpl(rdx, 0x80000000); 11034 __ jccb(Assembler::notEqual, fast); 11035 __ testl(rax, rax); 11036 __ jccb(Assembler::notEqual, fast); 11037 __ subptr(rsp, 8); 11038 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 11039 __ fld_d(Address(rsp, 0)); 11040 __ addptr(rsp, 8); 11041 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11042 __ bind(fast); 11043 %} 11044 ins_pipe( pipe_slow ); 11045 %} 11046 11047 // Convert a double to an int. Java semantics require we do complex 11048 // manglations in the corner cases. So we set the rounding mode to 11049 // 'zero', store the darned double down as an int, and reset the 11050 // rounding mode to 'nearest'. The hardware stores a flag value down 11051 // if we would overflow or converted a NAN; we check for this and 11052 // and go the slow path if needed. 11053 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 11054 predicate(UseSSE==0); 11055 match(Set dst (ConvF2I src)); 11056 effect( KILL tmp, KILL cr ); 11057 format %{ "FLD $src\t# Convert float to int \n\t" 11058 "FLDCW trunc mode\n\t" 11059 "SUB ESP,4\n\t" 11060 "FISTp [ESP + #0]\n\t" 11061 "FLDCW std/24-bit mode\n\t" 11062 "POP EAX\n\t" 11063 "CMP EAX,0x80000000\n\t" 11064 "JNE,s fast\n\t" 11065 "FLD $src\n\t" 11066 "CALL d2i_wrapper\n" 11067 "fast:" %} 11068 // DPR2I_encoding works for FPR2I 11069 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 11070 ins_pipe( pipe_slow ); 11071 %} 11072 11073 // Convert a float in xmm to an int reg. 11074 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11075 predicate(UseSSE>=1); 11076 match(Set dst (ConvF2I src)); 11077 effect( KILL tmp, KILL cr ); 11078 format %{ "CVTTSS2SI $dst, $src\n\t" 11079 "CMP $dst,0x80000000\n\t" 11080 "JNE,s fast\n\t" 11081 "SUB ESP, 4\n\t" 11082 "MOVSS [ESP], $src\n\t" 11083 "FLD [ESP]\n\t" 11084 "ADD ESP, 4\n\t" 11085 "CALL d2i_wrapper\n" 11086 "fast:" %} 11087 ins_encode %{ 11088 Label fast; 11089 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 11090 __ cmpl($dst$$Register, 0x80000000); 11091 __ jccb(Assembler::notEqual, fast); 11092 __ subptr(rsp, 4); 11093 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11094 __ fld_s(Address(rsp, 0)); 11095 __ addptr(rsp, 4); 11096 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 11097 __ bind(fast); 11098 %} 11099 ins_pipe( pipe_slow ); 11100 %} 11101 11102 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 11103 predicate(UseSSE==0); 11104 match(Set dst (ConvF2L src)); 11105 effect( KILL cr ); 11106 format %{ "FLD $src\t# Convert float to long\n\t" 11107 "FLDCW trunc mode\n\t" 11108 "SUB ESP,8\n\t" 11109 "FISTp [ESP + #0]\n\t" 11110 "FLDCW std/24-bit mode\n\t" 11111 "POP EAX\n\t" 11112 "POP EDX\n\t" 11113 "CMP EDX,0x80000000\n\t" 11114 "JNE,s fast\n\t" 11115 "TEST EAX,EAX\n\t" 11116 "JNE,s fast\n\t" 11117 "FLD $src\n\t" 11118 "CALL d2l_wrapper\n" 11119 "fast:" %} 11120 // DPR2L_encoding works for FPR2L 11121 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11122 ins_pipe( pipe_slow ); 11123 %} 11124 11125 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11126 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11127 predicate (UseSSE>=1); 11128 match(Set dst (ConvF2L src)); 11129 effect( KILL cr ); 11130 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11131 "MOVSS [ESP],$src\n\t" 11132 "FLD_S [ESP]\n\t" 11133 "FLDCW trunc mode\n\t" 11134 "FISTp [ESP + #0]\n\t" 11135 "FLDCW std/24-bit mode\n\t" 11136 "POP EAX\n\t" 11137 "POP EDX\n\t" 11138 "CMP EDX,0x80000000\n\t" 11139 "JNE,s fast\n\t" 11140 "TEST EAX,EAX\n\t" 11141 "JNE,s fast\n\t" 11142 "SUB ESP,4\t# Convert float to long\n\t" 11143 "MOVSS [ESP],$src\n\t" 11144 "FLD_S [ESP]\n\t" 11145 "ADD ESP,4\n\t" 11146 "CALL d2l_wrapper\n" 11147 "fast:" %} 11148 ins_encode %{ 11149 Label fast; 11150 __ subptr(rsp, 8); 11151 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11152 __ fld_s(Address(rsp, 0)); 11153 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11154 __ fistp_d(Address(rsp, 0)); 11155 // Restore the rounding mode, mask the exception 11156 if (Compile::current()->in_24_bit_fp_mode()) { 11157 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11158 } else { 11159 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11160 } 11161 // Load the converted long, adjust CPU stack 11162 __ pop(rax); 11163 __ pop(rdx); 11164 __ cmpl(rdx, 0x80000000); 11165 __ jccb(Assembler::notEqual, fast); 11166 __ testl(rax, rax); 11167 __ jccb(Assembler::notEqual, fast); 11168 __ subptr(rsp, 4); 11169 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11170 __ fld_s(Address(rsp, 0)); 11171 __ addptr(rsp, 4); 11172 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11173 __ bind(fast); 11174 %} 11175 ins_pipe( pipe_slow ); 11176 %} 11177 11178 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11179 predicate( UseSSE<=1 ); 11180 match(Set dst (ConvI2D src)); 11181 format %{ "FILD $src\n\t" 11182 "FSTP $dst" %} 11183 opcode(0xDB, 0x0); /* DB /0 */ 11184 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11185 ins_pipe( fpu_reg_mem ); 11186 %} 11187 11188 instruct convI2D_reg(regD dst, rRegI src) %{ 11189 predicate( UseSSE>=2 && !UseXmmI2D ); 11190 match(Set dst (ConvI2D src)); 11191 format %{ "CVTSI2SD $dst,$src" %} 11192 ins_encode %{ 11193 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11194 %} 11195 ins_pipe( pipe_slow ); 11196 %} 11197 11198 instruct convI2D_mem(regD dst, memory mem) %{ 11199 predicate( UseSSE>=2 ); 11200 match(Set dst (ConvI2D (LoadI mem))); 11201 format %{ "CVTSI2SD $dst,$mem" %} 11202 ins_encode %{ 11203 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11204 %} 11205 ins_pipe( pipe_slow ); 11206 %} 11207 11208 instruct convXI2D_reg(regD dst, rRegI src) 11209 %{ 11210 predicate( UseSSE>=2 && UseXmmI2D ); 11211 match(Set dst (ConvI2D src)); 11212 11213 format %{ "MOVD $dst,$src\n\t" 11214 "CVTDQ2PD $dst,$dst\t# i2d" %} 11215 ins_encode %{ 11216 __ movdl($dst$$XMMRegister, $src$$Register); 11217 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11218 %} 11219 ins_pipe(pipe_slow); // XXX 11220 %} 11221 11222 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11223 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11224 match(Set dst (ConvI2D (LoadI mem))); 11225 format %{ "FILD $mem\n\t" 11226 "FSTP $dst" %} 11227 opcode(0xDB); /* DB /0 */ 11228 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11229 Pop_Reg_DPR(dst)); 11230 ins_pipe( fpu_reg_mem ); 11231 %} 11232 11233 // Convert a byte to a float; no rounding step needed. 11234 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11235 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11236 match(Set dst (ConvI2F src)); 11237 format %{ "FILD $src\n\t" 11238 "FSTP $dst" %} 11239 11240 opcode(0xDB, 0x0); /* DB /0 */ 11241 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11242 ins_pipe( fpu_reg_mem ); 11243 %} 11244 11245 // In 24-bit mode, force exponent rounding by storing back out 11246 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11247 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11248 match(Set dst (ConvI2F src)); 11249 ins_cost(200); 11250 format %{ "FILD $src\n\t" 11251 "FSTP_S $dst" %} 11252 opcode(0xDB, 0x0); /* DB /0 */ 11253 ins_encode( Push_Mem_I(src), 11254 Pop_Mem_FPR(dst)); 11255 ins_pipe( fpu_mem_mem ); 11256 %} 11257 11258 // In 24-bit mode, force exponent rounding by storing back out 11259 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11260 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11261 match(Set dst (ConvI2F (LoadI mem))); 11262 ins_cost(200); 11263 format %{ "FILD $mem\n\t" 11264 "FSTP_S $dst" %} 11265 opcode(0xDB); /* DB /0 */ 11266 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11267 Pop_Mem_FPR(dst)); 11268 ins_pipe( fpu_mem_mem ); 11269 %} 11270 11271 // This instruction does not round to 24-bits 11272 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11273 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11274 match(Set dst (ConvI2F src)); 11275 format %{ "FILD $src\n\t" 11276 "FSTP $dst" %} 11277 opcode(0xDB, 0x0); /* DB /0 */ 11278 ins_encode( Push_Mem_I(src), 11279 Pop_Reg_FPR(dst)); 11280 ins_pipe( fpu_reg_mem ); 11281 %} 11282 11283 // This instruction does not round to 24-bits 11284 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11285 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11286 match(Set dst (ConvI2F (LoadI mem))); 11287 format %{ "FILD $mem\n\t" 11288 "FSTP $dst" %} 11289 opcode(0xDB); /* DB /0 */ 11290 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11291 Pop_Reg_FPR(dst)); 11292 ins_pipe( fpu_reg_mem ); 11293 %} 11294 11295 // Convert an int to a float in xmm; no rounding step needed. 11296 instruct convI2F_reg(regF dst, rRegI src) %{ 11297 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11298 match(Set dst (ConvI2F src)); 11299 format %{ "CVTSI2SS $dst, $src" %} 11300 ins_encode %{ 11301 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11302 %} 11303 ins_pipe( pipe_slow ); 11304 %} 11305 11306 instruct convXI2F_reg(regF dst, rRegI src) 11307 %{ 11308 predicate( UseSSE>=2 && UseXmmI2F ); 11309 match(Set dst (ConvI2F src)); 11310 11311 format %{ "MOVD $dst,$src\n\t" 11312 "CVTDQ2PS $dst,$dst\t# i2f" %} 11313 ins_encode %{ 11314 __ movdl($dst$$XMMRegister, $src$$Register); 11315 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11316 %} 11317 ins_pipe(pipe_slow); // XXX 11318 %} 11319 11320 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11321 match(Set dst (ConvI2L src)); 11322 effect(KILL cr); 11323 ins_cost(375); 11324 format %{ "MOV $dst.lo,$src\n\t" 11325 "MOV $dst.hi,$src\n\t" 11326 "SAR $dst.hi,31" %} 11327 ins_encode(convert_int_long(dst,src)); 11328 ins_pipe( ialu_reg_reg_long ); 11329 %} 11330 11331 // Zero-extend convert int to long 11332 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11333 match(Set dst (AndL (ConvI2L src) mask) ); 11334 effect( KILL flags ); 11335 ins_cost(250); 11336 format %{ "MOV $dst.lo,$src\n\t" 11337 "XOR $dst.hi,$dst.hi" %} 11338 opcode(0x33); // XOR 11339 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11340 ins_pipe( ialu_reg_reg_long ); 11341 %} 11342 11343 // Zero-extend long 11344 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11345 match(Set dst (AndL src mask) ); 11346 effect( KILL flags ); 11347 ins_cost(250); 11348 format %{ "MOV $dst.lo,$src.lo\n\t" 11349 "XOR $dst.hi,$dst.hi\n\t" %} 11350 opcode(0x33); // XOR 11351 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11352 ins_pipe( ialu_reg_reg_long ); 11353 %} 11354 11355 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11356 predicate (UseSSE<=1); 11357 match(Set dst (ConvL2D src)); 11358 effect( KILL cr ); 11359 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11360 "PUSH $src.lo\n\t" 11361 "FILD ST,[ESP + #0]\n\t" 11362 "ADD ESP,8\n\t" 11363 "FSTP_D $dst\t# D-round" %} 11364 opcode(0xDF, 0x5); /* DF /5 */ 11365 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11366 ins_pipe( pipe_slow ); 11367 %} 11368 11369 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11370 predicate (UseSSE>=2); 11371 match(Set dst (ConvL2D src)); 11372 effect( KILL cr ); 11373 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11374 "PUSH $src.lo\n\t" 11375 "FILD_D [ESP]\n\t" 11376 "FSTP_D [ESP]\n\t" 11377 "MOVSD $dst,[ESP]\n\t" 11378 "ADD ESP,8" %} 11379 opcode(0xDF, 0x5); /* DF /5 */ 11380 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11381 ins_pipe( pipe_slow ); 11382 %} 11383 11384 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11385 predicate (UseSSE>=1); 11386 match(Set dst (ConvL2F src)); 11387 effect( KILL cr ); 11388 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11389 "PUSH $src.lo\n\t" 11390 "FILD_D [ESP]\n\t" 11391 "FSTP_S [ESP]\n\t" 11392 "MOVSS $dst,[ESP]\n\t" 11393 "ADD ESP,8" %} 11394 opcode(0xDF, 0x5); /* DF /5 */ 11395 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11396 ins_pipe( pipe_slow ); 11397 %} 11398 11399 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11400 match(Set dst (ConvL2F src)); 11401 effect( KILL cr ); 11402 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11403 "PUSH $src.lo\n\t" 11404 "FILD ST,[ESP + #0]\n\t" 11405 "ADD ESP,8\n\t" 11406 "FSTP_S $dst\t# F-round" %} 11407 opcode(0xDF, 0x5); /* DF /5 */ 11408 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11409 ins_pipe( pipe_slow ); 11410 %} 11411 11412 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11413 match(Set dst (ConvL2I src)); 11414 effect( DEF dst, USE src ); 11415 format %{ "MOV $dst,$src.lo" %} 11416 ins_encode(enc_CopyL_Lo(dst,src)); 11417 ins_pipe( ialu_reg_reg ); 11418 %} 11419 11420 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11421 match(Set dst (MoveF2I src)); 11422 effect( DEF dst, USE src ); 11423 ins_cost(100); 11424 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11425 ins_encode %{ 11426 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11427 %} 11428 ins_pipe( ialu_reg_mem ); 11429 %} 11430 11431 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11432 predicate(UseSSE==0); 11433 match(Set dst (MoveF2I src)); 11434 effect( DEF dst, USE src ); 11435 11436 ins_cost(125); 11437 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11438 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11439 ins_pipe( fpu_mem_reg ); 11440 %} 11441 11442 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11443 predicate(UseSSE>=1); 11444 match(Set dst (MoveF2I src)); 11445 effect( DEF dst, USE src ); 11446 11447 ins_cost(95); 11448 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11449 ins_encode %{ 11450 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11451 %} 11452 ins_pipe( pipe_slow ); 11453 %} 11454 11455 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11456 predicate(UseSSE>=2); 11457 match(Set dst (MoveF2I src)); 11458 effect( DEF dst, USE src ); 11459 ins_cost(85); 11460 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11461 ins_encode %{ 11462 __ movdl($dst$$Register, $src$$XMMRegister); 11463 %} 11464 ins_pipe( pipe_slow ); 11465 %} 11466 11467 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11468 match(Set dst (MoveI2F src)); 11469 effect( DEF dst, USE src ); 11470 11471 ins_cost(100); 11472 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11473 ins_encode %{ 11474 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11475 %} 11476 ins_pipe( ialu_mem_reg ); 11477 %} 11478 11479 11480 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11481 predicate(UseSSE==0); 11482 match(Set dst (MoveI2F src)); 11483 effect(DEF dst, USE src); 11484 11485 ins_cost(125); 11486 format %{ "FLD_S $src\n\t" 11487 "FSTP $dst\t# MoveI2F_stack_reg" %} 11488 opcode(0xD9); /* D9 /0, FLD m32real */ 11489 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11490 Pop_Reg_FPR(dst) ); 11491 ins_pipe( fpu_reg_mem ); 11492 %} 11493 11494 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11495 predicate(UseSSE>=1); 11496 match(Set dst (MoveI2F src)); 11497 effect( DEF dst, USE src ); 11498 11499 ins_cost(95); 11500 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11501 ins_encode %{ 11502 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11503 %} 11504 ins_pipe( pipe_slow ); 11505 %} 11506 11507 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11508 predicate(UseSSE>=2); 11509 match(Set dst (MoveI2F src)); 11510 effect( DEF dst, USE src ); 11511 11512 ins_cost(85); 11513 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11514 ins_encode %{ 11515 __ movdl($dst$$XMMRegister, $src$$Register); 11516 %} 11517 ins_pipe( pipe_slow ); 11518 %} 11519 11520 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11521 match(Set dst (MoveD2L src)); 11522 effect(DEF dst, USE src); 11523 11524 ins_cost(250); 11525 format %{ "MOV $dst.lo,$src\n\t" 11526 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11527 opcode(0x8B, 0x8B); 11528 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11529 ins_pipe( ialu_mem_long_reg ); 11530 %} 11531 11532 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11533 predicate(UseSSE<=1); 11534 match(Set dst (MoveD2L src)); 11535 effect(DEF dst, USE src); 11536 11537 ins_cost(125); 11538 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11539 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11540 ins_pipe( fpu_mem_reg ); 11541 %} 11542 11543 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11544 predicate(UseSSE>=2); 11545 match(Set dst (MoveD2L src)); 11546 effect(DEF dst, USE src); 11547 ins_cost(95); 11548 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11549 ins_encode %{ 11550 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11551 %} 11552 ins_pipe( pipe_slow ); 11553 %} 11554 11555 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11556 predicate(UseSSE>=2); 11557 match(Set dst (MoveD2L src)); 11558 effect(DEF dst, USE src, TEMP tmp); 11559 ins_cost(85); 11560 format %{ "MOVD $dst.lo,$src\n\t" 11561 "PSHUFLW $tmp,$src,0x4E\n\t" 11562 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11563 ins_encode %{ 11564 __ movdl($dst$$Register, $src$$XMMRegister); 11565 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11566 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11567 %} 11568 ins_pipe( pipe_slow ); 11569 %} 11570 11571 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11572 match(Set dst (MoveL2D src)); 11573 effect(DEF dst, USE src); 11574 11575 ins_cost(200); 11576 format %{ "MOV $dst,$src.lo\n\t" 11577 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11578 opcode(0x89, 0x89); 11579 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11580 ins_pipe( ialu_mem_long_reg ); 11581 %} 11582 11583 11584 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11585 predicate(UseSSE<=1); 11586 match(Set dst (MoveL2D src)); 11587 effect(DEF dst, USE src); 11588 ins_cost(125); 11589 11590 format %{ "FLD_D $src\n\t" 11591 "FSTP $dst\t# MoveL2D_stack_reg" %} 11592 opcode(0xDD); /* DD /0, FLD m64real */ 11593 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11594 Pop_Reg_DPR(dst) ); 11595 ins_pipe( fpu_reg_mem ); 11596 %} 11597 11598 11599 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11600 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11601 match(Set dst (MoveL2D src)); 11602 effect(DEF dst, USE src); 11603 11604 ins_cost(95); 11605 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11606 ins_encode %{ 11607 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11608 %} 11609 ins_pipe( pipe_slow ); 11610 %} 11611 11612 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11613 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11614 match(Set dst (MoveL2D src)); 11615 effect(DEF dst, USE src); 11616 11617 ins_cost(95); 11618 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11619 ins_encode %{ 11620 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11621 %} 11622 ins_pipe( pipe_slow ); 11623 %} 11624 11625 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11626 predicate(UseSSE>=2); 11627 match(Set dst (MoveL2D src)); 11628 effect(TEMP dst, USE src, TEMP tmp); 11629 ins_cost(85); 11630 format %{ "MOVD $dst,$src.lo\n\t" 11631 "MOVD $tmp,$src.hi\n\t" 11632 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11633 ins_encode %{ 11634 __ movdl($dst$$XMMRegister, $src$$Register); 11635 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11636 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11637 %} 11638 ins_pipe( pipe_slow ); 11639 %} 11640 11641 11642 // ======================================================================= 11643 // fast clearing of an array 11644 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11645 predicate(!((ClearArrayNode*)n)->is_large()); 11646 match(Set dummy (ClearArray cnt base)); 11647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11648 11649 format %{ $$template 11650 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11651 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11652 $$emit$$"JG LARGE\n\t" 11653 $$emit$$"SHL ECX, 1\n\t" 11654 $$emit$$"DEC ECX\n\t" 11655 $$emit$$"JS DONE\t# Zero length\n\t" 11656 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11657 $$emit$$"DEC ECX\n\t" 11658 $$emit$$"JGE LOOP\n\t" 11659 $$emit$$"JMP DONE\n\t" 11660 $$emit$$"# LARGE:\n\t" 11661 if (UseFastStosb) { 11662 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11663 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11664 } else if (UseXMMForObjInit) { 11665 $$emit$$"MOV RDI,RAX\n\t" 11666 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11667 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11668 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11669 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11670 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11671 $$emit$$"ADD 0x40,RAX\n\t" 11672 $$emit$$"# L_zero_64_bytes:\n\t" 11673 $$emit$$"SUB 0x8,RCX\n\t" 11674 $$emit$$"JGE L_loop\n\t" 11675 $$emit$$"ADD 0x4,RCX\n\t" 11676 $$emit$$"JL L_tail\n\t" 11677 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11678 $$emit$$"ADD 0x20,RAX\n\t" 11679 $$emit$$"SUB 0x4,RCX\n\t" 11680 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11681 $$emit$$"ADD 0x4,RCX\n\t" 11682 $$emit$$"JLE L_end\n\t" 11683 $$emit$$"DEC RCX\n\t" 11684 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11685 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11686 $$emit$$"ADD 0x8,RAX\n\t" 11687 $$emit$$"DEC RCX\n\t" 11688 $$emit$$"JGE L_sloop\n\t" 11689 $$emit$$"# L_end:\n\t" 11690 } else { 11691 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11692 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11693 } 11694 $$emit$$"# DONE" 11695 %} 11696 ins_encode %{ 11697 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11698 $tmp$$XMMRegister, false); 11699 %} 11700 ins_pipe( pipe_slow ); 11701 %} 11702 11703 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11704 predicate(((ClearArrayNode*)n)->is_large()); 11705 match(Set dummy (ClearArray cnt base)); 11706 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11707 format %{ $$template 11708 if (UseFastStosb) { 11709 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11710 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11711 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11712 } else if (UseXMMForObjInit) { 11713 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11714 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11715 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11716 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11717 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11718 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11719 $$emit$$"ADD 0x40,RAX\n\t" 11720 $$emit$$"# L_zero_64_bytes:\n\t" 11721 $$emit$$"SUB 0x8,RCX\n\t" 11722 $$emit$$"JGE L_loop\n\t" 11723 $$emit$$"ADD 0x4,RCX\n\t" 11724 $$emit$$"JL L_tail\n\t" 11725 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11726 $$emit$$"ADD 0x20,RAX\n\t" 11727 $$emit$$"SUB 0x4,RCX\n\t" 11728 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11729 $$emit$$"ADD 0x4,RCX\n\t" 11730 $$emit$$"JLE L_end\n\t" 11731 $$emit$$"DEC RCX\n\t" 11732 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11733 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11734 $$emit$$"ADD 0x8,RAX\n\t" 11735 $$emit$$"DEC RCX\n\t" 11736 $$emit$$"JGE L_sloop\n\t" 11737 $$emit$$"# L_end:\n\t" 11738 } else { 11739 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11740 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11741 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11742 } 11743 $$emit$$"# DONE" 11744 %} 11745 ins_encode %{ 11746 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11747 $tmp$$XMMRegister, true); 11748 %} 11749 ins_pipe( pipe_slow ); 11750 %} 11751 11752 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11753 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11754 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11755 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11756 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11757 11758 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11759 ins_encode %{ 11760 __ string_compare($str1$$Register, $str2$$Register, 11761 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11762 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11763 %} 11764 ins_pipe( pipe_slow ); 11765 %} 11766 11767 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11768 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11769 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11770 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11771 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11772 11773 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11774 ins_encode %{ 11775 __ string_compare($str1$$Register, $str2$$Register, 11776 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11777 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11778 %} 11779 ins_pipe( pipe_slow ); 11780 %} 11781 11782 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11783 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11784 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11785 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11786 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11787 11788 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11789 ins_encode %{ 11790 __ string_compare($str1$$Register, $str2$$Register, 11791 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11792 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11793 %} 11794 ins_pipe( pipe_slow ); 11795 %} 11796 11797 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11798 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11799 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11800 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11801 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11802 11803 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11804 ins_encode %{ 11805 __ string_compare($str2$$Register, $str1$$Register, 11806 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11807 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11808 %} 11809 ins_pipe( pipe_slow ); 11810 %} 11811 11812 // fast string equals 11813 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11814 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11815 match(Set result (StrEquals (Binary str1 str2) cnt)); 11816 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11817 11818 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11819 ins_encode %{ 11820 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11821 $cnt$$Register, $result$$Register, $tmp3$$Register, 11822 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11823 %} 11824 11825 ins_pipe( pipe_slow ); 11826 %} 11827 11828 // fast search of substring with known size. 11829 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11830 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11831 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11832 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11833 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11834 11835 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11836 ins_encode %{ 11837 int icnt2 = (int)$int_cnt2$$constant; 11838 if (icnt2 >= 16) { 11839 // IndexOf for constant substrings with size >= 16 elements 11840 // which don't need to be loaded through stack. 11841 __ string_indexofC8($str1$$Register, $str2$$Register, 11842 $cnt1$$Register, $cnt2$$Register, 11843 icnt2, $result$$Register, 11844 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11845 } else { 11846 // Small strings are loaded through stack if they cross page boundary. 11847 __ string_indexof($str1$$Register, $str2$$Register, 11848 $cnt1$$Register, $cnt2$$Register, 11849 icnt2, $result$$Register, 11850 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11851 } 11852 %} 11853 ins_pipe( pipe_slow ); 11854 %} 11855 11856 // fast search of substring with known size. 11857 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11858 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11859 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11860 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11861 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11862 11863 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11864 ins_encode %{ 11865 int icnt2 = (int)$int_cnt2$$constant; 11866 if (icnt2 >= 8) { 11867 // IndexOf for constant substrings with size >= 8 elements 11868 // which don't need to be loaded through stack. 11869 __ string_indexofC8($str1$$Register, $str2$$Register, 11870 $cnt1$$Register, $cnt2$$Register, 11871 icnt2, $result$$Register, 11872 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11873 } else { 11874 // Small strings are loaded through stack if they cross page boundary. 11875 __ string_indexof($str1$$Register, $str2$$Register, 11876 $cnt1$$Register, $cnt2$$Register, 11877 icnt2, $result$$Register, 11878 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11879 } 11880 %} 11881 ins_pipe( pipe_slow ); 11882 %} 11883 11884 // fast search of substring with known size. 11885 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11886 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11887 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11888 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11889 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11890 11891 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11892 ins_encode %{ 11893 int icnt2 = (int)$int_cnt2$$constant; 11894 if (icnt2 >= 8) { 11895 // IndexOf for constant substrings with size >= 8 elements 11896 // which don't need to be loaded through stack. 11897 __ string_indexofC8($str1$$Register, $str2$$Register, 11898 $cnt1$$Register, $cnt2$$Register, 11899 icnt2, $result$$Register, 11900 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11901 } else { 11902 // Small strings are loaded through stack if they cross page boundary. 11903 __ string_indexof($str1$$Register, $str2$$Register, 11904 $cnt1$$Register, $cnt2$$Register, 11905 icnt2, $result$$Register, 11906 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11907 } 11908 %} 11909 ins_pipe( pipe_slow ); 11910 %} 11911 11912 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11913 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11914 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11915 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11916 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11917 11918 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11919 ins_encode %{ 11920 __ string_indexof($str1$$Register, $str2$$Register, 11921 $cnt1$$Register, $cnt2$$Register, 11922 (-1), $result$$Register, 11923 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11924 %} 11925 ins_pipe( pipe_slow ); 11926 %} 11927 11928 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11929 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11930 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11931 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11932 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11933 11934 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11935 ins_encode %{ 11936 __ string_indexof($str1$$Register, $str2$$Register, 11937 $cnt1$$Register, $cnt2$$Register, 11938 (-1), $result$$Register, 11939 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11940 %} 11941 ins_pipe( pipe_slow ); 11942 %} 11943 11944 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11945 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11946 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11947 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11948 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11949 11950 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11951 ins_encode %{ 11952 __ string_indexof($str1$$Register, $str2$$Register, 11953 $cnt1$$Register, $cnt2$$Register, 11954 (-1), $result$$Register, 11955 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11956 %} 11957 ins_pipe( pipe_slow ); 11958 %} 11959 11960 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11961 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11962 predicate(UseSSE42Intrinsics); 11963 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11964 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11965 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11966 ins_encode %{ 11967 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11968 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11969 %} 11970 ins_pipe( pipe_slow ); 11971 %} 11972 11973 // fast array equals 11974 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11975 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11976 %{ 11977 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11978 match(Set result (AryEq ary1 ary2)); 11979 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11980 //ins_cost(300); 11981 11982 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11983 ins_encode %{ 11984 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11985 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11986 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11987 %} 11988 ins_pipe( pipe_slow ); 11989 %} 11990 11991 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11992 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11993 %{ 11994 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11995 match(Set result (AryEq ary1 ary2)); 11996 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11997 //ins_cost(300); 11998 11999 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12000 ins_encode %{ 12001 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12002 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12003 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 12004 %} 12005 ins_pipe( pipe_slow ); 12006 %} 12007 12008 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12009 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12010 %{ 12011 match(Set result (HasNegatives ary1 len)); 12012 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12013 12014 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12015 ins_encode %{ 12016 __ has_negatives($ary1$$Register, $len$$Register, 12017 $result$$Register, $tmp3$$Register, 12018 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 12019 %} 12020 ins_pipe( pipe_slow ); 12021 %} 12022 12023 // fast char[] to byte[] compression 12024 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12025 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12026 match(Set result (StrCompressedCopy src (Binary dst len))); 12027 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12028 12029 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12030 ins_encode %{ 12031 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12032 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12033 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 12034 %} 12035 ins_pipe( pipe_slow ); 12036 %} 12037 12038 // fast byte[] to char[] inflation 12039 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12040 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12041 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12042 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12043 12044 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12045 ins_encode %{ 12046 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12047 $tmp1$$XMMRegister, $tmp2$$Register); 12048 %} 12049 ins_pipe( pipe_slow ); 12050 %} 12051 12052 // encode char[] to byte[] in ISO_8859_1 12053 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12054 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12055 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12056 match(Set result (EncodeISOArray src (Binary dst len))); 12057 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12058 12059 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12060 ins_encode %{ 12061 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12062 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12063 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 12064 %} 12065 ins_pipe( pipe_slow ); 12066 %} 12067 12068 12069 //----------Control Flow Instructions------------------------------------------ 12070 // Signed compare Instructions 12071 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12072 match(Set cr (CmpI op1 op2)); 12073 effect( DEF cr, USE op1, USE op2 ); 12074 format %{ "CMP $op1,$op2" %} 12075 opcode(0x3B); /* Opcode 3B /r */ 12076 ins_encode( OpcP, RegReg( op1, op2) ); 12077 ins_pipe( ialu_cr_reg_reg ); 12078 %} 12079 12080 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12081 match(Set cr (CmpI op1 op2)); 12082 effect( DEF cr, USE op1 ); 12083 format %{ "CMP $op1,$op2" %} 12084 opcode(0x81,0x07); /* Opcode 81 /7 */ 12085 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12086 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12087 ins_pipe( ialu_cr_reg_imm ); 12088 %} 12089 12090 // Cisc-spilled version of cmpI_eReg 12091 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12092 match(Set cr (CmpI op1 (LoadI op2))); 12093 12094 format %{ "CMP $op1,$op2" %} 12095 ins_cost(500); 12096 opcode(0x3B); /* Opcode 3B /r */ 12097 ins_encode( OpcP, RegMem( op1, op2) ); 12098 ins_pipe( ialu_cr_reg_mem ); 12099 %} 12100 12101 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 12102 match(Set cr (CmpI src zero)); 12103 effect( DEF cr, USE src ); 12104 12105 format %{ "TEST $src,$src" %} 12106 opcode(0x85); 12107 ins_encode( OpcP, RegReg( src, src ) ); 12108 ins_pipe( ialu_cr_reg_imm ); 12109 %} 12110 12111 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 12112 match(Set cr (CmpI (AndI src con) zero)); 12113 12114 format %{ "TEST $src,$con" %} 12115 opcode(0xF7,0x00); 12116 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12117 ins_pipe( ialu_cr_reg_imm ); 12118 %} 12119 12120 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 12121 match(Set cr (CmpI (AndI src mem) zero)); 12122 12123 format %{ "TEST $src,$mem" %} 12124 opcode(0x85); 12125 ins_encode( OpcP, RegMem( src, mem ) ); 12126 ins_pipe( ialu_cr_reg_mem ); 12127 %} 12128 12129 // Unsigned compare Instructions; really, same as signed except they 12130 // produce an eFlagsRegU instead of eFlagsReg. 12131 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12132 match(Set cr (CmpU op1 op2)); 12133 12134 format %{ "CMPu $op1,$op2" %} 12135 opcode(0x3B); /* Opcode 3B /r */ 12136 ins_encode( OpcP, RegReg( op1, op2) ); 12137 ins_pipe( ialu_cr_reg_reg ); 12138 %} 12139 12140 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12141 match(Set cr (CmpU op1 op2)); 12142 12143 format %{ "CMPu $op1,$op2" %} 12144 opcode(0x81,0x07); /* Opcode 81 /7 */ 12145 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12146 ins_pipe( ialu_cr_reg_imm ); 12147 %} 12148 12149 // // Cisc-spilled version of cmpU_eReg 12150 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12151 match(Set cr (CmpU op1 (LoadI op2))); 12152 12153 format %{ "CMPu $op1,$op2" %} 12154 ins_cost(500); 12155 opcode(0x3B); /* Opcode 3B /r */ 12156 ins_encode( OpcP, RegMem( op1, op2) ); 12157 ins_pipe( ialu_cr_reg_mem ); 12158 %} 12159 12160 // // Cisc-spilled version of cmpU_eReg 12161 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12162 // match(Set cr (CmpU (LoadI op1) op2)); 12163 // 12164 // format %{ "CMPu $op1,$op2" %} 12165 // ins_cost(500); 12166 // opcode(0x39); /* Opcode 39 /r */ 12167 // ins_encode( OpcP, RegMem( op1, op2) ); 12168 //%} 12169 12170 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12171 match(Set cr (CmpU src zero)); 12172 12173 format %{ "TESTu $src,$src" %} 12174 opcode(0x85); 12175 ins_encode( OpcP, RegReg( src, src ) ); 12176 ins_pipe( ialu_cr_reg_imm ); 12177 %} 12178 12179 // Unsigned pointer compare Instructions 12180 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12181 match(Set cr (CmpP op1 op2)); 12182 12183 format %{ "CMPu $op1,$op2" %} 12184 opcode(0x3B); /* Opcode 3B /r */ 12185 ins_encode( OpcP, RegReg( op1, op2) ); 12186 ins_pipe( ialu_cr_reg_reg ); 12187 %} 12188 12189 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12190 match(Set cr (CmpP op1 op2)); 12191 12192 format %{ "CMPu $op1,$op2" %} 12193 opcode(0x81,0x07); /* Opcode 81 /7 */ 12194 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12195 ins_pipe( ialu_cr_reg_imm ); 12196 %} 12197 12198 // // Cisc-spilled version of cmpP_eReg 12199 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12200 match(Set cr (CmpP op1 (LoadP op2))); 12201 12202 format %{ "CMPu $op1,$op2" %} 12203 ins_cost(500); 12204 opcode(0x3B); /* Opcode 3B /r */ 12205 ins_encode( OpcP, RegMem( op1, op2) ); 12206 ins_pipe( ialu_cr_reg_mem ); 12207 %} 12208 12209 // // Cisc-spilled version of cmpP_eReg 12210 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12211 // match(Set cr (CmpP (LoadP op1) op2)); 12212 // 12213 // format %{ "CMPu $op1,$op2" %} 12214 // ins_cost(500); 12215 // opcode(0x39); /* Opcode 39 /r */ 12216 // ins_encode( OpcP, RegMem( op1, op2) ); 12217 //%} 12218 12219 // Compare raw pointer (used in out-of-heap check). 12220 // Only works because non-oop pointers must be raw pointers 12221 // and raw pointers have no anti-dependencies. 12222 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12223 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12224 match(Set cr (CmpP op1 (LoadP op2))); 12225 12226 format %{ "CMPu $op1,$op2" %} 12227 opcode(0x3B); /* Opcode 3B /r */ 12228 ins_encode( OpcP, RegMem( op1, op2) ); 12229 ins_pipe( ialu_cr_reg_mem ); 12230 %} 12231 12232 // 12233 // This will generate a signed flags result. This should be ok 12234 // since any compare to a zero should be eq/neq. 12235 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12236 match(Set cr (CmpP src zero)); 12237 12238 format %{ "TEST $src,$src" %} 12239 opcode(0x85); 12240 ins_encode( OpcP, RegReg( src, src ) ); 12241 ins_pipe( ialu_cr_reg_imm ); 12242 %} 12243 12244 // Cisc-spilled version of testP_reg 12245 // This will generate a signed flags result. This should be ok 12246 // since any compare to a zero should be eq/neq. 12247 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12248 match(Set cr (CmpP (LoadP op) zero)); 12249 12250 format %{ "TEST $op,0xFFFFFFFF" %} 12251 ins_cost(500); 12252 opcode(0xF7); /* Opcode F7 /0 */ 12253 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12254 ins_pipe( ialu_cr_reg_imm ); 12255 %} 12256 12257 // Yanked all unsigned pointer compare operations. 12258 // Pointer compares are done with CmpP which is already unsigned. 12259 12260 //----------Max and Min-------------------------------------------------------- 12261 // Min Instructions 12262 //// 12263 // *** Min and Max using the conditional move are slower than the 12264 // *** branch version on a Pentium III. 12265 // // Conditional move for min 12266 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12267 // effect( USE_DEF op2, USE op1, USE cr ); 12268 // format %{ "CMOVlt $op2,$op1\t! min" %} 12269 // opcode(0x4C,0x0F); 12270 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12271 // ins_pipe( pipe_cmov_reg ); 12272 //%} 12273 // 12274 //// Min Register with Register (P6 version) 12275 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12276 // predicate(VM_Version::supports_cmov() ); 12277 // match(Set op2 (MinI op1 op2)); 12278 // ins_cost(200); 12279 // expand %{ 12280 // eFlagsReg cr; 12281 // compI_eReg(cr,op1,op2); 12282 // cmovI_reg_lt(op2,op1,cr); 12283 // %} 12284 //%} 12285 12286 // Min Register with Register (generic version) 12287 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12288 match(Set dst (MinI dst src)); 12289 effect(KILL flags); 12290 ins_cost(300); 12291 12292 format %{ "MIN $dst,$src" %} 12293 opcode(0xCC); 12294 ins_encode( min_enc(dst,src) ); 12295 ins_pipe( pipe_slow ); 12296 %} 12297 12298 // Max Register with Register 12299 // *** Min and Max using the conditional move are slower than the 12300 // *** branch version on a Pentium III. 12301 // // Conditional move for max 12302 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12303 // effect( USE_DEF op2, USE op1, USE cr ); 12304 // format %{ "CMOVgt $op2,$op1\t! max" %} 12305 // opcode(0x4F,0x0F); 12306 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12307 // ins_pipe( pipe_cmov_reg ); 12308 //%} 12309 // 12310 // // Max Register with Register (P6 version) 12311 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12312 // predicate(VM_Version::supports_cmov() ); 12313 // match(Set op2 (MaxI op1 op2)); 12314 // ins_cost(200); 12315 // expand %{ 12316 // eFlagsReg cr; 12317 // compI_eReg(cr,op1,op2); 12318 // cmovI_reg_gt(op2,op1,cr); 12319 // %} 12320 //%} 12321 12322 // Max Register with Register (generic version) 12323 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12324 match(Set dst (MaxI dst src)); 12325 effect(KILL flags); 12326 ins_cost(300); 12327 12328 format %{ "MAX $dst,$src" %} 12329 opcode(0xCC); 12330 ins_encode( max_enc(dst,src) ); 12331 ins_pipe( pipe_slow ); 12332 %} 12333 12334 // ============================================================================ 12335 // Counted Loop limit node which represents exact final iterator value. 12336 // Note: the resulting value should fit into integer range since 12337 // counted loops have limit check on overflow. 12338 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12339 match(Set limit (LoopLimit (Binary init limit) stride)); 12340 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12341 ins_cost(300); 12342 12343 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12344 ins_encode %{ 12345 int strd = (int)$stride$$constant; 12346 assert(strd != 1 && strd != -1, "sanity"); 12347 int m1 = (strd > 0) ? 1 : -1; 12348 // Convert limit to long (EAX:EDX) 12349 __ cdql(); 12350 // Convert init to long (init:tmp) 12351 __ movl($tmp$$Register, $init$$Register); 12352 __ sarl($tmp$$Register, 31); 12353 // $limit - $init 12354 __ subl($limit$$Register, $init$$Register); 12355 __ sbbl($limit_hi$$Register, $tmp$$Register); 12356 // + ($stride - 1) 12357 if (strd > 0) { 12358 __ addl($limit$$Register, (strd - 1)); 12359 __ adcl($limit_hi$$Register, 0); 12360 __ movl($tmp$$Register, strd); 12361 } else { 12362 __ addl($limit$$Register, (strd + 1)); 12363 __ adcl($limit_hi$$Register, -1); 12364 __ lneg($limit_hi$$Register, $limit$$Register); 12365 __ movl($tmp$$Register, -strd); 12366 } 12367 // signed devision: (EAX:EDX) / pos_stride 12368 __ idivl($tmp$$Register); 12369 if (strd < 0) { 12370 // restore sign 12371 __ negl($tmp$$Register); 12372 } 12373 // (EAX) * stride 12374 __ mull($tmp$$Register); 12375 // + init (ignore upper bits) 12376 __ addl($limit$$Register, $init$$Register); 12377 %} 12378 ins_pipe( pipe_slow ); 12379 %} 12380 12381 // ============================================================================ 12382 // Branch Instructions 12383 // Jump Table 12384 instruct jumpXtnd(rRegI switch_val) %{ 12385 match(Jump switch_val); 12386 ins_cost(350); 12387 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12388 ins_encode %{ 12389 // Jump to Address(table_base + switch_reg) 12390 Address index(noreg, $switch_val$$Register, Address::times_1); 12391 __ jump(ArrayAddress($constantaddress, index)); 12392 %} 12393 ins_pipe(pipe_jmp); 12394 %} 12395 12396 // Jump Direct - Label defines a relative address from JMP+1 12397 instruct jmpDir(label labl) %{ 12398 match(Goto); 12399 effect(USE labl); 12400 12401 ins_cost(300); 12402 format %{ "JMP $labl" %} 12403 size(5); 12404 ins_encode %{ 12405 Label* L = $labl$$label; 12406 __ jmp(*L, false); // Always long jump 12407 %} 12408 ins_pipe( pipe_jmp ); 12409 %} 12410 12411 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12412 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12413 match(If cop cr); 12414 effect(USE labl); 12415 12416 ins_cost(300); 12417 format %{ "J$cop $labl" %} 12418 size(6); 12419 ins_encode %{ 12420 Label* L = $labl$$label; 12421 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12422 %} 12423 ins_pipe( pipe_jcc ); 12424 %} 12425 12426 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12427 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12428 predicate(!n->has_vector_mask_set()); 12429 match(CountedLoopEnd cop cr); 12430 effect(USE labl); 12431 12432 ins_cost(300); 12433 format %{ "J$cop $labl\t# Loop end" %} 12434 size(6); 12435 ins_encode %{ 12436 Label* L = $labl$$label; 12437 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12438 %} 12439 ins_pipe( pipe_jcc ); 12440 %} 12441 12442 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12443 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12444 predicate(!n->has_vector_mask_set()); 12445 match(CountedLoopEnd cop cmp); 12446 effect(USE labl); 12447 12448 ins_cost(300); 12449 format %{ "J$cop,u $labl\t# Loop end" %} 12450 size(6); 12451 ins_encode %{ 12452 Label* L = $labl$$label; 12453 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12454 %} 12455 ins_pipe( pipe_jcc ); 12456 %} 12457 12458 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12459 predicate(!n->has_vector_mask_set()); 12460 match(CountedLoopEnd cop cmp); 12461 effect(USE labl); 12462 12463 ins_cost(200); 12464 format %{ "J$cop,u $labl\t# Loop end" %} 12465 size(6); 12466 ins_encode %{ 12467 Label* L = $labl$$label; 12468 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12469 %} 12470 ins_pipe( pipe_jcc ); 12471 %} 12472 12473 // mask version 12474 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12475 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12476 predicate(n->has_vector_mask_set()); 12477 match(CountedLoopEnd cop cr); 12478 effect(USE labl); 12479 12480 ins_cost(400); 12481 format %{ "J$cop $labl\t# Loop end\n\t" 12482 "restorevectmask \t# vector mask restore for loops" %} 12483 size(10); 12484 ins_encode %{ 12485 Label* L = $labl$$label; 12486 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12487 __ restorevectmask(); 12488 %} 12489 ins_pipe( pipe_jcc ); 12490 %} 12491 12492 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12493 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12494 predicate(n->has_vector_mask_set()); 12495 match(CountedLoopEnd cop cmp); 12496 effect(USE labl); 12497 12498 ins_cost(400); 12499 format %{ "J$cop,u $labl\t# Loop end\n\t" 12500 "restorevectmask \t# vector mask restore for loops" %} 12501 size(10); 12502 ins_encode %{ 12503 Label* L = $labl$$label; 12504 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12505 __ restorevectmask(); 12506 %} 12507 ins_pipe( pipe_jcc ); 12508 %} 12509 12510 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12511 predicate(n->has_vector_mask_set()); 12512 match(CountedLoopEnd cop cmp); 12513 effect(USE labl); 12514 12515 ins_cost(300); 12516 format %{ "J$cop,u $labl\t# Loop end\n\t" 12517 "restorevectmask \t# vector mask restore for loops" %} 12518 size(10); 12519 ins_encode %{ 12520 Label* L = $labl$$label; 12521 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12522 __ restorevectmask(); 12523 %} 12524 ins_pipe( pipe_jcc ); 12525 %} 12526 12527 // Jump Direct Conditional - using unsigned comparison 12528 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12529 match(If cop cmp); 12530 effect(USE labl); 12531 12532 ins_cost(300); 12533 format %{ "J$cop,u $labl" %} 12534 size(6); 12535 ins_encode %{ 12536 Label* L = $labl$$label; 12537 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12538 %} 12539 ins_pipe(pipe_jcc); 12540 %} 12541 12542 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12543 match(If cop cmp); 12544 effect(USE labl); 12545 12546 ins_cost(200); 12547 format %{ "J$cop,u $labl" %} 12548 size(6); 12549 ins_encode %{ 12550 Label* L = $labl$$label; 12551 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12552 %} 12553 ins_pipe(pipe_jcc); 12554 %} 12555 12556 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12557 match(If cop cmp); 12558 effect(USE labl); 12559 12560 ins_cost(200); 12561 format %{ $$template 12562 if ($cop$$cmpcode == Assembler::notEqual) { 12563 $$emit$$"JP,u $labl\n\t" 12564 $$emit$$"J$cop,u $labl" 12565 } else { 12566 $$emit$$"JP,u done\n\t" 12567 $$emit$$"J$cop,u $labl\n\t" 12568 $$emit$$"done:" 12569 } 12570 %} 12571 ins_encode %{ 12572 Label* l = $labl$$label; 12573 if ($cop$$cmpcode == Assembler::notEqual) { 12574 __ jcc(Assembler::parity, *l, false); 12575 __ jcc(Assembler::notEqual, *l, false); 12576 } else if ($cop$$cmpcode == Assembler::equal) { 12577 Label done; 12578 __ jccb(Assembler::parity, done); 12579 __ jcc(Assembler::equal, *l, false); 12580 __ bind(done); 12581 } else { 12582 ShouldNotReachHere(); 12583 } 12584 %} 12585 ins_pipe(pipe_jcc); 12586 %} 12587 12588 // ============================================================================ 12589 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12590 // array for an instance of the superklass. Set a hidden internal cache on a 12591 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12592 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12593 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12594 match(Set result (PartialSubtypeCheck sub super)); 12595 effect( KILL rcx, KILL cr ); 12596 12597 ins_cost(1100); // slightly larger than the next version 12598 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12599 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12600 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12601 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12602 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12603 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12604 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12605 "miss:\t" %} 12606 12607 opcode(0x1); // Force a XOR of EDI 12608 ins_encode( enc_PartialSubtypeCheck() ); 12609 ins_pipe( pipe_slow ); 12610 %} 12611 12612 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12613 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12614 effect( KILL rcx, KILL result ); 12615 12616 ins_cost(1000); 12617 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12618 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12619 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12620 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12621 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12622 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12623 "miss:\t" %} 12624 12625 opcode(0x0); // No need to XOR EDI 12626 ins_encode( enc_PartialSubtypeCheck() ); 12627 ins_pipe( pipe_slow ); 12628 %} 12629 12630 // ============================================================================ 12631 // Branch Instructions -- short offset versions 12632 // 12633 // These instructions are used to replace jumps of a long offset (the default 12634 // match) with jumps of a shorter offset. These instructions are all tagged 12635 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12636 // match rules in general matching. Instead, the ADLC generates a conversion 12637 // method in the MachNode which can be used to do in-place replacement of the 12638 // long variant with the shorter variant. The compiler will determine if a 12639 // branch can be taken by the is_short_branch_offset() predicate in the machine 12640 // specific code section of the file. 12641 12642 // Jump Direct - Label defines a relative address from JMP+1 12643 instruct jmpDir_short(label labl) %{ 12644 match(Goto); 12645 effect(USE labl); 12646 12647 ins_cost(300); 12648 format %{ "JMP,s $labl" %} 12649 size(2); 12650 ins_encode %{ 12651 Label* L = $labl$$label; 12652 __ jmpb(*L); 12653 %} 12654 ins_pipe( pipe_jmp ); 12655 ins_short_branch(1); 12656 %} 12657 12658 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12659 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12660 match(If cop cr); 12661 effect(USE labl); 12662 12663 ins_cost(300); 12664 format %{ "J$cop,s $labl" %} 12665 size(2); 12666 ins_encode %{ 12667 Label* L = $labl$$label; 12668 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12669 %} 12670 ins_pipe( pipe_jcc ); 12671 ins_short_branch(1); 12672 %} 12673 12674 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12675 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12676 match(CountedLoopEnd cop cr); 12677 effect(USE labl); 12678 12679 ins_cost(300); 12680 format %{ "J$cop,s $labl\t# Loop end" %} 12681 size(2); 12682 ins_encode %{ 12683 Label* L = $labl$$label; 12684 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12685 %} 12686 ins_pipe( pipe_jcc ); 12687 ins_short_branch(1); 12688 %} 12689 12690 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12691 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12692 match(CountedLoopEnd cop cmp); 12693 effect(USE labl); 12694 12695 ins_cost(300); 12696 format %{ "J$cop,us $labl\t# Loop end" %} 12697 size(2); 12698 ins_encode %{ 12699 Label* L = $labl$$label; 12700 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12701 %} 12702 ins_pipe( pipe_jcc ); 12703 ins_short_branch(1); 12704 %} 12705 12706 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12707 match(CountedLoopEnd cop cmp); 12708 effect(USE labl); 12709 12710 ins_cost(300); 12711 format %{ "J$cop,us $labl\t# Loop end" %} 12712 size(2); 12713 ins_encode %{ 12714 Label* L = $labl$$label; 12715 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12716 %} 12717 ins_pipe( pipe_jcc ); 12718 ins_short_branch(1); 12719 %} 12720 12721 // Jump Direct Conditional - using unsigned comparison 12722 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12723 match(If cop cmp); 12724 effect(USE labl); 12725 12726 ins_cost(300); 12727 format %{ "J$cop,us $labl" %} 12728 size(2); 12729 ins_encode %{ 12730 Label* L = $labl$$label; 12731 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12732 %} 12733 ins_pipe( pipe_jcc ); 12734 ins_short_branch(1); 12735 %} 12736 12737 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12738 match(If cop cmp); 12739 effect(USE labl); 12740 12741 ins_cost(300); 12742 format %{ "J$cop,us $labl" %} 12743 size(2); 12744 ins_encode %{ 12745 Label* L = $labl$$label; 12746 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12747 %} 12748 ins_pipe( pipe_jcc ); 12749 ins_short_branch(1); 12750 %} 12751 12752 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12753 match(If cop cmp); 12754 effect(USE labl); 12755 12756 ins_cost(300); 12757 format %{ $$template 12758 if ($cop$$cmpcode == Assembler::notEqual) { 12759 $$emit$$"JP,u,s $labl\n\t" 12760 $$emit$$"J$cop,u,s $labl" 12761 } else { 12762 $$emit$$"JP,u,s done\n\t" 12763 $$emit$$"J$cop,u,s $labl\n\t" 12764 $$emit$$"done:" 12765 } 12766 %} 12767 size(4); 12768 ins_encode %{ 12769 Label* l = $labl$$label; 12770 if ($cop$$cmpcode == Assembler::notEqual) { 12771 __ jccb(Assembler::parity, *l); 12772 __ jccb(Assembler::notEqual, *l); 12773 } else if ($cop$$cmpcode == Assembler::equal) { 12774 Label done; 12775 __ jccb(Assembler::parity, done); 12776 __ jccb(Assembler::equal, *l); 12777 __ bind(done); 12778 } else { 12779 ShouldNotReachHere(); 12780 } 12781 %} 12782 ins_pipe(pipe_jcc); 12783 ins_short_branch(1); 12784 %} 12785 12786 // ============================================================================ 12787 // Long Compare 12788 // 12789 // Currently we hold longs in 2 registers. Comparing such values efficiently 12790 // is tricky. The flavor of compare used depends on whether we are testing 12791 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12792 // The GE test is the negated LT test. The LE test can be had by commuting 12793 // the operands (yielding a GE test) and then negating; negate again for the 12794 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12795 // NE test is negated from that. 12796 12797 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12798 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12799 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12800 // are collapsed internally in the ADLC's dfa-gen code. The match for 12801 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12802 // foo match ends up with the wrong leaf. One fix is to not match both 12803 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12804 // both forms beat the trinary form of long-compare and both are very useful 12805 // on Intel which has so few registers. 12806 12807 // Manifest a CmpL result in an integer register. Very painful. 12808 // This is the test to avoid. 12809 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12810 match(Set dst (CmpL3 src1 src2)); 12811 effect( KILL flags ); 12812 ins_cost(1000); 12813 format %{ "XOR $dst,$dst\n\t" 12814 "CMP $src1.hi,$src2.hi\n\t" 12815 "JLT,s m_one\n\t" 12816 "JGT,s p_one\n\t" 12817 "CMP $src1.lo,$src2.lo\n\t" 12818 "JB,s m_one\n\t" 12819 "JEQ,s done\n" 12820 "p_one:\tINC $dst\n\t" 12821 "JMP,s done\n" 12822 "m_one:\tDEC $dst\n" 12823 "done:" %} 12824 ins_encode %{ 12825 Label p_one, m_one, done; 12826 __ xorptr($dst$$Register, $dst$$Register); 12827 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12828 __ jccb(Assembler::less, m_one); 12829 __ jccb(Assembler::greater, p_one); 12830 __ cmpl($src1$$Register, $src2$$Register); 12831 __ jccb(Assembler::below, m_one); 12832 __ jccb(Assembler::equal, done); 12833 __ bind(p_one); 12834 __ incrementl($dst$$Register); 12835 __ jmpb(done); 12836 __ bind(m_one); 12837 __ decrementl($dst$$Register); 12838 __ bind(done); 12839 %} 12840 ins_pipe( pipe_slow ); 12841 %} 12842 12843 //====== 12844 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12845 // compares. Can be used for LE or GT compares by reversing arguments. 12846 // NOT GOOD FOR EQ/NE tests. 12847 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12848 match( Set flags (CmpL src zero )); 12849 ins_cost(100); 12850 format %{ "TEST $src.hi,$src.hi" %} 12851 opcode(0x85); 12852 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12853 ins_pipe( ialu_cr_reg_reg ); 12854 %} 12855 12856 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12857 // compares. Can be used for LE or GT compares by reversing arguments. 12858 // NOT GOOD FOR EQ/NE tests. 12859 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12860 match( Set flags (CmpL src1 src2 )); 12861 effect( TEMP tmp ); 12862 ins_cost(300); 12863 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12864 "MOV $tmp,$src1.hi\n\t" 12865 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12866 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12867 ins_pipe( ialu_cr_reg_reg ); 12868 %} 12869 12870 // Long compares reg < zero/req OR reg >= zero/req. 12871 // Just a wrapper for a normal branch, plus the predicate test. 12872 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12873 match(If cmp flags); 12874 effect(USE labl); 12875 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12876 expand %{ 12877 jmpCon(cmp,flags,labl); // JLT or JGE... 12878 %} 12879 %} 12880 12881 //====== 12882 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12883 // compares. Can be used for LE or GT compares by reversing arguments. 12884 // NOT GOOD FOR EQ/NE tests. 12885 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12886 match(Set flags (CmpUL src zero)); 12887 ins_cost(100); 12888 format %{ "TEST $src.hi,$src.hi" %} 12889 opcode(0x85); 12890 ins_encode(OpcP, RegReg_Hi2(src, src)); 12891 ins_pipe(ialu_cr_reg_reg); 12892 %} 12893 12894 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12895 // compares. Can be used for LE or GT compares by reversing arguments. 12896 // NOT GOOD FOR EQ/NE tests. 12897 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12898 match(Set flags (CmpUL src1 src2)); 12899 effect(TEMP tmp); 12900 ins_cost(300); 12901 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12902 "MOV $tmp,$src1.hi\n\t" 12903 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12904 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12905 ins_pipe(ialu_cr_reg_reg); 12906 %} 12907 12908 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12909 // Just a wrapper for a normal branch, plus the predicate test. 12910 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12911 match(If cmp flags); 12912 effect(USE labl); 12913 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12914 expand %{ 12915 jmpCon(cmp, flags, labl); // JLT or JGE... 12916 %} 12917 %} 12918 12919 // Compare 2 longs and CMOVE longs. 12920 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12921 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12922 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12923 ins_cost(400); 12924 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12925 "CMOV$cmp $dst.hi,$src.hi" %} 12926 opcode(0x0F,0x40); 12927 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12928 ins_pipe( pipe_cmov_reg_long ); 12929 %} 12930 12931 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12932 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12933 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12934 ins_cost(500); 12935 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12936 "CMOV$cmp $dst.hi,$src.hi" %} 12937 opcode(0x0F,0x40); 12938 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12939 ins_pipe( pipe_cmov_reg_long ); 12940 %} 12941 12942 // Compare 2 longs and CMOVE ints. 12943 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12944 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12945 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12946 ins_cost(200); 12947 format %{ "CMOV$cmp $dst,$src" %} 12948 opcode(0x0F,0x40); 12949 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12950 ins_pipe( pipe_cmov_reg ); 12951 %} 12952 12953 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12954 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12955 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12956 ins_cost(250); 12957 format %{ "CMOV$cmp $dst,$src" %} 12958 opcode(0x0F,0x40); 12959 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12960 ins_pipe( pipe_cmov_mem ); 12961 %} 12962 12963 // Compare 2 longs and CMOVE ints. 12964 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12965 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12966 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12967 ins_cost(200); 12968 format %{ "CMOV$cmp $dst,$src" %} 12969 opcode(0x0F,0x40); 12970 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12971 ins_pipe( pipe_cmov_reg ); 12972 %} 12973 12974 // Compare 2 longs and CMOVE doubles 12975 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12976 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12977 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12978 ins_cost(200); 12979 expand %{ 12980 fcmovDPR_regS(cmp,flags,dst,src); 12981 %} 12982 %} 12983 12984 // Compare 2 longs and CMOVE doubles 12985 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12986 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12987 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12988 ins_cost(200); 12989 expand %{ 12990 fcmovD_regS(cmp,flags,dst,src); 12991 %} 12992 %} 12993 12994 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12995 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12996 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12997 ins_cost(200); 12998 expand %{ 12999 fcmovFPR_regS(cmp,flags,dst,src); 13000 %} 13001 %} 13002 13003 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13004 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13005 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13006 ins_cost(200); 13007 expand %{ 13008 fcmovF_regS(cmp,flags,dst,src); 13009 %} 13010 %} 13011 13012 //====== 13013 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13014 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13015 match( Set flags (CmpL src zero )); 13016 effect(TEMP tmp); 13017 ins_cost(200); 13018 format %{ "MOV $tmp,$src.lo\n\t" 13019 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13020 ins_encode( long_cmp_flags0( src, tmp ) ); 13021 ins_pipe( ialu_reg_reg_long ); 13022 %} 13023 13024 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13025 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13026 match( Set flags (CmpL src1 src2 )); 13027 ins_cost(200+300); 13028 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13029 "JNE,s skip\n\t" 13030 "CMP $src1.hi,$src2.hi\n\t" 13031 "skip:\t" %} 13032 ins_encode( long_cmp_flags1( src1, src2 ) ); 13033 ins_pipe( ialu_cr_reg_reg ); 13034 %} 13035 13036 // Long compare reg == zero/reg OR reg != zero/reg 13037 // Just a wrapper for a normal branch, plus the predicate test. 13038 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13039 match(If cmp flags); 13040 effect(USE labl); 13041 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13042 expand %{ 13043 jmpCon(cmp,flags,labl); // JEQ or JNE... 13044 %} 13045 %} 13046 13047 //====== 13048 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13049 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13050 match(Set flags (CmpUL src zero)); 13051 effect(TEMP tmp); 13052 ins_cost(200); 13053 format %{ "MOV $tmp,$src.lo\n\t" 13054 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13055 ins_encode(long_cmp_flags0(src, tmp)); 13056 ins_pipe(ialu_reg_reg_long); 13057 %} 13058 13059 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13060 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13061 match(Set flags (CmpUL src1 src2)); 13062 ins_cost(200+300); 13063 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13064 "JNE,s skip\n\t" 13065 "CMP $src1.hi,$src2.hi\n\t" 13066 "skip:\t" %} 13067 ins_encode(long_cmp_flags1(src1, src2)); 13068 ins_pipe(ialu_cr_reg_reg); 13069 %} 13070 13071 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13072 // Just a wrapper for a normal branch, plus the predicate test. 13073 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13074 match(If cmp flags); 13075 effect(USE labl); 13076 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13077 expand %{ 13078 jmpCon(cmp, flags, labl); // JEQ or JNE... 13079 %} 13080 %} 13081 13082 // Compare 2 longs and CMOVE longs. 13083 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13084 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13085 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13086 ins_cost(400); 13087 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13088 "CMOV$cmp $dst.hi,$src.hi" %} 13089 opcode(0x0F,0x40); 13090 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13091 ins_pipe( pipe_cmov_reg_long ); 13092 %} 13093 13094 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13095 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13096 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13097 ins_cost(500); 13098 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13099 "CMOV$cmp $dst.hi,$src.hi" %} 13100 opcode(0x0F,0x40); 13101 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13102 ins_pipe( pipe_cmov_reg_long ); 13103 %} 13104 13105 // Compare 2 longs and CMOVE ints. 13106 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13107 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13108 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13109 ins_cost(200); 13110 format %{ "CMOV$cmp $dst,$src" %} 13111 opcode(0x0F,0x40); 13112 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13113 ins_pipe( pipe_cmov_reg ); 13114 %} 13115 13116 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13117 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13118 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13119 ins_cost(250); 13120 format %{ "CMOV$cmp $dst,$src" %} 13121 opcode(0x0F,0x40); 13122 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13123 ins_pipe( pipe_cmov_mem ); 13124 %} 13125 13126 // Compare 2 longs and CMOVE ints. 13127 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13128 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13129 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13130 ins_cost(200); 13131 format %{ "CMOV$cmp $dst,$src" %} 13132 opcode(0x0F,0x40); 13133 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13134 ins_pipe( pipe_cmov_reg ); 13135 %} 13136 13137 // Compare 2 longs and CMOVE doubles 13138 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13139 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13140 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13141 ins_cost(200); 13142 expand %{ 13143 fcmovDPR_regS(cmp,flags,dst,src); 13144 %} 13145 %} 13146 13147 // Compare 2 longs and CMOVE doubles 13148 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13149 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13150 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13151 ins_cost(200); 13152 expand %{ 13153 fcmovD_regS(cmp,flags,dst,src); 13154 %} 13155 %} 13156 13157 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13158 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13159 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13160 ins_cost(200); 13161 expand %{ 13162 fcmovFPR_regS(cmp,flags,dst,src); 13163 %} 13164 %} 13165 13166 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13167 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13168 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13169 ins_cost(200); 13170 expand %{ 13171 fcmovF_regS(cmp,flags,dst,src); 13172 %} 13173 %} 13174 13175 //====== 13176 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13177 // Same as cmpL_reg_flags_LEGT except must negate src 13178 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13179 match( Set flags (CmpL src zero )); 13180 effect( TEMP tmp ); 13181 ins_cost(300); 13182 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13183 "CMP $tmp,$src.lo\n\t" 13184 "SBB $tmp,$src.hi\n\t" %} 13185 ins_encode( long_cmp_flags3(src, tmp) ); 13186 ins_pipe( ialu_reg_reg_long ); 13187 %} 13188 13189 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13190 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13191 // requires a commuted test to get the same result. 13192 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13193 match( Set flags (CmpL src1 src2 )); 13194 effect( TEMP tmp ); 13195 ins_cost(300); 13196 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13197 "MOV $tmp,$src2.hi\n\t" 13198 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13199 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13200 ins_pipe( ialu_cr_reg_reg ); 13201 %} 13202 13203 // Long compares reg < zero/req OR reg >= zero/req. 13204 // Just a wrapper for a normal branch, plus the predicate test 13205 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13206 match(If cmp flags); 13207 effect(USE labl); 13208 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13209 ins_cost(300); 13210 expand %{ 13211 jmpCon(cmp,flags,labl); // JGT or JLE... 13212 %} 13213 %} 13214 13215 //====== 13216 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13217 // Same as cmpUL_reg_flags_LEGT except must negate src 13218 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13219 match(Set flags (CmpUL src zero)); 13220 effect(TEMP tmp); 13221 ins_cost(300); 13222 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13223 "CMP $tmp,$src.lo\n\t" 13224 "SBB $tmp,$src.hi\n\t" %} 13225 ins_encode(long_cmp_flags3(src, tmp)); 13226 ins_pipe(ialu_reg_reg_long); 13227 %} 13228 13229 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13230 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13231 // requires a commuted test to get the same result. 13232 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13233 match(Set flags (CmpUL src1 src2)); 13234 effect(TEMP tmp); 13235 ins_cost(300); 13236 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13237 "MOV $tmp,$src2.hi\n\t" 13238 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13239 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13240 ins_pipe(ialu_cr_reg_reg); 13241 %} 13242 13243 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13244 // Just a wrapper for a normal branch, plus the predicate test 13245 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13246 match(If cmp flags); 13247 effect(USE labl); 13248 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13249 ins_cost(300); 13250 expand %{ 13251 jmpCon(cmp, flags, labl); // JGT or JLE... 13252 %} 13253 %} 13254 13255 // Compare 2 longs and CMOVE longs. 13256 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13257 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13258 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13259 ins_cost(400); 13260 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13261 "CMOV$cmp $dst.hi,$src.hi" %} 13262 opcode(0x0F,0x40); 13263 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13264 ins_pipe( pipe_cmov_reg_long ); 13265 %} 13266 13267 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13268 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13269 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13270 ins_cost(500); 13271 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13272 "CMOV$cmp $dst.hi,$src.hi+4" %} 13273 opcode(0x0F,0x40); 13274 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13275 ins_pipe( pipe_cmov_reg_long ); 13276 %} 13277 13278 // Compare 2 longs and CMOVE ints. 13279 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13280 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13281 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13282 ins_cost(200); 13283 format %{ "CMOV$cmp $dst,$src" %} 13284 opcode(0x0F,0x40); 13285 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13286 ins_pipe( pipe_cmov_reg ); 13287 %} 13288 13289 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13290 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13291 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13292 ins_cost(250); 13293 format %{ "CMOV$cmp $dst,$src" %} 13294 opcode(0x0F,0x40); 13295 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13296 ins_pipe( pipe_cmov_mem ); 13297 %} 13298 13299 // Compare 2 longs and CMOVE ptrs. 13300 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13301 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13302 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13303 ins_cost(200); 13304 format %{ "CMOV$cmp $dst,$src" %} 13305 opcode(0x0F,0x40); 13306 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13307 ins_pipe( pipe_cmov_reg ); 13308 %} 13309 13310 // Compare 2 longs and CMOVE doubles 13311 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13312 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13313 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13314 ins_cost(200); 13315 expand %{ 13316 fcmovDPR_regS(cmp,flags,dst,src); 13317 %} 13318 %} 13319 13320 // Compare 2 longs and CMOVE doubles 13321 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13322 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13323 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13324 ins_cost(200); 13325 expand %{ 13326 fcmovD_regS(cmp,flags,dst,src); 13327 %} 13328 %} 13329 13330 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13331 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13332 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13333 ins_cost(200); 13334 expand %{ 13335 fcmovFPR_regS(cmp,flags,dst,src); 13336 %} 13337 %} 13338 13339 13340 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13341 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13342 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13343 ins_cost(200); 13344 expand %{ 13345 fcmovF_regS(cmp,flags,dst,src); 13346 %} 13347 %} 13348 13349 13350 // ============================================================================ 13351 // Procedure Call/Return Instructions 13352 // Call Java Static Instruction 13353 // Note: If this code changes, the corresponding ret_addr_offset() and 13354 // compute_padding() functions will have to be adjusted. 13355 instruct CallStaticJavaDirect(method meth) %{ 13356 match(CallStaticJava); 13357 effect(USE meth); 13358 13359 ins_cost(300); 13360 format %{ "CALL,static " %} 13361 opcode(0xE8); /* E8 cd */ 13362 ins_encode( pre_call_resets, 13363 Java_Static_Call( meth ), 13364 call_epilog, 13365 post_call_FPU ); 13366 ins_pipe( pipe_slow ); 13367 ins_alignment(4); 13368 %} 13369 13370 // Call Java Dynamic Instruction 13371 // Note: If this code changes, the corresponding ret_addr_offset() and 13372 // compute_padding() functions will have to be adjusted. 13373 instruct CallDynamicJavaDirect(method meth) %{ 13374 match(CallDynamicJava); 13375 effect(USE meth); 13376 13377 ins_cost(300); 13378 format %{ "MOV EAX,(oop)-1\n\t" 13379 "CALL,dynamic" %} 13380 opcode(0xE8); /* E8 cd */ 13381 ins_encode( pre_call_resets, 13382 Java_Dynamic_Call( meth ), 13383 call_epilog, 13384 post_call_FPU ); 13385 ins_pipe( pipe_slow ); 13386 ins_alignment(4); 13387 %} 13388 13389 // Call Runtime Instruction 13390 instruct CallRuntimeDirect(method meth) %{ 13391 match(CallRuntime ); 13392 effect(USE meth); 13393 13394 ins_cost(300); 13395 format %{ "CALL,runtime " %} 13396 opcode(0xE8); /* E8 cd */ 13397 // Use FFREEs to clear entries in float stack 13398 ins_encode( pre_call_resets, 13399 FFree_Float_Stack_All, 13400 Java_To_Runtime( meth ), 13401 post_call_FPU ); 13402 ins_pipe( pipe_slow ); 13403 %} 13404 13405 // Call runtime without safepoint 13406 instruct CallLeafDirect(method meth) %{ 13407 match(CallLeaf); 13408 effect(USE meth); 13409 13410 ins_cost(300); 13411 format %{ "CALL_LEAF,runtime " %} 13412 opcode(0xE8); /* E8 cd */ 13413 ins_encode( pre_call_resets, 13414 FFree_Float_Stack_All, 13415 Java_To_Runtime( meth ), 13416 Verify_FPU_For_Leaf, post_call_FPU ); 13417 ins_pipe( pipe_slow ); 13418 %} 13419 13420 instruct CallLeafNoFPDirect(method meth) %{ 13421 match(CallLeafNoFP); 13422 effect(USE meth); 13423 13424 ins_cost(300); 13425 format %{ "CALL_LEAF_NOFP,runtime " %} 13426 opcode(0xE8); /* E8 cd */ 13427 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13428 ins_pipe( pipe_slow ); 13429 %} 13430 13431 13432 // Return Instruction 13433 // Remove the return address & jump to it. 13434 instruct Ret() %{ 13435 match(Return); 13436 format %{ "RET" %} 13437 opcode(0xC3); 13438 ins_encode(OpcP); 13439 ins_pipe( pipe_jmp ); 13440 %} 13441 13442 // Tail Call; Jump from runtime stub to Java code. 13443 // Also known as an 'interprocedural jump'. 13444 // Target of jump will eventually return to caller. 13445 // TailJump below removes the return address. 13446 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13447 match(TailCall jump_target method_oop ); 13448 ins_cost(300); 13449 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13450 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13451 ins_encode( OpcP, RegOpc(jump_target) ); 13452 ins_pipe( pipe_jmp ); 13453 %} 13454 13455 13456 // Tail Jump; remove the return address; jump to target. 13457 // TailCall above leaves the return address around. 13458 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13459 match( TailJump jump_target ex_oop ); 13460 ins_cost(300); 13461 format %{ "POP EDX\t# pop return address into dummy\n\t" 13462 "JMP $jump_target " %} 13463 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13464 ins_encode( enc_pop_rdx, 13465 OpcP, RegOpc(jump_target) ); 13466 ins_pipe( pipe_jmp ); 13467 %} 13468 13469 // Create exception oop: created by stack-crawling runtime code. 13470 // Created exception is now available to this handler, and is setup 13471 // just prior to jumping to this handler. No code emitted. 13472 instruct CreateException( eAXRegP ex_oop ) 13473 %{ 13474 match(Set ex_oop (CreateEx)); 13475 13476 size(0); 13477 // use the following format syntax 13478 format %{ "# exception oop is in EAX; no code emitted" %} 13479 ins_encode(); 13480 ins_pipe( empty ); 13481 %} 13482 13483 13484 // Rethrow exception: 13485 // The exception oop will come in the first argument position. 13486 // Then JUMP (not call) to the rethrow stub code. 13487 instruct RethrowException() 13488 %{ 13489 match(Rethrow); 13490 13491 // use the following format syntax 13492 format %{ "JMP rethrow_stub" %} 13493 ins_encode(enc_rethrow); 13494 ins_pipe( pipe_jmp ); 13495 %} 13496 13497 // inlined locking and unlocking 13498 13499 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13500 predicate(Compile::current()->use_rtm()); 13501 match(Set cr (FastLock object box)); 13502 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13503 ins_cost(300); 13504 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13505 ins_encode %{ 13506 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13507 $scr$$Register, $cx1$$Register, $cx2$$Register, 13508 _counters, _rtm_counters, _stack_rtm_counters, 13509 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13510 true, ra_->C->profile_rtm()); 13511 %} 13512 ins_pipe(pipe_slow); 13513 %} 13514 13515 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13516 predicate(!Compile::current()->use_rtm()); 13517 match(Set cr (FastLock object box)); 13518 effect(TEMP tmp, TEMP scr, USE_KILL box); 13519 ins_cost(300); 13520 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13521 ins_encode %{ 13522 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13523 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13524 %} 13525 ins_pipe(pipe_slow); 13526 %} 13527 13528 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13529 match(Set cr (FastUnlock object box)); 13530 effect(TEMP tmp, USE_KILL box); 13531 ins_cost(300); 13532 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13533 ins_encode %{ 13534 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13535 %} 13536 ins_pipe(pipe_slow); 13537 %} 13538 13539 13540 13541 // ============================================================================ 13542 // Safepoint Instruction 13543 instruct safePoint_poll(eFlagsReg cr) %{ 13544 predicate(SafepointMechanism::uses_global_page_poll()); 13545 match(SafePoint); 13546 effect(KILL cr); 13547 13548 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13549 // On SPARC that might be acceptable as we can generate the address with 13550 // just a sethi, saving an or. By polling at offset 0 we can end up 13551 // putting additional pressure on the index-0 in the D$. Because of 13552 // alignment (just like the situation at hand) the lower indices tend 13553 // to see more traffic. It'd be better to change the polling address 13554 // to offset 0 of the last $line in the polling page. 13555 13556 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13557 ins_cost(125); 13558 size(6) ; 13559 ins_encode( Safepoint_Poll() ); 13560 ins_pipe( ialu_reg_mem ); 13561 %} 13562 13563 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13564 predicate(SafepointMechanism::uses_thread_local_poll()); 13565 match(SafePoint poll); 13566 effect(KILL cr, USE poll); 13567 13568 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13569 ins_cost(125); 13570 // EBP would need size(3) 13571 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13572 ins_encode %{ 13573 __ relocate(relocInfo::poll_type); 13574 address pre_pc = __ pc(); 13575 __ testl(rax, Address($poll$$Register, 0)); 13576 address post_pc = __ pc(); 13577 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13578 %} 13579 ins_pipe(ialu_reg_mem); 13580 %} 13581 13582 13583 // ============================================================================ 13584 // This name is KNOWN by the ADLC and cannot be changed. 13585 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13586 // for this guy. 13587 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13588 match(Set dst (ThreadLocal)); 13589 effect(DEF dst, KILL cr); 13590 13591 format %{ "MOV $dst, Thread::current()" %} 13592 ins_encode %{ 13593 Register dstReg = as_Register($dst$$reg); 13594 __ get_thread(dstReg); 13595 %} 13596 ins_pipe( ialu_reg_fat ); 13597 %} 13598 13599 13600 13601 //----------PEEPHOLE RULES----------------------------------------------------- 13602 // These must follow all instruction definitions as they use the names 13603 // defined in the instructions definitions. 13604 // 13605 // peepmatch ( root_instr_name [preceding_instruction]* ); 13606 // 13607 // peepconstraint %{ 13608 // (instruction_number.operand_name relational_op instruction_number.operand_name 13609 // [, ...] ); 13610 // // instruction numbers are zero-based using left to right order in peepmatch 13611 // 13612 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13613 // // provide an instruction_number.operand_name for each operand that appears 13614 // // in the replacement instruction's match rule 13615 // 13616 // ---------VM FLAGS--------------------------------------------------------- 13617 // 13618 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13619 // 13620 // Each peephole rule is given an identifying number starting with zero and 13621 // increasing by one in the order seen by the parser. An individual peephole 13622 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13623 // on the command-line. 13624 // 13625 // ---------CURRENT LIMITATIONS---------------------------------------------- 13626 // 13627 // Only match adjacent instructions in same basic block 13628 // Only equality constraints 13629 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13630 // Only one replacement instruction 13631 // 13632 // ---------EXAMPLE---------------------------------------------------------- 13633 // 13634 // // pertinent parts of existing instructions in architecture description 13635 // instruct movI(rRegI dst, rRegI src) %{ 13636 // match(Set dst (CopyI src)); 13637 // %} 13638 // 13639 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13640 // match(Set dst (AddI dst src)); 13641 // effect(KILL cr); 13642 // %} 13643 // 13644 // // Change (inc mov) to lea 13645 // peephole %{ 13646 // // increment preceeded by register-register move 13647 // peepmatch ( incI_eReg movI ); 13648 // // require that the destination register of the increment 13649 // // match the destination register of the move 13650 // peepconstraint ( 0.dst == 1.dst ); 13651 // // construct a replacement instruction that sets 13652 // // the destination to ( move's source register + one ) 13653 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13654 // %} 13655 // 13656 // Implementation no longer uses movX instructions since 13657 // machine-independent system no longer uses CopyX nodes. 13658 // 13659 // peephole %{ 13660 // peepmatch ( incI_eReg movI ); 13661 // peepconstraint ( 0.dst == 1.dst ); 13662 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13663 // %} 13664 // 13665 // peephole %{ 13666 // peepmatch ( decI_eReg movI ); 13667 // peepconstraint ( 0.dst == 1.dst ); 13668 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13669 // %} 13670 // 13671 // peephole %{ 13672 // peepmatch ( addI_eReg_imm movI ); 13673 // peepconstraint ( 0.dst == 1.dst ); 13674 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13675 // %} 13676 // 13677 // peephole %{ 13678 // peepmatch ( addP_eReg_imm movP ); 13679 // peepconstraint ( 0.dst == 1.dst ); 13680 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13681 // %} 13682 13683 // // Change load of spilled value to only a spill 13684 // instruct storeI(memory mem, rRegI src) %{ 13685 // match(Set mem (StoreI mem src)); 13686 // %} 13687 // 13688 // instruct loadI(rRegI dst, memory mem) %{ 13689 // match(Set dst (LoadI mem)); 13690 // %} 13691 // 13692 peephole %{ 13693 peepmatch ( loadI storeI ); 13694 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13695 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13696 %} 13697 13698 //----------SMARTSPILL RULES--------------------------------------------------- 13699 // These must follow all instruction definitions as they use the names 13700 // defined in the instructions definitions.