1 // 2 // Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // 318 // Compute padding required for nodes which need alignment 319 // 320 321 // The address of the call instruction needs to be 4-byte aligned to 322 // ensure that it does not span a cache line so that it can be patched. 323 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 324 current_offset += pre_call_resets_size(); // skip fldcw, if any 325 current_offset += 1; // skip call opcode byte 326 return align_up(current_offset, alignment_required()) - current_offset; 327 } 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 5; // skip MOV instruction 334 current_offset += 1; // skip call opcode byte 335 return align_up(current_offset, alignment_required()) - current_offset; 336 } 337 338 // EMIT_RM() 339 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 340 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 341 cbuf.insts()->emit_int8(c); 342 } 343 344 // EMIT_CC() 345 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 346 unsigned char c = (unsigned char)( f1 | f2 ); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_OPCODE() 351 void emit_opcode(CodeBuffer &cbuf, int code) { 352 cbuf.insts()->emit_int8((unsigned char) code); 353 } 354 355 // EMIT_OPCODE() w/ relocation information 356 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 357 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 358 emit_opcode(cbuf, code); 359 } 360 361 // EMIT_D8() 362 void emit_d8(CodeBuffer &cbuf, int d8) { 363 cbuf.insts()->emit_int8((unsigned char) d8); 364 } 365 366 // EMIT_D16() 367 void emit_d16(CodeBuffer &cbuf, int d16) { 368 cbuf.insts()->emit_int16(d16); 369 } 370 371 // EMIT_D32() 372 void emit_d32(CodeBuffer &cbuf, int d32) { 373 cbuf.insts()->emit_int32(d32); 374 } 375 376 // emit 32 bit value and construct relocation entry from relocInfo::relocType 377 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 378 int format) { 379 cbuf.relocate(cbuf.insts_mark(), reloc, format); 380 cbuf.insts()->emit_int32(d32); 381 } 382 383 // emit 32 bit value and construct relocation entry from RelocationHolder 384 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 385 int format) { 386 #ifdef ASSERT 387 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 388 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 389 } 390 #endif 391 cbuf.relocate(cbuf.insts_mark(), rspec, format); 392 cbuf.insts()->emit_int32(d32); 393 } 394 395 // Access stack slot for load or store 396 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 397 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 398 if( -128 <= disp && disp <= 127 ) { 399 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 400 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 401 emit_d8 (cbuf, disp); // Displacement // R/M byte 402 } else { 403 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 404 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 405 emit_d32(cbuf, disp); // Displacement // R/M byte 406 } 407 } 408 409 // rRegI ereg, memory mem) %{ // emit_reg_mem 410 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 411 // There is no index & no scale, use form without SIB byte 412 if ((index == 0x4) && 413 (scale == 0) && (base != ESP_enc)) { 414 // If no displacement, mode is 0x0; unless base is [EBP] 415 if ( (displace == 0) && (base != EBP_enc) ) { 416 emit_rm(cbuf, 0x0, reg_encoding, base); 417 } 418 else { // If 8-bit displacement, mode 0x1 419 if ((displace >= -128) && (displace <= 127) 420 && (disp_reloc == relocInfo::none) ) { 421 emit_rm(cbuf, 0x1, reg_encoding, base); 422 emit_d8(cbuf, displace); 423 } 424 else { // If 32-bit displacement 425 if (base == -1) { // Special flag for absolute address 426 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 427 // (manual lies; no SIB needed here) 428 if ( disp_reloc != relocInfo::none ) { 429 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 430 } else { 431 emit_d32 (cbuf, displace); 432 } 433 } 434 else { // Normal base + offset 435 emit_rm(cbuf, 0x2, reg_encoding, base); 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 } 443 } 444 } 445 else { // Else, encode with the SIB byte 446 // If no displacement, mode is 0x0; unless base is [EBP] 447 if (displace == 0 && (base != EBP_enc)) { // If no displacement 448 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 449 emit_rm(cbuf, scale, index, base); 450 } 451 else { // If 8-bit displacement, mode 0x1 452 if ((displace >= -128) && (displace <= 127) 453 && (disp_reloc == relocInfo::none) ) { 454 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 emit_d8(cbuf, displace); 457 } 458 else { // If 32-bit displacement 459 if (base == 0x04 ) { 460 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, 0x04); 462 } else { 463 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 464 emit_rm(cbuf, scale, index, base); 465 } 466 if ( disp_reloc != relocInfo::none ) { 467 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 468 } else { 469 emit_d32 (cbuf, displace); 470 } 471 } 472 } 473 } 474 } 475 476 477 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 478 if( dst_encoding == src_encoding ) { 479 // reg-reg copy, use an empty encoding 480 } else { 481 emit_opcode( cbuf, 0x8B ); 482 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 483 } 484 } 485 486 void emit_cmpfp_fixup(MacroAssembler& _masm) { 487 Label exit; 488 __ jccb(Assembler::noParity, exit); 489 __ pushf(); 490 // 491 // comiss/ucomiss instructions set ZF,PF,CF flags and 492 // zero OF,AF,SF for NaN values. 493 // Fixup flags by zeroing ZF,PF so that compare of NaN 494 // values returns 'less than' result (CF is set). 495 // Leave the rest of flags unchanged. 496 // 497 // 7 6 5 4 3 2 1 0 498 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 499 // 0 0 1 0 1 0 1 1 (0x2B) 500 // 501 __ andl(Address(rsp, 0), 0xffffff2b); 502 __ popf(); 503 __ bind(exit); 504 } 505 506 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 507 Label done; 508 __ movl(dst, -1); 509 __ jcc(Assembler::parity, done); 510 __ jcc(Assembler::below, done); 511 __ setb(Assembler::notEqual, dst); 512 __ movzbl(dst, dst); 513 __ bind(done); 514 } 515 516 517 //============================================================================= 518 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 519 520 int ConstantTable::calculate_table_base_offset() const { 521 return 0; // absolute addressing, no offset 522 } 523 524 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 525 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 526 ShouldNotReachHere(); 527 } 528 529 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 530 // Empty encoding 531 } 532 533 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 534 return 0; 535 } 536 537 #ifndef PRODUCT 538 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 539 st->print("# MachConstantBaseNode (empty encoding)"); 540 } 541 #endif 542 543 544 //============================================================================= 545 #ifndef PRODUCT 546 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 Compile* C = ra_->C; 548 549 int framesize = C->output()->frame_size_in_bytes(); 550 int bangsize = C->output()->bang_size_in_bytes(); 551 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 552 // Remove wordSize for return addr which is already pushed. 553 framesize -= wordSize; 554 555 if (C->output()->need_stack_bang(bangsize)) { 556 framesize -= wordSize; 557 st->print("# stack bang (%d bytes)", bangsize); 558 st->print("\n\t"); 559 st->print("PUSH EBP\t# Save EBP"); 560 if (PreserveFramePointer) { 561 st->print("\n\t"); 562 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 563 } 564 if (framesize) { 565 st->print("\n\t"); 566 st->print("SUB ESP, #%d\t# Create frame",framesize); 567 } 568 } else { 569 st->print("SUB ESP, #%d\t# Create frame",framesize); 570 st->print("\n\t"); 571 framesize -= wordSize; 572 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 573 if (PreserveFramePointer) { 574 st->print("\n\t"); 575 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 576 if (framesize > 0) { 577 st->print("\n\t"); 578 st->print("ADD EBP, #%d", framesize); 579 } 580 } 581 } 582 583 if (VerifyStackAtCalls) { 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 587 } 588 589 if( C->in_24_bit_fp_mode() ) { 590 st->print("\n\t"); 591 st->print("FLDCW \t# load 24 bit fpu control word"); 592 } 593 if (UseSSE >= 2 && VerifyFPU) { 594 st->print("\n\t"); 595 st->print("# verify FPU stack (must be clean on entry)"); 596 } 597 598 #ifdef ASSERT 599 if (VerifyStackAtCalls) { 600 st->print("\n\t"); 601 st->print("# stack alignment check"); 602 } 603 #endif 604 st->cr(); 605 } 606 #endif 607 608 609 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 610 Compile* C = ra_->C; 611 MacroAssembler _masm(&cbuf); 612 613 int framesize = C->output()->frame_size_in_bytes(); 614 int bangsize = C->output()->bang_size_in_bytes(); 615 616 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 617 618 C->output()->set_frame_complete(cbuf.insts_size()); 619 620 if (C->has_mach_constant_base_node()) { 621 // NOTE: We set the table base offset here because users might be 622 // emitted before MachConstantBaseNode. 623 ConstantTable& constant_table = C->output()->constant_table(); 624 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 625 } 626 } 627 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 629 return MachNode::size(ra_); // too many variables; just compute it the hard way 630 } 631 632 int MachPrologNode::reloc() const { 633 return 0; // a large enough number 634 } 635 636 //============================================================================= 637 #ifndef PRODUCT 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 639 Compile *C = ra_->C; 640 int framesize = C->output()->frame_size_in_bytes(); 641 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 642 // Remove two words for return addr and rbp, 643 framesize -= 2*wordSize; 644 645 if (C->max_vector_size() > 16) { 646 st->print("VZEROUPPER"); 647 st->cr(); st->print("\t"); 648 } 649 if (C->in_24_bit_fp_mode()) { 650 st->print("FLDCW standard control word"); 651 st->cr(); st->print("\t"); 652 } 653 if (framesize) { 654 st->print("ADD ESP,%d\t# Destroy frame",framesize); 655 st->cr(); st->print("\t"); 656 } 657 st->print_cr("POPL EBP"); st->print("\t"); 658 if (do_polling() && C->is_method_compilation()) { 659 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 660 st->cr(); st->print("\t"); 661 } 662 } 663 #endif 664 665 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 666 Compile *C = ra_->C; 667 MacroAssembler _masm(&cbuf); 668 669 if (C->max_vector_size() > 16) { 670 // Clear upper bits of YMM registers when current compiled code uses 671 // wide vectors to avoid AVX <-> SSE transition penalty during call. 672 _masm.vzeroupper(); 673 } 674 // If method set FPU control word, restore to standard control word 675 if (C->in_24_bit_fp_mode()) { 676 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 677 } 678 679 int framesize = C->output()->frame_size_in_bytes(); 680 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 681 // Remove two words for return addr and rbp, 682 framesize -= 2*wordSize; 683 684 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 685 686 if (framesize >= 128) { 687 emit_opcode(cbuf, 0x81); // add SP, #framesize 688 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 689 emit_d32(cbuf, framesize); 690 } else if (framesize) { 691 emit_opcode(cbuf, 0x83); // add SP, #framesize 692 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 693 emit_d8(cbuf, framesize); 694 } 695 696 emit_opcode(cbuf, 0x58 | EBP_enc); 697 698 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 699 __ reserved_stack_check(); 700 } 701 702 if (do_polling() && C->is_method_compilation()) { 703 Register pollReg = as_Register(EBX_enc); 704 MacroAssembler masm(&cbuf); 705 masm.get_thread(pollReg); 706 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 707 masm.relocate(relocInfo::poll_return_type); 708 masm.testl(rax, Address(pollReg, 0)); 709 } 710 } 711 712 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 713 return MachNode::size(ra_); // too many variables; just compute it 714 // the hard way 715 } 716 717 int MachEpilogNode::reloc() const { 718 return 0; // a large enough number 719 } 720 721 const Pipeline * MachEpilogNode::pipeline() const { 722 return MachNode::pipeline_class(); 723 } 724 725 //============================================================================= 726 727 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 728 static enum RC rc_class( OptoReg::Name reg ) { 729 730 if( !OptoReg::is_valid(reg) ) return rc_bad; 731 if (OptoReg::is_stack(reg)) return rc_stack; 732 733 VMReg r = OptoReg::as_VMReg(reg); 734 if (r->is_Register()) return rc_int; 735 if (r->is_FloatRegister()) { 736 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 737 return rc_float; 738 } 739 assert(r->is_XMMRegister(), "must be"); 740 return rc_xmm; 741 } 742 743 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 744 int opcode, const char *op_str, int size, outputStream* st ) { 745 if( cbuf ) { 746 emit_opcode (*cbuf, opcode ); 747 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 748 #ifndef PRODUCT 749 } else if( !do_size ) { 750 if( size != 0 ) st->print("\n\t"); 751 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 752 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 753 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 754 } else { // FLD, FST, PUSH, POP 755 st->print("%s [ESP + #%d]",op_str,offset); 756 } 757 #endif 758 } 759 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 760 return size+3+offset_size; 761 } 762 763 // Helper for XMM registers. Extra opcode bits, limited syntax. 764 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 765 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 766 int in_size_in_bits = Assembler::EVEX_32bit; 767 int evex_encoding = 0; 768 if (reg_lo+1 == reg_hi) { 769 in_size_in_bits = Assembler::EVEX_64bit; 770 evex_encoding = Assembler::VEX_W; 771 } 772 if (cbuf) { 773 MacroAssembler _masm(cbuf); 774 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 775 // it maps more cases to single byte displacement 776 _masm.set_managed(); 777 if (reg_lo+1 == reg_hi) { // double move? 778 if (is_load) { 779 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 780 } else { 781 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 782 } 783 } else { 784 if (is_load) { 785 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } 790 #ifndef PRODUCT 791 } else if (!do_size) { 792 if (size != 0) st->print("\n\t"); 793 if (reg_lo+1 == reg_hi) { // double move? 794 if (is_load) st->print("%s %s,[ESP + #%d]", 795 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 796 Matcher::regName[reg_lo], offset); 797 else st->print("MOVSD [ESP + #%d],%s", 798 offset, Matcher::regName[reg_lo]); 799 } else { 800 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 801 Matcher::regName[reg_lo], offset); 802 else st->print("MOVSS [ESP + #%d],%s", 803 offset, Matcher::regName[reg_lo]); 804 } 805 #endif 806 } 807 bool is_single_byte = false; 808 if ((UseAVX > 2) && (offset != 0)) { 809 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 810 } 811 int offset_size = 0; 812 if (UseAVX > 2 ) { 813 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 814 } else { 815 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 816 } 817 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 818 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 819 return size+5+offset_size; 820 } 821 822 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 824 int src_hi, int dst_hi, int size, outputStream* st ) { 825 if (cbuf) { 826 MacroAssembler _masm(cbuf); 827 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 828 _masm.set_managed(); 829 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 830 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 831 as_XMMRegister(Matcher::_regEncode[src_lo])); 832 } else { 833 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 834 as_XMMRegister(Matcher::_regEncode[src_lo])); 835 } 836 #ifndef PRODUCT 837 } else if (!do_size) { 838 if (size != 0) st->print("\n\t"); 839 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 840 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 841 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 842 } else { 843 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 844 } 845 } else { 846 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 847 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } 852 #endif 853 } 854 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 855 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 856 int sz = (UseAVX > 2) ? 6 : 4; 857 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 858 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 859 return size + sz; 860 } 861 862 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 863 int src_hi, int dst_hi, int size, outputStream* st ) { 864 // 32-bit 865 if (cbuf) { 866 MacroAssembler _masm(cbuf); 867 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 868 _masm.set_managed(); 869 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 870 as_Register(Matcher::_regEncode[src_lo])); 871 #ifndef PRODUCT 872 } else if (!do_size) { 873 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 874 #endif 875 } 876 return (UseAVX> 2) ? 6 : 4; 877 } 878 879 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 881 int src_hi, int dst_hi, int size, outputStream* st ) { 882 // 32-bit 883 if (cbuf) { 884 MacroAssembler _masm(cbuf); 885 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 886 _masm.set_managed(); 887 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 888 as_XMMRegister(Matcher::_regEncode[src_lo])); 889 #ifndef PRODUCT 890 } else if (!do_size) { 891 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 892 #endif 893 } 894 return (UseAVX> 2) ? 6 : 4; 895 } 896 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 898 if( cbuf ) { 899 emit_opcode(*cbuf, 0x8B ); 900 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 901 #ifndef PRODUCT 902 } else if( !do_size ) { 903 if( size != 0 ) st->print("\n\t"); 904 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 905 #endif 906 } 907 return size+2; 908 } 909 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 911 int offset, int size, outputStream* st ) { 912 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 913 if( cbuf ) { 914 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 915 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("FLD %s",Matcher::regName[src_lo]); 920 #endif 921 } 922 size += 2; 923 } 924 925 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 926 const char *op_str; 927 int op; 928 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 929 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 930 op = 0xDD; 931 } else { // 32-bit store 932 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 933 op = 0xD9; 934 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 935 } 936 937 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 938 } 939 940 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 941 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 942 int src_hi, int dst_hi, uint ireg, outputStream* st); 943 944 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 945 int stack_offset, int reg, uint ireg, outputStream* st); 946 947 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 948 int dst_offset, uint ireg, outputStream* st) { 949 int calc_size = 0; 950 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 951 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 952 switch (ireg) { 953 case Op_VecS: 954 calc_size = 3+src_offset_size + 3+dst_offset_size; 955 break; 956 case Op_VecD: { 957 calc_size = 3+src_offset_size + 3+dst_offset_size; 958 int tmp_src_offset = src_offset + 4; 959 int tmp_dst_offset = dst_offset + 4; 960 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 961 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 962 calc_size += 3+src_offset_size + 3+dst_offset_size; 963 break; 964 } 965 case Op_VecX: 966 case Op_VecY: 967 case Op_VecZ: 968 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 969 break; 970 default: 971 ShouldNotReachHere(); 972 } 973 if (cbuf) { 974 MacroAssembler _masm(cbuf); 975 int offset = __ offset(); 976 switch (ireg) { 977 case Op_VecS: 978 __ pushl(Address(rsp, src_offset)); 979 __ popl (Address(rsp, dst_offset)); 980 break; 981 case Op_VecD: 982 __ pushl(Address(rsp, src_offset)); 983 __ popl (Address(rsp, dst_offset)); 984 __ pushl(Address(rsp, src_offset+4)); 985 __ popl (Address(rsp, dst_offset+4)); 986 break; 987 case Op_VecX: 988 __ movdqu(Address(rsp, -16), xmm0); 989 __ movdqu(xmm0, Address(rsp, src_offset)); 990 __ movdqu(Address(rsp, dst_offset), xmm0); 991 __ movdqu(xmm0, Address(rsp, -16)); 992 break; 993 case Op_VecY: 994 __ vmovdqu(Address(rsp, -32), xmm0); 995 __ vmovdqu(xmm0, Address(rsp, src_offset)); 996 __ vmovdqu(Address(rsp, dst_offset), xmm0); 997 __ vmovdqu(xmm0, Address(rsp, -32)); 998 break; 999 case Op_VecZ: 1000 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1001 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1002 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1003 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1004 break; 1005 default: 1006 ShouldNotReachHere(); 1007 } 1008 int size = __ offset() - offset; 1009 assert(size == calc_size, "incorrect size calculation"); 1010 return size; 1011 #ifndef PRODUCT 1012 } else if (!do_size) { 1013 switch (ireg) { 1014 case Op_VecS: 1015 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1016 "popl [rsp + #%d]", 1017 src_offset, dst_offset); 1018 break; 1019 case Op_VecD: 1020 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1021 "popq [rsp + #%d]\n\t" 1022 "pushl [rsp + #%d]\n\t" 1023 "popq [rsp + #%d]", 1024 src_offset, dst_offset, src_offset+4, dst_offset+4); 1025 break; 1026 case Op_VecX: 1027 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1028 "movdqu xmm0, [rsp + #%d]\n\t" 1029 "movdqu [rsp + #%d], xmm0\n\t" 1030 "movdqu xmm0, [rsp - #16]", 1031 src_offset, dst_offset); 1032 break; 1033 case Op_VecY: 1034 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1035 "vmovdqu xmm0, [rsp + #%d]\n\t" 1036 "vmovdqu [rsp + #%d], xmm0\n\t" 1037 "vmovdqu xmm0, [rsp - #32]", 1038 src_offset, dst_offset); 1039 break; 1040 case Op_VecZ: 1041 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1042 "vmovdqu xmm0, [rsp + #%d]\n\t" 1043 "vmovdqu [rsp + #%d], xmm0\n\t" 1044 "vmovdqu xmm0, [rsp - #64]", 1045 src_offset, dst_offset); 1046 break; 1047 default: 1048 ShouldNotReachHere(); 1049 } 1050 #endif 1051 } 1052 return calc_size; 1053 } 1054 1055 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1056 // Get registers to move 1057 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1058 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1059 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1060 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1061 1062 enum RC src_second_rc = rc_class(src_second); 1063 enum RC src_first_rc = rc_class(src_first); 1064 enum RC dst_second_rc = rc_class(dst_second); 1065 enum RC dst_first_rc = rc_class(dst_first); 1066 1067 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1068 1069 // Generate spill code! 1070 int size = 0; 1071 1072 if( src_first == dst_first && src_second == dst_second ) 1073 return size; // Self copy, no move 1074 1075 if (bottom_type()->isa_vect() != NULL) { 1076 uint ireg = ideal_reg(); 1077 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1078 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1079 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1080 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1081 // mem -> mem 1082 int src_offset = ra_->reg2offset(src_first); 1083 int dst_offset = ra_->reg2offset(dst_first); 1084 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1085 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1086 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1087 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1088 int stack_offset = ra_->reg2offset(dst_first); 1089 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1090 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1091 int stack_offset = ra_->reg2offset(src_first); 1092 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1093 } else { 1094 ShouldNotReachHere(); 1095 } 1096 } 1097 1098 // -------------------------------------- 1099 // Check for mem-mem move. push/pop to move. 1100 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1101 if( src_second == dst_first ) { // overlapping stack copy ranges 1102 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1103 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1104 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1105 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1106 } 1107 // move low bits 1108 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1109 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1110 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1111 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1112 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1113 } 1114 return size; 1115 } 1116 1117 // -------------------------------------- 1118 // Check for integer reg-reg copy 1119 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1120 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1121 1122 // Check for integer store 1123 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1125 1126 // Check for integer load 1127 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1128 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1129 1130 // Check for integer reg-xmm reg copy 1131 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1132 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1133 "no 64 bit integer-float reg moves" ); 1134 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1135 } 1136 // -------------------------------------- 1137 // Check for float reg-reg copy 1138 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1139 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1140 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1141 if( cbuf ) { 1142 1143 // Note the mucking with the register encode to compensate for the 0/1 1144 // indexing issue mentioned in a comment in the reg_def sections 1145 // for FPR registers many lines above here. 1146 1147 if( src_first != FPR1L_num ) { 1148 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1149 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1150 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1151 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1152 } else { 1153 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1154 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1155 } 1156 #ifndef PRODUCT 1157 } else if( !do_size ) { 1158 if( size != 0 ) st->print("\n\t"); 1159 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1160 else st->print( "FST %s", Matcher::regName[dst_first]); 1161 #endif 1162 } 1163 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1164 } 1165 1166 // Check for float store 1167 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1168 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1169 } 1170 1171 // Check for float load 1172 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1173 int offset = ra_->reg2offset(src_first); 1174 const char *op_str; 1175 int op; 1176 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1177 op_str = "FLD_D"; 1178 op = 0xDD; 1179 } else { // 32-bit load 1180 op_str = "FLD_S"; 1181 op = 0xD9; 1182 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1183 } 1184 if( cbuf ) { 1185 emit_opcode (*cbuf, op ); 1186 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1187 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1188 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1189 #ifndef PRODUCT 1190 } else if( !do_size ) { 1191 if( size != 0 ) st->print("\n\t"); 1192 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1193 #endif 1194 } 1195 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1196 return size + 3+offset_size+2; 1197 } 1198 1199 // Check for xmm reg-reg copy 1200 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1201 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1202 (src_first+1 == src_second && dst_first+1 == dst_second), 1203 "no non-adjacent float-moves" ); 1204 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1205 } 1206 1207 // Check for xmm reg-integer reg copy 1208 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1209 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1210 "no 64 bit float-integer reg moves" ); 1211 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1212 } 1213 1214 // Check for xmm store 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1216 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1217 } 1218 1219 // Check for float xmm load 1220 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1221 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1222 } 1223 1224 // Copy from float reg to xmm reg 1225 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1226 // copy to the top of stack from floating point reg 1227 // and use LEA to preserve flags 1228 if( cbuf ) { 1229 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1230 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1231 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1232 emit_d8(*cbuf,0xF8); 1233 #ifndef PRODUCT 1234 } else if( !do_size ) { 1235 if( size != 0 ) st->print("\n\t"); 1236 st->print("LEA ESP,[ESP-8]"); 1237 #endif 1238 } 1239 size += 4; 1240 1241 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1242 1243 // Copy from the temp memory to the xmm reg. 1244 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1245 1246 if( cbuf ) { 1247 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1248 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1249 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1250 emit_d8(*cbuf,0x08); 1251 #ifndef PRODUCT 1252 } else if( !do_size ) { 1253 if( size != 0 ) st->print("\n\t"); 1254 st->print("LEA ESP,[ESP+8]"); 1255 #endif 1256 } 1257 size += 4; 1258 return size; 1259 } 1260 1261 assert( size > 0, "missed a case" ); 1262 1263 // -------------------------------------------------------------------- 1264 // Check for second bits still needing moving. 1265 if( src_second == dst_second ) 1266 return size; // Self copy; no move 1267 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1268 1269 // Check for second word int-int move 1270 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1271 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1272 1273 // Check for second word integer store 1274 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1275 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1276 1277 // Check for second word integer load 1278 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1279 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1280 1281 1282 Unimplemented(); 1283 return 0; // Mute compiler 1284 } 1285 1286 #ifndef PRODUCT 1287 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1288 implementation( NULL, ra_, false, st ); 1289 } 1290 #endif 1291 1292 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1293 implementation( &cbuf, ra_, false, NULL ); 1294 } 1295 1296 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1297 return MachNode::size(ra_); 1298 } 1299 1300 1301 //============================================================================= 1302 #ifndef PRODUCT 1303 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1304 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1305 int reg = ra_->get_reg_first(this); 1306 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1307 } 1308 #endif 1309 1310 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1311 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1312 int reg = ra_->get_encode(this); 1313 if( offset >= 128 ) { 1314 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1315 emit_rm(cbuf, 0x2, reg, 0x04); 1316 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1317 emit_d32(cbuf, offset); 1318 } 1319 else { 1320 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1321 emit_rm(cbuf, 0x1, reg, 0x04); 1322 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1323 emit_d8(cbuf, offset); 1324 } 1325 } 1326 1327 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 if( offset >= 128 ) { 1330 return 7; 1331 } 1332 else { 1333 return 4; 1334 } 1335 } 1336 1337 //============================================================================= 1338 #ifndef PRODUCT 1339 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1340 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1341 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1342 st->print_cr("\tNOP"); 1343 st->print_cr("\tNOP"); 1344 if( !OptoBreakpoint ) 1345 st->print_cr("\tNOP"); 1346 } 1347 #endif 1348 1349 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1350 MacroAssembler masm(&cbuf); 1351 #ifdef ASSERT 1352 uint insts_size = cbuf.insts_size(); 1353 #endif 1354 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1355 masm.jump_cc(Assembler::notEqual, 1356 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1357 /* WARNING these NOPs are critical so that verified entry point is properly 1358 aligned for patching by NativeJump::patch_verified_entry() */ 1359 int nops_cnt = 2; 1360 if( !OptoBreakpoint ) // Leave space for int3 1361 nops_cnt += 1; 1362 masm.nop(nops_cnt); 1363 1364 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1365 } 1366 1367 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1368 return OptoBreakpoint ? 11 : 12; 1369 } 1370 1371 1372 //============================================================================= 1373 1374 int Matcher::regnum_to_fpu_offset(int regnum) { 1375 return regnum - 32; // The FP registers are in the second chunk 1376 } 1377 1378 // This is UltraSparc specific, true just means we have fast l2f conversion 1379 const bool Matcher::convL2FSupported(void) { 1380 return true; 1381 } 1382 1383 // Is this branch offset short enough that a short branch can be used? 1384 // 1385 // NOTE: If the platform does not provide any short branch variants, then 1386 // this method should return false for offset 0. 1387 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1388 // The passed offset is relative to address of the branch. 1389 // On 86 a branch displacement is calculated relative to address 1390 // of a next instruction. 1391 offset -= br_size; 1392 1393 // the short version of jmpConUCF2 contains multiple branches, 1394 // making the reach slightly less 1395 if (rule == jmpConUCF2_rule) 1396 return (-126 <= offset && offset <= 125); 1397 return (-128 <= offset && offset <= 127); 1398 } 1399 1400 const bool Matcher::isSimpleConstant64(jlong value) { 1401 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1402 return false; 1403 } 1404 1405 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1406 const bool Matcher::init_array_count_is_in_bytes = false; 1407 1408 // Needs 2 CMOV's for longs. 1409 const int Matcher::long_cmove_cost() { return 1; } 1410 1411 // No CMOVF/CMOVD with SSE/SSE2 1412 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1413 1414 // Does the CPU require late expand (see block.cpp for description of late expand)? 1415 const bool Matcher::require_postalloc_expand = false; 1416 1417 // Do we need to mask the count passed to shift instructions or does 1418 // the cpu only look at the lower 5/6 bits anyway? 1419 const bool Matcher::need_masked_shift_count = false; 1420 1421 bool Matcher::narrow_oop_use_complex_address() { 1422 ShouldNotCallThis(); 1423 return true; 1424 } 1425 1426 bool Matcher::narrow_klass_use_complex_address() { 1427 ShouldNotCallThis(); 1428 return true; 1429 } 1430 1431 bool Matcher::const_oop_prefer_decode() { 1432 ShouldNotCallThis(); 1433 return true; 1434 } 1435 1436 bool Matcher::const_klass_prefer_decode() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 // Is it better to copy float constants, or load them directly from memory? 1442 // Intel can load a float constant from a direct address, requiring no 1443 // extra registers. Most RISCs will have to materialize an address into a 1444 // register first, so they would do better to copy the constant from stack. 1445 const bool Matcher::rematerialize_float_constants = true; 1446 1447 // If CPU can load and store mis-aligned doubles directly then no fixup is 1448 // needed. Else we split the double into 2 integer pieces and move it 1449 // piece-by-piece. Only happens when passing doubles into C code as the 1450 // Java calling convention forces doubles to be aligned. 1451 const bool Matcher::misaligned_doubles_ok = true; 1452 1453 1454 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1455 // Get the memory operand from the node 1456 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1457 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1458 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1459 uint opcnt = 1; // First operand 1460 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1461 while( idx >= skipped+num_edges ) { 1462 skipped += num_edges; 1463 opcnt++; // Bump operand count 1464 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1465 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1466 } 1467 1468 MachOper *memory = node->_opnds[opcnt]; 1469 MachOper *new_memory = NULL; 1470 switch (memory->opcode()) { 1471 case DIRECT: 1472 case INDOFFSET32X: 1473 // No transformation necessary. 1474 return; 1475 case INDIRECT: 1476 new_memory = new indirect_win95_safeOper( ); 1477 break; 1478 case INDOFFSET8: 1479 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1480 break; 1481 case INDOFFSET32: 1482 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1483 break; 1484 case INDINDEXOFFSET: 1485 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1486 break; 1487 case INDINDEXSCALE: 1488 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1489 break; 1490 case INDINDEXSCALEOFFSET: 1491 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1492 break; 1493 case LOAD_LONG_INDIRECT: 1494 case LOAD_LONG_INDOFFSET32: 1495 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1496 return; 1497 default: 1498 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1499 return; 1500 } 1501 node->_opnds[opcnt] = new_memory; 1502 } 1503 1504 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. 1505 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1506 1507 // Are floats conerted to double when stored to stack during deoptimization? 1508 // On x32 it is stored with convertion only when FPU is used for floats. 1509 bool Matcher::float_in_double() { return (UseSSE == 0); } 1510 1511 // Do ints take an entire long register or just half? 1512 const bool Matcher::int_in_long = false; 1513 1514 // Return whether or not this register is ever used as an argument. This 1515 // function is used on startup to build the trampoline stubs in generateOptoStub. 1516 // Registers not mentioned will be killed by the VM call in the trampoline, and 1517 // arguments in those registers not be available to the callee. 1518 bool Matcher::can_be_java_arg( int reg ) { 1519 if( reg == ECX_num || reg == EDX_num ) return true; 1520 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1521 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1522 return false; 1523 } 1524 1525 bool Matcher::is_spillable_arg( int reg ) { 1526 return can_be_java_arg(reg); 1527 } 1528 1529 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1530 // Use hardware integer DIV instruction when 1531 // it is faster than a code which use multiply. 1532 // Only when constant divisor fits into 32 bit 1533 // (min_jint is excluded to get only correct 1534 // positive 32 bit values from negative). 1535 return VM_Version::has_fast_idiv() && 1536 (divisor == (int)divisor && divisor != min_jint); 1537 } 1538 1539 // Register for DIVI projection of divmodI 1540 RegMask Matcher::divI_proj_mask() { 1541 return EAX_REG_mask(); 1542 } 1543 1544 // Register for MODI projection of divmodI 1545 RegMask Matcher::modI_proj_mask() { 1546 return EDX_REG_mask(); 1547 } 1548 1549 // Register for DIVL projection of divmodL 1550 RegMask Matcher::divL_proj_mask() { 1551 ShouldNotReachHere(); 1552 return RegMask(); 1553 } 1554 1555 // Register for MODL projection of divmodL 1556 RegMask Matcher::modL_proj_mask() { 1557 ShouldNotReachHere(); 1558 return RegMask(); 1559 } 1560 1561 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1562 return NO_REG_mask(); 1563 } 1564 1565 // Returns true if the high 32 bits of the value is known to be zero. 1566 bool is_operand_hi32_zero(Node* n) { 1567 int opc = n->Opcode(); 1568 if (opc == Op_AndL) { 1569 Node* o2 = n->in(2); 1570 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1571 return true; 1572 } 1573 } 1574 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1575 return true; 1576 } 1577 return false; 1578 } 1579 1580 %} 1581 1582 //----------ENCODING BLOCK----------------------------------------------------- 1583 // This block specifies the encoding classes used by the compiler to output 1584 // byte streams. Encoding classes generate functions which are called by 1585 // Machine Instruction Nodes in order to generate the bit encoding of the 1586 // instruction. Operands specify their base encoding interface with the 1587 // interface keyword. There are currently supported four interfaces, 1588 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1589 // operand to generate a function which returns its register number when 1590 // queried. CONST_INTER causes an operand to generate a function which 1591 // returns the value of the constant when queried. MEMORY_INTER causes an 1592 // operand to generate four functions which return the Base Register, the 1593 // Index Register, the Scale Value, and the Offset Value of the operand when 1594 // queried. COND_INTER causes an operand to generate six functions which 1595 // return the encoding code (ie - encoding bits for the instruction) 1596 // associated with each basic boolean condition for a conditional instruction. 1597 // Instructions specify two basic values for encoding. They use the 1598 // ins_encode keyword to specify their encoding class (which must be one of 1599 // the class names specified in the encoding block), and they use the 1600 // opcode keyword to specify, in order, their primary, secondary, and 1601 // tertiary opcode. Only the opcode sections which a particular instruction 1602 // needs for encoding need to be specified. 1603 encode %{ 1604 // Build emit functions for each basic byte or larger field in the intel 1605 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1606 // code in the enc_class source block. Emit functions will live in the 1607 // main source block for now. In future, we can generalize this by 1608 // adding a syntax that specifies the sizes of fields in an order, 1609 // so that the adlc can build the emit functions automagically 1610 1611 // Emit primary opcode 1612 enc_class OpcP %{ 1613 emit_opcode(cbuf, $primary); 1614 %} 1615 1616 // Emit secondary opcode 1617 enc_class OpcS %{ 1618 emit_opcode(cbuf, $secondary); 1619 %} 1620 1621 // Emit opcode directly 1622 enc_class Opcode(immI d8) %{ 1623 emit_opcode(cbuf, $d8$$constant); 1624 %} 1625 1626 enc_class SizePrefix %{ 1627 emit_opcode(cbuf,0x66); 1628 %} 1629 1630 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1631 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1632 %} 1633 1634 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1635 emit_opcode(cbuf,$opcode$$constant); 1636 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1637 %} 1638 1639 enc_class mov_r32_imm0( rRegI dst ) %{ 1640 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1641 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1642 %} 1643 1644 enc_class cdq_enc %{ 1645 // Full implementation of Java idiv and irem; checks for 1646 // special case as described in JVM spec., p.243 & p.271. 1647 // 1648 // normal case special case 1649 // 1650 // input : rax,: dividend min_int 1651 // reg: divisor -1 1652 // 1653 // output: rax,: quotient (= rax, idiv reg) min_int 1654 // rdx: remainder (= rax, irem reg) 0 1655 // 1656 // Code sequnce: 1657 // 1658 // 81 F8 00 00 00 80 cmp rax,80000000h 1659 // 0F 85 0B 00 00 00 jne normal_case 1660 // 33 D2 xor rdx,edx 1661 // 83 F9 FF cmp rcx,0FFh 1662 // 0F 84 03 00 00 00 je done 1663 // normal_case: 1664 // 99 cdq 1665 // F7 F9 idiv rax,ecx 1666 // done: 1667 // 1668 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1669 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1670 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1671 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1672 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1673 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1674 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1675 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1676 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1677 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1678 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1679 // normal_case: 1680 emit_opcode(cbuf,0x99); // cdq 1681 // idiv (note: must be emitted by the user of this rule) 1682 // normal: 1683 %} 1684 1685 // Dense encoding for older common ops 1686 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1687 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1688 %} 1689 1690 1691 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1692 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1693 // Check for 8-bit immediate, and set sign extend bit in opcode 1694 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1695 emit_opcode(cbuf, $primary | 0x02); 1696 } 1697 else { // If 32-bit immediate 1698 emit_opcode(cbuf, $primary); 1699 } 1700 %} 1701 1702 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1703 // Emit primary opcode and set sign-extend bit 1704 // Check for 8-bit immediate, and set sign extend bit in opcode 1705 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1706 emit_opcode(cbuf, $primary | 0x02); } 1707 else { // If 32-bit immediate 1708 emit_opcode(cbuf, $primary); 1709 } 1710 // Emit r/m byte with secondary opcode, after primary opcode. 1711 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1712 %} 1713 1714 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1715 // Check for 8-bit immediate, and set sign extend bit in opcode 1716 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1717 $$$emit8$imm$$constant; 1718 } 1719 else { // If 32-bit immediate 1720 // Output immediate 1721 $$$emit32$imm$$constant; 1722 } 1723 %} 1724 1725 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1726 // Emit primary opcode and set sign-extend bit 1727 // Check for 8-bit immediate, and set sign extend bit in opcode 1728 int con = (int)$imm$$constant; // Throw away top bits 1729 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1730 // Emit r/m byte with secondary opcode, after primary opcode. 1731 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1732 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1733 else emit_d32(cbuf,con); 1734 %} 1735 1736 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1737 // Emit primary opcode and set sign-extend bit 1738 // Check for 8-bit immediate, and set sign extend bit in opcode 1739 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1740 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1741 // Emit r/m byte with tertiary opcode, after primary opcode. 1742 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1743 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1744 else emit_d32(cbuf,con); 1745 %} 1746 1747 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1748 emit_cc(cbuf, $secondary, $dst$$reg ); 1749 %} 1750 1751 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1752 int destlo = $dst$$reg; 1753 int desthi = HIGH_FROM_LOW(destlo); 1754 // bswap lo 1755 emit_opcode(cbuf, 0x0F); 1756 emit_cc(cbuf, 0xC8, destlo); 1757 // bswap hi 1758 emit_opcode(cbuf, 0x0F); 1759 emit_cc(cbuf, 0xC8, desthi); 1760 // xchg lo and hi 1761 emit_opcode(cbuf, 0x87); 1762 emit_rm(cbuf, 0x3, destlo, desthi); 1763 %} 1764 1765 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1766 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1767 %} 1768 1769 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1770 $$$emit8$primary; 1771 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1772 %} 1773 1774 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1775 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1776 emit_d8(cbuf, op >> 8 ); 1777 emit_d8(cbuf, op & 255); 1778 %} 1779 1780 // emulate a CMOV with a conditional branch around a MOV 1781 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1782 // Invert sense of branch from sense of CMOV 1783 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1784 emit_d8( cbuf, $brOffs$$constant ); 1785 %} 1786 1787 enc_class enc_PartialSubtypeCheck( ) %{ 1788 Register Redi = as_Register(EDI_enc); // result register 1789 Register Reax = as_Register(EAX_enc); // super class 1790 Register Recx = as_Register(ECX_enc); // killed 1791 Register Resi = as_Register(ESI_enc); // sub class 1792 Label miss; 1793 1794 MacroAssembler _masm(&cbuf); 1795 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1796 NULL, &miss, 1797 /*set_cond_codes:*/ true); 1798 if ($primary) { 1799 __ xorptr(Redi, Redi); 1800 } 1801 __ bind(miss); 1802 %} 1803 1804 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1805 MacroAssembler masm(&cbuf); 1806 int start = masm.offset(); 1807 if (UseSSE >= 2) { 1808 if (VerifyFPU) { 1809 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1810 } 1811 } else { 1812 // External c_calling_convention expects the FPU stack to be 'clean'. 1813 // Compiled code leaves it dirty. Do cleanup now. 1814 masm.empty_FPU_stack(); 1815 } 1816 if (sizeof_FFree_Float_Stack_All == -1) { 1817 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1818 } else { 1819 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1820 } 1821 %} 1822 1823 enc_class Verify_FPU_For_Leaf %{ 1824 if( VerifyFPU ) { 1825 MacroAssembler masm(&cbuf); 1826 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1827 } 1828 %} 1829 1830 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1831 // This is the instruction starting address for relocation info. 1832 cbuf.set_insts_mark(); 1833 $$$emit8$primary; 1834 // CALL directly to the runtime 1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1836 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1837 1838 if (UseSSE >= 2) { 1839 MacroAssembler _masm(&cbuf); 1840 BasicType rt = tf()->return_type(); 1841 1842 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1843 // A C runtime call where the return value is unused. In SSE2+ 1844 // mode the result needs to be removed from the FPU stack. It's 1845 // likely that this function call could be removed by the 1846 // optimizer if the C function is a pure function. 1847 __ ffree(0); 1848 } else if (rt == T_FLOAT) { 1849 __ lea(rsp, Address(rsp, -4)); 1850 __ fstp_s(Address(rsp, 0)); 1851 __ movflt(xmm0, Address(rsp, 0)); 1852 __ lea(rsp, Address(rsp, 4)); 1853 } else if (rt == T_DOUBLE) { 1854 __ lea(rsp, Address(rsp, -8)); 1855 __ fstp_d(Address(rsp, 0)); 1856 __ movdbl(xmm0, Address(rsp, 0)); 1857 __ lea(rsp, Address(rsp, 8)); 1858 } 1859 } 1860 %} 1861 1862 enc_class pre_call_resets %{ 1863 // If method sets FPU control word restore it here 1864 debug_only(int off0 = cbuf.insts_size()); 1865 if (ra_->C->in_24_bit_fp_mode()) { 1866 MacroAssembler _masm(&cbuf); 1867 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1868 } 1869 // Clear upper bits of YMM registers when current compiled code uses 1870 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1871 MacroAssembler _masm(&cbuf); 1872 __ vzeroupper(); 1873 debug_only(int off1 = cbuf.insts_size()); 1874 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1875 %} 1876 1877 enc_class post_call_FPU %{ 1878 // If method sets FPU control word do it here also 1879 if (Compile::current()->in_24_bit_fp_mode()) { 1880 MacroAssembler masm(&cbuf); 1881 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1882 } 1883 %} 1884 1885 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1886 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1887 // who we intended to call. 1888 cbuf.set_insts_mark(); 1889 $$$emit8$primary; 1890 1891 if (!_method) { 1892 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1893 runtime_call_Relocation::spec(), 1894 RELOC_IMM32); 1895 } else { 1896 int method_index = resolved_method_index(cbuf); 1897 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1898 : static_call_Relocation::spec(method_index); 1899 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1900 rspec, RELOC_DISP32); 1901 // Emit stubs for static call. 1902 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1903 if (stub == NULL) { 1904 ciEnv::current()->record_failure("CodeCache is full"); 1905 return; 1906 } 1907 } 1908 %} 1909 1910 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1911 MacroAssembler _masm(&cbuf); 1912 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1913 %} 1914 1915 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1916 int disp = in_bytes(Method::from_compiled_offset()); 1917 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1918 1919 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1920 cbuf.set_insts_mark(); 1921 $$$emit8$primary; 1922 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1923 emit_d8(cbuf, disp); // Displacement 1924 1925 %} 1926 1927 // Following encoding is no longer used, but may be restored if calling 1928 // convention changes significantly. 1929 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1930 // 1931 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1932 // // int ic_reg = Matcher::inline_cache_reg(); 1933 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1934 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1935 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1936 // 1937 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1938 // // // so we load it immediately before the call 1939 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1940 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1941 // 1942 // // xor rbp,ebp 1943 // emit_opcode(cbuf, 0x33); 1944 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1945 // 1946 // // CALL to interpreter. 1947 // cbuf.set_insts_mark(); 1948 // $$$emit8$primary; 1949 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1950 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1951 // %} 1952 1953 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1954 $$$emit8$primary; 1955 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1956 $$$emit8$shift$$constant; 1957 %} 1958 1959 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1960 // Load immediate does not have a zero or sign extended version 1961 // for 8-bit immediates 1962 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1963 $$$emit32$src$$constant; 1964 %} 1965 1966 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1967 // Load immediate does not have a zero or sign extended version 1968 // for 8-bit immediates 1969 emit_opcode(cbuf, $primary + $dst$$reg); 1970 $$$emit32$src$$constant; 1971 %} 1972 1973 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1974 // Load immediate does not have a zero or sign extended version 1975 // for 8-bit immediates 1976 int dst_enc = $dst$$reg; 1977 int src_con = $src$$constant & 0x0FFFFFFFFL; 1978 if (src_con == 0) { 1979 // xor dst, dst 1980 emit_opcode(cbuf, 0x33); 1981 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1982 } else { 1983 emit_opcode(cbuf, $primary + dst_enc); 1984 emit_d32(cbuf, src_con); 1985 } 1986 %} 1987 1988 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1989 // Load immediate does not have a zero or sign extended version 1990 // for 8-bit immediates 1991 int dst_enc = $dst$$reg + 2; 1992 int src_con = ((julong)($src$$constant)) >> 32; 1993 if (src_con == 0) { 1994 // xor dst, dst 1995 emit_opcode(cbuf, 0x33); 1996 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1997 } else { 1998 emit_opcode(cbuf, $primary + dst_enc); 1999 emit_d32(cbuf, src_con); 2000 } 2001 %} 2002 2003 2004 // Encode a reg-reg copy. If it is useless, then empty encoding. 2005 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2006 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2007 %} 2008 2009 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2010 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2011 %} 2012 2013 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2014 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2015 %} 2016 2017 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2018 $$$emit8$primary; 2019 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2020 %} 2021 2022 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2023 $$$emit8$secondary; 2024 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2025 %} 2026 2027 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2028 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2029 %} 2030 2031 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2032 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2033 %} 2034 2035 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2036 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2037 %} 2038 2039 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2040 // Output immediate 2041 $$$emit32$src$$constant; 2042 %} 2043 2044 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2045 // Output Float immediate bits 2046 jfloat jf = $src$$constant; 2047 int jf_as_bits = jint_cast( jf ); 2048 emit_d32(cbuf, jf_as_bits); 2049 %} 2050 2051 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2052 // Output Float immediate bits 2053 jfloat jf = $src$$constant; 2054 int jf_as_bits = jint_cast( jf ); 2055 emit_d32(cbuf, jf_as_bits); 2056 %} 2057 2058 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2059 // Output immediate 2060 $$$emit16$src$$constant; 2061 %} 2062 2063 enc_class Con_d32(immI src) %{ 2064 emit_d32(cbuf,$src$$constant); 2065 %} 2066 2067 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2068 // Output immediate memory reference 2069 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2070 emit_d32(cbuf, 0x00); 2071 %} 2072 2073 enc_class lock_prefix( ) %{ 2074 emit_opcode(cbuf,0xF0); // [Lock] 2075 %} 2076 2077 // Cmp-xchg long value. 2078 // Note: we need to swap rbx, and rcx before and after the 2079 // cmpxchg8 instruction because the instruction uses 2080 // rcx as the high order word of the new value to store but 2081 // our register encoding uses rbx,. 2082 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2083 2084 // XCHG rbx,ecx 2085 emit_opcode(cbuf,0x87); 2086 emit_opcode(cbuf,0xD9); 2087 // [Lock] 2088 emit_opcode(cbuf,0xF0); 2089 // CMPXCHG8 [Eptr] 2090 emit_opcode(cbuf,0x0F); 2091 emit_opcode(cbuf,0xC7); 2092 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2093 // XCHG rbx,ecx 2094 emit_opcode(cbuf,0x87); 2095 emit_opcode(cbuf,0xD9); 2096 %} 2097 2098 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2099 // [Lock] 2100 emit_opcode(cbuf,0xF0); 2101 2102 // CMPXCHG [Eptr] 2103 emit_opcode(cbuf,0x0F); 2104 emit_opcode(cbuf,0xB1); 2105 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2106 %} 2107 2108 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2109 // [Lock] 2110 emit_opcode(cbuf,0xF0); 2111 2112 // CMPXCHGB [Eptr] 2113 emit_opcode(cbuf,0x0F); 2114 emit_opcode(cbuf,0xB0); 2115 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2116 %} 2117 2118 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2119 // [Lock] 2120 emit_opcode(cbuf,0xF0); 2121 2122 // 16-bit mode 2123 emit_opcode(cbuf, 0x66); 2124 2125 // CMPXCHGW [Eptr] 2126 emit_opcode(cbuf,0x0F); 2127 emit_opcode(cbuf,0xB1); 2128 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2129 %} 2130 2131 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2132 int res_encoding = $res$$reg; 2133 2134 // MOV res,0 2135 emit_opcode( cbuf, 0xB8 + res_encoding); 2136 emit_d32( cbuf, 0 ); 2137 // JNE,s fail 2138 emit_opcode(cbuf,0x75); 2139 emit_d8(cbuf, 5 ); 2140 // MOV res,1 2141 emit_opcode( cbuf, 0xB8 + res_encoding); 2142 emit_d32( cbuf, 1 ); 2143 // fail: 2144 %} 2145 2146 enc_class set_instruction_start( ) %{ 2147 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2148 %} 2149 2150 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2151 int reg_encoding = $ereg$$reg; 2152 int base = $mem$$base; 2153 int index = $mem$$index; 2154 int scale = $mem$$scale; 2155 int displace = $mem$$disp; 2156 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2157 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2158 %} 2159 2160 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2161 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2162 int base = $mem$$base; 2163 int index = $mem$$index; 2164 int scale = $mem$$scale; 2165 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2166 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2167 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2168 %} 2169 2170 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2171 int r1, r2; 2172 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2173 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2174 emit_opcode(cbuf,0x0F); 2175 emit_opcode(cbuf,$tertiary); 2176 emit_rm(cbuf, 0x3, r1, r2); 2177 emit_d8(cbuf,$cnt$$constant); 2178 emit_d8(cbuf,$primary); 2179 emit_rm(cbuf, 0x3, $secondary, r1); 2180 emit_d8(cbuf,$cnt$$constant); 2181 %} 2182 2183 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2184 emit_opcode( cbuf, 0x8B ); // Move 2185 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2186 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2187 emit_d8(cbuf,$primary); 2188 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2189 emit_d8(cbuf,$cnt$$constant-32); 2190 } 2191 emit_d8(cbuf,$primary); 2192 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2193 emit_d8(cbuf,31); 2194 %} 2195 2196 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2197 int r1, r2; 2198 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2199 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2200 2201 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2202 emit_rm(cbuf, 0x3, r1, r2); 2203 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2204 emit_opcode(cbuf,$primary); 2205 emit_rm(cbuf, 0x3, $secondary, r1); 2206 emit_d8(cbuf,$cnt$$constant-32); 2207 } 2208 emit_opcode(cbuf,0x33); // XOR r2,r2 2209 emit_rm(cbuf, 0x3, r2, r2); 2210 %} 2211 2212 // Clone of RegMem but accepts an extra parameter to access each 2213 // half of a double in memory; it never needs relocation info. 2214 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2215 emit_opcode(cbuf,$opcode$$constant); 2216 int reg_encoding = $rm_reg$$reg; 2217 int base = $mem$$base; 2218 int index = $mem$$index; 2219 int scale = $mem$$scale; 2220 int displace = $mem$$disp + $disp_for_half$$constant; 2221 relocInfo::relocType disp_reloc = relocInfo::none; 2222 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2223 %} 2224 2225 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2226 // 2227 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2228 // and it never needs relocation information. 2229 // Frequently used to move data between FPU's Stack Top and memory. 2230 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2231 int rm_byte_opcode = $rm_opcode$$constant; 2232 int base = $mem$$base; 2233 int index = $mem$$index; 2234 int scale = $mem$$scale; 2235 int displace = $mem$$disp; 2236 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2237 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2238 %} 2239 2240 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2241 int rm_byte_opcode = $rm_opcode$$constant; 2242 int base = $mem$$base; 2243 int index = $mem$$index; 2244 int scale = $mem$$scale; 2245 int displace = $mem$$disp; 2246 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2247 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2248 %} 2249 2250 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2251 int reg_encoding = $dst$$reg; 2252 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2253 int index = 0x04; // 0x04 indicates no index 2254 int scale = 0x00; // 0x00 indicates no scale 2255 int displace = $src1$$constant; // 0x00 indicates no displacement 2256 relocInfo::relocType disp_reloc = relocInfo::none; 2257 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2258 %} 2259 2260 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2261 // Compare dst,src 2262 emit_opcode(cbuf,0x3B); 2263 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2264 // jmp dst < src around move 2265 emit_opcode(cbuf,0x7C); 2266 emit_d8(cbuf,2); 2267 // move dst,src 2268 emit_opcode(cbuf,0x8B); 2269 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2270 %} 2271 2272 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2273 // Compare dst,src 2274 emit_opcode(cbuf,0x3B); 2275 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2276 // jmp dst > src around move 2277 emit_opcode(cbuf,0x7F); 2278 emit_d8(cbuf,2); 2279 // move dst,src 2280 emit_opcode(cbuf,0x8B); 2281 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2282 %} 2283 2284 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2285 // If src is FPR1, we can just FST to store it. 2286 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2287 int reg_encoding = 0x2; // Just store 2288 int base = $mem$$base; 2289 int index = $mem$$index; 2290 int scale = $mem$$scale; 2291 int displace = $mem$$disp; 2292 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2293 if( $src$$reg != FPR1L_enc ) { 2294 reg_encoding = 0x3; // Store & pop 2295 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2296 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2297 } 2298 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2299 emit_opcode(cbuf,$primary); 2300 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2301 %} 2302 2303 enc_class neg_reg(rRegI dst) %{ 2304 // NEG $dst 2305 emit_opcode(cbuf,0xF7); 2306 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2307 %} 2308 2309 enc_class setLT_reg(eCXRegI dst) %{ 2310 // SETLT $dst 2311 emit_opcode(cbuf,0x0F); 2312 emit_opcode(cbuf,0x9C); 2313 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2314 %} 2315 2316 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2317 int tmpReg = $tmp$$reg; 2318 2319 // SUB $p,$q 2320 emit_opcode(cbuf,0x2B); 2321 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2322 // SBB $tmp,$tmp 2323 emit_opcode(cbuf,0x1B); 2324 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2325 // AND $tmp,$y 2326 emit_opcode(cbuf,0x23); 2327 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2328 // ADD $p,$tmp 2329 emit_opcode(cbuf,0x03); 2330 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2331 %} 2332 2333 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2334 // TEST shift,32 2335 emit_opcode(cbuf,0xF7); 2336 emit_rm(cbuf, 0x3, 0, ECX_enc); 2337 emit_d32(cbuf,0x20); 2338 // JEQ,s small 2339 emit_opcode(cbuf, 0x74); 2340 emit_d8(cbuf, 0x04); 2341 // MOV $dst.hi,$dst.lo 2342 emit_opcode( cbuf, 0x8B ); 2343 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2344 // CLR $dst.lo 2345 emit_opcode(cbuf, 0x33); 2346 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2347 // small: 2348 // SHLD $dst.hi,$dst.lo,$shift 2349 emit_opcode(cbuf,0x0F); 2350 emit_opcode(cbuf,0xA5); 2351 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2352 // SHL $dst.lo,$shift" 2353 emit_opcode(cbuf,0xD3); 2354 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2355 %} 2356 2357 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2358 // TEST shift,32 2359 emit_opcode(cbuf,0xF7); 2360 emit_rm(cbuf, 0x3, 0, ECX_enc); 2361 emit_d32(cbuf,0x20); 2362 // JEQ,s small 2363 emit_opcode(cbuf, 0x74); 2364 emit_d8(cbuf, 0x04); 2365 // MOV $dst.lo,$dst.hi 2366 emit_opcode( cbuf, 0x8B ); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2368 // CLR $dst.hi 2369 emit_opcode(cbuf, 0x33); 2370 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2371 // small: 2372 // SHRD $dst.lo,$dst.hi,$shift 2373 emit_opcode(cbuf,0x0F); 2374 emit_opcode(cbuf,0xAD); 2375 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2376 // SHR $dst.hi,$shift" 2377 emit_opcode(cbuf,0xD3); 2378 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2379 %} 2380 2381 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2382 // TEST shift,32 2383 emit_opcode(cbuf,0xF7); 2384 emit_rm(cbuf, 0x3, 0, ECX_enc); 2385 emit_d32(cbuf,0x20); 2386 // JEQ,s small 2387 emit_opcode(cbuf, 0x74); 2388 emit_d8(cbuf, 0x05); 2389 // MOV $dst.lo,$dst.hi 2390 emit_opcode( cbuf, 0x8B ); 2391 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2392 // SAR $dst.hi,31 2393 emit_opcode(cbuf, 0xC1); 2394 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2395 emit_d8(cbuf, 0x1F ); 2396 // small: 2397 // SHRD $dst.lo,$dst.hi,$shift 2398 emit_opcode(cbuf,0x0F); 2399 emit_opcode(cbuf,0xAD); 2400 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2401 // SAR $dst.hi,$shift" 2402 emit_opcode(cbuf,0xD3); 2403 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2404 %} 2405 2406 2407 // ----------------- Encodings for floating point unit ----------------- 2408 // May leave result in FPU-TOS or FPU reg depending on opcodes 2409 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2410 $$$emit8$primary; 2411 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2412 %} 2413 2414 // Pop argument in FPR0 with FSTP ST(0) 2415 enc_class PopFPU() %{ 2416 emit_opcode( cbuf, 0xDD ); 2417 emit_d8( cbuf, 0xD8 ); 2418 %} 2419 2420 // !!!!! equivalent to Pop_Reg_F 2421 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2422 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2423 emit_d8( cbuf, 0xD8+$dst$$reg ); 2424 %} 2425 2426 enc_class Push_Reg_DPR( regDPR dst ) %{ 2427 emit_opcode( cbuf, 0xD9 ); 2428 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2429 %} 2430 2431 enc_class strictfp_bias1( regDPR dst ) %{ 2432 emit_opcode( cbuf, 0xDB ); // FLD m80real 2433 emit_opcode( cbuf, 0x2D ); 2434 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2435 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2436 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2437 %} 2438 2439 enc_class strictfp_bias2( regDPR dst ) %{ 2440 emit_opcode( cbuf, 0xDB ); // FLD m80real 2441 emit_opcode( cbuf, 0x2D ); 2442 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2443 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2444 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2445 %} 2446 2447 // Special case for moving an integer register to a stack slot. 2448 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2449 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2450 %} 2451 2452 // Special case for moving a register to a stack slot. 2453 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2454 // Opcode already emitted 2455 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2456 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2457 emit_d32(cbuf, $dst$$disp); // Displacement 2458 %} 2459 2460 // Push the integer in stackSlot 'src' onto FP-stack 2461 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2462 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2463 %} 2464 2465 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2466 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2467 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2468 %} 2469 2470 // Same as Pop_Mem_F except for opcode 2471 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2472 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2473 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2474 %} 2475 2476 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2477 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2478 emit_d8( cbuf, 0xD8+$dst$$reg ); 2479 %} 2480 2481 enc_class Push_Reg_FPR( regFPR dst ) %{ 2482 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2483 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2484 %} 2485 2486 // Push FPU's float to a stack-slot, and pop FPU-stack 2487 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2488 int pop = 0x02; 2489 if ($src$$reg != FPR1L_enc) { 2490 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2491 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2492 pop = 0x03; 2493 } 2494 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2495 %} 2496 2497 // Push FPU's double to a stack-slot, and pop FPU-stack 2498 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2499 int pop = 0x02; 2500 if ($src$$reg != FPR1L_enc) { 2501 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2502 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2503 pop = 0x03; 2504 } 2505 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2506 %} 2507 2508 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2509 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2510 int pop = 0xD0 - 1; // -1 since we skip FLD 2511 if ($src$$reg != FPR1L_enc) { 2512 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2513 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2514 pop = 0xD8; 2515 } 2516 emit_opcode( cbuf, 0xDD ); 2517 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2518 %} 2519 2520 2521 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2522 // load dst in FPR0 2523 emit_opcode( cbuf, 0xD9 ); 2524 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2525 if ($src$$reg != FPR1L_enc) { 2526 // fincstp 2527 emit_opcode (cbuf, 0xD9); 2528 emit_opcode (cbuf, 0xF7); 2529 // swap src with FPR1: 2530 // FXCH FPR1 with src 2531 emit_opcode(cbuf, 0xD9); 2532 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2533 // fdecstp 2534 emit_opcode (cbuf, 0xD9); 2535 emit_opcode (cbuf, 0xF6); 2536 } 2537 %} 2538 2539 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2540 MacroAssembler _masm(&cbuf); 2541 __ subptr(rsp, 8); 2542 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2543 __ fld_d(Address(rsp, 0)); 2544 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2545 __ fld_d(Address(rsp, 0)); 2546 %} 2547 2548 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2549 MacroAssembler _masm(&cbuf); 2550 __ subptr(rsp, 4); 2551 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2552 __ fld_s(Address(rsp, 0)); 2553 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2554 __ fld_s(Address(rsp, 0)); 2555 %} 2556 2557 enc_class Push_ResultD(regD dst) %{ 2558 MacroAssembler _masm(&cbuf); 2559 __ fstp_d(Address(rsp, 0)); 2560 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2561 __ addptr(rsp, 8); 2562 %} 2563 2564 enc_class Push_ResultF(regF dst, immI d8) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ fstp_s(Address(rsp, 0)); 2567 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2568 __ addptr(rsp, $d8$$constant); 2569 %} 2570 2571 enc_class Push_SrcD(regD src) %{ 2572 MacroAssembler _masm(&cbuf); 2573 __ subptr(rsp, 8); 2574 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2575 __ fld_d(Address(rsp, 0)); 2576 %} 2577 2578 enc_class push_stack_temp_qword() %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ subptr(rsp, 8); 2581 %} 2582 2583 enc_class pop_stack_temp_qword() %{ 2584 MacroAssembler _masm(&cbuf); 2585 __ addptr(rsp, 8); 2586 %} 2587 2588 enc_class push_xmm_to_fpr1(regD src) %{ 2589 MacroAssembler _masm(&cbuf); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2595 if ($src$$reg != FPR1L_enc) { 2596 // fincstp 2597 emit_opcode (cbuf, 0xD9); 2598 emit_opcode (cbuf, 0xF7); 2599 // FXCH FPR1 with src 2600 emit_opcode(cbuf, 0xD9); 2601 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2602 // fdecstp 2603 emit_opcode (cbuf, 0xD9); 2604 emit_opcode (cbuf, 0xF6); 2605 } 2606 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2607 // // FSTP FPR$dst$$reg 2608 // emit_opcode( cbuf, 0xDD ); 2609 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2610 %} 2611 2612 enc_class fnstsw_sahf_skip_parity() %{ 2613 // fnstsw ax 2614 emit_opcode( cbuf, 0xDF ); 2615 emit_opcode( cbuf, 0xE0 ); 2616 // sahf 2617 emit_opcode( cbuf, 0x9E ); 2618 // jnp ::skip 2619 emit_opcode( cbuf, 0x7B ); 2620 emit_opcode( cbuf, 0x05 ); 2621 %} 2622 2623 enc_class emitModDPR() %{ 2624 // fprem must be iterative 2625 // :: loop 2626 // fprem 2627 emit_opcode( cbuf, 0xD9 ); 2628 emit_opcode( cbuf, 0xF8 ); 2629 // wait 2630 emit_opcode( cbuf, 0x9b ); 2631 // fnstsw ax 2632 emit_opcode( cbuf, 0xDF ); 2633 emit_opcode( cbuf, 0xE0 ); 2634 // sahf 2635 emit_opcode( cbuf, 0x9E ); 2636 // jp ::loop 2637 emit_opcode( cbuf, 0x0F ); 2638 emit_opcode( cbuf, 0x8A ); 2639 emit_opcode( cbuf, 0xF4 ); 2640 emit_opcode( cbuf, 0xFF ); 2641 emit_opcode( cbuf, 0xFF ); 2642 emit_opcode( cbuf, 0xFF ); 2643 %} 2644 2645 enc_class fpu_flags() %{ 2646 // fnstsw_ax 2647 emit_opcode( cbuf, 0xDF); 2648 emit_opcode( cbuf, 0xE0); 2649 // test ax,0x0400 2650 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2651 emit_opcode( cbuf, 0xA9 ); 2652 emit_d16 ( cbuf, 0x0400 ); 2653 // // // This sequence works, but stalls for 12-16 cycles on PPro 2654 // // test rax,0x0400 2655 // emit_opcode( cbuf, 0xA9 ); 2656 // emit_d32 ( cbuf, 0x00000400 ); 2657 // 2658 // jz exit (no unordered comparison) 2659 emit_opcode( cbuf, 0x74 ); 2660 emit_d8 ( cbuf, 0x02 ); 2661 // mov ah,1 - treat as LT case (set carry flag) 2662 emit_opcode( cbuf, 0xB4 ); 2663 emit_d8 ( cbuf, 0x01 ); 2664 // sahf 2665 emit_opcode( cbuf, 0x9E); 2666 %} 2667 2668 enc_class cmpF_P6_fixup() %{ 2669 // Fixup the integer flags in case comparison involved a NaN 2670 // 2671 // JNP exit (no unordered comparison, P-flag is set by NaN) 2672 emit_opcode( cbuf, 0x7B ); 2673 emit_d8 ( cbuf, 0x03 ); 2674 // MOV AH,1 - treat as LT case (set carry flag) 2675 emit_opcode( cbuf, 0xB4 ); 2676 emit_d8 ( cbuf, 0x01 ); 2677 // SAHF 2678 emit_opcode( cbuf, 0x9E); 2679 // NOP // target for branch to avoid branch to branch 2680 emit_opcode( cbuf, 0x90); 2681 %} 2682 2683 // fnstsw_ax(); 2684 // sahf(); 2685 // movl(dst, nan_result); 2686 // jcc(Assembler::parity, exit); 2687 // movl(dst, less_result); 2688 // jcc(Assembler::below, exit); 2689 // movl(dst, equal_result); 2690 // jcc(Assembler::equal, exit); 2691 // movl(dst, greater_result); 2692 2693 // less_result = 1; 2694 // greater_result = -1; 2695 // equal_result = 0; 2696 // nan_result = -1; 2697 2698 enc_class CmpF_Result(rRegI dst) %{ 2699 // fnstsw_ax(); 2700 emit_opcode( cbuf, 0xDF); 2701 emit_opcode( cbuf, 0xE0); 2702 // sahf 2703 emit_opcode( cbuf, 0x9E); 2704 // movl(dst, nan_result); 2705 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2706 emit_d32( cbuf, -1 ); 2707 // jcc(Assembler::parity, exit); 2708 emit_opcode( cbuf, 0x7A ); 2709 emit_d8 ( cbuf, 0x13 ); 2710 // movl(dst, less_result); 2711 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2712 emit_d32( cbuf, -1 ); 2713 // jcc(Assembler::below, exit); 2714 emit_opcode( cbuf, 0x72 ); 2715 emit_d8 ( cbuf, 0x0C ); 2716 // movl(dst, equal_result); 2717 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2718 emit_d32( cbuf, 0 ); 2719 // jcc(Assembler::equal, exit); 2720 emit_opcode( cbuf, 0x74 ); 2721 emit_d8 ( cbuf, 0x05 ); 2722 // movl(dst, greater_result); 2723 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2724 emit_d32( cbuf, 1 ); 2725 %} 2726 2727 2728 // Compare the longs and set flags 2729 // BROKEN! Do Not use as-is 2730 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2731 // CMP $src1.hi,$src2.hi 2732 emit_opcode( cbuf, 0x3B ); 2733 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2734 // JNE,s done 2735 emit_opcode(cbuf,0x75); 2736 emit_d8(cbuf, 2 ); 2737 // CMP $src1.lo,$src2.lo 2738 emit_opcode( cbuf, 0x3B ); 2739 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2740 // done: 2741 %} 2742 2743 enc_class convert_int_long( regL dst, rRegI src ) %{ 2744 // mov $dst.lo,$src 2745 int dst_encoding = $dst$$reg; 2746 int src_encoding = $src$$reg; 2747 encode_Copy( cbuf, dst_encoding , src_encoding ); 2748 // mov $dst.hi,$src 2749 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2750 // sar $dst.hi,31 2751 emit_opcode( cbuf, 0xC1 ); 2752 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2753 emit_d8(cbuf, 0x1F ); 2754 %} 2755 2756 enc_class convert_long_double( eRegL src ) %{ 2757 // push $src.hi 2758 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2759 // push $src.lo 2760 emit_opcode(cbuf, 0x50+$src$$reg ); 2761 // fild 64-bits at [SP] 2762 emit_opcode(cbuf,0xdf); 2763 emit_d8(cbuf, 0x6C); 2764 emit_d8(cbuf, 0x24); 2765 emit_d8(cbuf, 0x00); 2766 // pop stack 2767 emit_opcode(cbuf, 0x83); // add SP, #8 2768 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2769 emit_d8(cbuf, 0x8); 2770 %} 2771 2772 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2773 // IMUL EDX:EAX,$src1 2774 emit_opcode( cbuf, 0xF7 ); 2775 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2776 // SAR EDX,$cnt-32 2777 int shift_count = ((int)$cnt$$constant) - 32; 2778 if (shift_count > 0) { 2779 emit_opcode(cbuf, 0xC1); 2780 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2781 emit_d8(cbuf, shift_count); 2782 } 2783 %} 2784 2785 // this version doesn't have add sp, 8 2786 enc_class convert_long_double2( eRegL src ) %{ 2787 // push $src.hi 2788 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2789 // push $src.lo 2790 emit_opcode(cbuf, 0x50+$src$$reg ); 2791 // fild 64-bits at [SP] 2792 emit_opcode(cbuf,0xdf); 2793 emit_d8(cbuf, 0x6C); 2794 emit_d8(cbuf, 0x24); 2795 emit_d8(cbuf, 0x00); 2796 %} 2797 2798 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2799 // Basic idea: long = (long)int * (long)int 2800 // IMUL EDX:EAX, src 2801 emit_opcode( cbuf, 0xF7 ); 2802 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2803 %} 2804 2805 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2806 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2807 // MUL EDX:EAX, src 2808 emit_opcode( cbuf, 0xF7 ); 2809 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2810 %} 2811 2812 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2813 // Basic idea: lo(result) = lo(x_lo * y_lo) 2814 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2815 // MOV $tmp,$src.lo 2816 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2817 // IMUL $tmp,EDX 2818 emit_opcode( cbuf, 0x0F ); 2819 emit_opcode( cbuf, 0xAF ); 2820 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2821 // MOV EDX,$src.hi 2822 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2823 // IMUL EDX,EAX 2824 emit_opcode( cbuf, 0x0F ); 2825 emit_opcode( cbuf, 0xAF ); 2826 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2827 // ADD $tmp,EDX 2828 emit_opcode( cbuf, 0x03 ); 2829 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2830 // MUL EDX:EAX,$src.lo 2831 emit_opcode( cbuf, 0xF7 ); 2832 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2833 // ADD EDX,ESI 2834 emit_opcode( cbuf, 0x03 ); 2835 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2836 %} 2837 2838 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2839 // Basic idea: lo(result) = lo(src * y_lo) 2840 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2841 // IMUL $tmp,EDX,$src 2842 emit_opcode( cbuf, 0x6B ); 2843 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2844 emit_d8( cbuf, (int)$src$$constant ); 2845 // MOV EDX,$src 2846 emit_opcode(cbuf, 0xB8 + EDX_enc); 2847 emit_d32( cbuf, (int)$src$$constant ); 2848 // MUL EDX:EAX,EDX 2849 emit_opcode( cbuf, 0xF7 ); 2850 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2851 // ADD EDX,ESI 2852 emit_opcode( cbuf, 0x03 ); 2853 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2854 %} 2855 2856 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2857 // PUSH src1.hi 2858 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2859 // PUSH src1.lo 2860 emit_opcode(cbuf, 0x50+$src1$$reg ); 2861 // PUSH src2.hi 2862 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2863 // PUSH src2.lo 2864 emit_opcode(cbuf, 0x50+$src2$$reg ); 2865 // CALL directly to the runtime 2866 cbuf.set_insts_mark(); 2867 emit_opcode(cbuf,0xE8); // Call into runtime 2868 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2869 // Restore stack 2870 emit_opcode(cbuf, 0x83); // add SP, #framesize 2871 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2872 emit_d8(cbuf, 4*4); 2873 %} 2874 2875 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2876 // PUSH src1.hi 2877 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2878 // PUSH src1.lo 2879 emit_opcode(cbuf, 0x50+$src1$$reg ); 2880 // PUSH src2.hi 2881 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2882 // PUSH src2.lo 2883 emit_opcode(cbuf, 0x50+$src2$$reg ); 2884 // CALL directly to the runtime 2885 cbuf.set_insts_mark(); 2886 emit_opcode(cbuf,0xE8); // Call into runtime 2887 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2888 // Restore stack 2889 emit_opcode(cbuf, 0x83); // add SP, #framesize 2890 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2891 emit_d8(cbuf, 4*4); 2892 %} 2893 2894 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2895 // MOV $tmp,$src.lo 2896 emit_opcode(cbuf, 0x8B); 2897 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2898 // OR $tmp,$src.hi 2899 emit_opcode(cbuf, 0x0B); 2900 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2901 %} 2902 2903 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2904 // CMP $src1.lo,$src2.lo 2905 emit_opcode( cbuf, 0x3B ); 2906 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2907 // JNE,s skip 2908 emit_cc(cbuf, 0x70, 0x5); 2909 emit_d8(cbuf,2); 2910 // CMP $src1.hi,$src2.hi 2911 emit_opcode( cbuf, 0x3B ); 2912 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2913 %} 2914 2915 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2916 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2917 emit_opcode( cbuf, 0x3B ); 2918 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2919 // MOV $tmp,$src1.hi 2920 emit_opcode( cbuf, 0x8B ); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2922 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2923 emit_opcode( cbuf, 0x1B ); 2924 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2925 %} 2926 2927 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2928 // XOR $tmp,$tmp 2929 emit_opcode(cbuf,0x33); // XOR 2930 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2931 // CMP $tmp,$src.lo 2932 emit_opcode( cbuf, 0x3B ); 2933 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2934 // SBB $tmp,$src.hi 2935 emit_opcode( cbuf, 0x1B ); 2936 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2937 %} 2938 2939 // Sniff, sniff... smells like Gnu Superoptimizer 2940 enc_class neg_long( eRegL dst ) %{ 2941 emit_opcode(cbuf,0xF7); // NEG hi 2942 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2943 emit_opcode(cbuf,0xF7); // NEG lo 2944 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2945 emit_opcode(cbuf,0x83); // SBB hi,0 2946 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2947 emit_d8 (cbuf,0 ); 2948 %} 2949 2950 enc_class enc_pop_rdx() %{ 2951 emit_opcode(cbuf,0x5A); 2952 %} 2953 2954 enc_class enc_rethrow() %{ 2955 cbuf.set_insts_mark(); 2956 emit_opcode(cbuf, 0xE9); // jmp entry 2957 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2958 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2959 %} 2960 2961 2962 // Convert a double to an int. Java semantics require we do complex 2963 // manglelations in the corner cases. So we set the rounding mode to 2964 // 'zero', store the darned double down as an int, and reset the 2965 // rounding mode to 'nearest'. The hardware throws an exception which 2966 // patches up the correct value directly to the stack. 2967 enc_class DPR2I_encoding( regDPR src ) %{ 2968 // Flip to round-to-zero mode. We attempted to allow invalid-op 2969 // exceptions here, so that a NAN or other corner-case value will 2970 // thrown an exception (but normal values get converted at full speed). 2971 // However, I2C adapters and other float-stack manglers leave pending 2972 // invalid-op exceptions hanging. We would have to clear them before 2973 // enabling them and that is more expensive than just testing for the 2974 // invalid value Intel stores down in the corner cases. 2975 emit_opcode(cbuf,0xD9); // FLDCW trunc 2976 emit_opcode(cbuf,0x2D); 2977 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2978 // Allocate a word 2979 emit_opcode(cbuf,0x83); // SUB ESP,4 2980 emit_opcode(cbuf,0xEC); 2981 emit_d8(cbuf,0x04); 2982 // Encoding assumes a double has been pushed into FPR0. 2983 // Store down the double as an int, popping the FPU stack 2984 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2985 emit_opcode(cbuf,0x1C); 2986 emit_d8(cbuf,0x24); 2987 // Restore the rounding mode; mask the exception 2988 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2989 emit_opcode(cbuf,0x2D); 2990 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2991 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2992 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2993 2994 // Load the converted int; adjust CPU stack 2995 emit_opcode(cbuf,0x58); // POP EAX 2996 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2997 emit_d32 (cbuf,0x80000000); // 0x80000000 2998 emit_opcode(cbuf,0x75); // JNE around_slow_call 2999 emit_d8 (cbuf,0x07); // Size of slow_call 3000 // Push src onto stack slow-path 3001 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3002 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3003 // CALL directly to the runtime 3004 cbuf.set_insts_mark(); 3005 emit_opcode(cbuf,0xE8); // Call into runtime 3006 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3007 // Carry on here... 3008 %} 3009 3010 enc_class DPR2L_encoding( regDPR src ) %{ 3011 emit_opcode(cbuf,0xD9); // FLDCW trunc 3012 emit_opcode(cbuf,0x2D); 3013 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3014 // Allocate a word 3015 emit_opcode(cbuf,0x83); // SUB ESP,8 3016 emit_opcode(cbuf,0xEC); 3017 emit_d8(cbuf,0x08); 3018 // Encoding assumes a double has been pushed into FPR0. 3019 // Store down the double as a long, popping the FPU stack 3020 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3021 emit_opcode(cbuf,0x3C); 3022 emit_d8(cbuf,0x24); 3023 // Restore the rounding mode; mask the exception 3024 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3025 emit_opcode(cbuf,0x2D); 3026 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3027 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3028 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3029 3030 // Load the converted int; adjust CPU stack 3031 emit_opcode(cbuf,0x58); // POP EAX 3032 emit_opcode(cbuf,0x5A); // POP EDX 3033 emit_opcode(cbuf,0x81); // CMP EDX,imm 3034 emit_d8 (cbuf,0xFA); // rdx 3035 emit_d32 (cbuf,0x80000000); // 0x80000000 3036 emit_opcode(cbuf,0x75); // JNE around_slow_call 3037 emit_d8 (cbuf,0x07+4); // Size of slow_call 3038 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3039 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3040 emit_opcode(cbuf,0x75); // JNE around_slow_call 3041 emit_d8 (cbuf,0x07); // Size of slow_call 3042 // Push src onto stack slow-path 3043 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3044 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3045 // CALL directly to the runtime 3046 cbuf.set_insts_mark(); 3047 emit_opcode(cbuf,0xE8); // Call into runtime 3048 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3049 // Carry on here... 3050 %} 3051 3052 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3053 // Operand was loaded from memory into fp ST (stack top) 3054 // FMUL ST,$src /* D8 C8+i */ 3055 emit_opcode(cbuf, 0xD8); 3056 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3057 %} 3058 3059 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3060 // FADDP ST,src2 /* D8 C0+i */ 3061 emit_opcode(cbuf, 0xD8); 3062 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3063 //could use FADDP src2,fpST /* DE C0+i */ 3064 %} 3065 3066 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3067 // FADDP src2,ST /* DE C0+i */ 3068 emit_opcode(cbuf, 0xDE); 3069 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3070 %} 3071 3072 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3073 // Operand has been loaded into fp ST (stack top) 3074 // FSUB ST,$src1 3075 emit_opcode(cbuf, 0xD8); 3076 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3077 3078 // FDIV 3079 emit_opcode(cbuf, 0xD8); 3080 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3081 %} 3082 3083 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3084 // Operand was loaded from memory into fp ST (stack top) 3085 // FADD ST,$src /* D8 C0+i */ 3086 emit_opcode(cbuf, 0xD8); 3087 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3088 3089 // FMUL ST,src2 /* D8 C*+i */ 3090 emit_opcode(cbuf, 0xD8); 3091 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3092 %} 3093 3094 3095 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3096 // Operand was loaded from memory into fp ST (stack top) 3097 // FADD ST,$src /* D8 C0+i */ 3098 emit_opcode(cbuf, 0xD8); 3099 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3100 3101 // FMULP src2,ST /* DE C8+i */ 3102 emit_opcode(cbuf, 0xDE); 3103 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3104 %} 3105 3106 // Atomically load the volatile long 3107 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3108 emit_opcode(cbuf,0xDF); 3109 int rm_byte_opcode = 0x05; 3110 int base = $mem$$base; 3111 int index = $mem$$index; 3112 int scale = $mem$$scale; 3113 int displace = $mem$$disp; 3114 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3115 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3116 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3117 %} 3118 3119 // Volatile Store Long. Must be atomic, so move it into 3120 // the FP TOS and then do a 64-bit FIST. Has to probe the 3121 // target address before the store (for null-ptr checks) 3122 // so the memory operand is used twice in the encoding. 3123 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3124 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3125 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3126 emit_opcode(cbuf,0xDF); 3127 int rm_byte_opcode = 0x07; 3128 int base = $mem$$base; 3129 int index = $mem$$index; 3130 int scale = $mem$$scale; 3131 int displace = $mem$$disp; 3132 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3133 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3134 %} 3135 3136 %} 3137 3138 3139 //----------FRAME-------------------------------------------------------------- 3140 // Definition of frame structure and management information. 3141 // 3142 // S T A C K L A Y O U T Allocators stack-slot number 3143 // | (to get allocators register number 3144 // G Owned by | | v add OptoReg::stack0()) 3145 // r CALLER | | 3146 // o | +--------+ pad to even-align allocators stack-slot 3147 // w V | pad0 | numbers; owned by CALLER 3148 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3149 // h ^ | in | 5 3150 // | | args | 4 Holes in incoming args owned by SELF 3151 // | | | | 3 3152 // | | +--------+ 3153 // V | | old out| Empty on Intel, window on Sparc 3154 // | old |preserve| Must be even aligned. 3155 // | SP-+--------+----> Matcher::_old_SP, even aligned 3156 // | | in | 3 area for Intel ret address 3157 // Owned by |preserve| Empty on Sparc. 3158 // SELF +--------+ 3159 // | | pad2 | 2 pad to align old SP 3160 // | +--------+ 1 3161 // | | locks | 0 3162 // | +--------+----> OptoReg::stack0(), even aligned 3163 // | | pad1 | 11 pad to align new SP 3164 // | +--------+ 3165 // | | | 10 3166 // | | spills | 9 spills 3167 // V | | 8 (pad0 slot for callee) 3168 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3169 // ^ | out | 7 3170 // | | args | 6 Holes in outgoing args owned by CALLEE 3171 // Owned by +--------+ 3172 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3173 // | new |preserve| Must be even-aligned. 3174 // | SP-+--------+----> Matcher::_new_SP, even aligned 3175 // | | | 3176 // 3177 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3178 // known from SELF's arguments and the Java calling convention. 3179 // Region 6-7 is determined per call site. 3180 // Note 2: If the calling convention leaves holes in the incoming argument 3181 // area, those holes are owned by SELF. Holes in the outgoing area 3182 // are owned by the CALLEE. Holes should not be nessecary in the 3183 // incoming area, as the Java calling convention is completely under 3184 // the control of the AD file. Doubles can be sorted and packed to 3185 // avoid holes. Holes in the outgoing arguments may be nessecary for 3186 // varargs C calling conventions. 3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3188 // even aligned with pad0 as needed. 3189 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3190 // region 6-11 is even aligned; it may be padded out more so that 3191 // the region from SP to FP meets the minimum stack alignment. 3192 3193 frame %{ 3194 // What direction does stack grow in (assumed to be same for C & Java) 3195 stack_direction(TOWARDS_LOW); 3196 3197 // These three registers define part of the calling convention 3198 // between compiled code and the interpreter. 3199 inline_cache_reg(EAX); // Inline Cache Register 3200 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3201 3202 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3203 cisc_spilling_operand_name(indOffset32); 3204 3205 // Number of stack slots consumed by locking an object 3206 sync_stack_slots(1); 3207 3208 // Compiled code's Frame Pointer 3209 frame_pointer(ESP); 3210 // Interpreter stores its frame pointer in a register which is 3211 // stored to the stack by I2CAdaptors. 3212 // I2CAdaptors convert from interpreted java to compiled java. 3213 interpreter_frame_pointer(EBP); 3214 3215 // Stack alignment requirement 3216 // Alignment size in bytes (128-bit -> 16 bytes) 3217 stack_alignment(StackAlignmentInBytes); 3218 3219 // Number of stack slots between incoming argument block and the start of 3220 // a new frame. The PROLOG must add this many slots to the stack. The 3221 // EPILOG must remove this many slots. Intel needs one slot for 3222 // return address and one for rbp, (must save rbp) 3223 in_preserve_stack_slots(2+VerifyStackAtCalls); 3224 3225 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3226 // for calls to C. Supports the var-args backing area for register parms. 3227 varargs_C_out_slots_killed(0); 3228 3229 // The after-PROLOG location of the return address. Location of 3230 // return address specifies a type (REG or STACK) and a number 3231 // representing the register number (i.e. - use a register name) or 3232 // stack slot. 3233 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3234 // Otherwise, it is above the locks and verification slot and alignment word 3235 return_addr(STACK - 1 + 3236 align_up((Compile::current()->in_preserve_stack_slots() + 3237 Compile::current()->fixed_slots()), 3238 stack_alignment_in_slots())); 3239 3240 // Body of function which returns an integer array locating 3241 // arguments either in registers or in stack slots. Passed an array 3242 // of ideal registers called "sig" and a "length" count. Stack-slot 3243 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3244 // arguments for a CALLEE. Incoming stack arguments are 3245 // automatically biased by the preserve_stack_slots field above. 3246 calling_convention %{ 3247 // No difference between ingoing/outgoing just pass false 3248 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3249 %} 3250 3251 3252 // Body of function which returns an integer array locating 3253 // arguments either in registers or in stack slots. Passed an array 3254 // of ideal registers called "sig" and a "length" count. Stack-slot 3255 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3256 // arguments for a CALLEE. Incoming stack arguments are 3257 // automatically biased by the preserve_stack_slots field above. 3258 c_calling_convention %{ 3259 // This is obviously always outgoing 3260 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3261 %} 3262 3263 // Location of C & interpreter return values 3264 c_return_value %{ 3265 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3266 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3267 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3268 3269 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3270 // that C functions return float and double results in XMM0. 3271 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3272 return OptoRegPair(XMM0b_num,XMM0_num); 3273 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3274 return OptoRegPair(OptoReg::Bad,XMM0_num); 3275 3276 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3277 %} 3278 3279 // Location of return values 3280 return_value %{ 3281 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3282 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3283 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3284 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3285 return OptoRegPair(XMM0b_num,XMM0_num); 3286 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3287 return OptoRegPair(OptoReg::Bad,XMM0_num); 3288 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3289 %} 3290 3291 %} 3292 3293 //----------ATTRIBUTES--------------------------------------------------------- 3294 //----------Operand Attributes------------------------------------------------- 3295 op_attrib op_cost(0); // Required cost attribute 3296 3297 //----------Instruction Attributes--------------------------------------------- 3298 ins_attrib ins_cost(100); // Required cost attribute 3299 ins_attrib ins_size(8); // Required size attribute (in bits) 3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3301 // non-matching short branch variant of some 3302 // long branch? 3303 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3304 // specifies the alignment that some part of the instruction (not 3305 // necessarily the start) requires. If > 1, a compute_padding() 3306 // function must be provided for the instruction 3307 3308 //----------OPERANDS----------------------------------------------------------- 3309 // Operand definitions must precede instruction definitions for correct parsing 3310 // in the ADLC because operands constitute user defined types which are used in 3311 // instruction definitions. 3312 3313 //----------Simple Operands---------------------------------------------------- 3314 // Immediate Operands 3315 // Integer Immediate 3316 operand immI() %{ 3317 match(ConI); 3318 3319 op_cost(10); 3320 format %{ %} 3321 interface(CONST_INTER); 3322 %} 3323 3324 // Constant for test vs zero 3325 operand immI0() %{ 3326 predicate(n->get_int() == 0); 3327 match(ConI); 3328 3329 op_cost(0); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 // Constant for increment 3335 operand immI1() %{ 3336 predicate(n->get_int() == 1); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Constant for decrement 3345 operand immI_M1() %{ 3346 predicate(n->get_int() == -1); 3347 match(ConI); 3348 3349 op_cost(0); 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Valid scale values for addressing modes 3355 operand immI2() %{ 3356 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3357 match(ConI); 3358 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 operand immI8() %{ 3364 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3365 match(ConI); 3366 3367 op_cost(5); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 operand immI16() %{ 3373 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3374 match(ConI); 3375 3376 op_cost(10); 3377 format %{ %} 3378 interface(CONST_INTER); 3379 %} 3380 3381 // Int Immediate non-negative 3382 operand immU31() 3383 %{ 3384 predicate(n->get_int() >= 0); 3385 match(ConI); 3386 3387 op_cost(0); 3388 format %{ %} 3389 interface(CONST_INTER); 3390 %} 3391 3392 // Constant for long shifts 3393 operand immI_32() %{ 3394 predicate( n->get_int() == 32 ); 3395 match(ConI); 3396 3397 op_cost(0); 3398 format %{ %} 3399 interface(CONST_INTER); 3400 %} 3401 3402 operand immI_1_31() %{ 3403 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3404 match(ConI); 3405 3406 op_cost(0); 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 operand immI_32_63() %{ 3412 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3413 match(ConI); 3414 op_cost(0); 3415 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 operand immI_1() %{ 3421 predicate( n->get_int() == 1 ); 3422 match(ConI); 3423 3424 op_cost(0); 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 operand immI_2() %{ 3430 predicate( n->get_int() == 2 ); 3431 match(ConI); 3432 3433 op_cost(0); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 operand immI_3() %{ 3439 predicate( n->get_int() == 3 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 // Pointer Immediate 3448 operand immP() %{ 3449 match(ConP); 3450 3451 op_cost(10); 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 // NULL Pointer Immediate 3457 operand immP0() %{ 3458 predicate( n->get_ptr() == 0 ); 3459 match(ConP); 3460 op_cost(0); 3461 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 // Long Immediate 3467 operand immL() %{ 3468 match(ConL); 3469 3470 op_cost(20); 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Long Immediate zero 3476 operand immL0() %{ 3477 predicate( n->get_long() == 0L ); 3478 match(ConL); 3479 op_cost(0); 3480 3481 format %{ %} 3482 interface(CONST_INTER); 3483 %} 3484 3485 // Long Immediate zero 3486 operand immL_M1() %{ 3487 predicate( n->get_long() == -1L ); 3488 match(ConL); 3489 op_cost(0); 3490 3491 format %{ %} 3492 interface(CONST_INTER); 3493 %} 3494 3495 // Long immediate from 0 to 127. 3496 // Used for a shorter form of long mul by 10. 3497 operand immL_127() %{ 3498 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3499 match(ConL); 3500 op_cost(0); 3501 3502 format %{ %} 3503 interface(CONST_INTER); 3504 %} 3505 3506 // Long Immediate: low 32-bit mask 3507 operand immL_32bits() %{ 3508 predicate(n->get_long() == 0xFFFFFFFFL); 3509 match(ConL); 3510 op_cost(0); 3511 3512 format %{ %} 3513 interface(CONST_INTER); 3514 %} 3515 3516 // Long Immediate: low 32-bit mask 3517 operand immL32() %{ 3518 predicate(n->get_long() == (int)(n->get_long())); 3519 match(ConL); 3520 op_cost(20); 3521 3522 format %{ %} 3523 interface(CONST_INTER); 3524 %} 3525 3526 //Double Immediate zero 3527 operand immDPR0() %{ 3528 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3529 // bug that generates code such that NaNs compare equal to 0.0 3530 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3531 match(ConD); 3532 3533 op_cost(5); 3534 format %{ %} 3535 interface(CONST_INTER); 3536 %} 3537 3538 // Double Immediate one 3539 operand immDPR1() %{ 3540 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3541 match(ConD); 3542 3543 op_cost(5); 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Double Immediate 3549 operand immDPR() %{ 3550 predicate(UseSSE<=1); 3551 match(ConD); 3552 3553 op_cost(5); 3554 format %{ %} 3555 interface(CONST_INTER); 3556 %} 3557 3558 operand immD() %{ 3559 predicate(UseSSE>=2); 3560 match(ConD); 3561 3562 op_cost(5); 3563 format %{ %} 3564 interface(CONST_INTER); 3565 %} 3566 3567 // Double Immediate zero 3568 operand immD0() %{ 3569 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3570 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3571 // compare equal to -0.0. 3572 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3573 match(ConD); 3574 3575 format %{ %} 3576 interface(CONST_INTER); 3577 %} 3578 3579 // Float Immediate zero 3580 operand immFPR0() %{ 3581 predicate(UseSSE == 0 && n->getf() == 0.0F); 3582 match(ConF); 3583 3584 op_cost(5); 3585 format %{ %} 3586 interface(CONST_INTER); 3587 %} 3588 3589 // Float Immediate one 3590 operand immFPR1() %{ 3591 predicate(UseSSE == 0 && n->getf() == 1.0F); 3592 match(ConF); 3593 3594 op_cost(5); 3595 format %{ %} 3596 interface(CONST_INTER); 3597 %} 3598 3599 // Float Immediate 3600 operand immFPR() %{ 3601 predicate( UseSSE == 0 ); 3602 match(ConF); 3603 3604 op_cost(5); 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 // Float Immediate 3610 operand immF() %{ 3611 predicate(UseSSE >= 1); 3612 match(ConF); 3613 3614 op_cost(5); 3615 format %{ %} 3616 interface(CONST_INTER); 3617 %} 3618 3619 // Float Immediate zero. Zero and not -0.0 3620 operand immF0() %{ 3621 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3622 match(ConF); 3623 3624 op_cost(5); 3625 format %{ %} 3626 interface(CONST_INTER); 3627 %} 3628 3629 // Immediates for special shifts (sign extend) 3630 3631 // Constants for increment 3632 operand immI_16() %{ 3633 predicate( n->get_int() == 16 ); 3634 match(ConI); 3635 3636 format %{ %} 3637 interface(CONST_INTER); 3638 %} 3639 3640 operand immI_24() %{ 3641 predicate( n->get_int() == 24 ); 3642 match(ConI); 3643 3644 format %{ %} 3645 interface(CONST_INTER); 3646 %} 3647 3648 // Constant for byte-wide masking 3649 operand immI_255() %{ 3650 predicate( n->get_int() == 255 ); 3651 match(ConI); 3652 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Constant for short-wide masking 3658 operand immI_65535() %{ 3659 predicate(n->get_int() == 65535); 3660 match(ConI); 3661 3662 format %{ %} 3663 interface(CONST_INTER); 3664 %} 3665 3666 // Register Operands 3667 // Integer Register 3668 operand rRegI() %{ 3669 constraint(ALLOC_IN_RC(int_reg)); 3670 match(RegI); 3671 match(xRegI); 3672 match(eAXRegI); 3673 match(eBXRegI); 3674 match(eCXRegI); 3675 match(eDXRegI); 3676 match(eDIRegI); 3677 match(eSIRegI); 3678 3679 format %{ %} 3680 interface(REG_INTER); 3681 %} 3682 3683 // Subset of Integer Register 3684 operand xRegI(rRegI reg) %{ 3685 constraint(ALLOC_IN_RC(int_x_reg)); 3686 match(reg); 3687 match(eAXRegI); 3688 match(eBXRegI); 3689 match(eCXRegI); 3690 match(eDXRegI); 3691 3692 format %{ %} 3693 interface(REG_INTER); 3694 %} 3695 3696 // Special Registers 3697 operand eAXRegI(xRegI reg) %{ 3698 constraint(ALLOC_IN_RC(eax_reg)); 3699 match(reg); 3700 match(rRegI); 3701 3702 format %{ "EAX" %} 3703 interface(REG_INTER); 3704 %} 3705 3706 // Special Registers 3707 operand eBXRegI(xRegI reg) %{ 3708 constraint(ALLOC_IN_RC(ebx_reg)); 3709 match(reg); 3710 match(rRegI); 3711 3712 format %{ "EBX" %} 3713 interface(REG_INTER); 3714 %} 3715 3716 operand eCXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(ecx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "ECX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eDXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(edx_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EDX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand eDIRegI(xRegI reg) %{ 3735 constraint(ALLOC_IN_RC(edi_reg)); 3736 match(reg); 3737 match(rRegI); 3738 3739 format %{ "EDI" %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand naxRegI() %{ 3744 constraint(ALLOC_IN_RC(nax_reg)); 3745 match(RegI); 3746 match(eCXRegI); 3747 match(eDXRegI); 3748 match(eSIRegI); 3749 match(eDIRegI); 3750 3751 format %{ %} 3752 interface(REG_INTER); 3753 %} 3754 3755 operand nadxRegI() %{ 3756 constraint(ALLOC_IN_RC(nadx_reg)); 3757 match(RegI); 3758 match(eBXRegI); 3759 match(eCXRegI); 3760 match(eSIRegI); 3761 match(eDIRegI); 3762 3763 format %{ %} 3764 interface(REG_INTER); 3765 %} 3766 3767 operand ncxRegI() %{ 3768 constraint(ALLOC_IN_RC(ncx_reg)); 3769 match(RegI); 3770 match(eAXRegI); 3771 match(eDXRegI); 3772 match(eSIRegI); 3773 match(eDIRegI); 3774 3775 format %{ %} 3776 interface(REG_INTER); 3777 %} 3778 3779 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3780 // // 3781 operand eSIRegI(xRegI reg) %{ 3782 constraint(ALLOC_IN_RC(esi_reg)); 3783 match(reg); 3784 match(rRegI); 3785 3786 format %{ "ESI" %} 3787 interface(REG_INTER); 3788 %} 3789 3790 // Pointer Register 3791 operand anyRegP() %{ 3792 constraint(ALLOC_IN_RC(any_reg)); 3793 match(RegP); 3794 match(eAXRegP); 3795 match(eBXRegP); 3796 match(eCXRegP); 3797 match(eDIRegP); 3798 match(eRegP); 3799 3800 format %{ %} 3801 interface(REG_INTER); 3802 %} 3803 3804 operand eRegP() %{ 3805 constraint(ALLOC_IN_RC(int_reg)); 3806 match(RegP); 3807 match(eAXRegP); 3808 match(eBXRegP); 3809 match(eCXRegP); 3810 match(eDIRegP); 3811 3812 format %{ %} 3813 interface(REG_INTER); 3814 %} 3815 3816 // On windows95, EBP is not safe to use for implicit null tests. 3817 operand eRegP_no_EBP() %{ 3818 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3819 match(RegP); 3820 match(eAXRegP); 3821 match(eBXRegP); 3822 match(eCXRegP); 3823 match(eDIRegP); 3824 3825 op_cost(100); 3826 format %{ %} 3827 interface(REG_INTER); 3828 %} 3829 3830 operand naxRegP() %{ 3831 constraint(ALLOC_IN_RC(nax_reg)); 3832 match(RegP); 3833 match(eBXRegP); 3834 match(eDXRegP); 3835 match(eCXRegP); 3836 match(eSIRegP); 3837 match(eDIRegP); 3838 3839 format %{ %} 3840 interface(REG_INTER); 3841 %} 3842 3843 operand nabxRegP() %{ 3844 constraint(ALLOC_IN_RC(nabx_reg)); 3845 match(RegP); 3846 match(eCXRegP); 3847 match(eDXRegP); 3848 match(eSIRegP); 3849 match(eDIRegP); 3850 3851 format %{ %} 3852 interface(REG_INTER); 3853 %} 3854 3855 operand pRegP() %{ 3856 constraint(ALLOC_IN_RC(p_reg)); 3857 match(RegP); 3858 match(eBXRegP); 3859 match(eDXRegP); 3860 match(eSIRegP); 3861 match(eDIRegP); 3862 3863 format %{ %} 3864 interface(REG_INTER); 3865 %} 3866 3867 // Special Registers 3868 // Return a pointer value 3869 operand eAXRegP(eRegP reg) %{ 3870 constraint(ALLOC_IN_RC(eax_reg)); 3871 match(reg); 3872 format %{ "EAX" %} 3873 interface(REG_INTER); 3874 %} 3875 3876 // Used in AtomicAdd 3877 operand eBXRegP(eRegP reg) %{ 3878 constraint(ALLOC_IN_RC(ebx_reg)); 3879 match(reg); 3880 format %{ "EBX" %} 3881 interface(REG_INTER); 3882 %} 3883 3884 // Tail-call (interprocedural jump) to interpreter 3885 operand eCXRegP(eRegP reg) %{ 3886 constraint(ALLOC_IN_RC(ecx_reg)); 3887 match(reg); 3888 format %{ "ECX" %} 3889 interface(REG_INTER); 3890 %} 3891 3892 operand eDXRegP(eRegP reg) %{ 3893 constraint(ALLOC_IN_RC(edx_reg)); 3894 match(reg); 3895 format %{ "EDX" %} 3896 interface(REG_INTER); 3897 %} 3898 3899 operand eSIRegP(eRegP reg) %{ 3900 constraint(ALLOC_IN_RC(esi_reg)); 3901 match(reg); 3902 format %{ "ESI" %} 3903 interface(REG_INTER); 3904 %} 3905 3906 // Used in rep stosw 3907 operand eDIRegP(eRegP reg) %{ 3908 constraint(ALLOC_IN_RC(edi_reg)); 3909 match(reg); 3910 format %{ "EDI" %} 3911 interface(REG_INTER); 3912 %} 3913 3914 operand eRegL() %{ 3915 constraint(ALLOC_IN_RC(long_reg)); 3916 match(RegL); 3917 match(eADXRegL); 3918 3919 format %{ %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand eADXRegL( eRegL reg ) %{ 3924 constraint(ALLOC_IN_RC(eadx_reg)); 3925 match(reg); 3926 3927 format %{ "EDX:EAX" %} 3928 interface(REG_INTER); 3929 %} 3930 3931 operand eBCXRegL( eRegL reg ) %{ 3932 constraint(ALLOC_IN_RC(ebcx_reg)); 3933 match(reg); 3934 3935 format %{ "EBX:ECX" %} 3936 interface(REG_INTER); 3937 %} 3938 3939 // Special case for integer high multiply 3940 operand eADXRegL_low_only() %{ 3941 constraint(ALLOC_IN_RC(eadx_reg)); 3942 match(RegL); 3943 3944 format %{ "EAX" %} 3945 interface(REG_INTER); 3946 %} 3947 3948 // Flags register, used as output of compare instructions 3949 operand eFlagsReg() %{ 3950 constraint(ALLOC_IN_RC(int_flags)); 3951 match(RegFlags); 3952 3953 format %{ "EFLAGS" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 // Flags register, used as output of FLOATING POINT compare instructions 3958 operand eFlagsRegU() %{ 3959 constraint(ALLOC_IN_RC(int_flags)); 3960 match(RegFlags); 3961 3962 format %{ "EFLAGS_U" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 operand eFlagsRegUCF() %{ 3967 constraint(ALLOC_IN_RC(int_flags)); 3968 match(RegFlags); 3969 predicate(false); 3970 3971 format %{ "EFLAGS_U_CF" %} 3972 interface(REG_INTER); 3973 %} 3974 3975 // Condition Code Register used by long compare 3976 operand flagsReg_long_LTGE() %{ 3977 constraint(ALLOC_IN_RC(int_flags)); 3978 match(RegFlags); 3979 format %{ "FLAGS_LTGE" %} 3980 interface(REG_INTER); 3981 %} 3982 operand flagsReg_long_EQNE() %{ 3983 constraint(ALLOC_IN_RC(int_flags)); 3984 match(RegFlags); 3985 format %{ "FLAGS_EQNE" %} 3986 interface(REG_INTER); 3987 %} 3988 operand flagsReg_long_LEGT() %{ 3989 constraint(ALLOC_IN_RC(int_flags)); 3990 match(RegFlags); 3991 format %{ "FLAGS_LEGT" %} 3992 interface(REG_INTER); 3993 %} 3994 3995 // Condition Code Register used by unsigned long compare 3996 operand flagsReg_ulong_LTGE() %{ 3997 constraint(ALLOC_IN_RC(int_flags)); 3998 match(RegFlags); 3999 format %{ "FLAGS_U_LTGE" %} 4000 interface(REG_INTER); 4001 %} 4002 operand flagsReg_ulong_EQNE() %{ 4003 constraint(ALLOC_IN_RC(int_flags)); 4004 match(RegFlags); 4005 format %{ "FLAGS_U_EQNE" %} 4006 interface(REG_INTER); 4007 %} 4008 operand flagsReg_ulong_LEGT() %{ 4009 constraint(ALLOC_IN_RC(int_flags)); 4010 match(RegFlags); 4011 format %{ "FLAGS_U_LEGT" %} 4012 interface(REG_INTER); 4013 %} 4014 4015 // Float register operands 4016 operand regDPR() %{ 4017 predicate( UseSSE < 2 ); 4018 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4019 match(RegD); 4020 match(regDPR1); 4021 match(regDPR2); 4022 format %{ %} 4023 interface(REG_INTER); 4024 %} 4025 4026 operand regDPR1(regDPR reg) %{ 4027 predicate( UseSSE < 2 ); 4028 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4029 match(reg); 4030 format %{ "FPR1" %} 4031 interface(REG_INTER); 4032 %} 4033 4034 operand regDPR2(regDPR reg) %{ 4035 predicate( UseSSE < 2 ); 4036 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4037 match(reg); 4038 format %{ "FPR2" %} 4039 interface(REG_INTER); 4040 %} 4041 4042 operand regnotDPR1(regDPR reg) %{ 4043 predicate( UseSSE < 2 ); 4044 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4045 match(reg); 4046 format %{ %} 4047 interface(REG_INTER); 4048 %} 4049 4050 // Float register operands 4051 operand regFPR() %{ 4052 predicate( UseSSE < 2 ); 4053 constraint(ALLOC_IN_RC(fp_flt_reg)); 4054 match(RegF); 4055 match(regFPR1); 4056 format %{ %} 4057 interface(REG_INTER); 4058 %} 4059 4060 // Float register operands 4061 operand regFPR1(regFPR reg) %{ 4062 predicate( UseSSE < 2 ); 4063 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4064 match(reg); 4065 format %{ "FPR1" %} 4066 interface(REG_INTER); 4067 %} 4068 4069 // XMM Float register operands 4070 operand regF() %{ 4071 predicate( UseSSE>=1 ); 4072 constraint(ALLOC_IN_RC(float_reg_legacy)); 4073 match(RegF); 4074 format %{ %} 4075 interface(REG_INTER); 4076 %} 4077 4078 // Float register operands 4079 operand vlRegF() %{ 4080 constraint(ALLOC_IN_RC(float_reg_vl)); 4081 match(RegF); 4082 4083 format %{ %} 4084 interface(REG_INTER); 4085 %} 4086 4087 // XMM Double register operands 4088 operand regD() %{ 4089 predicate( UseSSE>=2 ); 4090 constraint(ALLOC_IN_RC(double_reg_legacy)); 4091 match(RegD); 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 // Double register operands 4097 operand vlRegD() %{ 4098 constraint(ALLOC_IN_RC(double_reg_vl)); 4099 match(RegD); 4100 4101 format %{ %} 4102 interface(REG_INTER); 4103 %} 4104 4105 //----------Memory Operands---------------------------------------------------- 4106 // Direct Memory Operand 4107 operand direct(immP addr) %{ 4108 match(addr); 4109 4110 format %{ "[$addr]" %} 4111 interface(MEMORY_INTER) %{ 4112 base(0xFFFFFFFF); 4113 index(0x4); 4114 scale(0x0); 4115 disp($addr); 4116 %} 4117 %} 4118 4119 // Indirect Memory Operand 4120 operand indirect(eRegP reg) %{ 4121 constraint(ALLOC_IN_RC(int_reg)); 4122 match(reg); 4123 4124 format %{ "[$reg]" %} 4125 interface(MEMORY_INTER) %{ 4126 base($reg); 4127 index(0x4); 4128 scale(0x0); 4129 disp(0x0); 4130 %} 4131 %} 4132 4133 // Indirect Memory Plus Short Offset Operand 4134 operand indOffset8(eRegP reg, immI8 off) %{ 4135 match(AddP reg off); 4136 4137 format %{ "[$reg + $off]" %} 4138 interface(MEMORY_INTER) %{ 4139 base($reg); 4140 index(0x4); 4141 scale(0x0); 4142 disp($off); 4143 %} 4144 %} 4145 4146 // Indirect Memory Plus Long Offset Operand 4147 operand indOffset32(eRegP reg, immI off) %{ 4148 match(AddP reg off); 4149 4150 format %{ "[$reg + $off]" %} 4151 interface(MEMORY_INTER) %{ 4152 base($reg); 4153 index(0x4); 4154 scale(0x0); 4155 disp($off); 4156 %} 4157 %} 4158 4159 // Indirect Memory Plus Long Offset Operand 4160 operand indOffset32X(rRegI reg, immP off) %{ 4161 match(AddP off reg); 4162 4163 format %{ "[$reg + $off]" %} 4164 interface(MEMORY_INTER) %{ 4165 base($reg); 4166 index(0x4); 4167 scale(0x0); 4168 disp($off); 4169 %} 4170 %} 4171 4172 // Indirect Memory Plus Index Register Plus Offset Operand 4173 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4174 match(AddP (AddP reg ireg) off); 4175 4176 op_cost(10); 4177 format %{"[$reg + $off + $ireg]" %} 4178 interface(MEMORY_INTER) %{ 4179 base($reg); 4180 index($ireg); 4181 scale(0x0); 4182 disp($off); 4183 %} 4184 %} 4185 4186 // Indirect Memory Plus Index Register Plus Offset Operand 4187 operand indIndex(eRegP reg, rRegI ireg) %{ 4188 match(AddP reg ireg); 4189 4190 op_cost(10); 4191 format %{"[$reg + $ireg]" %} 4192 interface(MEMORY_INTER) %{ 4193 base($reg); 4194 index($ireg); 4195 scale(0x0); 4196 disp(0x0); 4197 %} 4198 %} 4199 4200 // // ------------------------------------------------------------------------- 4201 // // 486 architecture doesn't support "scale * index + offset" with out a base 4202 // // ------------------------------------------------------------------------- 4203 // // Scaled Memory Operands 4204 // // Indirect Memory Times Scale Plus Offset Operand 4205 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4206 // match(AddP off (LShiftI ireg scale)); 4207 // 4208 // op_cost(10); 4209 // format %{"[$off + $ireg << $scale]" %} 4210 // interface(MEMORY_INTER) %{ 4211 // base(0x4); 4212 // index($ireg); 4213 // scale($scale); 4214 // disp($off); 4215 // %} 4216 // %} 4217 4218 // Indirect Memory Times Scale Plus Index Register 4219 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4220 match(AddP reg (LShiftI ireg scale)); 4221 4222 op_cost(10); 4223 format %{"[$reg + $ireg << $scale]" %} 4224 interface(MEMORY_INTER) %{ 4225 base($reg); 4226 index($ireg); 4227 scale($scale); 4228 disp(0x0); 4229 %} 4230 %} 4231 4232 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4233 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4234 match(AddP (AddP reg (LShiftI ireg scale)) off); 4235 4236 op_cost(10); 4237 format %{"[$reg + $off + $ireg << $scale]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index($ireg); 4241 scale($scale); 4242 disp($off); 4243 %} 4244 %} 4245 4246 //----------Load Long Memory Operands------------------------------------------ 4247 // The load-long idiom will use it's address expression again after loading 4248 // the first word of the long. If the load-long destination overlaps with 4249 // registers used in the addressing expression, the 2nd half will be loaded 4250 // from a clobbered address. Fix this by requiring that load-long use 4251 // address registers that do not overlap with the load-long target. 4252 4253 // load-long support 4254 operand load_long_RegP() %{ 4255 constraint(ALLOC_IN_RC(esi_reg)); 4256 match(RegP); 4257 match(eSIRegP); 4258 op_cost(100); 4259 format %{ %} 4260 interface(REG_INTER); 4261 %} 4262 4263 // Indirect Memory Operand Long 4264 operand load_long_indirect(load_long_RegP reg) %{ 4265 constraint(ALLOC_IN_RC(esi_reg)); 4266 match(reg); 4267 4268 format %{ "[$reg]" %} 4269 interface(MEMORY_INTER) %{ 4270 base($reg); 4271 index(0x4); 4272 scale(0x0); 4273 disp(0x0); 4274 %} 4275 %} 4276 4277 // Indirect Memory Plus Long Offset Operand 4278 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4279 match(AddP reg off); 4280 4281 format %{ "[$reg + $off]" %} 4282 interface(MEMORY_INTER) %{ 4283 base($reg); 4284 index(0x4); 4285 scale(0x0); 4286 disp($off); 4287 %} 4288 %} 4289 4290 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4291 4292 4293 //----------Special Memory Operands-------------------------------------------- 4294 // Stack Slot Operand - This operand is used for loading and storing temporary 4295 // values on the stack where a match requires a value to 4296 // flow through memory. 4297 operand stackSlotP(sRegP reg) %{ 4298 constraint(ALLOC_IN_RC(stack_slots)); 4299 // No match rule because this operand is only generated in matching 4300 format %{ "[$reg]" %} 4301 interface(MEMORY_INTER) %{ 4302 base(0x4); // ESP 4303 index(0x4); // No Index 4304 scale(0x0); // No Scale 4305 disp($reg); // Stack Offset 4306 %} 4307 %} 4308 4309 operand stackSlotI(sRegI reg) %{ 4310 constraint(ALLOC_IN_RC(stack_slots)); 4311 // No match rule because this operand is only generated in matching 4312 format %{ "[$reg]" %} 4313 interface(MEMORY_INTER) %{ 4314 base(0x4); // ESP 4315 index(0x4); // No Index 4316 scale(0x0); // No Scale 4317 disp($reg); // Stack Offset 4318 %} 4319 %} 4320 4321 operand stackSlotF(sRegF reg) %{ 4322 constraint(ALLOC_IN_RC(stack_slots)); 4323 // No match rule because this operand is only generated in matching 4324 format %{ "[$reg]" %} 4325 interface(MEMORY_INTER) %{ 4326 base(0x4); // ESP 4327 index(0x4); // No Index 4328 scale(0x0); // No Scale 4329 disp($reg); // Stack Offset 4330 %} 4331 %} 4332 4333 operand stackSlotD(sRegD reg) %{ 4334 constraint(ALLOC_IN_RC(stack_slots)); 4335 // No match rule because this operand is only generated in matching 4336 format %{ "[$reg]" %} 4337 interface(MEMORY_INTER) %{ 4338 base(0x4); // ESP 4339 index(0x4); // No Index 4340 scale(0x0); // No Scale 4341 disp($reg); // Stack Offset 4342 %} 4343 %} 4344 4345 operand stackSlotL(sRegL reg) %{ 4346 constraint(ALLOC_IN_RC(stack_slots)); 4347 // No match rule because this operand is only generated in matching 4348 format %{ "[$reg]" %} 4349 interface(MEMORY_INTER) %{ 4350 base(0x4); // ESP 4351 index(0x4); // No Index 4352 scale(0x0); // No Scale 4353 disp($reg); // Stack Offset 4354 %} 4355 %} 4356 4357 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4358 // Indirect Memory Operand 4359 operand indirect_win95_safe(eRegP_no_EBP reg) 4360 %{ 4361 constraint(ALLOC_IN_RC(int_reg)); 4362 match(reg); 4363 4364 op_cost(100); 4365 format %{ "[$reg]" %} 4366 interface(MEMORY_INTER) %{ 4367 base($reg); 4368 index(0x4); 4369 scale(0x0); 4370 disp(0x0); 4371 %} 4372 %} 4373 4374 // Indirect Memory Plus Short Offset Operand 4375 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4376 %{ 4377 match(AddP reg off); 4378 4379 op_cost(100); 4380 format %{ "[$reg + $off]" %} 4381 interface(MEMORY_INTER) %{ 4382 base($reg); 4383 index(0x4); 4384 scale(0x0); 4385 disp($off); 4386 %} 4387 %} 4388 4389 // Indirect Memory Plus Long Offset Operand 4390 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4391 %{ 4392 match(AddP reg off); 4393 4394 op_cost(100); 4395 format %{ "[$reg + $off]" %} 4396 interface(MEMORY_INTER) %{ 4397 base($reg); 4398 index(0x4); 4399 scale(0x0); 4400 disp($off); 4401 %} 4402 %} 4403 4404 // Indirect Memory Plus Index Register Plus Offset Operand 4405 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4406 %{ 4407 match(AddP (AddP reg ireg) off); 4408 4409 op_cost(100); 4410 format %{"[$reg + $off + $ireg]" %} 4411 interface(MEMORY_INTER) %{ 4412 base($reg); 4413 index($ireg); 4414 scale(0x0); 4415 disp($off); 4416 %} 4417 %} 4418 4419 // Indirect Memory Times Scale Plus Index Register 4420 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4421 %{ 4422 match(AddP reg (LShiftI ireg scale)); 4423 4424 op_cost(100); 4425 format %{"[$reg + $ireg << $scale]" %} 4426 interface(MEMORY_INTER) %{ 4427 base($reg); 4428 index($ireg); 4429 scale($scale); 4430 disp(0x0); 4431 %} 4432 %} 4433 4434 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4435 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4436 %{ 4437 match(AddP (AddP reg (LShiftI ireg scale)) off); 4438 4439 op_cost(100); 4440 format %{"[$reg + $off + $ireg << $scale]" %} 4441 interface(MEMORY_INTER) %{ 4442 base($reg); 4443 index($ireg); 4444 scale($scale); 4445 disp($off); 4446 %} 4447 %} 4448 4449 //----------Conditional Branch Operands---------------------------------------- 4450 // Comparison Op - This is the operation of the comparison, and is limited to 4451 // the following set of codes: 4452 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4453 // 4454 // Other attributes of the comparison, such as unsignedness, are specified 4455 // by the comparison instruction that sets a condition code flags register. 4456 // That result is represented by a flags operand whose subtype is appropriate 4457 // to the unsignedness (etc.) of the comparison. 4458 // 4459 // Later, the instruction which matches both the Comparison Op (a Bool) and 4460 // the flags (produced by the Cmp) specifies the coding of the comparison op 4461 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4462 4463 // Comparision Code 4464 operand cmpOp() %{ 4465 match(Bool); 4466 4467 format %{ "" %} 4468 interface(COND_INTER) %{ 4469 equal(0x4, "e"); 4470 not_equal(0x5, "ne"); 4471 less(0xC, "l"); 4472 greater_equal(0xD, "ge"); 4473 less_equal(0xE, "le"); 4474 greater(0xF, "g"); 4475 overflow(0x0, "o"); 4476 no_overflow(0x1, "no"); 4477 %} 4478 %} 4479 4480 // Comparison Code, unsigned compare. Used by FP also, with 4481 // C2 (unordered) turned into GT or LT already. The other bits 4482 // C0 and C3 are turned into Carry & Zero flags. 4483 operand cmpOpU() %{ 4484 match(Bool); 4485 4486 format %{ "" %} 4487 interface(COND_INTER) %{ 4488 equal(0x4, "e"); 4489 not_equal(0x5, "ne"); 4490 less(0x2, "b"); 4491 greater_equal(0x3, "nb"); 4492 less_equal(0x6, "be"); 4493 greater(0x7, "nbe"); 4494 overflow(0x0, "o"); 4495 no_overflow(0x1, "no"); 4496 %} 4497 %} 4498 4499 // Floating comparisons that don't require any fixup for the unordered case 4500 operand cmpOpUCF() %{ 4501 match(Bool); 4502 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4503 n->as_Bool()->_test._test == BoolTest::ge || 4504 n->as_Bool()->_test._test == BoolTest::le || 4505 n->as_Bool()->_test._test == BoolTest::gt); 4506 format %{ "" %} 4507 interface(COND_INTER) %{ 4508 equal(0x4, "e"); 4509 not_equal(0x5, "ne"); 4510 less(0x2, "b"); 4511 greater_equal(0x3, "nb"); 4512 less_equal(0x6, "be"); 4513 greater(0x7, "nbe"); 4514 overflow(0x0, "o"); 4515 no_overflow(0x1, "no"); 4516 %} 4517 %} 4518 4519 4520 // Floating comparisons that can be fixed up with extra conditional jumps 4521 operand cmpOpUCF2() %{ 4522 match(Bool); 4523 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4524 n->as_Bool()->_test._test == BoolTest::eq); 4525 format %{ "" %} 4526 interface(COND_INTER) %{ 4527 equal(0x4, "e"); 4528 not_equal(0x5, "ne"); 4529 less(0x2, "b"); 4530 greater_equal(0x3, "nb"); 4531 less_equal(0x6, "be"); 4532 greater(0x7, "nbe"); 4533 overflow(0x0, "o"); 4534 no_overflow(0x1, "no"); 4535 %} 4536 %} 4537 4538 // Comparison Code for FP conditional move 4539 operand cmpOp_fcmov() %{ 4540 match(Bool); 4541 4542 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4543 n->as_Bool()->_test._test != BoolTest::no_overflow); 4544 format %{ "" %} 4545 interface(COND_INTER) %{ 4546 equal (0x0C8); 4547 not_equal (0x1C8); 4548 less (0x0C0); 4549 greater_equal(0x1C0); 4550 less_equal (0x0D0); 4551 greater (0x1D0); 4552 overflow(0x0, "o"); // not really supported by the instruction 4553 no_overflow(0x1, "no"); // not really supported by the instruction 4554 %} 4555 %} 4556 4557 // Comparison Code used in long compares 4558 operand cmpOp_commute() %{ 4559 match(Bool); 4560 4561 format %{ "" %} 4562 interface(COND_INTER) %{ 4563 equal(0x4, "e"); 4564 not_equal(0x5, "ne"); 4565 less(0xF, "g"); 4566 greater_equal(0xE, "le"); 4567 less_equal(0xD, "ge"); 4568 greater(0xC, "l"); 4569 overflow(0x0, "o"); 4570 no_overflow(0x1, "no"); 4571 %} 4572 %} 4573 4574 // Comparison Code used in unsigned long compares 4575 operand cmpOpU_commute() %{ 4576 match(Bool); 4577 4578 format %{ "" %} 4579 interface(COND_INTER) %{ 4580 equal(0x4, "e"); 4581 not_equal(0x5, "ne"); 4582 less(0x7, "nbe"); 4583 greater_equal(0x6, "be"); 4584 less_equal(0x3, "nb"); 4585 greater(0x2, "b"); 4586 overflow(0x0, "o"); 4587 no_overflow(0x1, "no"); 4588 %} 4589 %} 4590 4591 //----------OPERAND CLASSES---------------------------------------------------- 4592 // Operand Classes are groups of operands that are used as to simplify 4593 // instruction definitions by not requiring the AD writer to specify separate 4594 // instructions for every form of operand when the instruction accepts 4595 // multiple operand types with the same basic encoding and format. The classic 4596 // case of this is memory operands. 4597 4598 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4599 indIndex, indIndexScale, indIndexScaleOffset); 4600 4601 // Long memory operations are encoded in 2 instructions and a +4 offset. 4602 // This means some kind of offset is always required and you cannot use 4603 // an oop as the offset (done when working on static globals). 4604 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4605 indIndex, indIndexScale, indIndexScaleOffset); 4606 4607 4608 //----------PIPELINE----------------------------------------------------------- 4609 // Rules which define the behavior of the target architectures pipeline. 4610 pipeline %{ 4611 4612 //----------ATTRIBUTES--------------------------------------------------------- 4613 attributes %{ 4614 variable_size_instructions; // Fixed size instructions 4615 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4616 instruction_unit_size = 1; // An instruction is 1 bytes long 4617 instruction_fetch_unit_size = 16; // The processor fetches one line 4618 instruction_fetch_units = 1; // of 16 bytes 4619 4620 // List of nop instructions 4621 nops( MachNop ); 4622 %} 4623 4624 //----------RESOURCES---------------------------------------------------------- 4625 // Resources are the functional units available to the machine 4626 4627 // Generic P2/P3 pipeline 4628 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4629 // 3 instructions decoded per cycle. 4630 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4631 // 2 ALU op, only ALU0 handles mul/div instructions. 4632 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4633 MS0, MS1, MEM = MS0 | MS1, 4634 BR, FPU, 4635 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4636 4637 //----------PIPELINE DESCRIPTION----------------------------------------------- 4638 // Pipeline Description specifies the stages in the machine's pipeline 4639 4640 // Generic P2/P3 pipeline 4641 pipe_desc(S0, S1, S2, S3, S4, S5); 4642 4643 //----------PIPELINE CLASSES--------------------------------------------------- 4644 // Pipeline Classes describe the stages in which input and output are 4645 // referenced by the hardware pipeline. 4646 4647 // Naming convention: ialu or fpu 4648 // Then: _reg 4649 // Then: _reg if there is a 2nd register 4650 // Then: _long if it's a pair of instructions implementing a long 4651 // Then: _fat if it requires the big decoder 4652 // Or: _mem if it requires the big decoder and a memory unit. 4653 4654 // Integer ALU reg operation 4655 pipe_class ialu_reg(rRegI dst) %{ 4656 single_instruction; 4657 dst : S4(write); 4658 dst : S3(read); 4659 DECODE : S0; // any decoder 4660 ALU : S3; // any alu 4661 %} 4662 4663 // Long ALU reg operation 4664 pipe_class ialu_reg_long(eRegL dst) %{ 4665 instruction_count(2); 4666 dst : S4(write); 4667 dst : S3(read); 4668 DECODE : S0(2); // any 2 decoders 4669 ALU : S3(2); // both alus 4670 %} 4671 4672 // Integer ALU reg operation using big decoder 4673 pipe_class ialu_reg_fat(rRegI dst) %{ 4674 single_instruction; 4675 dst : S4(write); 4676 dst : S3(read); 4677 D0 : S0; // big decoder only 4678 ALU : S3; // any alu 4679 %} 4680 4681 // Long ALU reg operation using big decoder 4682 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4683 instruction_count(2); 4684 dst : S4(write); 4685 dst : S3(read); 4686 D0 : S0(2); // big decoder only; twice 4687 ALU : S3(2); // any 2 alus 4688 %} 4689 4690 // Integer ALU reg-reg operation 4691 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4692 single_instruction; 4693 dst : S4(write); 4694 src : S3(read); 4695 DECODE : S0; // any decoder 4696 ALU : S3; // any alu 4697 %} 4698 4699 // Long ALU reg-reg operation 4700 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4701 instruction_count(2); 4702 dst : S4(write); 4703 src : S3(read); 4704 DECODE : S0(2); // any 2 decoders 4705 ALU : S3(2); // both alus 4706 %} 4707 4708 // Integer ALU reg-reg operation 4709 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4710 single_instruction; 4711 dst : S4(write); 4712 src : S3(read); 4713 D0 : S0; // big decoder only 4714 ALU : S3; // any alu 4715 %} 4716 4717 // Long ALU reg-reg operation 4718 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4719 instruction_count(2); 4720 dst : S4(write); 4721 src : S3(read); 4722 D0 : S0(2); // big decoder only; twice 4723 ALU : S3(2); // both alus 4724 %} 4725 4726 // Integer ALU reg-mem operation 4727 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4728 single_instruction; 4729 dst : S5(write); 4730 mem : S3(read); 4731 D0 : S0; // big decoder only 4732 ALU : S4; // any alu 4733 MEM : S3; // any mem 4734 %} 4735 4736 // Long ALU reg-mem operation 4737 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4738 instruction_count(2); 4739 dst : S5(write); 4740 mem : S3(read); 4741 D0 : S0(2); // big decoder only; twice 4742 ALU : S4(2); // any 2 alus 4743 MEM : S3(2); // both mems 4744 %} 4745 4746 // Integer mem operation (prefetch) 4747 pipe_class ialu_mem(memory mem) 4748 %{ 4749 single_instruction; 4750 mem : S3(read); 4751 D0 : S0; // big decoder only 4752 MEM : S3; // any mem 4753 %} 4754 4755 // Integer Store to Memory 4756 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4757 single_instruction; 4758 mem : S3(read); 4759 src : S5(read); 4760 D0 : S0; // big decoder only 4761 ALU : S4; // any alu 4762 MEM : S3; 4763 %} 4764 4765 // Long Store to Memory 4766 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4767 instruction_count(2); 4768 mem : S3(read); 4769 src : S5(read); 4770 D0 : S0(2); // big decoder only; twice 4771 ALU : S4(2); // any 2 alus 4772 MEM : S3(2); // Both mems 4773 %} 4774 4775 // Integer Store to Memory 4776 pipe_class ialu_mem_imm(memory mem) %{ 4777 single_instruction; 4778 mem : S3(read); 4779 D0 : S0; // big decoder only 4780 ALU : S4; // any alu 4781 MEM : S3; 4782 %} 4783 4784 // Integer ALU0 reg-reg operation 4785 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4786 single_instruction; 4787 dst : S4(write); 4788 src : S3(read); 4789 D0 : S0; // Big decoder only 4790 ALU0 : S3; // only alu0 4791 %} 4792 4793 // Integer ALU0 reg-mem operation 4794 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4795 single_instruction; 4796 dst : S5(write); 4797 mem : S3(read); 4798 D0 : S0; // big decoder only 4799 ALU0 : S4; // ALU0 only 4800 MEM : S3; // any mem 4801 %} 4802 4803 // Integer ALU reg-reg operation 4804 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4805 single_instruction; 4806 cr : S4(write); 4807 src1 : S3(read); 4808 src2 : S3(read); 4809 DECODE : S0; // any decoder 4810 ALU : S3; // any alu 4811 %} 4812 4813 // Integer ALU reg-imm operation 4814 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4815 single_instruction; 4816 cr : S4(write); 4817 src1 : S3(read); 4818 DECODE : S0; // any decoder 4819 ALU : S3; // any alu 4820 %} 4821 4822 // Integer ALU reg-mem operation 4823 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4824 single_instruction; 4825 cr : S4(write); 4826 src1 : S3(read); 4827 src2 : S3(read); 4828 D0 : S0; // big decoder only 4829 ALU : S4; // any alu 4830 MEM : S3; 4831 %} 4832 4833 // Conditional move reg-reg 4834 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4835 instruction_count(4); 4836 y : S4(read); 4837 q : S3(read); 4838 p : S3(read); 4839 DECODE : S0(4); // any decoder 4840 %} 4841 4842 // Conditional move reg-reg 4843 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4844 single_instruction; 4845 dst : S4(write); 4846 src : S3(read); 4847 cr : S3(read); 4848 DECODE : S0; // any decoder 4849 %} 4850 4851 // Conditional move reg-mem 4852 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4853 single_instruction; 4854 dst : S4(write); 4855 src : S3(read); 4856 cr : S3(read); 4857 DECODE : S0; // any decoder 4858 MEM : S3; 4859 %} 4860 4861 // Conditional move reg-reg long 4862 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4863 single_instruction; 4864 dst : S4(write); 4865 src : S3(read); 4866 cr : S3(read); 4867 DECODE : S0(2); // any 2 decoders 4868 %} 4869 4870 // Conditional move double reg-reg 4871 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4872 single_instruction; 4873 dst : S4(write); 4874 src : S3(read); 4875 cr : S3(read); 4876 DECODE : S0; // any decoder 4877 %} 4878 4879 // Float reg-reg operation 4880 pipe_class fpu_reg(regDPR dst) %{ 4881 instruction_count(2); 4882 dst : S3(read); 4883 DECODE : S0(2); // any 2 decoders 4884 FPU : S3; 4885 %} 4886 4887 // Float reg-reg operation 4888 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4889 instruction_count(2); 4890 dst : S4(write); 4891 src : S3(read); 4892 DECODE : S0(2); // any 2 decoders 4893 FPU : S3; 4894 %} 4895 4896 // Float reg-reg operation 4897 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4898 instruction_count(3); 4899 dst : S4(write); 4900 src1 : S3(read); 4901 src2 : S3(read); 4902 DECODE : S0(3); // any 3 decoders 4903 FPU : S3(2); 4904 %} 4905 4906 // Float reg-reg operation 4907 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4908 instruction_count(4); 4909 dst : S4(write); 4910 src1 : S3(read); 4911 src2 : S3(read); 4912 src3 : S3(read); 4913 DECODE : S0(4); // any 3 decoders 4914 FPU : S3(2); 4915 %} 4916 4917 // Float reg-reg operation 4918 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4919 instruction_count(4); 4920 dst : S4(write); 4921 src1 : S3(read); 4922 src2 : S3(read); 4923 src3 : S3(read); 4924 DECODE : S1(3); // any 3 decoders 4925 D0 : S0; // Big decoder only 4926 FPU : S3(2); 4927 MEM : S3; 4928 %} 4929 4930 // Float reg-mem operation 4931 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4932 instruction_count(2); 4933 dst : S5(write); 4934 mem : S3(read); 4935 D0 : S0; // big decoder only 4936 DECODE : S1; // any decoder for FPU POP 4937 FPU : S4; 4938 MEM : S3; // any mem 4939 %} 4940 4941 // Float reg-mem operation 4942 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4943 instruction_count(3); 4944 dst : S5(write); 4945 src1 : S3(read); 4946 mem : S3(read); 4947 D0 : S0; // big decoder only 4948 DECODE : S1(2); // any decoder for FPU POP 4949 FPU : S4; 4950 MEM : S3; // any mem 4951 %} 4952 4953 // Float mem-reg operation 4954 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4955 instruction_count(2); 4956 src : S5(read); 4957 mem : S3(read); 4958 DECODE : S0; // any decoder for FPU PUSH 4959 D0 : S1; // big decoder only 4960 FPU : S4; 4961 MEM : S3; // any mem 4962 %} 4963 4964 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4965 instruction_count(3); 4966 src1 : S3(read); 4967 src2 : S3(read); 4968 mem : S3(read); 4969 DECODE : S0(2); // any decoder for FPU PUSH 4970 D0 : S1; // big decoder only 4971 FPU : S4; 4972 MEM : S3; // any mem 4973 %} 4974 4975 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4976 instruction_count(3); 4977 src1 : S3(read); 4978 src2 : S3(read); 4979 mem : S4(read); 4980 DECODE : S0; // any decoder for FPU PUSH 4981 D0 : S0(2); // big decoder only 4982 FPU : S4; 4983 MEM : S3(2); // any mem 4984 %} 4985 4986 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4987 instruction_count(2); 4988 src1 : S3(read); 4989 dst : S4(read); 4990 D0 : S0(2); // big decoder only 4991 MEM : S3(2); // any mem 4992 %} 4993 4994 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4995 instruction_count(3); 4996 src1 : S3(read); 4997 src2 : S3(read); 4998 dst : S4(read); 4999 D0 : S0(3); // big decoder only 5000 FPU : S4; 5001 MEM : S3(3); // any mem 5002 %} 5003 5004 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5005 instruction_count(3); 5006 src1 : S4(read); 5007 mem : S4(read); 5008 DECODE : S0; // any decoder for FPU PUSH 5009 D0 : S0(2); // big decoder only 5010 FPU : S4; 5011 MEM : S3(2); // any mem 5012 %} 5013 5014 // Float load constant 5015 pipe_class fpu_reg_con(regDPR dst) %{ 5016 instruction_count(2); 5017 dst : S5(write); 5018 D0 : S0; // big decoder only for the load 5019 DECODE : S1; // any decoder for FPU POP 5020 FPU : S4; 5021 MEM : S3; // any mem 5022 %} 5023 5024 // Float load constant 5025 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5026 instruction_count(3); 5027 dst : S5(write); 5028 src : S3(read); 5029 D0 : S0; // big decoder only for the load 5030 DECODE : S1(2); // any decoder for FPU POP 5031 FPU : S4; 5032 MEM : S3; // any mem 5033 %} 5034 5035 // UnConditional branch 5036 pipe_class pipe_jmp( label labl ) %{ 5037 single_instruction; 5038 BR : S3; 5039 %} 5040 5041 // Conditional branch 5042 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5043 single_instruction; 5044 cr : S1(read); 5045 BR : S3; 5046 %} 5047 5048 // Allocation idiom 5049 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5050 instruction_count(1); force_serialization; 5051 fixed_latency(6); 5052 heap_ptr : S3(read); 5053 DECODE : S0(3); 5054 D0 : S2; 5055 MEM : S3; 5056 ALU : S3(2); 5057 dst : S5(write); 5058 BR : S5; 5059 %} 5060 5061 // Generic big/slow expanded idiom 5062 pipe_class pipe_slow( ) %{ 5063 instruction_count(10); multiple_bundles; force_serialization; 5064 fixed_latency(100); 5065 D0 : S0(2); 5066 MEM : S3(2); 5067 %} 5068 5069 // The real do-nothing guy 5070 pipe_class empty( ) %{ 5071 instruction_count(0); 5072 %} 5073 5074 // Define the class for the Nop node 5075 define %{ 5076 MachNop = empty; 5077 %} 5078 5079 %} 5080 5081 //----------INSTRUCTIONS------------------------------------------------------- 5082 // 5083 // match -- States which machine-independent subtree may be replaced 5084 // by this instruction. 5085 // ins_cost -- The estimated cost of this instruction is used by instruction 5086 // selection to identify a minimum cost tree of machine 5087 // instructions that matches a tree of machine-independent 5088 // instructions. 5089 // format -- A string providing the disassembly for this instruction. 5090 // The value of an instruction's operand may be inserted 5091 // by referring to it with a '$' prefix. 5092 // opcode -- Three instruction opcodes may be provided. These are referred 5093 // to within an encode class as $primary, $secondary, and $tertiary 5094 // respectively. The primary opcode is commonly used to 5095 // indicate the type of machine instruction, while secondary 5096 // and tertiary are often used for prefix options or addressing 5097 // modes. 5098 // ins_encode -- A list of encode classes with parameters. The encode class 5099 // name must have been defined in an 'enc_class' specification 5100 // in the encode section of the architecture description. 5101 5102 //----------BSWAP-Instruction-------------------------------------------------- 5103 instruct bytes_reverse_int(rRegI dst) %{ 5104 match(Set dst (ReverseBytesI dst)); 5105 5106 format %{ "BSWAP $dst" %} 5107 opcode(0x0F, 0xC8); 5108 ins_encode( OpcP, OpcSReg(dst) ); 5109 ins_pipe( ialu_reg ); 5110 %} 5111 5112 instruct bytes_reverse_long(eRegL dst) %{ 5113 match(Set dst (ReverseBytesL dst)); 5114 5115 format %{ "BSWAP $dst.lo\n\t" 5116 "BSWAP $dst.hi\n\t" 5117 "XCHG $dst.lo $dst.hi" %} 5118 5119 ins_cost(125); 5120 ins_encode( bswap_long_bytes(dst) ); 5121 ins_pipe( ialu_reg_reg); 5122 %} 5123 5124 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5125 match(Set dst (ReverseBytesUS dst)); 5126 effect(KILL cr); 5127 5128 format %{ "BSWAP $dst\n\t" 5129 "SHR $dst,16\n\t" %} 5130 ins_encode %{ 5131 __ bswapl($dst$$Register); 5132 __ shrl($dst$$Register, 16); 5133 %} 5134 ins_pipe( ialu_reg ); 5135 %} 5136 5137 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5138 match(Set dst (ReverseBytesS dst)); 5139 effect(KILL cr); 5140 5141 format %{ "BSWAP $dst\n\t" 5142 "SAR $dst,16\n\t" %} 5143 ins_encode %{ 5144 __ bswapl($dst$$Register); 5145 __ sarl($dst$$Register, 16); 5146 %} 5147 ins_pipe( ialu_reg ); 5148 %} 5149 5150 5151 //---------- Zeros Count Instructions ------------------------------------------ 5152 5153 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5154 predicate(UseCountLeadingZerosInstruction); 5155 match(Set dst (CountLeadingZerosI src)); 5156 effect(KILL cr); 5157 5158 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5159 ins_encode %{ 5160 __ lzcntl($dst$$Register, $src$$Register); 5161 %} 5162 ins_pipe(ialu_reg); 5163 %} 5164 5165 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5166 predicate(!UseCountLeadingZerosInstruction); 5167 match(Set dst (CountLeadingZerosI src)); 5168 effect(KILL cr); 5169 5170 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5171 "JNZ skip\n\t" 5172 "MOV $dst, -1\n" 5173 "skip:\n\t" 5174 "NEG $dst\n\t" 5175 "ADD $dst, 31" %} 5176 ins_encode %{ 5177 Register Rdst = $dst$$Register; 5178 Register Rsrc = $src$$Register; 5179 Label skip; 5180 __ bsrl(Rdst, Rsrc); 5181 __ jccb(Assembler::notZero, skip); 5182 __ movl(Rdst, -1); 5183 __ bind(skip); 5184 __ negl(Rdst); 5185 __ addl(Rdst, BitsPerInt - 1); 5186 %} 5187 ins_pipe(ialu_reg); 5188 %} 5189 5190 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5191 predicate(UseCountLeadingZerosInstruction); 5192 match(Set dst (CountLeadingZerosL src)); 5193 effect(TEMP dst, KILL cr); 5194 5195 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5196 "JNC done\n\t" 5197 "LZCNT $dst, $src.lo\n\t" 5198 "ADD $dst, 32\n" 5199 "done:" %} 5200 ins_encode %{ 5201 Register Rdst = $dst$$Register; 5202 Register Rsrc = $src$$Register; 5203 Label done; 5204 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5205 __ jccb(Assembler::carryClear, done); 5206 __ lzcntl(Rdst, Rsrc); 5207 __ addl(Rdst, BitsPerInt); 5208 __ bind(done); 5209 %} 5210 ins_pipe(ialu_reg); 5211 %} 5212 5213 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5214 predicate(!UseCountLeadingZerosInstruction); 5215 match(Set dst (CountLeadingZerosL src)); 5216 effect(TEMP dst, KILL cr); 5217 5218 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5219 "JZ msw_is_zero\n\t" 5220 "ADD $dst, 32\n\t" 5221 "JMP not_zero\n" 5222 "msw_is_zero:\n\t" 5223 "BSR $dst, $src.lo\n\t" 5224 "JNZ not_zero\n\t" 5225 "MOV $dst, -1\n" 5226 "not_zero:\n\t" 5227 "NEG $dst\n\t" 5228 "ADD $dst, 63\n" %} 5229 ins_encode %{ 5230 Register Rdst = $dst$$Register; 5231 Register Rsrc = $src$$Register; 5232 Label msw_is_zero; 5233 Label not_zero; 5234 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5235 __ jccb(Assembler::zero, msw_is_zero); 5236 __ addl(Rdst, BitsPerInt); 5237 __ jmpb(not_zero); 5238 __ bind(msw_is_zero); 5239 __ bsrl(Rdst, Rsrc); 5240 __ jccb(Assembler::notZero, not_zero); 5241 __ movl(Rdst, -1); 5242 __ bind(not_zero); 5243 __ negl(Rdst); 5244 __ addl(Rdst, BitsPerLong - 1); 5245 %} 5246 ins_pipe(ialu_reg); 5247 %} 5248 5249 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5250 predicate(UseCountTrailingZerosInstruction); 5251 match(Set dst (CountTrailingZerosI src)); 5252 effect(KILL cr); 5253 5254 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5255 ins_encode %{ 5256 __ tzcntl($dst$$Register, $src$$Register); 5257 %} 5258 ins_pipe(ialu_reg); 5259 %} 5260 5261 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5262 predicate(!UseCountTrailingZerosInstruction); 5263 match(Set dst (CountTrailingZerosI src)); 5264 effect(KILL cr); 5265 5266 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5267 "JNZ done\n\t" 5268 "MOV $dst, 32\n" 5269 "done:" %} 5270 ins_encode %{ 5271 Register Rdst = $dst$$Register; 5272 Label done; 5273 __ bsfl(Rdst, $src$$Register); 5274 __ jccb(Assembler::notZero, done); 5275 __ movl(Rdst, BitsPerInt); 5276 __ bind(done); 5277 %} 5278 ins_pipe(ialu_reg); 5279 %} 5280 5281 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5282 predicate(UseCountTrailingZerosInstruction); 5283 match(Set dst (CountTrailingZerosL src)); 5284 effect(TEMP dst, KILL cr); 5285 5286 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5287 "JNC done\n\t" 5288 "TZCNT $dst, $src.hi\n\t" 5289 "ADD $dst, 32\n" 5290 "done:" %} 5291 ins_encode %{ 5292 Register Rdst = $dst$$Register; 5293 Register Rsrc = $src$$Register; 5294 Label done; 5295 __ tzcntl(Rdst, Rsrc); 5296 __ jccb(Assembler::carryClear, done); 5297 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5298 __ addl(Rdst, BitsPerInt); 5299 __ bind(done); 5300 %} 5301 ins_pipe(ialu_reg); 5302 %} 5303 5304 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5305 predicate(!UseCountTrailingZerosInstruction); 5306 match(Set dst (CountTrailingZerosL src)); 5307 effect(TEMP dst, KILL cr); 5308 5309 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5310 "JNZ done\n\t" 5311 "BSF $dst, $src.hi\n\t" 5312 "JNZ msw_not_zero\n\t" 5313 "MOV $dst, 32\n" 5314 "msw_not_zero:\n\t" 5315 "ADD $dst, 32\n" 5316 "done:" %} 5317 ins_encode %{ 5318 Register Rdst = $dst$$Register; 5319 Register Rsrc = $src$$Register; 5320 Label msw_not_zero; 5321 Label done; 5322 __ bsfl(Rdst, Rsrc); 5323 __ jccb(Assembler::notZero, done); 5324 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5325 __ jccb(Assembler::notZero, msw_not_zero); 5326 __ movl(Rdst, BitsPerInt); 5327 __ bind(msw_not_zero); 5328 __ addl(Rdst, BitsPerInt); 5329 __ bind(done); 5330 %} 5331 ins_pipe(ialu_reg); 5332 %} 5333 5334 5335 //---------- Population Count Instructions ------------------------------------- 5336 5337 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5338 predicate(UsePopCountInstruction); 5339 match(Set dst (PopCountI src)); 5340 effect(KILL cr); 5341 5342 format %{ "POPCNT $dst, $src" %} 5343 ins_encode %{ 5344 __ popcntl($dst$$Register, $src$$Register); 5345 %} 5346 ins_pipe(ialu_reg); 5347 %} 5348 5349 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5350 predicate(UsePopCountInstruction); 5351 match(Set dst (PopCountI (LoadI mem))); 5352 effect(KILL cr); 5353 5354 format %{ "POPCNT $dst, $mem" %} 5355 ins_encode %{ 5356 __ popcntl($dst$$Register, $mem$$Address); 5357 %} 5358 ins_pipe(ialu_reg); 5359 %} 5360 5361 // Note: Long.bitCount(long) returns an int. 5362 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5363 predicate(UsePopCountInstruction); 5364 match(Set dst (PopCountL src)); 5365 effect(KILL cr, TEMP tmp, TEMP dst); 5366 5367 format %{ "POPCNT $dst, $src.lo\n\t" 5368 "POPCNT $tmp, $src.hi\n\t" 5369 "ADD $dst, $tmp" %} 5370 ins_encode %{ 5371 __ popcntl($dst$$Register, $src$$Register); 5372 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5373 __ addl($dst$$Register, $tmp$$Register); 5374 %} 5375 ins_pipe(ialu_reg); 5376 %} 5377 5378 // Note: Long.bitCount(long) returns an int. 5379 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5380 predicate(UsePopCountInstruction); 5381 match(Set dst (PopCountL (LoadL mem))); 5382 effect(KILL cr, TEMP tmp, TEMP dst); 5383 5384 format %{ "POPCNT $dst, $mem\n\t" 5385 "POPCNT $tmp, $mem+4\n\t" 5386 "ADD $dst, $tmp" %} 5387 ins_encode %{ 5388 //__ popcntl($dst$$Register, $mem$$Address$$first); 5389 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5390 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5391 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5392 __ addl($dst$$Register, $tmp$$Register); 5393 %} 5394 ins_pipe(ialu_reg); 5395 %} 5396 5397 5398 //----------Load/Store/Move Instructions--------------------------------------- 5399 //----------Load Instructions-------------------------------------------------- 5400 // Load Byte (8bit signed) 5401 instruct loadB(xRegI dst, memory mem) %{ 5402 match(Set dst (LoadB mem)); 5403 5404 ins_cost(125); 5405 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5406 5407 ins_encode %{ 5408 __ movsbl($dst$$Register, $mem$$Address); 5409 %} 5410 5411 ins_pipe(ialu_reg_mem); 5412 %} 5413 5414 // Load Byte (8bit signed) into Long Register 5415 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5416 match(Set dst (ConvI2L (LoadB mem))); 5417 effect(KILL cr); 5418 5419 ins_cost(375); 5420 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5421 "MOV $dst.hi,$dst.lo\n\t" 5422 "SAR $dst.hi,7" %} 5423 5424 ins_encode %{ 5425 __ movsbl($dst$$Register, $mem$$Address); 5426 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5427 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5428 %} 5429 5430 ins_pipe(ialu_reg_mem); 5431 %} 5432 5433 // Load Unsigned Byte (8bit UNsigned) 5434 instruct loadUB(xRegI dst, memory mem) %{ 5435 match(Set dst (LoadUB mem)); 5436 5437 ins_cost(125); 5438 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5439 5440 ins_encode %{ 5441 __ movzbl($dst$$Register, $mem$$Address); 5442 %} 5443 5444 ins_pipe(ialu_reg_mem); 5445 %} 5446 5447 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5448 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5449 match(Set dst (ConvI2L (LoadUB mem))); 5450 effect(KILL cr); 5451 5452 ins_cost(250); 5453 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5454 "XOR $dst.hi,$dst.hi" %} 5455 5456 ins_encode %{ 5457 Register Rdst = $dst$$Register; 5458 __ movzbl(Rdst, $mem$$Address); 5459 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5460 %} 5461 5462 ins_pipe(ialu_reg_mem); 5463 %} 5464 5465 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5466 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5467 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5468 effect(KILL cr); 5469 5470 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5471 "XOR $dst.hi,$dst.hi\n\t" 5472 "AND $dst.lo,right_n_bits($mask, 8)" %} 5473 ins_encode %{ 5474 Register Rdst = $dst$$Register; 5475 __ movzbl(Rdst, $mem$$Address); 5476 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5477 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5478 %} 5479 ins_pipe(ialu_reg_mem); 5480 %} 5481 5482 // Load Short (16bit signed) 5483 instruct loadS(rRegI dst, memory mem) %{ 5484 match(Set dst (LoadS mem)); 5485 5486 ins_cost(125); 5487 format %{ "MOVSX $dst,$mem\t# short" %} 5488 5489 ins_encode %{ 5490 __ movswl($dst$$Register, $mem$$Address); 5491 %} 5492 5493 ins_pipe(ialu_reg_mem); 5494 %} 5495 5496 // Load Short (16 bit signed) to Byte (8 bit signed) 5497 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5498 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5499 5500 ins_cost(125); 5501 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5502 ins_encode %{ 5503 __ movsbl($dst$$Register, $mem$$Address); 5504 %} 5505 ins_pipe(ialu_reg_mem); 5506 %} 5507 5508 // Load Short (16bit signed) into Long Register 5509 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5510 match(Set dst (ConvI2L (LoadS mem))); 5511 effect(KILL cr); 5512 5513 ins_cost(375); 5514 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5515 "MOV $dst.hi,$dst.lo\n\t" 5516 "SAR $dst.hi,15" %} 5517 5518 ins_encode %{ 5519 __ movswl($dst$$Register, $mem$$Address); 5520 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5521 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5522 %} 5523 5524 ins_pipe(ialu_reg_mem); 5525 %} 5526 5527 // Load Unsigned Short/Char (16bit unsigned) 5528 instruct loadUS(rRegI dst, memory mem) %{ 5529 match(Set dst (LoadUS mem)); 5530 5531 ins_cost(125); 5532 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5533 5534 ins_encode %{ 5535 __ movzwl($dst$$Register, $mem$$Address); 5536 %} 5537 5538 ins_pipe(ialu_reg_mem); 5539 %} 5540 5541 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5542 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5543 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5544 5545 ins_cost(125); 5546 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5547 ins_encode %{ 5548 __ movsbl($dst$$Register, $mem$$Address); 5549 %} 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5554 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5555 match(Set dst (ConvI2L (LoadUS mem))); 5556 effect(KILL cr); 5557 5558 ins_cost(250); 5559 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5560 "XOR $dst.hi,$dst.hi" %} 5561 5562 ins_encode %{ 5563 __ movzwl($dst$$Register, $mem$$Address); 5564 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5565 %} 5566 5567 ins_pipe(ialu_reg_mem); 5568 %} 5569 5570 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5571 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5572 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5573 effect(KILL cr); 5574 5575 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5576 "XOR $dst.hi,$dst.hi" %} 5577 ins_encode %{ 5578 Register Rdst = $dst$$Register; 5579 __ movzbl(Rdst, $mem$$Address); 5580 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5581 %} 5582 ins_pipe(ialu_reg_mem); 5583 %} 5584 5585 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5586 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5587 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5588 effect(KILL cr); 5589 5590 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5591 "XOR $dst.hi,$dst.hi\n\t" 5592 "AND $dst.lo,right_n_bits($mask, 16)" %} 5593 ins_encode %{ 5594 Register Rdst = $dst$$Register; 5595 __ movzwl(Rdst, $mem$$Address); 5596 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5597 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5598 %} 5599 ins_pipe(ialu_reg_mem); 5600 %} 5601 5602 // Load Integer 5603 instruct loadI(rRegI dst, memory mem) %{ 5604 match(Set dst (LoadI mem)); 5605 5606 ins_cost(125); 5607 format %{ "MOV $dst,$mem\t# int" %} 5608 5609 ins_encode %{ 5610 __ movl($dst$$Register, $mem$$Address); 5611 %} 5612 5613 ins_pipe(ialu_reg_mem); 5614 %} 5615 5616 // Load Integer (32 bit signed) to Byte (8 bit signed) 5617 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5618 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5619 5620 ins_cost(125); 5621 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5622 ins_encode %{ 5623 __ movsbl($dst$$Register, $mem$$Address); 5624 %} 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5629 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5630 match(Set dst (AndI (LoadI mem) mask)); 5631 5632 ins_cost(125); 5633 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5634 ins_encode %{ 5635 __ movzbl($dst$$Register, $mem$$Address); 5636 %} 5637 ins_pipe(ialu_reg_mem); 5638 %} 5639 5640 // Load Integer (32 bit signed) to Short (16 bit signed) 5641 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5642 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5643 5644 ins_cost(125); 5645 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5646 ins_encode %{ 5647 __ movswl($dst$$Register, $mem$$Address); 5648 %} 5649 ins_pipe(ialu_reg_mem); 5650 %} 5651 5652 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5653 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5654 match(Set dst (AndI (LoadI mem) mask)); 5655 5656 ins_cost(125); 5657 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5658 ins_encode %{ 5659 __ movzwl($dst$$Register, $mem$$Address); 5660 %} 5661 ins_pipe(ialu_reg_mem); 5662 %} 5663 5664 // Load Integer into Long Register 5665 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5666 match(Set dst (ConvI2L (LoadI mem))); 5667 effect(KILL cr); 5668 5669 ins_cost(375); 5670 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5671 "MOV $dst.hi,$dst.lo\n\t" 5672 "SAR $dst.hi,31" %} 5673 5674 ins_encode %{ 5675 __ movl($dst$$Register, $mem$$Address); 5676 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5677 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5678 %} 5679 5680 ins_pipe(ialu_reg_mem); 5681 %} 5682 5683 // Load Integer with mask 0xFF into Long Register 5684 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5685 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5686 effect(KILL cr); 5687 5688 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5689 "XOR $dst.hi,$dst.hi" %} 5690 ins_encode %{ 5691 Register Rdst = $dst$$Register; 5692 __ movzbl(Rdst, $mem$$Address); 5693 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5694 %} 5695 ins_pipe(ialu_reg_mem); 5696 %} 5697 5698 // Load Integer with mask 0xFFFF into Long Register 5699 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5700 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5701 effect(KILL cr); 5702 5703 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5704 "XOR $dst.hi,$dst.hi" %} 5705 ins_encode %{ 5706 Register Rdst = $dst$$Register; 5707 __ movzwl(Rdst, $mem$$Address); 5708 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5709 %} 5710 ins_pipe(ialu_reg_mem); 5711 %} 5712 5713 // Load Integer with 31-bit mask into Long Register 5714 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5715 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5716 effect(KILL cr); 5717 5718 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5719 "XOR $dst.hi,$dst.hi\n\t" 5720 "AND $dst.lo,$mask" %} 5721 ins_encode %{ 5722 Register Rdst = $dst$$Register; 5723 __ movl(Rdst, $mem$$Address); 5724 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5725 __ andl(Rdst, $mask$$constant); 5726 %} 5727 ins_pipe(ialu_reg_mem); 5728 %} 5729 5730 // Load Unsigned Integer into Long Register 5731 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5732 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5733 effect(KILL cr); 5734 5735 ins_cost(250); 5736 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5737 "XOR $dst.hi,$dst.hi" %} 5738 5739 ins_encode %{ 5740 __ movl($dst$$Register, $mem$$Address); 5741 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5742 %} 5743 5744 ins_pipe(ialu_reg_mem); 5745 %} 5746 5747 // Load Long. Cannot clobber address while loading, so restrict address 5748 // register to ESI 5749 instruct loadL(eRegL dst, load_long_memory mem) %{ 5750 predicate(!((LoadLNode*)n)->require_atomic_access()); 5751 match(Set dst (LoadL mem)); 5752 5753 ins_cost(250); 5754 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5755 "MOV $dst.hi,$mem+4" %} 5756 5757 ins_encode %{ 5758 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5759 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5760 __ movl($dst$$Register, Amemlo); 5761 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5762 %} 5763 5764 ins_pipe(ialu_reg_long_mem); 5765 %} 5766 5767 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5768 // then store it down to the stack and reload on the int 5769 // side. 5770 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5771 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5772 match(Set dst (LoadL mem)); 5773 5774 ins_cost(200); 5775 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5776 "FISTp $dst" %} 5777 ins_encode(enc_loadL_volatile(mem,dst)); 5778 ins_pipe( fpu_reg_mem ); 5779 %} 5780 5781 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5782 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5783 match(Set dst (LoadL mem)); 5784 effect(TEMP tmp); 5785 ins_cost(180); 5786 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5787 "MOVSD $dst,$tmp" %} 5788 ins_encode %{ 5789 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5790 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5796 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5797 match(Set dst (LoadL mem)); 5798 effect(TEMP tmp); 5799 ins_cost(160); 5800 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5801 "MOVD $dst.lo,$tmp\n\t" 5802 "PSRLQ $tmp,32\n\t" 5803 "MOVD $dst.hi,$tmp" %} 5804 ins_encode %{ 5805 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5806 __ movdl($dst$$Register, $tmp$$XMMRegister); 5807 __ psrlq($tmp$$XMMRegister, 32); 5808 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 // Load Range 5814 instruct loadRange(rRegI dst, memory mem) %{ 5815 match(Set dst (LoadRange mem)); 5816 5817 ins_cost(125); 5818 format %{ "MOV $dst,$mem" %} 5819 opcode(0x8B); 5820 ins_encode( OpcP, RegMem(dst,mem)); 5821 ins_pipe( ialu_reg_mem ); 5822 %} 5823 5824 5825 // Load Pointer 5826 instruct loadP(eRegP dst, memory mem) %{ 5827 match(Set dst (LoadP mem)); 5828 5829 ins_cost(125); 5830 format %{ "MOV $dst,$mem" %} 5831 opcode(0x8B); 5832 ins_encode( OpcP, RegMem(dst,mem)); 5833 ins_pipe( ialu_reg_mem ); 5834 %} 5835 5836 // Load Klass Pointer 5837 instruct loadKlass(eRegP dst, memory mem) %{ 5838 match(Set dst (LoadKlass mem)); 5839 5840 ins_cost(125); 5841 format %{ "MOV $dst,$mem" %} 5842 opcode(0x8B); 5843 ins_encode( OpcP, RegMem(dst,mem)); 5844 ins_pipe( ialu_reg_mem ); 5845 %} 5846 5847 // Load Double 5848 instruct loadDPR(regDPR dst, memory mem) %{ 5849 predicate(UseSSE<=1); 5850 match(Set dst (LoadD mem)); 5851 5852 ins_cost(150); 5853 format %{ "FLD_D ST,$mem\n\t" 5854 "FSTP $dst" %} 5855 opcode(0xDD); /* DD /0 */ 5856 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5857 Pop_Reg_DPR(dst) ); 5858 ins_pipe( fpu_reg_mem ); 5859 %} 5860 5861 // Load Double to XMM 5862 instruct loadD(regD dst, memory mem) %{ 5863 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5864 match(Set dst (LoadD mem)); 5865 ins_cost(145); 5866 format %{ "MOVSD $dst,$mem" %} 5867 ins_encode %{ 5868 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5869 %} 5870 ins_pipe( pipe_slow ); 5871 %} 5872 5873 instruct loadD_partial(regD dst, memory mem) %{ 5874 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5875 match(Set dst (LoadD mem)); 5876 ins_cost(145); 5877 format %{ "MOVLPD $dst,$mem" %} 5878 ins_encode %{ 5879 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5880 %} 5881 ins_pipe( pipe_slow ); 5882 %} 5883 5884 // Load to XMM register (single-precision floating point) 5885 // MOVSS instruction 5886 instruct loadF(regF dst, memory mem) %{ 5887 predicate(UseSSE>=1); 5888 match(Set dst (LoadF mem)); 5889 ins_cost(145); 5890 format %{ "MOVSS $dst,$mem" %} 5891 ins_encode %{ 5892 __ movflt ($dst$$XMMRegister, $mem$$Address); 5893 %} 5894 ins_pipe( pipe_slow ); 5895 %} 5896 5897 // Load Float 5898 instruct loadFPR(regFPR dst, memory mem) %{ 5899 predicate(UseSSE==0); 5900 match(Set dst (LoadF mem)); 5901 5902 ins_cost(150); 5903 format %{ "FLD_S ST,$mem\n\t" 5904 "FSTP $dst" %} 5905 opcode(0xD9); /* D9 /0 */ 5906 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5907 Pop_Reg_FPR(dst) ); 5908 ins_pipe( fpu_reg_mem ); 5909 %} 5910 5911 // Load Effective Address 5912 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5913 match(Set dst mem); 5914 5915 ins_cost(110); 5916 format %{ "LEA $dst,$mem" %} 5917 opcode(0x8D); 5918 ins_encode( OpcP, RegMem(dst,mem)); 5919 ins_pipe( ialu_reg_reg_fat ); 5920 %} 5921 5922 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5923 match(Set dst mem); 5924 5925 ins_cost(110); 5926 format %{ "LEA $dst,$mem" %} 5927 opcode(0x8D); 5928 ins_encode( OpcP, RegMem(dst,mem)); 5929 ins_pipe( ialu_reg_reg_fat ); 5930 %} 5931 5932 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5933 match(Set dst mem); 5934 5935 ins_cost(110); 5936 format %{ "LEA $dst,$mem" %} 5937 opcode(0x8D); 5938 ins_encode( OpcP, RegMem(dst,mem)); 5939 ins_pipe( ialu_reg_reg_fat ); 5940 %} 5941 5942 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5943 match(Set dst mem); 5944 5945 ins_cost(110); 5946 format %{ "LEA $dst,$mem" %} 5947 opcode(0x8D); 5948 ins_encode( OpcP, RegMem(dst,mem)); 5949 ins_pipe( ialu_reg_reg_fat ); 5950 %} 5951 5952 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5953 match(Set dst mem); 5954 5955 ins_cost(110); 5956 format %{ "LEA $dst,$mem" %} 5957 opcode(0x8D); 5958 ins_encode( OpcP, RegMem(dst,mem)); 5959 ins_pipe( ialu_reg_reg_fat ); 5960 %} 5961 5962 // Load Constant 5963 instruct loadConI(rRegI dst, immI src) %{ 5964 match(Set dst src); 5965 5966 format %{ "MOV $dst,$src" %} 5967 ins_encode( LdImmI(dst, src) ); 5968 ins_pipe( ialu_reg_fat ); 5969 %} 5970 5971 // Load Constant zero 5972 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5973 match(Set dst src); 5974 effect(KILL cr); 5975 5976 ins_cost(50); 5977 format %{ "XOR $dst,$dst" %} 5978 opcode(0x33); /* + rd */ 5979 ins_encode( OpcP, RegReg( dst, dst ) ); 5980 ins_pipe( ialu_reg ); 5981 %} 5982 5983 instruct loadConP(eRegP dst, immP src) %{ 5984 match(Set dst src); 5985 5986 format %{ "MOV $dst,$src" %} 5987 opcode(0xB8); /* + rd */ 5988 ins_encode( LdImmP(dst, src) ); 5989 ins_pipe( ialu_reg_fat ); 5990 %} 5991 5992 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5993 match(Set dst src); 5994 effect(KILL cr); 5995 ins_cost(200); 5996 format %{ "MOV $dst.lo,$src.lo\n\t" 5997 "MOV $dst.hi,$src.hi" %} 5998 opcode(0xB8); 5999 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6000 ins_pipe( ialu_reg_long_fat ); 6001 %} 6002 6003 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6004 match(Set dst src); 6005 effect(KILL cr); 6006 ins_cost(150); 6007 format %{ "XOR $dst.lo,$dst.lo\n\t" 6008 "XOR $dst.hi,$dst.hi" %} 6009 opcode(0x33,0x33); 6010 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6011 ins_pipe( ialu_reg_long ); 6012 %} 6013 6014 // The instruction usage is guarded by predicate in operand immFPR(). 6015 instruct loadConFPR(regFPR dst, immFPR con) %{ 6016 match(Set dst con); 6017 ins_cost(125); 6018 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6019 "FSTP $dst" %} 6020 ins_encode %{ 6021 __ fld_s($constantaddress($con)); 6022 __ fstp_d($dst$$reg); 6023 %} 6024 ins_pipe(fpu_reg_con); 6025 %} 6026 6027 // The instruction usage is guarded by predicate in operand immFPR0(). 6028 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6029 match(Set dst con); 6030 ins_cost(125); 6031 format %{ "FLDZ ST\n\t" 6032 "FSTP $dst" %} 6033 ins_encode %{ 6034 __ fldz(); 6035 __ fstp_d($dst$$reg); 6036 %} 6037 ins_pipe(fpu_reg_con); 6038 %} 6039 6040 // The instruction usage is guarded by predicate in operand immFPR1(). 6041 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6042 match(Set dst con); 6043 ins_cost(125); 6044 format %{ "FLD1 ST\n\t" 6045 "FSTP $dst" %} 6046 ins_encode %{ 6047 __ fld1(); 6048 __ fstp_d($dst$$reg); 6049 %} 6050 ins_pipe(fpu_reg_con); 6051 %} 6052 6053 // The instruction usage is guarded by predicate in operand immF(). 6054 instruct loadConF(regF dst, immF con) %{ 6055 match(Set dst con); 6056 ins_cost(125); 6057 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6058 ins_encode %{ 6059 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6060 %} 6061 ins_pipe(pipe_slow); 6062 %} 6063 6064 // The instruction usage is guarded by predicate in operand immF0(). 6065 instruct loadConF0(regF dst, immF0 src) %{ 6066 match(Set dst src); 6067 ins_cost(100); 6068 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6069 ins_encode %{ 6070 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6071 %} 6072 ins_pipe(pipe_slow); 6073 %} 6074 6075 // The instruction usage is guarded by predicate in operand immDPR(). 6076 instruct loadConDPR(regDPR dst, immDPR con) %{ 6077 match(Set dst con); 6078 ins_cost(125); 6079 6080 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6081 "FSTP $dst" %} 6082 ins_encode %{ 6083 __ fld_d($constantaddress($con)); 6084 __ fstp_d($dst$$reg); 6085 %} 6086 ins_pipe(fpu_reg_con); 6087 %} 6088 6089 // The instruction usage is guarded by predicate in operand immDPR0(). 6090 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6091 match(Set dst con); 6092 ins_cost(125); 6093 6094 format %{ "FLDZ ST\n\t" 6095 "FSTP $dst" %} 6096 ins_encode %{ 6097 __ fldz(); 6098 __ fstp_d($dst$$reg); 6099 %} 6100 ins_pipe(fpu_reg_con); 6101 %} 6102 6103 // The instruction usage is guarded by predicate in operand immDPR1(). 6104 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6105 match(Set dst con); 6106 ins_cost(125); 6107 6108 format %{ "FLD1 ST\n\t" 6109 "FSTP $dst" %} 6110 ins_encode %{ 6111 __ fld1(); 6112 __ fstp_d($dst$$reg); 6113 %} 6114 ins_pipe(fpu_reg_con); 6115 %} 6116 6117 // The instruction usage is guarded by predicate in operand immD(). 6118 instruct loadConD(regD dst, immD con) %{ 6119 match(Set dst con); 6120 ins_cost(125); 6121 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6122 ins_encode %{ 6123 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6124 %} 6125 ins_pipe(pipe_slow); 6126 %} 6127 6128 // The instruction usage is guarded by predicate in operand immD0(). 6129 instruct loadConD0(regD dst, immD0 src) %{ 6130 match(Set dst src); 6131 ins_cost(100); 6132 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6133 ins_encode %{ 6134 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6135 %} 6136 ins_pipe( pipe_slow ); 6137 %} 6138 6139 // Load Stack Slot 6140 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6141 match(Set dst src); 6142 ins_cost(125); 6143 6144 format %{ "MOV $dst,$src" %} 6145 opcode(0x8B); 6146 ins_encode( OpcP, RegMem(dst,src)); 6147 ins_pipe( ialu_reg_mem ); 6148 %} 6149 6150 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6151 match(Set dst src); 6152 6153 ins_cost(200); 6154 format %{ "MOV $dst,$src.lo\n\t" 6155 "MOV $dst+4,$src.hi" %} 6156 opcode(0x8B, 0x8B); 6157 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6158 ins_pipe( ialu_mem_long_reg ); 6159 %} 6160 6161 // Load Stack Slot 6162 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6163 match(Set dst src); 6164 ins_cost(125); 6165 6166 format %{ "MOV $dst,$src" %} 6167 opcode(0x8B); 6168 ins_encode( OpcP, RegMem(dst,src)); 6169 ins_pipe( ialu_reg_mem ); 6170 %} 6171 6172 // Load Stack Slot 6173 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6174 match(Set dst src); 6175 ins_cost(125); 6176 6177 format %{ "FLD_S $src\n\t" 6178 "FSTP $dst" %} 6179 opcode(0xD9); /* D9 /0, FLD m32real */ 6180 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6181 Pop_Reg_FPR(dst) ); 6182 ins_pipe( fpu_reg_mem ); 6183 %} 6184 6185 // Load Stack Slot 6186 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6187 match(Set dst src); 6188 ins_cost(125); 6189 6190 format %{ "FLD_D $src\n\t" 6191 "FSTP $dst" %} 6192 opcode(0xDD); /* DD /0, FLD m64real */ 6193 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6194 Pop_Reg_DPR(dst) ); 6195 ins_pipe( fpu_reg_mem ); 6196 %} 6197 6198 // Prefetch instructions for allocation. 6199 // Must be safe to execute with invalid address (cannot fault). 6200 6201 instruct prefetchAlloc0( memory mem ) %{ 6202 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6203 match(PrefetchAllocation mem); 6204 ins_cost(0); 6205 size(0); 6206 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6207 ins_encode(); 6208 ins_pipe(empty); 6209 %} 6210 6211 instruct prefetchAlloc( memory mem ) %{ 6212 predicate(AllocatePrefetchInstr==3); 6213 match( PrefetchAllocation mem ); 6214 ins_cost(100); 6215 6216 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6217 ins_encode %{ 6218 __ prefetchw($mem$$Address); 6219 %} 6220 ins_pipe(ialu_mem); 6221 %} 6222 6223 instruct prefetchAllocNTA( memory mem ) %{ 6224 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6225 match(PrefetchAllocation mem); 6226 ins_cost(100); 6227 6228 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6229 ins_encode %{ 6230 __ prefetchnta($mem$$Address); 6231 %} 6232 ins_pipe(ialu_mem); 6233 %} 6234 6235 instruct prefetchAllocT0( memory mem ) %{ 6236 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6237 match(PrefetchAllocation mem); 6238 ins_cost(100); 6239 6240 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6241 ins_encode %{ 6242 __ prefetcht0($mem$$Address); 6243 %} 6244 ins_pipe(ialu_mem); 6245 %} 6246 6247 instruct prefetchAllocT2( memory mem ) %{ 6248 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6249 match(PrefetchAllocation mem); 6250 ins_cost(100); 6251 6252 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6253 ins_encode %{ 6254 __ prefetcht2($mem$$Address); 6255 %} 6256 ins_pipe(ialu_mem); 6257 %} 6258 6259 //----------Store Instructions------------------------------------------------- 6260 6261 // Store Byte 6262 instruct storeB(memory mem, xRegI src) %{ 6263 match(Set mem (StoreB mem src)); 6264 6265 ins_cost(125); 6266 format %{ "MOV8 $mem,$src" %} 6267 opcode(0x88); 6268 ins_encode( OpcP, RegMem( src, mem ) ); 6269 ins_pipe( ialu_mem_reg ); 6270 %} 6271 6272 // Store Char/Short 6273 instruct storeC(memory mem, rRegI src) %{ 6274 match(Set mem (StoreC mem src)); 6275 6276 ins_cost(125); 6277 format %{ "MOV16 $mem,$src" %} 6278 opcode(0x89, 0x66); 6279 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6280 ins_pipe( ialu_mem_reg ); 6281 %} 6282 6283 // Store Integer 6284 instruct storeI(memory mem, rRegI src) %{ 6285 match(Set mem (StoreI mem src)); 6286 6287 ins_cost(125); 6288 format %{ "MOV $mem,$src" %} 6289 opcode(0x89); 6290 ins_encode( OpcP, RegMem( src, mem ) ); 6291 ins_pipe( ialu_mem_reg ); 6292 %} 6293 6294 // Store Long 6295 instruct storeL(long_memory mem, eRegL src) %{ 6296 predicate(!((StoreLNode*)n)->require_atomic_access()); 6297 match(Set mem (StoreL mem src)); 6298 6299 ins_cost(200); 6300 format %{ "MOV $mem,$src.lo\n\t" 6301 "MOV $mem+4,$src.hi" %} 6302 opcode(0x89, 0x89); 6303 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6304 ins_pipe( ialu_mem_long_reg ); 6305 %} 6306 6307 // Store Long to Integer 6308 instruct storeL2I(memory mem, eRegL src) %{ 6309 match(Set mem (StoreI mem (ConvL2I src))); 6310 6311 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6312 ins_encode %{ 6313 __ movl($mem$$Address, $src$$Register); 6314 %} 6315 ins_pipe(ialu_mem_reg); 6316 %} 6317 6318 // Volatile Store Long. Must be atomic, so move it into 6319 // the FP TOS and then do a 64-bit FIST. Has to probe the 6320 // target address before the store (for null-ptr checks) 6321 // so the memory operand is used twice in the encoding. 6322 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6323 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6324 match(Set mem (StoreL mem src)); 6325 effect( KILL cr ); 6326 ins_cost(400); 6327 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6328 "FILD $src\n\t" 6329 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6330 opcode(0x3B); 6331 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6332 ins_pipe( fpu_reg_mem ); 6333 %} 6334 6335 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6336 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6337 match(Set mem (StoreL mem src)); 6338 effect( TEMP tmp, KILL cr ); 6339 ins_cost(380); 6340 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6341 "MOVSD $tmp,$src\n\t" 6342 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6343 ins_encode %{ 6344 __ cmpl(rax, $mem$$Address); 6345 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6346 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6352 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6353 match(Set mem (StoreL mem src)); 6354 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6355 ins_cost(360); 6356 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6357 "MOVD $tmp,$src.lo\n\t" 6358 "MOVD $tmp2,$src.hi\n\t" 6359 "PUNPCKLDQ $tmp,$tmp2\n\t" 6360 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6361 ins_encode %{ 6362 __ cmpl(rax, $mem$$Address); 6363 __ movdl($tmp$$XMMRegister, $src$$Register); 6364 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6365 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6366 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6367 %} 6368 ins_pipe( pipe_slow ); 6369 %} 6370 6371 // Store Pointer; for storing unknown oops and raw pointers 6372 instruct storeP(memory mem, anyRegP src) %{ 6373 match(Set mem (StoreP mem src)); 6374 6375 ins_cost(125); 6376 format %{ "MOV $mem,$src" %} 6377 opcode(0x89); 6378 ins_encode( OpcP, RegMem( src, mem ) ); 6379 ins_pipe( ialu_mem_reg ); 6380 %} 6381 6382 // Store Integer Immediate 6383 instruct storeImmI(memory mem, immI src) %{ 6384 match(Set mem (StoreI mem src)); 6385 6386 ins_cost(150); 6387 format %{ "MOV $mem,$src" %} 6388 opcode(0xC7); /* C7 /0 */ 6389 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6390 ins_pipe( ialu_mem_imm ); 6391 %} 6392 6393 // Store Short/Char Immediate 6394 instruct storeImmI16(memory mem, immI16 src) %{ 6395 predicate(UseStoreImmI16); 6396 match(Set mem (StoreC mem src)); 6397 6398 ins_cost(150); 6399 format %{ "MOV16 $mem,$src" %} 6400 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6401 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6402 ins_pipe( ialu_mem_imm ); 6403 %} 6404 6405 // Store Pointer Immediate; null pointers or constant oops that do not 6406 // need card-mark barriers. 6407 instruct storeImmP(memory mem, immP src) %{ 6408 match(Set mem (StoreP mem src)); 6409 6410 ins_cost(150); 6411 format %{ "MOV $mem,$src" %} 6412 opcode(0xC7); /* C7 /0 */ 6413 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6414 ins_pipe( ialu_mem_imm ); 6415 %} 6416 6417 // Store Byte Immediate 6418 instruct storeImmB(memory mem, immI8 src) %{ 6419 match(Set mem (StoreB mem src)); 6420 6421 ins_cost(150); 6422 format %{ "MOV8 $mem,$src" %} 6423 opcode(0xC6); /* C6 /0 */ 6424 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6425 ins_pipe( ialu_mem_imm ); 6426 %} 6427 6428 // Store CMS card-mark Immediate 6429 instruct storeImmCM(memory mem, immI8 src) %{ 6430 match(Set mem (StoreCM mem src)); 6431 6432 ins_cost(150); 6433 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6434 opcode(0xC6); /* C6 /0 */ 6435 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6436 ins_pipe( ialu_mem_imm ); 6437 %} 6438 6439 // Store Double 6440 instruct storeDPR( memory mem, regDPR1 src) %{ 6441 predicate(UseSSE<=1); 6442 match(Set mem (StoreD mem src)); 6443 6444 ins_cost(100); 6445 format %{ "FST_D $mem,$src" %} 6446 opcode(0xDD); /* DD /2 */ 6447 ins_encode( enc_FPR_store(mem,src) ); 6448 ins_pipe( fpu_mem_reg ); 6449 %} 6450 6451 // Store double does rounding on x86 6452 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6453 predicate(UseSSE<=1); 6454 match(Set mem (StoreD mem (RoundDouble src))); 6455 6456 ins_cost(100); 6457 format %{ "FST_D $mem,$src\t# round" %} 6458 opcode(0xDD); /* DD /2 */ 6459 ins_encode( enc_FPR_store(mem,src) ); 6460 ins_pipe( fpu_mem_reg ); 6461 %} 6462 6463 // Store XMM register to memory (double-precision floating points) 6464 // MOVSD instruction 6465 instruct storeD(memory mem, regD src) %{ 6466 predicate(UseSSE>=2); 6467 match(Set mem (StoreD mem src)); 6468 ins_cost(95); 6469 format %{ "MOVSD $mem,$src" %} 6470 ins_encode %{ 6471 __ movdbl($mem$$Address, $src$$XMMRegister); 6472 %} 6473 ins_pipe( pipe_slow ); 6474 %} 6475 6476 // Load Double 6477 instruct MoveD2VL(vlRegD dst, regD src) %{ 6478 match(Set dst src); 6479 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6480 ins_encode %{ 6481 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6482 %} 6483 ins_pipe( fpu_reg_reg ); 6484 %} 6485 6486 // Load Double 6487 instruct MoveVL2D(regD dst, vlRegD src) %{ 6488 match(Set dst src); 6489 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6490 ins_encode %{ 6491 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6492 %} 6493 ins_pipe( fpu_reg_reg ); 6494 %} 6495 6496 // Store XMM register to memory (single-precision floating point) 6497 // MOVSS instruction 6498 instruct storeF(memory mem, regF src) %{ 6499 predicate(UseSSE>=1); 6500 match(Set mem (StoreF mem src)); 6501 ins_cost(95); 6502 format %{ "MOVSS $mem,$src" %} 6503 ins_encode %{ 6504 __ movflt($mem$$Address, $src$$XMMRegister); 6505 %} 6506 ins_pipe( pipe_slow ); 6507 %} 6508 6509 // Load Float 6510 instruct MoveF2VL(vlRegF dst, regF src) %{ 6511 match(Set dst src); 6512 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6513 ins_encode %{ 6514 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6515 %} 6516 ins_pipe( fpu_reg_reg ); 6517 %} 6518 6519 // Load Float 6520 instruct MoveVL2F(regF dst, vlRegF src) %{ 6521 match(Set dst src); 6522 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6523 ins_encode %{ 6524 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6525 %} 6526 ins_pipe( fpu_reg_reg ); 6527 %} 6528 6529 // Store Float 6530 instruct storeFPR( memory mem, regFPR1 src) %{ 6531 predicate(UseSSE==0); 6532 match(Set mem (StoreF mem src)); 6533 6534 ins_cost(100); 6535 format %{ "FST_S $mem,$src" %} 6536 opcode(0xD9); /* D9 /2 */ 6537 ins_encode( enc_FPR_store(mem,src) ); 6538 ins_pipe( fpu_mem_reg ); 6539 %} 6540 6541 // Store Float does rounding on x86 6542 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6543 predicate(UseSSE==0); 6544 match(Set mem (StoreF mem (RoundFloat src))); 6545 6546 ins_cost(100); 6547 format %{ "FST_S $mem,$src\t# round" %} 6548 opcode(0xD9); /* D9 /2 */ 6549 ins_encode( enc_FPR_store(mem,src) ); 6550 ins_pipe( fpu_mem_reg ); 6551 %} 6552 6553 // Store Float does rounding on x86 6554 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6555 predicate(UseSSE<=1); 6556 match(Set mem (StoreF mem (ConvD2F src))); 6557 6558 ins_cost(100); 6559 format %{ "FST_S $mem,$src\t# D-round" %} 6560 opcode(0xD9); /* D9 /2 */ 6561 ins_encode( enc_FPR_store(mem,src) ); 6562 ins_pipe( fpu_mem_reg ); 6563 %} 6564 6565 // Store immediate Float value (it is faster than store from FPU register) 6566 // The instruction usage is guarded by predicate in operand immFPR(). 6567 instruct storeFPR_imm( memory mem, immFPR src) %{ 6568 match(Set mem (StoreF mem src)); 6569 6570 ins_cost(50); 6571 format %{ "MOV $mem,$src\t# store float" %} 6572 opcode(0xC7); /* C7 /0 */ 6573 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6574 ins_pipe( ialu_mem_imm ); 6575 %} 6576 6577 // Store immediate Float value (it is faster than store from XMM register) 6578 // The instruction usage is guarded by predicate in operand immF(). 6579 instruct storeF_imm( memory mem, immF src) %{ 6580 match(Set mem (StoreF mem src)); 6581 6582 ins_cost(50); 6583 format %{ "MOV $mem,$src\t# store float" %} 6584 opcode(0xC7); /* C7 /0 */ 6585 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6586 ins_pipe( ialu_mem_imm ); 6587 %} 6588 6589 // Store Integer to stack slot 6590 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6591 match(Set dst src); 6592 6593 ins_cost(100); 6594 format %{ "MOV $dst,$src" %} 6595 opcode(0x89); 6596 ins_encode( OpcPRegSS( dst, src ) ); 6597 ins_pipe( ialu_mem_reg ); 6598 %} 6599 6600 // Store Integer to stack slot 6601 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6602 match(Set dst src); 6603 6604 ins_cost(100); 6605 format %{ "MOV $dst,$src" %} 6606 opcode(0x89); 6607 ins_encode( OpcPRegSS( dst, src ) ); 6608 ins_pipe( ialu_mem_reg ); 6609 %} 6610 6611 // Store Long to stack slot 6612 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6613 match(Set dst src); 6614 6615 ins_cost(200); 6616 format %{ "MOV $dst,$src.lo\n\t" 6617 "MOV $dst+4,$src.hi" %} 6618 opcode(0x89, 0x89); 6619 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6620 ins_pipe( ialu_mem_long_reg ); 6621 %} 6622 6623 //----------MemBar Instructions----------------------------------------------- 6624 // Memory barrier flavors 6625 6626 instruct membar_acquire() %{ 6627 match(MemBarAcquire); 6628 match(LoadFence); 6629 ins_cost(400); 6630 6631 size(0); 6632 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6633 ins_encode(); 6634 ins_pipe(empty); 6635 %} 6636 6637 instruct membar_acquire_lock() %{ 6638 match(MemBarAcquireLock); 6639 ins_cost(0); 6640 6641 size(0); 6642 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6643 ins_encode( ); 6644 ins_pipe(empty); 6645 %} 6646 6647 instruct membar_release() %{ 6648 match(MemBarRelease); 6649 match(StoreFence); 6650 ins_cost(400); 6651 6652 size(0); 6653 format %{ "MEMBAR-release ! (empty encoding)" %} 6654 ins_encode( ); 6655 ins_pipe(empty); 6656 %} 6657 6658 instruct membar_release_lock() %{ 6659 match(MemBarReleaseLock); 6660 ins_cost(0); 6661 6662 size(0); 6663 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6664 ins_encode( ); 6665 ins_pipe(empty); 6666 %} 6667 6668 instruct membar_volatile(eFlagsReg cr) %{ 6669 match(MemBarVolatile); 6670 effect(KILL cr); 6671 ins_cost(400); 6672 6673 format %{ 6674 $$template 6675 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6676 %} 6677 ins_encode %{ 6678 __ membar(Assembler::StoreLoad); 6679 %} 6680 ins_pipe(pipe_slow); 6681 %} 6682 6683 instruct unnecessary_membar_volatile() %{ 6684 match(MemBarVolatile); 6685 predicate(Matcher::post_store_load_barrier(n)); 6686 ins_cost(0); 6687 6688 size(0); 6689 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6690 ins_encode( ); 6691 ins_pipe(empty); 6692 %} 6693 6694 instruct membar_storestore() %{ 6695 match(MemBarStoreStore); 6696 ins_cost(0); 6697 6698 size(0); 6699 format %{ "MEMBAR-storestore (empty encoding)" %} 6700 ins_encode( ); 6701 ins_pipe(empty); 6702 %} 6703 6704 //----------Move Instructions-------------------------------------------------- 6705 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6706 match(Set dst (CastX2P src)); 6707 format %{ "# X2P $dst, $src" %} 6708 ins_encode( /*empty encoding*/ ); 6709 ins_cost(0); 6710 ins_pipe(empty); 6711 %} 6712 6713 instruct castP2X(rRegI dst, eRegP src ) %{ 6714 match(Set dst (CastP2X src)); 6715 ins_cost(50); 6716 format %{ "MOV $dst, $src\t# CastP2X" %} 6717 ins_encode( enc_Copy( dst, src) ); 6718 ins_pipe( ialu_reg_reg ); 6719 %} 6720 6721 //----------Conditional Move--------------------------------------------------- 6722 // Conditional move 6723 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6724 predicate(!VM_Version::supports_cmov() ); 6725 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6726 ins_cost(200); 6727 format %{ "J$cop,us skip\t# signed cmove\n\t" 6728 "MOV $dst,$src\n" 6729 "skip:" %} 6730 ins_encode %{ 6731 Label Lskip; 6732 // Invert sense of branch from sense of CMOV 6733 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6734 __ movl($dst$$Register, $src$$Register); 6735 __ bind(Lskip); 6736 %} 6737 ins_pipe( pipe_cmov_reg ); 6738 %} 6739 6740 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6741 predicate(!VM_Version::supports_cmov() ); 6742 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6743 ins_cost(200); 6744 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6745 "MOV $dst,$src\n" 6746 "skip:" %} 6747 ins_encode %{ 6748 Label Lskip; 6749 // Invert sense of branch from sense of CMOV 6750 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6751 __ movl($dst$$Register, $src$$Register); 6752 __ bind(Lskip); 6753 %} 6754 ins_pipe( pipe_cmov_reg ); 6755 %} 6756 6757 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6758 predicate(VM_Version::supports_cmov() ); 6759 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6760 ins_cost(200); 6761 format %{ "CMOV$cop $dst,$src" %} 6762 opcode(0x0F,0x40); 6763 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6764 ins_pipe( pipe_cmov_reg ); 6765 %} 6766 6767 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6768 predicate(VM_Version::supports_cmov() ); 6769 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6770 ins_cost(200); 6771 format %{ "CMOV$cop $dst,$src" %} 6772 opcode(0x0F,0x40); 6773 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6774 ins_pipe( pipe_cmov_reg ); 6775 %} 6776 6777 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6778 predicate(VM_Version::supports_cmov() ); 6779 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6780 ins_cost(200); 6781 expand %{ 6782 cmovI_regU(cop, cr, dst, src); 6783 %} 6784 %} 6785 6786 // Conditional move 6787 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6788 predicate(VM_Version::supports_cmov() ); 6789 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6790 ins_cost(250); 6791 format %{ "CMOV$cop $dst,$src" %} 6792 opcode(0x0F,0x40); 6793 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6794 ins_pipe( pipe_cmov_mem ); 6795 %} 6796 6797 // Conditional move 6798 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6799 predicate(VM_Version::supports_cmov() ); 6800 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6801 ins_cost(250); 6802 format %{ "CMOV$cop $dst,$src" %} 6803 opcode(0x0F,0x40); 6804 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6805 ins_pipe( pipe_cmov_mem ); 6806 %} 6807 6808 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6809 predicate(VM_Version::supports_cmov() ); 6810 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6811 ins_cost(250); 6812 expand %{ 6813 cmovI_memU(cop, cr, dst, src); 6814 %} 6815 %} 6816 6817 // Conditional move 6818 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6819 predicate(VM_Version::supports_cmov() ); 6820 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6821 ins_cost(200); 6822 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6823 opcode(0x0F,0x40); 6824 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6825 ins_pipe( pipe_cmov_reg ); 6826 %} 6827 6828 // Conditional move (non-P6 version) 6829 // Note: a CMoveP is generated for stubs and native wrappers 6830 // regardless of whether we are on a P6, so we 6831 // emulate a cmov here 6832 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6833 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6834 ins_cost(300); 6835 format %{ "Jn$cop skip\n\t" 6836 "MOV $dst,$src\t# pointer\n" 6837 "skip:" %} 6838 opcode(0x8b); 6839 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6840 ins_pipe( pipe_cmov_reg ); 6841 %} 6842 6843 // Conditional move 6844 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6845 predicate(VM_Version::supports_cmov() ); 6846 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6847 ins_cost(200); 6848 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6849 opcode(0x0F,0x40); 6850 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6851 ins_pipe( pipe_cmov_reg ); 6852 %} 6853 6854 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6855 predicate(VM_Version::supports_cmov() ); 6856 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6857 ins_cost(200); 6858 expand %{ 6859 cmovP_regU(cop, cr, dst, src); 6860 %} 6861 %} 6862 6863 // DISABLED: Requires the ADLC to emit a bottom_type call that 6864 // correctly meets the two pointer arguments; one is an incoming 6865 // register but the other is a memory operand. ALSO appears to 6866 // be buggy with implicit null checks. 6867 // 6868 //// Conditional move 6869 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6870 // predicate(VM_Version::supports_cmov() ); 6871 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6872 // ins_cost(250); 6873 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6874 // opcode(0x0F,0x40); 6875 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6876 // ins_pipe( pipe_cmov_mem ); 6877 //%} 6878 // 6879 //// Conditional move 6880 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6881 // predicate(VM_Version::supports_cmov() ); 6882 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6883 // ins_cost(250); 6884 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6885 // opcode(0x0F,0x40); 6886 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6887 // ins_pipe( pipe_cmov_mem ); 6888 //%} 6889 6890 // Conditional move 6891 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6892 predicate(UseSSE<=1); 6893 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6894 ins_cost(200); 6895 format %{ "FCMOV$cop $dst,$src\t# double" %} 6896 opcode(0xDA); 6897 ins_encode( enc_cmov_dpr(cop,src) ); 6898 ins_pipe( pipe_cmovDPR_reg ); 6899 %} 6900 6901 // Conditional move 6902 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6903 predicate(UseSSE==0); 6904 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6905 ins_cost(200); 6906 format %{ "FCMOV$cop $dst,$src\t# float" %} 6907 opcode(0xDA); 6908 ins_encode( enc_cmov_dpr(cop,src) ); 6909 ins_pipe( pipe_cmovDPR_reg ); 6910 %} 6911 6912 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6913 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6914 predicate(UseSSE<=1); 6915 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6916 ins_cost(200); 6917 format %{ "Jn$cop skip\n\t" 6918 "MOV $dst,$src\t# double\n" 6919 "skip:" %} 6920 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6921 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6922 ins_pipe( pipe_cmovDPR_reg ); 6923 %} 6924 6925 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6926 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6927 predicate(UseSSE==0); 6928 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6929 ins_cost(200); 6930 format %{ "Jn$cop skip\n\t" 6931 "MOV $dst,$src\t# float\n" 6932 "skip:" %} 6933 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6934 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6935 ins_pipe( pipe_cmovDPR_reg ); 6936 %} 6937 6938 // No CMOVE with SSE/SSE2 6939 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6940 predicate (UseSSE>=1); 6941 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6942 ins_cost(200); 6943 format %{ "Jn$cop skip\n\t" 6944 "MOVSS $dst,$src\t# float\n" 6945 "skip:" %} 6946 ins_encode %{ 6947 Label skip; 6948 // Invert sense of branch from sense of CMOV 6949 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6950 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6951 __ bind(skip); 6952 %} 6953 ins_pipe( pipe_slow ); 6954 %} 6955 6956 // No CMOVE with SSE/SSE2 6957 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6958 predicate (UseSSE>=2); 6959 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6960 ins_cost(200); 6961 format %{ "Jn$cop skip\n\t" 6962 "MOVSD $dst,$src\t# float\n" 6963 "skip:" %} 6964 ins_encode %{ 6965 Label skip; 6966 // Invert sense of branch from sense of CMOV 6967 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6968 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6969 __ bind(skip); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 // unsigned version 6975 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6976 predicate (UseSSE>=1); 6977 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6978 ins_cost(200); 6979 format %{ "Jn$cop skip\n\t" 6980 "MOVSS $dst,$src\t# float\n" 6981 "skip:" %} 6982 ins_encode %{ 6983 Label skip; 6984 // Invert sense of branch from sense of CMOV 6985 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6986 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6987 __ bind(skip); 6988 %} 6989 ins_pipe( pipe_slow ); 6990 %} 6991 6992 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6993 predicate (UseSSE>=1); 6994 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6995 ins_cost(200); 6996 expand %{ 6997 fcmovF_regU(cop, cr, dst, src); 6998 %} 6999 %} 7000 7001 // unsigned version 7002 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7003 predicate (UseSSE>=2); 7004 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7005 ins_cost(200); 7006 format %{ "Jn$cop skip\n\t" 7007 "MOVSD $dst,$src\t# float\n" 7008 "skip:" %} 7009 ins_encode %{ 7010 Label skip; 7011 // Invert sense of branch from sense of CMOV 7012 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7013 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7014 __ bind(skip); 7015 %} 7016 ins_pipe( pipe_slow ); 7017 %} 7018 7019 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7020 predicate (UseSSE>=2); 7021 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7022 ins_cost(200); 7023 expand %{ 7024 fcmovD_regU(cop, cr, dst, src); 7025 %} 7026 %} 7027 7028 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7029 predicate(VM_Version::supports_cmov() ); 7030 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7031 ins_cost(200); 7032 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7033 "CMOV$cop $dst.hi,$src.hi" %} 7034 opcode(0x0F,0x40); 7035 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7036 ins_pipe( pipe_cmov_reg_long ); 7037 %} 7038 7039 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7040 predicate(VM_Version::supports_cmov() ); 7041 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7042 ins_cost(200); 7043 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7044 "CMOV$cop $dst.hi,$src.hi" %} 7045 opcode(0x0F,0x40); 7046 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7047 ins_pipe( pipe_cmov_reg_long ); 7048 %} 7049 7050 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7051 predicate(VM_Version::supports_cmov() ); 7052 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7053 ins_cost(200); 7054 expand %{ 7055 cmovL_regU(cop, cr, dst, src); 7056 %} 7057 %} 7058 7059 //----------Arithmetic Instructions-------------------------------------------- 7060 //----------Addition Instructions---------------------------------------------- 7061 7062 // Integer Addition Instructions 7063 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7064 match(Set dst (AddI dst src)); 7065 effect(KILL cr); 7066 7067 size(2); 7068 format %{ "ADD $dst,$src" %} 7069 opcode(0x03); 7070 ins_encode( OpcP, RegReg( dst, src) ); 7071 ins_pipe( ialu_reg_reg ); 7072 %} 7073 7074 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7075 match(Set dst (AddI dst src)); 7076 effect(KILL cr); 7077 7078 format %{ "ADD $dst,$src" %} 7079 opcode(0x81, 0x00); /* /0 id */ 7080 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7081 ins_pipe( ialu_reg ); 7082 %} 7083 7084 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7085 predicate(UseIncDec); 7086 match(Set dst (AddI dst src)); 7087 effect(KILL cr); 7088 7089 size(1); 7090 format %{ "INC $dst" %} 7091 opcode(0x40); /* */ 7092 ins_encode( Opc_plus( primary, dst ) ); 7093 ins_pipe( ialu_reg ); 7094 %} 7095 7096 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7097 match(Set dst (AddI src0 src1)); 7098 ins_cost(110); 7099 7100 format %{ "LEA $dst,[$src0 + $src1]" %} 7101 opcode(0x8D); /* 0x8D /r */ 7102 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7103 ins_pipe( ialu_reg_reg ); 7104 %} 7105 7106 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7107 match(Set dst (AddP src0 src1)); 7108 ins_cost(110); 7109 7110 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7111 opcode(0x8D); /* 0x8D /r */ 7112 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7113 ins_pipe( ialu_reg_reg ); 7114 %} 7115 7116 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7117 predicate(UseIncDec); 7118 match(Set dst (AddI dst src)); 7119 effect(KILL cr); 7120 7121 size(1); 7122 format %{ "DEC $dst" %} 7123 opcode(0x48); /* */ 7124 ins_encode( Opc_plus( primary, dst ) ); 7125 ins_pipe( ialu_reg ); 7126 %} 7127 7128 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7129 match(Set dst (AddP dst src)); 7130 effect(KILL cr); 7131 7132 size(2); 7133 format %{ "ADD $dst,$src" %} 7134 opcode(0x03); 7135 ins_encode( OpcP, RegReg( dst, src) ); 7136 ins_pipe( ialu_reg_reg ); 7137 %} 7138 7139 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7140 match(Set dst (AddP dst src)); 7141 effect(KILL cr); 7142 7143 format %{ "ADD $dst,$src" %} 7144 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7145 // ins_encode( RegImm( dst, src) ); 7146 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7147 ins_pipe( ialu_reg ); 7148 %} 7149 7150 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7151 match(Set dst (AddI dst (LoadI src))); 7152 effect(KILL cr); 7153 7154 ins_cost(125); 7155 format %{ "ADD $dst,$src" %} 7156 opcode(0x03); 7157 ins_encode( OpcP, RegMem( dst, src) ); 7158 ins_pipe( ialu_reg_mem ); 7159 %} 7160 7161 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7162 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7163 effect(KILL cr); 7164 7165 ins_cost(150); 7166 format %{ "ADD $dst,$src" %} 7167 opcode(0x01); /* Opcode 01 /r */ 7168 ins_encode( OpcP, RegMem( src, dst ) ); 7169 ins_pipe( ialu_mem_reg ); 7170 %} 7171 7172 // Add Memory with Immediate 7173 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7174 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7175 effect(KILL cr); 7176 7177 ins_cost(125); 7178 format %{ "ADD $dst,$src" %} 7179 opcode(0x81); /* Opcode 81 /0 id */ 7180 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7181 ins_pipe( ialu_mem_imm ); 7182 %} 7183 7184 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7185 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7186 effect(KILL cr); 7187 7188 ins_cost(125); 7189 format %{ "INC $dst" %} 7190 opcode(0xFF); /* Opcode FF /0 */ 7191 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7192 ins_pipe( ialu_mem_imm ); 7193 %} 7194 7195 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7196 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7197 effect(KILL cr); 7198 7199 ins_cost(125); 7200 format %{ "DEC $dst" %} 7201 opcode(0xFF); /* Opcode FF /1 */ 7202 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7203 ins_pipe( ialu_mem_imm ); 7204 %} 7205 7206 7207 instruct checkCastPP( eRegP dst ) %{ 7208 match(Set dst (CheckCastPP dst)); 7209 7210 size(0); 7211 format %{ "#checkcastPP of $dst" %} 7212 ins_encode( /*empty encoding*/ ); 7213 ins_pipe( empty ); 7214 %} 7215 7216 instruct castPP( eRegP dst ) %{ 7217 match(Set dst (CastPP dst)); 7218 format %{ "#castPP of $dst" %} 7219 ins_encode( /*empty encoding*/ ); 7220 ins_pipe( empty ); 7221 %} 7222 7223 instruct castII( rRegI dst ) %{ 7224 match(Set dst (CastII dst)); 7225 format %{ "#castII of $dst" %} 7226 ins_encode( /*empty encoding*/ ); 7227 ins_cost(0); 7228 ins_pipe( empty ); 7229 %} 7230 7231 instruct castLL( eRegL dst ) %{ 7232 match(Set dst (CastLL dst)); 7233 format %{ "#castLL of $dst" %} 7234 ins_encode( /*empty encoding*/ ); 7235 ins_cost(0); 7236 ins_pipe( empty ); 7237 %} 7238 7239 7240 // Load-locked - same as a regular pointer load when used with compare-swap 7241 instruct loadPLocked(eRegP dst, memory mem) %{ 7242 match(Set dst (LoadPLocked mem)); 7243 7244 ins_cost(125); 7245 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7246 opcode(0x8B); 7247 ins_encode( OpcP, RegMem(dst,mem)); 7248 ins_pipe( ialu_reg_mem ); 7249 %} 7250 7251 // Conditional-store of the updated heap-top. 7252 // Used during allocation of the shared heap. 7253 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7254 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7255 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7256 // EAX is killed if there is contention, but then it's also unused. 7257 // In the common case of no contention, EAX holds the new oop address. 7258 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7259 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7260 ins_pipe( pipe_cmpxchg ); 7261 %} 7262 7263 // Conditional-store of an int value. 7264 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7265 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7266 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7267 effect(KILL oldval); 7268 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7269 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7270 ins_pipe( pipe_cmpxchg ); 7271 %} 7272 7273 // Conditional-store of a long value. 7274 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7275 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7276 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7277 effect(KILL oldval); 7278 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7279 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7280 "XCHG EBX,ECX" 7281 %} 7282 ins_encode %{ 7283 // Note: we need to swap rbx, and rcx before and after the 7284 // cmpxchg8 instruction because the instruction uses 7285 // rcx as the high order word of the new value to store but 7286 // our register encoding uses rbx. 7287 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7288 __ lock(); 7289 __ cmpxchg8($mem$$Address); 7290 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7291 %} 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7296 7297 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7298 predicate(VM_Version::supports_cx8()); 7299 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7300 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7301 effect(KILL cr, KILL oldval); 7302 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7303 "MOV $res,0\n\t" 7304 "JNE,s fail\n\t" 7305 "MOV $res,1\n" 7306 "fail:" %} 7307 ins_encode( enc_cmpxchg8(mem_ptr), 7308 enc_flags_ne_to_boolean(res) ); 7309 ins_pipe( pipe_cmpxchg ); 7310 %} 7311 7312 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7313 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7314 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7315 effect(KILL cr, KILL oldval); 7316 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7317 "MOV $res,0\n\t" 7318 "JNE,s fail\n\t" 7319 "MOV $res,1\n" 7320 "fail:" %} 7321 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7322 ins_pipe( pipe_cmpxchg ); 7323 %} 7324 7325 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7326 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7327 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7328 effect(KILL cr, KILL oldval); 7329 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7330 "MOV $res,0\n\t" 7331 "JNE,s fail\n\t" 7332 "MOV $res,1\n" 7333 "fail:" %} 7334 ins_encode( enc_cmpxchgb(mem_ptr), 7335 enc_flags_ne_to_boolean(res) ); 7336 ins_pipe( pipe_cmpxchg ); 7337 %} 7338 7339 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7340 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7341 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7342 effect(KILL cr, KILL oldval); 7343 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7344 "MOV $res,0\n\t" 7345 "JNE,s fail\n\t" 7346 "MOV $res,1\n" 7347 "fail:" %} 7348 ins_encode( enc_cmpxchgw(mem_ptr), 7349 enc_flags_ne_to_boolean(res) ); 7350 ins_pipe( pipe_cmpxchg ); 7351 %} 7352 7353 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7354 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7355 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7356 effect(KILL cr, KILL oldval); 7357 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7358 "MOV $res,0\n\t" 7359 "JNE,s fail\n\t" 7360 "MOV $res,1\n" 7361 "fail:" %} 7362 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7363 ins_pipe( pipe_cmpxchg ); 7364 %} 7365 7366 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7367 predicate(VM_Version::supports_cx8()); 7368 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7369 effect(KILL cr); 7370 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7371 ins_encode( enc_cmpxchg8(mem_ptr) ); 7372 ins_pipe( pipe_cmpxchg ); 7373 %} 7374 7375 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7376 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7377 effect(KILL cr); 7378 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7379 ins_encode( enc_cmpxchg(mem_ptr) ); 7380 ins_pipe( pipe_cmpxchg ); 7381 %} 7382 7383 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7384 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7385 effect(KILL cr); 7386 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7387 ins_encode( enc_cmpxchgb(mem_ptr) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7392 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7393 effect(KILL cr); 7394 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7395 ins_encode( enc_cmpxchgw(mem_ptr) ); 7396 ins_pipe( pipe_cmpxchg ); 7397 %} 7398 7399 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7400 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7401 effect(KILL cr); 7402 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7403 ins_encode( enc_cmpxchg(mem_ptr) ); 7404 ins_pipe( pipe_cmpxchg ); 7405 %} 7406 7407 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7408 predicate(n->as_LoadStore()->result_not_used()); 7409 match(Set dummy (GetAndAddB mem add)); 7410 effect(KILL cr); 7411 format %{ "ADDB [$mem],$add" %} 7412 ins_encode %{ 7413 __ lock(); 7414 __ addb($mem$$Address, $add$$constant); 7415 %} 7416 ins_pipe( pipe_cmpxchg ); 7417 %} 7418 7419 // Important to match to xRegI: only 8-bit regs. 7420 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7421 match(Set newval (GetAndAddB mem newval)); 7422 effect(KILL cr); 7423 format %{ "XADDB [$mem],$newval" %} 7424 ins_encode %{ 7425 __ lock(); 7426 __ xaddb($mem$$Address, $newval$$Register); 7427 %} 7428 ins_pipe( pipe_cmpxchg ); 7429 %} 7430 7431 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7432 predicate(n->as_LoadStore()->result_not_used()); 7433 match(Set dummy (GetAndAddS mem add)); 7434 effect(KILL cr); 7435 format %{ "ADDS [$mem],$add" %} 7436 ins_encode %{ 7437 __ lock(); 7438 __ addw($mem$$Address, $add$$constant); 7439 %} 7440 ins_pipe( pipe_cmpxchg ); 7441 %} 7442 7443 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7444 match(Set newval (GetAndAddS mem newval)); 7445 effect(KILL cr); 7446 format %{ "XADDS [$mem],$newval" %} 7447 ins_encode %{ 7448 __ lock(); 7449 __ xaddw($mem$$Address, $newval$$Register); 7450 %} 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7455 predicate(n->as_LoadStore()->result_not_used()); 7456 match(Set dummy (GetAndAddI mem add)); 7457 effect(KILL cr); 7458 format %{ "ADDL [$mem],$add" %} 7459 ins_encode %{ 7460 __ lock(); 7461 __ addl($mem$$Address, $add$$constant); 7462 %} 7463 ins_pipe( pipe_cmpxchg ); 7464 %} 7465 7466 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7467 match(Set newval (GetAndAddI mem newval)); 7468 effect(KILL cr); 7469 format %{ "XADDL [$mem],$newval" %} 7470 ins_encode %{ 7471 __ lock(); 7472 __ xaddl($mem$$Address, $newval$$Register); 7473 %} 7474 ins_pipe( pipe_cmpxchg ); 7475 %} 7476 7477 // Important to match to xRegI: only 8-bit regs. 7478 instruct xchgB( memory mem, xRegI newval) %{ 7479 match(Set newval (GetAndSetB mem newval)); 7480 format %{ "XCHGB $newval,[$mem]" %} 7481 ins_encode %{ 7482 __ xchgb($newval$$Register, $mem$$Address); 7483 %} 7484 ins_pipe( pipe_cmpxchg ); 7485 %} 7486 7487 instruct xchgS( memory mem, rRegI newval) %{ 7488 match(Set newval (GetAndSetS mem newval)); 7489 format %{ "XCHGW $newval,[$mem]" %} 7490 ins_encode %{ 7491 __ xchgw($newval$$Register, $mem$$Address); 7492 %} 7493 ins_pipe( pipe_cmpxchg ); 7494 %} 7495 7496 instruct xchgI( memory mem, rRegI newval) %{ 7497 match(Set newval (GetAndSetI mem newval)); 7498 format %{ "XCHGL $newval,[$mem]" %} 7499 ins_encode %{ 7500 __ xchgl($newval$$Register, $mem$$Address); 7501 %} 7502 ins_pipe( pipe_cmpxchg ); 7503 %} 7504 7505 instruct xchgP( memory mem, pRegP newval) %{ 7506 match(Set newval (GetAndSetP mem newval)); 7507 format %{ "XCHGL $newval,[$mem]" %} 7508 ins_encode %{ 7509 __ xchgl($newval$$Register, $mem$$Address); 7510 %} 7511 ins_pipe( pipe_cmpxchg ); 7512 %} 7513 7514 //----------Subtraction Instructions------------------------------------------- 7515 7516 // Integer Subtraction Instructions 7517 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7518 match(Set dst (SubI dst src)); 7519 effect(KILL cr); 7520 7521 size(2); 7522 format %{ "SUB $dst,$src" %} 7523 opcode(0x2B); 7524 ins_encode( OpcP, RegReg( dst, src) ); 7525 ins_pipe( ialu_reg_reg ); 7526 %} 7527 7528 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7529 match(Set dst (SubI dst src)); 7530 effect(KILL cr); 7531 7532 format %{ "SUB $dst,$src" %} 7533 opcode(0x81,0x05); /* Opcode 81 /5 */ 7534 // ins_encode( RegImm( dst, src) ); 7535 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7536 ins_pipe( ialu_reg ); 7537 %} 7538 7539 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7540 match(Set dst (SubI dst (LoadI src))); 7541 effect(KILL cr); 7542 7543 ins_cost(125); 7544 format %{ "SUB $dst,$src" %} 7545 opcode(0x2B); 7546 ins_encode( OpcP, RegMem( dst, src) ); 7547 ins_pipe( ialu_reg_mem ); 7548 %} 7549 7550 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7551 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7552 effect(KILL cr); 7553 7554 ins_cost(150); 7555 format %{ "SUB $dst,$src" %} 7556 opcode(0x29); /* Opcode 29 /r */ 7557 ins_encode( OpcP, RegMem( src, dst ) ); 7558 ins_pipe( ialu_mem_reg ); 7559 %} 7560 7561 // Subtract from a pointer 7562 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7563 match(Set dst (AddP dst (SubI zero src))); 7564 effect(KILL cr); 7565 7566 size(2); 7567 format %{ "SUB $dst,$src" %} 7568 opcode(0x2B); 7569 ins_encode( OpcP, RegReg( dst, src) ); 7570 ins_pipe( ialu_reg_reg ); 7571 %} 7572 7573 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7574 match(Set dst (SubI zero dst)); 7575 effect(KILL cr); 7576 7577 size(2); 7578 format %{ "NEG $dst" %} 7579 opcode(0xF7,0x03); // Opcode F7 /3 7580 ins_encode( OpcP, RegOpc( dst ) ); 7581 ins_pipe( ialu_reg ); 7582 %} 7583 7584 //----------Multiplication/Division Instructions------------------------------- 7585 // Integer Multiplication Instructions 7586 // Multiply Register 7587 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7588 match(Set dst (MulI dst src)); 7589 effect(KILL cr); 7590 7591 size(3); 7592 ins_cost(300); 7593 format %{ "IMUL $dst,$src" %} 7594 opcode(0xAF, 0x0F); 7595 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7596 ins_pipe( ialu_reg_reg_alu0 ); 7597 %} 7598 7599 // Multiply 32-bit Immediate 7600 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7601 match(Set dst (MulI src imm)); 7602 effect(KILL cr); 7603 7604 ins_cost(300); 7605 format %{ "IMUL $dst,$src,$imm" %} 7606 opcode(0x69); /* 69 /r id */ 7607 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7608 ins_pipe( ialu_reg_reg_alu0 ); 7609 %} 7610 7611 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7612 match(Set dst src); 7613 effect(KILL cr); 7614 7615 // Note that this is artificially increased to make it more expensive than loadConL 7616 ins_cost(250); 7617 format %{ "MOV EAX,$src\t// low word only" %} 7618 opcode(0xB8); 7619 ins_encode( LdImmL_Lo(dst, src) ); 7620 ins_pipe( ialu_reg_fat ); 7621 %} 7622 7623 // Multiply by 32-bit Immediate, taking the shifted high order results 7624 // (special case for shift by 32) 7625 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7626 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7627 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7628 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7629 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7630 effect(USE src1, KILL cr); 7631 7632 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7633 ins_cost(0*100 + 1*400 - 150); 7634 format %{ "IMUL EDX:EAX,$src1" %} 7635 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 // Multiply by 32-bit Immediate, taking the shifted high order results 7640 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7641 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7642 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7643 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7644 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7645 effect(USE src1, KILL cr); 7646 7647 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7648 ins_cost(1*100 + 1*400 - 150); 7649 format %{ "IMUL EDX:EAX,$src1\n\t" 7650 "SAR EDX,$cnt-32" %} 7651 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 // Multiply Memory 32-bit Immediate 7656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7657 match(Set dst (MulI (LoadI src) imm)); 7658 effect(KILL cr); 7659 7660 ins_cost(300); 7661 format %{ "IMUL $dst,$src,$imm" %} 7662 opcode(0x69); /* 69 /r id */ 7663 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7664 ins_pipe( ialu_reg_mem_alu0 ); 7665 %} 7666 7667 // Multiply Memory 7668 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7669 match(Set dst (MulI dst (LoadI src))); 7670 effect(KILL cr); 7671 7672 ins_cost(350); 7673 format %{ "IMUL $dst,$src" %} 7674 opcode(0xAF, 0x0F); 7675 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7676 ins_pipe( ialu_reg_mem_alu0 ); 7677 %} 7678 7679 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7680 %{ 7681 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7682 effect(KILL cr, KILL src2); 7683 7684 expand %{ mulI_eReg(dst, src1, cr); 7685 mulI_eReg(src2, src3, cr); 7686 addI_eReg(dst, src2, cr); %} 7687 %} 7688 7689 // Multiply Register Int to Long 7690 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7691 // Basic Idea: long = (long)int * (long)int 7692 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7693 effect(DEF dst, USE src, USE src1, KILL flags); 7694 7695 ins_cost(300); 7696 format %{ "IMUL $dst,$src1" %} 7697 7698 ins_encode( long_int_multiply( dst, src1 ) ); 7699 ins_pipe( ialu_reg_reg_alu0 ); 7700 %} 7701 7702 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7703 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7704 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7705 effect(KILL flags); 7706 7707 ins_cost(300); 7708 format %{ "MUL $dst,$src1" %} 7709 7710 ins_encode( long_uint_multiply(dst, src1) ); 7711 ins_pipe( ialu_reg_reg_alu0 ); 7712 %} 7713 7714 // Multiply Register Long 7715 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7716 match(Set dst (MulL dst src)); 7717 effect(KILL cr, TEMP tmp); 7718 ins_cost(4*100+3*400); 7719 // Basic idea: lo(result) = lo(x_lo * y_lo) 7720 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7721 format %{ "MOV $tmp,$src.lo\n\t" 7722 "IMUL $tmp,EDX\n\t" 7723 "MOV EDX,$src.hi\n\t" 7724 "IMUL EDX,EAX\n\t" 7725 "ADD $tmp,EDX\n\t" 7726 "MUL EDX:EAX,$src.lo\n\t" 7727 "ADD EDX,$tmp" %} 7728 ins_encode( long_multiply( dst, src, tmp ) ); 7729 ins_pipe( pipe_slow ); 7730 %} 7731 7732 // Multiply Register Long where the left operand's high 32 bits are zero 7733 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7734 predicate(is_operand_hi32_zero(n->in(1))); 7735 match(Set dst (MulL dst src)); 7736 effect(KILL cr, TEMP tmp); 7737 ins_cost(2*100+2*400); 7738 // Basic idea: lo(result) = lo(x_lo * y_lo) 7739 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7740 format %{ "MOV $tmp,$src.hi\n\t" 7741 "IMUL $tmp,EAX\n\t" 7742 "MUL EDX:EAX,$src.lo\n\t" 7743 "ADD EDX,$tmp" %} 7744 ins_encode %{ 7745 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7746 __ imull($tmp$$Register, rax); 7747 __ mull($src$$Register); 7748 __ addl(rdx, $tmp$$Register); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 // Multiply Register Long where the right operand's high 32 bits are zero 7754 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7755 predicate(is_operand_hi32_zero(n->in(2))); 7756 match(Set dst (MulL dst src)); 7757 effect(KILL cr, TEMP tmp); 7758 ins_cost(2*100+2*400); 7759 // Basic idea: lo(result) = lo(x_lo * y_lo) 7760 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7761 format %{ "MOV $tmp,$src.lo\n\t" 7762 "IMUL $tmp,EDX\n\t" 7763 "MUL EDX:EAX,$src.lo\n\t" 7764 "ADD EDX,$tmp" %} 7765 ins_encode %{ 7766 __ movl($tmp$$Register, $src$$Register); 7767 __ imull($tmp$$Register, rdx); 7768 __ mull($src$$Register); 7769 __ addl(rdx, $tmp$$Register); 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7775 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7776 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7777 match(Set dst (MulL dst src)); 7778 effect(KILL cr); 7779 ins_cost(1*400); 7780 // Basic idea: lo(result) = lo(x_lo * y_lo) 7781 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7782 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7783 ins_encode %{ 7784 __ mull($src$$Register); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 // Multiply Register Long by small constant 7790 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7791 match(Set dst (MulL dst src)); 7792 effect(KILL cr, TEMP tmp); 7793 ins_cost(2*100+2*400); 7794 size(12); 7795 // Basic idea: lo(result) = lo(src * EAX) 7796 // hi(result) = hi(src * EAX) + lo(src * EDX) 7797 format %{ "IMUL $tmp,EDX,$src\n\t" 7798 "MOV EDX,$src\n\t" 7799 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7800 "ADD EDX,$tmp" %} 7801 ins_encode( long_multiply_con( dst, src, tmp ) ); 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 // Integer DIV with Register 7806 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7807 match(Set rax (DivI rax div)); 7808 effect(KILL rdx, KILL cr); 7809 size(26); 7810 ins_cost(30*100+10*100); 7811 format %{ "CMP EAX,0x80000000\n\t" 7812 "JNE,s normal\n\t" 7813 "XOR EDX,EDX\n\t" 7814 "CMP ECX,-1\n\t" 7815 "JE,s done\n" 7816 "normal: CDQ\n\t" 7817 "IDIV $div\n\t" 7818 "done:" %} 7819 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7820 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7821 ins_pipe( ialu_reg_reg_alu0 ); 7822 %} 7823 7824 // Divide Register Long 7825 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7826 match(Set dst (DivL src1 src2)); 7827 effect( KILL cr, KILL cx, KILL bx ); 7828 ins_cost(10000); 7829 format %{ "PUSH $src1.hi\n\t" 7830 "PUSH $src1.lo\n\t" 7831 "PUSH $src2.hi\n\t" 7832 "PUSH $src2.lo\n\t" 7833 "CALL SharedRuntime::ldiv\n\t" 7834 "ADD ESP,16" %} 7835 ins_encode( long_div(src1,src2) ); 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 // Integer DIVMOD with Register, both quotient and mod results 7840 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7841 match(DivModI rax div); 7842 effect(KILL cr); 7843 size(26); 7844 ins_cost(30*100+10*100); 7845 format %{ "CMP EAX,0x80000000\n\t" 7846 "JNE,s normal\n\t" 7847 "XOR EDX,EDX\n\t" 7848 "CMP ECX,-1\n\t" 7849 "JE,s done\n" 7850 "normal: CDQ\n\t" 7851 "IDIV $div\n\t" 7852 "done:" %} 7853 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7854 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 // Integer MOD with Register 7859 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7860 match(Set rdx (ModI rax div)); 7861 effect(KILL rax, KILL cr); 7862 7863 size(26); 7864 ins_cost(300); 7865 format %{ "CDQ\n\t" 7866 "IDIV $div" %} 7867 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7868 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7869 ins_pipe( ialu_reg_reg_alu0 ); 7870 %} 7871 7872 // Remainder Register Long 7873 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7874 match(Set dst (ModL src1 src2)); 7875 effect( KILL cr, KILL cx, KILL bx ); 7876 ins_cost(10000); 7877 format %{ "PUSH $src1.hi\n\t" 7878 "PUSH $src1.lo\n\t" 7879 "PUSH $src2.hi\n\t" 7880 "PUSH $src2.lo\n\t" 7881 "CALL SharedRuntime::lrem\n\t" 7882 "ADD ESP,16" %} 7883 ins_encode( long_mod(src1,src2) ); 7884 ins_pipe( pipe_slow ); 7885 %} 7886 7887 // Divide Register Long (no special case since divisor != -1) 7888 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7889 match(Set dst (DivL dst imm)); 7890 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7891 ins_cost(1000); 7892 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7893 "XOR $tmp2,$tmp2\n\t" 7894 "CMP $tmp,EDX\n\t" 7895 "JA,s fast\n\t" 7896 "MOV $tmp2,EAX\n\t" 7897 "MOV EAX,EDX\n\t" 7898 "MOV EDX,0\n\t" 7899 "JLE,s pos\n\t" 7900 "LNEG EAX : $tmp2\n\t" 7901 "DIV $tmp # unsigned division\n\t" 7902 "XCHG EAX,$tmp2\n\t" 7903 "DIV $tmp\n\t" 7904 "LNEG $tmp2 : EAX\n\t" 7905 "JMP,s done\n" 7906 "pos:\n\t" 7907 "DIV $tmp\n\t" 7908 "XCHG EAX,$tmp2\n" 7909 "fast:\n\t" 7910 "DIV $tmp\n" 7911 "done:\n\t" 7912 "MOV EDX,$tmp2\n\t" 7913 "NEG EDX:EAX # if $imm < 0" %} 7914 ins_encode %{ 7915 int con = (int)$imm$$constant; 7916 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7917 int pcon = (con > 0) ? con : -con; 7918 Label Lfast, Lpos, Ldone; 7919 7920 __ movl($tmp$$Register, pcon); 7921 __ xorl($tmp2$$Register,$tmp2$$Register); 7922 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7923 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7924 7925 __ movl($tmp2$$Register, $dst$$Register); // save 7926 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7927 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7928 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7929 7930 // Negative dividend. 7931 // convert value to positive to use unsigned division 7932 __ lneg($dst$$Register, $tmp2$$Register); 7933 __ divl($tmp$$Register); 7934 __ xchgl($dst$$Register, $tmp2$$Register); 7935 __ divl($tmp$$Register); 7936 // revert result back to negative 7937 __ lneg($tmp2$$Register, $dst$$Register); 7938 __ jmpb(Ldone); 7939 7940 __ bind(Lpos); 7941 __ divl($tmp$$Register); // Use unsigned division 7942 __ xchgl($dst$$Register, $tmp2$$Register); 7943 // Fallthrow for final divide, tmp2 has 32 bit hi result 7944 7945 __ bind(Lfast); 7946 // fast path: src is positive 7947 __ divl($tmp$$Register); // Use unsigned division 7948 7949 __ bind(Ldone); 7950 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7951 if (con < 0) { 7952 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7953 } 7954 %} 7955 ins_pipe( pipe_slow ); 7956 %} 7957 7958 // Remainder Register Long (remainder fit into 32 bits) 7959 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7960 match(Set dst (ModL dst imm)); 7961 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7962 ins_cost(1000); 7963 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7964 "CMP $tmp,EDX\n\t" 7965 "JA,s fast\n\t" 7966 "MOV $tmp2,EAX\n\t" 7967 "MOV EAX,EDX\n\t" 7968 "MOV EDX,0\n\t" 7969 "JLE,s pos\n\t" 7970 "LNEG EAX : $tmp2\n\t" 7971 "DIV $tmp # unsigned division\n\t" 7972 "MOV EAX,$tmp2\n\t" 7973 "DIV $tmp\n\t" 7974 "NEG EDX\n\t" 7975 "JMP,s done\n" 7976 "pos:\n\t" 7977 "DIV $tmp\n\t" 7978 "MOV EAX,$tmp2\n" 7979 "fast:\n\t" 7980 "DIV $tmp\n" 7981 "done:\n\t" 7982 "MOV EAX,EDX\n\t" 7983 "SAR EDX,31\n\t" %} 7984 ins_encode %{ 7985 int con = (int)$imm$$constant; 7986 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7987 int pcon = (con > 0) ? con : -con; 7988 Label Lfast, Lpos, Ldone; 7989 7990 __ movl($tmp$$Register, pcon); 7991 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7992 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7993 7994 __ movl($tmp2$$Register, $dst$$Register); // save 7995 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7996 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7997 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7998 7999 // Negative dividend. 8000 // convert value to positive to use unsigned division 8001 __ lneg($dst$$Register, $tmp2$$Register); 8002 __ divl($tmp$$Register); 8003 __ movl($dst$$Register, $tmp2$$Register); 8004 __ divl($tmp$$Register); 8005 // revert remainder back to negative 8006 __ negl(HIGH_FROM_LOW($dst$$Register)); 8007 __ jmpb(Ldone); 8008 8009 __ bind(Lpos); 8010 __ divl($tmp$$Register); 8011 __ movl($dst$$Register, $tmp2$$Register); 8012 8013 __ bind(Lfast); 8014 // fast path: src is positive 8015 __ divl($tmp$$Register); 8016 8017 __ bind(Ldone); 8018 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8019 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8020 8021 %} 8022 ins_pipe( pipe_slow ); 8023 %} 8024 8025 // Integer Shift Instructions 8026 // Shift Left by one 8027 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8028 match(Set dst (LShiftI dst shift)); 8029 effect(KILL cr); 8030 8031 size(2); 8032 format %{ "SHL $dst,$shift" %} 8033 opcode(0xD1, 0x4); /* D1 /4 */ 8034 ins_encode( OpcP, RegOpc( dst ) ); 8035 ins_pipe( ialu_reg ); 8036 %} 8037 8038 // Shift Left by 8-bit immediate 8039 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8040 match(Set dst (LShiftI dst shift)); 8041 effect(KILL cr); 8042 8043 size(3); 8044 format %{ "SHL $dst,$shift" %} 8045 opcode(0xC1, 0x4); /* C1 /4 ib */ 8046 ins_encode( RegOpcImm( dst, shift) ); 8047 ins_pipe( ialu_reg ); 8048 %} 8049 8050 // Shift Left by variable 8051 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8052 match(Set dst (LShiftI dst shift)); 8053 effect(KILL cr); 8054 8055 size(2); 8056 format %{ "SHL $dst,$shift" %} 8057 opcode(0xD3, 0x4); /* D3 /4 */ 8058 ins_encode( OpcP, RegOpc( dst ) ); 8059 ins_pipe( ialu_reg_reg ); 8060 %} 8061 8062 // Arithmetic shift right by one 8063 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8064 match(Set dst (RShiftI dst shift)); 8065 effect(KILL cr); 8066 8067 size(2); 8068 format %{ "SAR $dst,$shift" %} 8069 opcode(0xD1, 0x7); /* D1 /7 */ 8070 ins_encode( OpcP, RegOpc( dst ) ); 8071 ins_pipe( ialu_reg ); 8072 %} 8073 8074 // Arithmetic shift right by one 8075 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8076 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8077 effect(KILL cr); 8078 format %{ "SAR $dst,$shift" %} 8079 opcode(0xD1, 0x7); /* D1 /7 */ 8080 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8081 ins_pipe( ialu_mem_imm ); 8082 %} 8083 8084 // Arithmetic Shift Right by 8-bit immediate 8085 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8086 match(Set dst (RShiftI dst shift)); 8087 effect(KILL cr); 8088 8089 size(3); 8090 format %{ "SAR $dst,$shift" %} 8091 opcode(0xC1, 0x7); /* C1 /7 ib */ 8092 ins_encode( RegOpcImm( dst, shift ) ); 8093 ins_pipe( ialu_mem_imm ); 8094 %} 8095 8096 // Arithmetic Shift Right by 8-bit immediate 8097 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8098 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8099 effect(KILL cr); 8100 8101 format %{ "SAR $dst,$shift" %} 8102 opcode(0xC1, 0x7); /* C1 /7 ib */ 8103 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8104 ins_pipe( ialu_mem_imm ); 8105 %} 8106 8107 // Arithmetic Shift Right by variable 8108 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8109 match(Set dst (RShiftI dst shift)); 8110 effect(KILL cr); 8111 8112 size(2); 8113 format %{ "SAR $dst,$shift" %} 8114 opcode(0xD3, 0x7); /* D3 /7 */ 8115 ins_encode( OpcP, RegOpc( dst ) ); 8116 ins_pipe( ialu_reg_reg ); 8117 %} 8118 8119 // Logical shift right by one 8120 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8121 match(Set dst (URShiftI dst shift)); 8122 effect(KILL cr); 8123 8124 size(2); 8125 format %{ "SHR $dst,$shift" %} 8126 opcode(0xD1, 0x5); /* D1 /5 */ 8127 ins_encode( OpcP, RegOpc( dst ) ); 8128 ins_pipe( ialu_reg ); 8129 %} 8130 8131 // Logical Shift Right by 8-bit immediate 8132 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8133 match(Set dst (URShiftI dst shift)); 8134 effect(KILL cr); 8135 8136 size(3); 8137 format %{ "SHR $dst,$shift" %} 8138 opcode(0xC1, 0x5); /* C1 /5 ib */ 8139 ins_encode( RegOpcImm( dst, shift) ); 8140 ins_pipe( ialu_reg ); 8141 %} 8142 8143 8144 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8145 // This idiom is used by the compiler for the i2b bytecode. 8146 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8147 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8148 8149 size(3); 8150 format %{ "MOVSX $dst,$src :8" %} 8151 ins_encode %{ 8152 __ movsbl($dst$$Register, $src$$Register); 8153 %} 8154 ins_pipe(ialu_reg_reg); 8155 %} 8156 8157 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8158 // This idiom is used by the compiler the i2s bytecode. 8159 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8160 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8161 8162 size(3); 8163 format %{ "MOVSX $dst,$src :16" %} 8164 ins_encode %{ 8165 __ movswl($dst$$Register, $src$$Register); 8166 %} 8167 ins_pipe(ialu_reg_reg); 8168 %} 8169 8170 8171 // Logical Shift Right by variable 8172 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8173 match(Set dst (URShiftI dst shift)); 8174 effect(KILL cr); 8175 8176 size(2); 8177 format %{ "SHR $dst,$shift" %} 8178 opcode(0xD3, 0x5); /* D3 /5 */ 8179 ins_encode( OpcP, RegOpc( dst ) ); 8180 ins_pipe( ialu_reg_reg ); 8181 %} 8182 8183 8184 //----------Logical Instructions----------------------------------------------- 8185 //----------Integer Logical Instructions--------------------------------------- 8186 // And Instructions 8187 // And Register with Register 8188 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8189 match(Set dst (AndI dst src)); 8190 effect(KILL cr); 8191 8192 size(2); 8193 format %{ "AND $dst,$src" %} 8194 opcode(0x23); 8195 ins_encode( OpcP, RegReg( dst, src) ); 8196 ins_pipe( ialu_reg_reg ); 8197 %} 8198 8199 // And Register with Immediate 8200 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8201 match(Set dst (AndI dst src)); 8202 effect(KILL cr); 8203 8204 format %{ "AND $dst,$src" %} 8205 opcode(0x81,0x04); /* Opcode 81 /4 */ 8206 // ins_encode( RegImm( dst, src) ); 8207 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8208 ins_pipe( ialu_reg ); 8209 %} 8210 8211 // And Register with Memory 8212 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8213 match(Set dst (AndI dst (LoadI src))); 8214 effect(KILL cr); 8215 8216 ins_cost(125); 8217 format %{ "AND $dst,$src" %} 8218 opcode(0x23); 8219 ins_encode( OpcP, RegMem( dst, src) ); 8220 ins_pipe( ialu_reg_mem ); 8221 %} 8222 8223 // And Memory with Register 8224 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8225 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8226 effect(KILL cr); 8227 8228 ins_cost(150); 8229 format %{ "AND $dst,$src" %} 8230 opcode(0x21); /* Opcode 21 /r */ 8231 ins_encode( OpcP, RegMem( src, dst ) ); 8232 ins_pipe( ialu_mem_reg ); 8233 %} 8234 8235 // And Memory with Immediate 8236 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8237 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8238 effect(KILL cr); 8239 8240 ins_cost(125); 8241 format %{ "AND $dst,$src" %} 8242 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8243 // ins_encode( MemImm( dst, src) ); 8244 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8245 ins_pipe( ialu_mem_imm ); 8246 %} 8247 8248 // BMI1 instructions 8249 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8250 match(Set dst (AndI (XorI src1 minus_1) src2)); 8251 predicate(UseBMI1Instructions); 8252 effect(KILL cr); 8253 8254 format %{ "ANDNL $dst, $src1, $src2" %} 8255 8256 ins_encode %{ 8257 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8258 %} 8259 ins_pipe(ialu_reg); 8260 %} 8261 8262 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8263 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8264 predicate(UseBMI1Instructions); 8265 effect(KILL cr); 8266 8267 ins_cost(125); 8268 format %{ "ANDNL $dst, $src1, $src2" %} 8269 8270 ins_encode %{ 8271 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8272 %} 8273 ins_pipe(ialu_reg_mem); 8274 %} 8275 8276 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8277 match(Set dst (AndI (SubI imm_zero src) src)); 8278 predicate(UseBMI1Instructions); 8279 effect(KILL cr); 8280 8281 format %{ "BLSIL $dst, $src" %} 8282 8283 ins_encode %{ 8284 __ blsil($dst$$Register, $src$$Register); 8285 %} 8286 ins_pipe(ialu_reg); 8287 %} 8288 8289 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8290 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8291 predicate(UseBMI1Instructions); 8292 effect(KILL cr); 8293 8294 ins_cost(125); 8295 format %{ "BLSIL $dst, $src" %} 8296 8297 ins_encode %{ 8298 __ blsil($dst$$Register, $src$$Address); 8299 %} 8300 ins_pipe(ialu_reg_mem); 8301 %} 8302 8303 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8304 %{ 8305 match(Set dst (XorI (AddI src minus_1) src)); 8306 predicate(UseBMI1Instructions); 8307 effect(KILL cr); 8308 8309 format %{ "BLSMSKL $dst, $src" %} 8310 8311 ins_encode %{ 8312 __ blsmskl($dst$$Register, $src$$Register); 8313 %} 8314 8315 ins_pipe(ialu_reg); 8316 %} 8317 8318 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8319 %{ 8320 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8321 predicate(UseBMI1Instructions); 8322 effect(KILL cr); 8323 8324 ins_cost(125); 8325 format %{ "BLSMSKL $dst, $src" %} 8326 8327 ins_encode %{ 8328 __ blsmskl($dst$$Register, $src$$Address); 8329 %} 8330 8331 ins_pipe(ialu_reg_mem); 8332 %} 8333 8334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8335 %{ 8336 match(Set dst (AndI (AddI src minus_1) src) ); 8337 predicate(UseBMI1Instructions); 8338 effect(KILL cr); 8339 8340 format %{ "BLSRL $dst, $src" %} 8341 8342 ins_encode %{ 8343 __ blsrl($dst$$Register, $src$$Register); 8344 %} 8345 8346 ins_pipe(ialu_reg); 8347 %} 8348 8349 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8350 %{ 8351 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8352 predicate(UseBMI1Instructions); 8353 effect(KILL cr); 8354 8355 ins_cost(125); 8356 format %{ "BLSRL $dst, $src" %} 8357 8358 ins_encode %{ 8359 __ blsrl($dst$$Register, $src$$Address); 8360 %} 8361 8362 ins_pipe(ialu_reg_mem); 8363 %} 8364 8365 // Or Instructions 8366 // Or Register with Register 8367 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8368 match(Set dst (OrI dst src)); 8369 effect(KILL cr); 8370 8371 size(2); 8372 format %{ "OR $dst,$src" %} 8373 opcode(0x0B); 8374 ins_encode( OpcP, RegReg( dst, src) ); 8375 ins_pipe( ialu_reg_reg ); 8376 %} 8377 8378 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8379 match(Set dst (OrI dst (CastP2X src))); 8380 effect(KILL cr); 8381 8382 size(2); 8383 format %{ "OR $dst,$src" %} 8384 opcode(0x0B); 8385 ins_encode( OpcP, RegReg( dst, src) ); 8386 ins_pipe( ialu_reg_reg ); 8387 %} 8388 8389 8390 // Or Register with Immediate 8391 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8392 match(Set dst (OrI dst src)); 8393 effect(KILL cr); 8394 8395 format %{ "OR $dst,$src" %} 8396 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8397 // ins_encode( RegImm( dst, src) ); 8398 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8399 ins_pipe( ialu_reg ); 8400 %} 8401 8402 // Or Register with Memory 8403 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8404 match(Set dst (OrI dst (LoadI src))); 8405 effect(KILL cr); 8406 8407 ins_cost(125); 8408 format %{ "OR $dst,$src" %} 8409 opcode(0x0B); 8410 ins_encode( OpcP, RegMem( dst, src) ); 8411 ins_pipe( ialu_reg_mem ); 8412 %} 8413 8414 // Or Memory with Register 8415 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8416 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8417 effect(KILL cr); 8418 8419 ins_cost(150); 8420 format %{ "OR $dst,$src" %} 8421 opcode(0x09); /* Opcode 09 /r */ 8422 ins_encode( OpcP, RegMem( src, dst ) ); 8423 ins_pipe( ialu_mem_reg ); 8424 %} 8425 8426 // Or Memory with Immediate 8427 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8428 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8429 effect(KILL cr); 8430 8431 ins_cost(125); 8432 format %{ "OR $dst,$src" %} 8433 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8434 // ins_encode( MemImm( dst, src) ); 8435 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8436 ins_pipe( ialu_mem_imm ); 8437 %} 8438 8439 // ROL/ROR 8440 // ROL expand 8441 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8442 effect(USE_DEF dst, USE shift, KILL cr); 8443 8444 format %{ "ROL $dst, $shift" %} 8445 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8446 ins_encode( OpcP, RegOpc( dst )); 8447 ins_pipe( ialu_reg ); 8448 %} 8449 8450 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8451 effect(USE_DEF dst, USE shift, KILL cr); 8452 8453 format %{ "ROL $dst, $shift" %} 8454 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8455 ins_encode( RegOpcImm(dst, shift) ); 8456 ins_pipe(ialu_reg); 8457 %} 8458 8459 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8460 effect(USE_DEF dst, USE shift, KILL cr); 8461 8462 format %{ "ROL $dst, $shift" %} 8463 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8464 ins_encode(OpcP, RegOpc(dst)); 8465 ins_pipe( ialu_reg_reg ); 8466 %} 8467 // end of ROL expand 8468 8469 // ROL 32bit by one once 8470 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8471 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8472 8473 expand %{ 8474 rolI_eReg_imm1(dst, lshift, cr); 8475 %} 8476 %} 8477 8478 // ROL 32bit var by imm8 once 8479 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8480 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8481 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8482 8483 expand %{ 8484 rolI_eReg_imm8(dst, lshift, cr); 8485 %} 8486 %} 8487 8488 // ROL 32bit var by var once 8489 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8490 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8491 8492 expand %{ 8493 rolI_eReg_CL(dst, shift, cr); 8494 %} 8495 %} 8496 8497 // ROL 32bit var by var once 8498 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8499 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8500 8501 expand %{ 8502 rolI_eReg_CL(dst, shift, cr); 8503 %} 8504 %} 8505 8506 // ROR expand 8507 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8508 effect(USE_DEF dst, USE shift, KILL cr); 8509 8510 format %{ "ROR $dst, $shift" %} 8511 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8512 ins_encode( OpcP, RegOpc( dst ) ); 8513 ins_pipe( ialu_reg ); 8514 %} 8515 8516 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8517 effect (USE_DEF dst, USE shift, KILL cr); 8518 8519 format %{ "ROR $dst, $shift" %} 8520 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8521 ins_encode( RegOpcImm(dst, shift) ); 8522 ins_pipe( ialu_reg ); 8523 %} 8524 8525 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8526 effect(USE_DEF dst, USE shift, KILL cr); 8527 8528 format %{ "ROR $dst, $shift" %} 8529 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8530 ins_encode(OpcP, RegOpc(dst)); 8531 ins_pipe( ialu_reg_reg ); 8532 %} 8533 // end of ROR expand 8534 8535 // ROR right once 8536 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8537 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8538 8539 expand %{ 8540 rorI_eReg_imm1(dst, rshift, cr); 8541 %} 8542 %} 8543 8544 // ROR 32bit by immI8 once 8545 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8546 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8547 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8548 8549 expand %{ 8550 rorI_eReg_imm8(dst, rshift, cr); 8551 %} 8552 %} 8553 8554 // ROR 32bit var by var once 8555 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8556 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8557 8558 expand %{ 8559 rorI_eReg_CL(dst, shift, cr); 8560 %} 8561 %} 8562 8563 // ROR 32bit var by var once 8564 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8565 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8566 8567 expand %{ 8568 rorI_eReg_CL(dst, shift, cr); 8569 %} 8570 %} 8571 8572 // Xor Instructions 8573 // Xor Register with Register 8574 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8575 match(Set dst (XorI dst src)); 8576 effect(KILL cr); 8577 8578 size(2); 8579 format %{ "XOR $dst,$src" %} 8580 opcode(0x33); 8581 ins_encode( OpcP, RegReg( dst, src) ); 8582 ins_pipe( ialu_reg_reg ); 8583 %} 8584 8585 // Xor Register with Immediate -1 8586 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8587 match(Set dst (XorI dst imm)); 8588 8589 size(2); 8590 format %{ "NOT $dst" %} 8591 ins_encode %{ 8592 __ notl($dst$$Register); 8593 %} 8594 ins_pipe( ialu_reg ); 8595 %} 8596 8597 // Xor Register with Immediate 8598 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8599 match(Set dst (XorI dst src)); 8600 effect(KILL cr); 8601 8602 format %{ "XOR $dst,$src" %} 8603 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8604 // ins_encode( RegImm( dst, src) ); 8605 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8606 ins_pipe( ialu_reg ); 8607 %} 8608 8609 // Xor Register with Memory 8610 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8611 match(Set dst (XorI dst (LoadI src))); 8612 effect(KILL cr); 8613 8614 ins_cost(125); 8615 format %{ "XOR $dst,$src" %} 8616 opcode(0x33); 8617 ins_encode( OpcP, RegMem(dst, src) ); 8618 ins_pipe( ialu_reg_mem ); 8619 %} 8620 8621 // Xor Memory with Register 8622 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8623 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8624 effect(KILL cr); 8625 8626 ins_cost(150); 8627 format %{ "XOR $dst,$src" %} 8628 opcode(0x31); /* Opcode 31 /r */ 8629 ins_encode( OpcP, RegMem( src, dst ) ); 8630 ins_pipe( ialu_mem_reg ); 8631 %} 8632 8633 // Xor Memory with Immediate 8634 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8635 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8636 effect(KILL cr); 8637 8638 ins_cost(125); 8639 format %{ "XOR $dst,$src" %} 8640 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8641 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8642 ins_pipe( ialu_mem_imm ); 8643 %} 8644 8645 //----------Convert Int to Boolean--------------------------------------------- 8646 8647 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8648 effect( DEF dst, USE src ); 8649 format %{ "MOV $dst,$src" %} 8650 ins_encode( enc_Copy( dst, src) ); 8651 ins_pipe( ialu_reg_reg ); 8652 %} 8653 8654 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8655 effect( USE_DEF dst, USE src, KILL cr ); 8656 8657 size(4); 8658 format %{ "NEG $dst\n\t" 8659 "ADC $dst,$src" %} 8660 ins_encode( neg_reg(dst), 8661 OpcRegReg(0x13,dst,src) ); 8662 ins_pipe( ialu_reg_reg_long ); 8663 %} 8664 8665 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8666 match(Set dst (Conv2B src)); 8667 8668 expand %{ 8669 movI_nocopy(dst,src); 8670 ci2b(dst,src,cr); 8671 %} 8672 %} 8673 8674 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8675 effect( DEF dst, USE src ); 8676 format %{ "MOV $dst,$src" %} 8677 ins_encode( enc_Copy( dst, src) ); 8678 ins_pipe( ialu_reg_reg ); 8679 %} 8680 8681 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8682 effect( USE_DEF dst, USE src, KILL cr ); 8683 format %{ "NEG $dst\n\t" 8684 "ADC $dst,$src" %} 8685 ins_encode( neg_reg(dst), 8686 OpcRegReg(0x13,dst,src) ); 8687 ins_pipe( ialu_reg_reg_long ); 8688 %} 8689 8690 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8691 match(Set dst (Conv2B src)); 8692 8693 expand %{ 8694 movP_nocopy(dst,src); 8695 cp2b(dst,src,cr); 8696 %} 8697 %} 8698 8699 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8700 match(Set dst (CmpLTMask p q)); 8701 effect(KILL cr); 8702 ins_cost(400); 8703 8704 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8705 format %{ "XOR $dst,$dst\n\t" 8706 "CMP $p,$q\n\t" 8707 "SETlt $dst\n\t" 8708 "NEG $dst" %} 8709 ins_encode %{ 8710 Register Rp = $p$$Register; 8711 Register Rq = $q$$Register; 8712 Register Rd = $dst$$Register; 8713 Label done; 8714 __ xorl(Rd, Rd); 8715 __ cmpl(Rp, Rq); 8716 __ setb(Assembler::less, Rd); 8717 __ negl(Rd); 8718 %} 8719 8720 ins_pipe(pipe_slow); 8721 %} 8722 8723 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8724 match(Set dst (CmpLTMask dst zero)); 8725 effect(DEF dst, KILL cr); 8726 ins_cost(100); 8727 8728 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8729 ins_encode %{ 8730 __ sarl($dst$$Register, 31); 8731 %} 8732 ins_pipe(ialu_reg); 8733 %} 8734 8735 /* better to save a register than avoid a branch */ 8736 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8737 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8738 effect(KILL cr); 8739 ins_cost(400); 8740 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8741 "JGE done\n\t" 8742 "ADD $p,$y\n" 8743 "done: " %} 8744 ins_encode %{ 8745 Register Rp = $p$$Register; 8746 Register Rq = $q$$Register; 8747 Register Ry = $y$$Register; 8748 Label done; 8749 __ subl(Rp, Rq); 8750 __ jccb(Assembler::greaterEqual, done); 8751 __ addl(Rp, Ry); 8752 __ bind(done); 8753 %} 8754 8755 ins_pipe(pipe_cmplt); 8756 %} 8757 8758 /* better to save a register than avoid a branch */ 8759 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8760 match(Set y (AndI (CmpLTMask p q) y)); 8761 effect(KILL cr); 8762 8763 ins_cost(300); 8764 8765 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8766 "JLT done\n\t" 8767 "XORL $y, $y\n" 8768 "done: " %} 8769 ins_encode %{ 8770 Register Rp = $p$$Register; 8771 Register Rq = $q$$Register; 8772 Register Ry = $y$$Register; 8773 Label done; 8774 __ cmpl(Rp, Rq); 8775 __ jccb(Assembler::less, done); 8776 __ xorl(Ry, Ry); 8777 __ bind(done); 8778 %} 8779 8780 ins_pipe(pipe_cmplt); 8781 %} 8782 8783 /* If I enable this, I encourage spilling in the inner loop of compress. 8784 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8785 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8786 */ 8787 //----------Overflow Math Instructions----------------------------------------- 8788 8789 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8790 %{ 8791 match(Set cr (OverflowAddI op1 op2)); 8792 effect(DEF cr, USE_KILL op1, USE op2); 8793 8794 format %{ "ADD $op1, $op2\t# overflow check int" %} 8795 8796 ins_encode %{ 8797 __ addl($op1$$Register, $op2$$Register); 8798 %} 8799 ins_pipe(ialu_reg_reg); 8800 %} 8801 8802 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8803 %{ 8804 match(Set cr (OverflowAddI op1 op2)); 8805 effect(DEF cr, USE_KILL op1, USE op2); 8806 8807 format %{ "ADD $op1, $op2\t# overflow check int" %} 8808 8809 ins_encode %{ 8810 __ addl($op1$$Register, $op2$$constant); 8811 %} 8812 ins_pipe(ialu_reg_reg); 8813 %} 8814 8815 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8816 %{ 8817 match(Set cr (OverflowSubI op1 op2)); 8818 8819 format %{ "CMP $op1, $op2\t# overflow check int" %} 8820 ins_encode %{ 8821 __ cmpl($op1$$Register, $op2$$Register); 8822 %} 8823 ins_pipe(ialu_reg_reg); 8824 %} 8825 8826 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8827 %{ 8828 match(Set cr (OverflowSubI op1 op2)); 8829 8830 format %{ "CMP $op1, $op2\t# overflow check int" %} 8831 ins_encode %{ 8832 __ cmpl($op1$$Register, $op2$$constant); 8833 %} 8834 ins_pipe(ialu_reg_reg); 8835 %} 8836 8837 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8838 %{ 8839 match(Set cr (OverflowSubI zero op2)); 8840 effect(DEF cr, USE_KILL op2); 8841 8842 format %{ "NEG $op2\t# overflow check int" %} 8843 ins_encode %{ 8844 __ negl($op2$$Register); 8845 %} 8846 ins_pipe(ialu_reg_reg); 8847 %} 8848 8849 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8850 %{ 8851 match(Set cr (OverflowMulI op1 op2)); 8852 effect(DEF cr, USE_KILL op1, USE op2); 8853 8854 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8855 ins_encode %{ 8856 __ imull($op1$$Register, $op2$$Register); 8857 %} 8858 ins_pipe(ialu_reg_reg_alu0); 8859 %} 8860 8861 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8862 %{ 8863 match(Set cr (OverflowMulI op1 op2)); 8864 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8865 8866 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8867 ins_encode %{ 8868 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8869 %} 8870 ins_pipe(ialu_reg_reg_alu0); 8871 %} 8872 8873 // Integer Absolute Instructions 8874 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8875 %{ 8876 match(Set dst (AbsI src)); 8877 effect(TEMP dst, TEMP tmp, KILL cr); 8878 format %{ "movl $tmp, $src\n\t" 8879 "sarl $tmp, 31\n\t" 8880 "movl $dst, $src\n\t" 8881 "xorl $dst, $tmp\n\t" 8882 "subl $dst, $tmp\n" 8883 %} 8884 ins_encode %{ 8885 __ movl($tmp$$Register, $src$$Register); 8886 __ sarl($tmp$$Register, 31); 8887 __ movl($dst$$Register, $src$$Register); 8888 __ xorl($dst$$Register, $tmp$$Register); 8889 __ subl($dst$$Register, $tmp$$Register); 8890 %} 8891 8892 ins_pipe(ialu_reg_reg); 8893 %} 8894 8895 //----------Long Instructions------------------------------------------------ 8896 // Add Long Register with Register 8897 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8898 match(Set dst (AddL dst src)); 8899 effect(KILL cr); 8900 ins_cost(200); 8901 format %{ "ADD $dst.lo,$src.lo\n\t" 8902 "ADC $dst.hi,$src.hi" %} 8903 opcode(0x03, 0x13); 8904 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8905 ins_pipe( ialu_reg_reg_long ); 8906 %} 8907 8908 // Add Long Register with Immediate 8909 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8910 match(Set dst (AddL dst src)); 8911 effect(KILL cr); 8912 format %{ "ADD $dst.lo,$src.lo\n\t" 8913 "ADC $dst.hi,$src.hi" %} 8914 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8915 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8916 ins_pipe( ialu_reg_long ); 8917 %} 8918 8919 // Add Long Register with Memory 8920 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8921 match(Set dst (AddL dst (LoadL mem))); 8922 effect(KILL cr); 8923 ins_cost(125); 8924 format %{ "ADD $dst.lo,$mem\n\t" 8925 "ADC $dst.hi,$mem+4" %} 8926 opcode(0x03, 0x13); 8927 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8928 ins_pipe( ialu_reg_long_mem ); 8929 %} 8930 8931 // Subtract Long Register with Register. 8932 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8933 match(Set dst (SubL dst src)); 8934 effect(KILL cr); 8935 ins_cost(200); 8936 format %{ "SUB $dst.lo,$src.lo\n\t" 8937 "SBB $dst.hi,$src.hi" %} 8938 opcode(0x2B, 0x1B); 8939 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8940 ins_pipe( ialu_reg_reg_long ); 8941 %} 8942 8943 // Subtract Long Register with Immediate 8944 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8945 match(Set dst (SubL dst src)); 8946 effect(KILL cr); 8947 format %{ "SUB $dst.lo,$src.lo\n\t" 8948 "SBB $dst.hi,$src.hi" %} 8949 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8950 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8951 ins_pipe( ialu_reg_long ); 8952 %} 8953 8954 // Subtract Long Register with Memory 8955 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8956 match(Set dst (SubL dst (LoadL mem))); 8957 effect(KILL cr); 8958 ins_cost(125); 8959 format %{ "SUB $dst.lo,$mem\n\t" 8960 "SBB $dst.hi,$mem+4" %} 8961 opcode(0x2B, 0x1B); 8962 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8963 ins_pipe( ialu_reg_long_mem ); 8964 %} 8965 8966 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8967 match(Set dst (SubL zero dst)); 8968 effect(KILL cr); 8969 ins_cost(300); 8970 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8971 ins_encode( neg_long(dst) ); 8972 ins_pipe( ialu_reg_reg_long ); 8973 %} 8974 8975 // And Long Register with Register 8976 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8977 match(Set dst (AndL dst src)); 8978 effect(KILL cr); 8979 format %{ "AND $dst.lo,$src.lo\n\t" 8980 "AND $dst.hi,$src.hi" %} 8981 opcode(0x23,0x23); 8982 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8983 ins_pipe( ialu_reg_reg_long ); 8984 %} 8985 8986 // And Long Register with Immediate 8987 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8988 match(Set dst (AndL dst src)); 8989 effect(KILL cr); 8990 format %{ "AND $dst.lo,$src.lo\n\t" 8991 "AND $dst.hi,$src.hi" %} 8992 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8993 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8994 ins_pipe( ialu_reg_long ); 8995 %} 8996 8997 // And Long Register with Memory 8998 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8999 match(Set dst (AndL dst (LoadL mem))); 9000 effect(KILL cr); 9001 ins_cost(125); 9002 format %{ "AND $dst.lo,$mem\n\t" 9003 "AND $dst.hi,$mem+4" %} 9004 opcode(0x23, 0x23); 9005 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9006 ins_pipe( ialu_reg_long_mem ); 9007 %} 9008 9009 // BMI1 instructions 9010 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9011 match(Set dst (AndL (XorL src1 minus_1) src2)); 9012 predicate(UseBMI1Instructions); 9013 effect(KILL cr, TEMP dst); 9014 9015 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9016 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9017 %} 9018 9019 ins_encode %{ 9020 Register Rdst = $dst$$Register; 9021 Register Rsrc1 = $src1$$Register; 9022 Register Rsrc2 = $src2$$Register; 9023 __ andnl(Rdst, Rsrc1, Rsrc2); 9024 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9025 %} 9026 ins_pipe(ialu_reg_reg_long); 9027 %} 9028 9029 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9030 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9031 predicate(UseBMI1Instructions); 9032 effect(KILL cr, TEMP dst); 9033 9034 ins_cost(125); 9035 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9036 "ANDNL $dst.hi, $src1.hi, $src2+4" 9037 %} 9038 9039 ins_encode %{ 9040 Register Rdst = $dst$$Register; 9041 Register Rsrc1 = $src1$$Register; 9042 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9043 9044 __ andnl(Rdst, Rsrc1, $src2$$Address); 9045 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9046 %} 9047 ins_pipe(ialu_reg_mem); 9048 %} 9049 9050 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9051 match(Set dst (AndL (SubL imm_zero src) src)); 9052 predicate(UseBMI1Instructions); 9053 effect(KILL cr, TEMP dst); 9054 9055 format %{ "MOVL $dst.hi, 0\n\t" 9056 "BLSIL $dst.lo, $src.lo\n\t" 9057 "JNZ done\n\t" 9058 "BLSIL $dst.hi, $src.hi\n" 9059 "done:" 9060 %} 9061 9062 ins_encode %{ 9063 Label done; 9064 Register Rdst = $dst$$Register; 9065 Register Rsrc = $src$$Register; 9066 __ movl(HIGH_FROM_LOW(Rdst), 0); 9067 __ blsil(Rdst, Rsrc); 9068 __ jccb(Assembler::notZero, done); 9069 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9070 __ bind(done); 9071 %} 9072 ins_pipe(ialu_reg); 9073 %} 9074 9075 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9076 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9077 predicate(UseBMI1Instructions); 9078 effect(KILL cr, TEMP dst); 9079 9080 ins_cost(125); 9081 format %{ "MOVL $dst.hi, 0\n\t" 9082 "BLSIL $dst.lo, $src\n\t" 9083 "JNZ done\n\t" 9084 "BLSIL $dst.hi, $src+4\n" 9085 "done:" 9086 %} 9087 9088 ins_encode %{ 9089 Label done; 9090 Register Rdst = $dst$$Register; 9091 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9092 9093 __ movl(HIGH_FROM_LOW(Rdst), 0); 9094 __ blsil(Rdst, $src$$Address); 9095 __ jccb(Assembler::notZero, done); 9096 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9097 __ bind(done); 9098 %} 9099 ins_pipe(ialu_reg_mem); 9100 %} 9101 9102 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9103 %{ 9104 match(Set dst (XorL (AddL src minus_1) src)); 9105 predicate(UseBMI1Instructions); 9106 effect(KILL cr, TEMP dst); 9107 9108 format %{ "MOVL $dst.hi, 0\n\t" 9109 "BLSMSKL $dst.lo, $src.lo\n\t" 9110 "JNC done\n\t" 9111 "BLSMSKL $dst.hi, $src.hi\n" 9112 "done:" 9113 %} 9114 9115 ins_encode %{ 9116 Label done; 9117 Register Rdst = $dst$$Register; 9118 Register Rsrc = $src$$Register; 9119 __ movl(HIGH_FROM_LOW(Rdst), 0); 9120 __ blsmskl(Rdst, Rsrc); 9121 __ jccb(Assembler::carryClear, done); 9122 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9123 __ bind(done); 9124 %} 9125 9126 ins_pipe(ialu_reg); 9127 %} 9128 9129 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9130 %{ 9131 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9132 predicate(UseBMI1Instructions); 9133 effect(KILL cr, TEMP dst); 9134 9135 ins_cost(125); 9136 format %{ "MOVL $dst.hi, 0\n\t" 9137 "BLSMSKL $dst.lo, $src\n\t" 9138 "JNC done\n\t" 9139 "BLSMSKL $dst.hi, $src+4\n" 9140 "done:" 9141 %} 9142 9143 ins_encode %{ 9144 Label done; 9145 Register Rdst = $dst$$Register; 9146 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9147 9148 __ movl(HIGH_FROM_LOW(Rdst), 0); 9149 __ blsmskl(Rdst, $src$$Address); 9150 __ jccb(Assembler::carryClear, done); 9151 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9152 __ bind(done); 9153 %} 9154 9155 ins_pipe(ialu_reg_mem); 9156 %} 9157 9158 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9159 %{ 9160 match(Set dst (AndL (AddL src minus_1) src) ); 9161 predicate(UseBMI1Instructions); 9162 effect(KILL cr, TEMP dst); 9163 9164 format %{ "MOVL $dst.hi, $src.hi\n\t" 9165 "BLSRL $dst.lo, $src.lo\n\t" 9166 "JNC done\n\t" 9167 "BLSRL $dst.hi, $src.hi\n" 9168 "done:" 9169 %} 9170 9171 ins_encode %{ 9172 Label done; 9173 Register Rdst = $dst$$Register; 9174 Register Rsrc = $src$$Register; 9175 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9176 __ blsrl(Rdst, Rsrc); 9177 __ jccb(Assembler::carryClear, done); 9178 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9179 __ bind(done); 9180 %} 9181 9182 ins_pipe(ialu_reg); 9183 %} 9184 9185 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9186 %{ 9187 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9188 predicate(UseBMI1Instructions); 9189 effect(KILL cr, TEMP dst); 9190 9191 ins_cost(125); 9192 format %{ "MOVL $dst.hi, $src+4\n\t" 9193 "BLSRL $dst.lo, $src\n\t" 9194 "JNC done\n\t" 9195 "BLSRL $dst.hi, $src+4\n" 9196 "done:" 9197 %} 9198 9199 ins_encode %{ 9200 Label done; 9201 Register Rdst = $dst$$Register; 9202 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9203 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9204 __ blsrl(Rdst, $src$$Address); 9205 __ jccb(Assembler::carryClear, done); 9206 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9207 __ bind(done); 9208 %} 9209 9210 ins_pipe(ialu_reg_mem); 9211 %} 9212 9213 // Or Long Register with Register 9214 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9215 match(Set dst (OrL dst src)); 9216 effect(KILL cr); 9217 format %{ "OR $dst.lo,$src.lo\n\t" 9218 "OR $dst.hi,$src.hi" %} 9219 opcode(0x0B,0x0B); 9220 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9221 ins_pipe( ialu_reg_reg_long ); 9222 %} 9223 9224 // Or Long Register with Immediate 9225 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9226 match(Set dst (OrL dst src)); 9227 effect(KILL cr); 9228 format %{ "OR $dst.lo,$src.lo\n\t" 9229 "OR $dst.hi,$src.hi" %} 9230 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9231 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9232 ins_pipe( ialu_reg_long ); 9233 %} 9234 9235 // Or Long Register with Memory 9236 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9237 match(Set dst (OrL dst (LoadL mem))); 9238 effect(KILL cr); 9239 ins_cost(125); 9240 format %{ "OR $dst.lo,$mem\n\t" 9241 "OR $dst.hi,$mem+4" %} 9242 opcode(0x0B,0x0B); 9243 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9244 ins_pipe( ialu_reg_long_mem ); 9245 %} 9246 9247 // Xor Long Register with Register 9248 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9249 match(Set dst (XorL dst src)); 9250 effect(KILL cr); 9251 format %{ "XOR $dst.lo,$src.lo\n\t" 9252 "XOR $dst.hi,$src.hi" %} 9253 opcode(0x33,0x33); 9254 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9255 ins_pipe( ialu_reg_reg_long ); 9256 %} 9257 9258 // Xor Long Register with Immediate -1 9259 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9260 match(Set dst (XorL dst imm)); 9261 format %{ "NOT $dst.lo\n\t" 9262 "NOT $dst.hi" %} 9263 ins_encode %{ 9264 __ notl($dst$$Register); 9265 __ notl(HIGH_FROM_LOW($dst$$Register)); 9266 %} 9267 ins_pipe( ialu_reg_long ); 9268 %} 9269 9270 // Xor Long Register with Immediate 9271 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9272 match(Set dst (XorL dst src)); 9273 effect(KILL cr); 9274 format %{ "XOR $dst.lo,$src.lo\n\t" 9275 "XOR $dst.hi,$src.hi" %} 9276 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9277 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9278 ins_pipe( ialu_reg_long ); 9279 %} 9280 9281 // Xor Long Register with Memory 9282 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9283 match(Set dst (XorL dst (LoadL mem))); 9284 effect(KILL cr); 9285 ins_cost(125); 9286 format %{ "XOR $dst.lo,$mem\n\t" 9287 "XOR $dst.hi,$mem+4" %} 9288 opcode(0x33,0x33); 9289 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9290 ins_pipe( ialu_reg_long_mem ); 9291 %} 9292 9293 // Shift Left Long by 1 9294 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9295 predicate(UseNewLongLShift); 9296 match(Set dst (LShiftL dst cnt)); 9297 effect(KILL cr); 9298 ins_cost(100); 9299 format %{ "ADD $dst.lo,$dst.lo\n\t" 9300 "ADC $dst.hi,$dst.hi" %} 9301 ins_encode %{ 9302 __ addl($dst$$Register,$dst$$Register); 9303 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9304 %} 9305 ins_pipe( ialu_reg_long ); 9306 %} 9307 9308 // Shift Left Long by 2 9309 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9310 predicate(UseNewLongLShift); 9311 match(Set dst (LShiftL dst cnt)); 9312 effect(KILL cr); 9313 ins_cost(100); 9314 format %{ "ADD $dst.lo,$dst.lo\n\t" 9315 "ADC $dst.hi,$dst.hi\n\t" 9316 "ADD $dst.lo,$dst.lo\n\t" 9317 "ADC $dst.hi,$dst.hi" %} 9318 ins_encode %{ 9319 __ addl($dst$$Register,$dst$$Register); 9320 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9321 __ addl($dst$$Register,$dst$$Register); 9322 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9323 %} 9324 ins_pipe( ialu_reg_long ); 9325 %} 9326 9327 // Shift Left Long by 3 9328 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9329 predicate(UseNewLongLShift); 9330 match(Set dst (LShiftL dst cnt)); 9331 effect(KILL cr); 9332 ins_cost(100); 9333 format %{ "ADD $dst.lo,$dst.lo\n\t" 9334 "ADC $dst.hi,$dst.hi\n\t" 9335 "ADD $dst.lo,$dst.lo\n\t" 9336 "ADC $dst.hi,$dst.hi\n\t" 9337 "ADD $dst.lo,$dst.lo\n\t" 9338 "ADC $dst.hi,$dst.hi" %} 9339 ins_encode %{ 9340 __ addl($dst$$Register,$dst$$Register); 9341 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9342 __ addl($dst$$Register,$dst$$Register); 9343 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9344 __ addl($dst$$Register,$dst$$Register); 9345 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9346 %} 9347 ins_pipe( ialu_reg_long ); 9348 %} 9349 9350 // Shift Left Long by 1-31 9351 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9352 match(Set dst (LShiftL dst cnt)); 9353 effect(KILL cr); 9354 ins_cost(200); 9355 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9356 "SHL $dst.lo,$cnt" %} 9357 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9358 ins_encode( move_long_small_shift(dst,cnt) ); 9359 ins_pipe( ialu_reg_long ); 9360 %} 9361 9362 // Shift Left Long by 32-63 9363 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9364 match(Set dst (LShiftL dst cnt)); 9365 effect(KILL cr); 9366 ins_cost(300); 9367 format %{ "MOV $dst.hi,$dst.lo\n" 9368 "\tSHL $dst.hi,$cnt-32\n" 9369 "\tXOR $dst.lo,$dst.lo" %} 9370 opcode(0xC1, 0x4); /* C1 /4 ib */ 9371 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9372 ins_pipe( ialu_reg_long ); 9373 %} 9374 9375 // Shift Left Long by variable 9376 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9377 match(Set dst (LShiftL dst shift)); 9378 effect(KILL cr); 9379 ins_cost(500+200); 9380 size(17); 9381 format %{ "TEST $shift,32\n\t" 9382 "JEQ,s small\n\t" 9383 "MOV $dst.hi,$dst.lo\n\t" 9384 "XOR $dst.lo,$dst.lo\n" 9385 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9386 "SHL $dst.lo,$shift" %} 9387 ins_encode( shift_left_long( dst, shift ) ); 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 // Shift Right Long by 1-31 9392 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9393 match(Set dst (URShiftL dst cnt)); 9394 effect(KILL cr); 9395 ins_cost(200); 9396 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9397 "SHR $dst.hi,$cnt" %} 9398 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9399 ins_encode( move_long_small_shift(dst,cnt) ); 9400 ins_pipe( ialu_reg_long ); 9401 %} 9402 9403 // Shift Right Long by 32-63 9404 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9405 match(Set dst (URShiftL dst cnt)); 9406 effect(KILL cr); 9407 ins_cost(300); 9408 format %{ "MOV $dst.lo,$dst.hi\n" 9409 "\tSHR $dst.lo,$cnt-32\n" 9410 "\tXOR $dst.hi,$dst.hi" %} 9411 opcode(0xC1, 0x5); /* C1 /5 ib */ 9412 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9413 ins_pipe( ialu_reg_long ); 9414 %} 9415 9416 // Shift Right Long by variable 9417 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9418 match(Set dst (URShiftL dst shift)); 9419 effect(KILL cr); 9420 ins_cost(600); 9421 size(17); 9422 format %{ "TEST $shift,32\n\t" 9423 "JEQ,s small\n\t" 9424 "MOV $dst.lo,$dst.hi\n\t" 9425 "XOR $dst.hi,$dst.hi\n" 9426 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9427 "SHR $dst.hi,$shift" %} 9428 ins_encode( shift_right_long( dst, shift ) ); 9429 ins_pipe( pipe_slow ); 9430 %} 9431 9432 // Shift Right Long by 1-31 9433 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9434 match(Set dst (RShiftL dst cnt)); 9435 effect(KILL cr); 9436 ins_cost(200); 9437 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9438 "SAR $dst.hi,$cnt" %} 9439 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9440 ins_encode( move_long_small_shift(dst,cnt) ); 9441 ins_pipe( ialu_reg_long ); 9442 %} 9443 9444 // Shift Right Long by 32-63 9445 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9446 match(Set dst (RShiftL dst cnt)); 9447 effect(KILL cr); 9448 ins_cost(300); 9449 format %{ "MOV $dst.lo,$dst.hi\n" 9450 "\tSAR $dst.lo,$cnt-32\n" 9451 "\tSAR $dst.hi,31" %} 9452 opcode(0xC1, 0x7); /* C1 /7 ib */ 9453 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9454 ins_pipe( ialu_reg_long ); 9455 %} 9456 9457 // Shift Right arithmetic Long by variable 9458 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9459 match(Set dst (RShiftL dst shift)); 9460 effect(KILL cr); 9461 ins_cost(600); 9462 size(18); 9463 format %{ "TEST $shift,32\n\t" 9464 "JEQ,s small\n\t" 9465 "MOV $dst.lo,$dst.hi\n\t" 9466 "SAR $dst.hi,31\n" 9467 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9468 "SAR $dst.hi,$shift" %} 9469 ins_encode( shift_right_arith_long( dst, shift ) ); 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 9474 //----------Double Instructions------------------------------------------------ 9475 // Double Math 9476 9477 // Compare & branch 9478 9479 // P6 version of float compare, sets condition codes in EFLAGS 9480 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9481 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9482 match(Set cr (CmpD src1 src2)); 9483 effect(KILL rax); 9484 ins_cost(150); 9485 format %{ "FLD $src1\n\t" 9486 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9487 "JNP exit\n\t" 9488 "MOV ah,1 // saw a NaN, set CF\n\t" 9489 "SAHF\n" 9490 "exit:\tNOP // avoid branch to branch" %} 9491 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9492 ins_encode( Push_Reg_DPR(src1), 9493 OpcP, RegOpc(src2), 9494 cmpF_P6_fixup ); 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9499 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9500 match(Set cr (CmpD src1 src2)); 9501 ins_cost(150); 9502 format %{ "FLD $src1\n\t" 9503 "FUCOMIP ST,$src2 // P6 instruction" %} 9504 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9505 ins_encode( Push_Reg_DPR(src1), 9506 OpcP, RegOpc(src2)); 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 // Compare & branch 9511 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9512 predicate(UseSSE<=1); 9513 match(Set cr (CmpD src1 src2)); 9514 effect(KILL rax); 9515 ins_cost(200); 9516 format %{ "FLD $src1\n\t" 9517 "FCOMp $src2\n\t" 9518 "FNSTSW AX\n\t" 9519 "TEST AX,0x400\n\t" 9520 "JZ,s flags\n\t" 9521 "MOV AH,1\t# unordered treat as LT\n" 9522 "flags:\tSAHF" %} 9523 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9524 ins_encode( Push_Reg_DPR(src1), 9525 OpcP, RegOpc(src2), 9526 fpu_flags); 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 // Compare vs zero into -1,0,1 9531 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9532 predicate(UseSSE<=1); 9533 match(Set dst (CmpD3 src1 zero)); 9534 effect(KILL cr, KILL rax); 9535 ins_cost(280); 9536 format %{ "FTSTD $dst,$src1" %} 9537 opcode(0xE4, 0xD9); 9538 ins_encode( Push_Reg_DPR(src1), 9539 OpcS, OpcP, PopFPU, 9540 CmpF_Result(dst)); 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 // Compare into -1,0,1 9545 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9546 predicate(UseSSE<=1); 9547 match(Set dst (CmpD3 src1 src2)); 9548 effect(KILL cr, KILL rax); 9549 ins_cost(300); 9550 format %{ "FCMPD $dst,$src1,$src2" %} 9551 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9552 ins_encode( Push_Reg_DPR(src1), 9553 OpcP, RegOpc(src2), 9554 CmpF_Result(dst)); 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 // float compare and set condition codes in EFLAGS by XMM regs 9559 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9560 predicate(UseSSE>=2); 9561 match(Set cr (CmpD src1 src2)); 9562 ins_cost(145); 9563 format %{ "UCOMISD $src1,$src2\n\t" 9564 "JNP,s exit\n\t" 9565 "PUSHF\t# saw NaN, set CF\n\t" 9566 "AND [rsp], #0xffffff2b\n\t" 9567 "POPF\n" 9568 "exit:" %} 9569 ins_encode %{ 9570 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9571 emit_cmpfp_fixup(_masm); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9577 predicate(UseSSE>=2); 9578 match(Set cr (CmpD src1 src2)); 9579 ins_cost(100); 9580 format %{ "UCOMISD $src1,$src2" %} 9581 ins_encode %{ 9582 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 // float compare and set condition codes in EFLAGS by XMM regs 9588 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9589 predicate(UseSSE>=2); 9590 match(Set cr (CmpD src1 (LoadD src2))); 9591 ins_cost(145); 9592 format %{ "UCOMISD $src1,$src2\n\t" 9593 "JNP,s exit\n\t" 9594 "PUSHF\t# saw NaN, set CF\n\t" 9595 "AND [rsp], #0xffffff2b\n\t" 9596 "POPF\n" 9597 "exit:" %} 9598 ins_encode %{ 9599 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9600 emit_cmpfp_fixup(_masm); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9606 predicate(UseSSE>=2); 9607 match(Set cr (CmpD src1 (LoadD src2))); 9608 ins_cost(100); 9609 format %{ "UCOMISD $src1,$src2" %} 9610 ins_encode %{ 9611 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9612 %} 9613 ins_pipe( pipe_slow ); 9614 %} 9615 9616 // Compare into -1,0,1 in XMM 9617 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9618 predicate(UseSSE>=2); 9619 match(Set dst (CmpD3 src1 src2)); 9620 effect(KILL cr); 9621 ins_cost(255); 9622 format %{ "UCOMISD $src1, $src2\n\t" 9623 "MOV $dst, #-1\n\t" 9624 "JP,s done\n\t" 9625 "JB,s done\n\t" 9626 "SETNE $dst\n\t" 9627 "MOVZB $dst, $dst\n" 9628 "done:" %} 9629 ins_encode %{ 9630 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9631 emit_cmpfp3(_masm, $dst$$Register); 9632 %} 9633 ins_pipe( pipe_slow ); 9634 %} 9635 9636 // Compare into -1,0,1 in XMM and memory 9637 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9638 predicate(UseSSE>=2); 9639 match(Set dst (CmpD3 src1 (LoadD src2))); 9640 effect(KILL cr); 9641 ins_cost(275); 9642 format %{ "UCOMISD $src1, $src2\n\t" 9643 "MOV $dst, #-1\n\t" 9644 "JP,s done\n\t" 9645 "JB,s done\n\t" 9646 "SETNE $dst\n\t" 9647 "MOVZB $dst, $dst\n" 9648 "done:" %} 9649 ins_encode %{ 9650 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9651 emit_cmpfp3(_masm, $dst$$Register); 9652 %} 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 9657 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9658 predicate (UseSSE <=1); 9659 match(Set dst (SubD dst src)); 9660 9661 format %{ "FLD $src\n\t" 9662 "DSUBp $dst,ST" %} 9663 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9664 ins_cost(150); 9665 ins_encode( Push_Reg_DPR(src), 9666 OpcP, RegOpc(dst) ); 9667 ins_pipe( fpu_reg_reg ); 9668 %} 9669 9670 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9671 predicate (UseSSE <=1); 9672 match(Set dst (RoundDouble (SubD src1 src2))); 9673 ins_cost(250); 9674 9675 format %{ "FLD $src2\n\t" 9676 "DSUB ST,$src1\n\t" 9677 "FSTP_D $dst\t# D-round" %} 9678 opcode(0xD8, 0x5); 9679 ins_encode( Push_Reg_DPR(src2), 9680 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9681 ins_pipe( fpu_mem_reg_reg ); 9682 %} 9683 9684 9685 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9686 predicate (UseSSE <=1); 9687 match(Set dst (SubD dst (LoadD src))); 9688 ins_cost(150); 9689 9690 format %{ "FLD $src\n\t" 9691 "DSUBp $dst,ST" %} 9692 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9693 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9694 OpcP, RegOpc(dst) ); 9695 ins_pipe( fpu_reg_mem ); 9696 %} 9697 9698 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9699 predicate (UseSSE<=1); 9700 match(Set dst (AbsD src)); 9701 ins_cost(100); 9702 format %{ "FABS" %} 9703 opcode(0xE1, 0xD9); 9704 ins_encode( OpcS, OpcP ); 9705 ins_pipe( fpu_reg_reg ); 9706 %} 9707 9708 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9709 predicate(UseSSE<=1); 9710 match(Set dst (NegD src)); 9711 ins_cost(100); 9712 format %{ "FCHS" %} 9713 opcode(0xE0, 0xD9); 9714 ins_encode( OpcS, OpcP ); 9715 ins_pipe( fpu_reg_reg ); 9716 %} 9717 9718 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9719 predicate(UseSSE<=1); 9720 match(Set dst (AddD dst src)); 9721 format %{ "FLD $src\n\t" 9722 "DADD $dst,ST" %} 9723 size(4); 9724 ins_cost(150); 9725 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9726 ins_encode( Push_Reg_DPR(src), 9727 OpcP, RegOpc(dst) ); 9728 ins_pipe( fpu_reg_reg ); 9729 %} 9730 9731 9732 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9733 predicate(UseSSE<=1); 9734 match(Set dst (RoundDouble (AddD src1 src2))); 9735 ins_cost(250); 9736 9737 format %{ "FLD $src2\n\t" 9738 "DADD ST,$src1\n\t" 9739 "FSTP_D $dst\t# D-round" %} 9740 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9741 ins_encode( Push_Reg_DPR(src2), 9742 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9743 ins_pipe( fpu_mem_reg_reg ); 9744 %} 9745 9746 9747 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9748 predicate(UseSSE<=1); 9749 match(Set dst (AddD dst (LoadD src))); 9750 ins_cost(150); 9751 9752 format %{ "FLD $src\n\t" 9753 "DADDp $dst,ST" %} 9754 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9756 OpcP, RegOpc(dst) ); 9757 ins_pipe( fpu_reg_mem ); 9758 %} 9759 9760 // add-to-memory 9761 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9762 predicate(UseSSE<=1); 9763 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9764 ins_cost(150); 9765 9766 format %{ "FLD_D $dst\n\t" 9767 "DADD ST,$src\n\t" 9768 "FST_D $dst" %} 9769 opcode(0xDD, 0x0); 9770 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9771 Opcode(0xD8), RegOpc(src), 9772 set_instruction_start, 9773 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9774 ins_pipe( fpu_reg_mem ); 9775 %} 9776 9777 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9778 predicate(UseSSE<=1); 9779 match(Set dst (AddD dst con)); 9780 ins_cost(125); 9781 format %{ "FLD1\n\t" 9782 "DADDp $dst,ST" %} 9783 ins_encode %{ 9784 __ fld1(); 9785 __ faddp($dst$$reg); 9786 %} 9787 ins_pipe(fpu_reg); 9788 %} 9789 9790 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9791 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9792 match(Set dst (AddD dst con)); 9793 ins_cost(200); 9794 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9795 "DADDp $dst,ST" %} 9796 ins_encode %{ 9797 __ fld_d($constantaddress($con)); 9798 __ faddp($dst$$reg); 9799 %} 9800 ins_pipe(fpu_reg_mem); 9801 %} 9802 9803 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9804 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9805 match(Set dst (RoundDouble (AddD src con))); 9806 ins_cost(200); 9807 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9808 "DADD ST,$src\n\t" 9809 "FSTP_D $dst\t# D-round" %} 9810 ins_encode %{ 9811 __ fld_d($constantaddress($con)); 9812 __ fadd($src$$reg); 9813 __ fstp_d(Address(rsp, $dst$$disp)); 9814 %} 9815 ins_pipe(fpu_mem_reg_con); 9816 %} 9817 9818 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9819 predicate(UseSSE<=1); 9820 match(Set dst (MulD dst src)); 9821 format %{ "FLD $src\n\t" 9822 "DMULp $dst,ST" %} 9823 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9824 ins_cost(150); 9825 ins_encode( Push_Reg_DPR(src), 9826 OpcP, RegOpc(dst) ); 9827 ins_pipe( fpu_reg_reg ); 9828 %} 9829 9830 // Strict FP instruction biases argument before multiply then 9831 // biases result to avoid double rounding of subnormals. 9832 // 9833 // scale arg1 by multiplying arg1 by 2^(-15360) 9834 // load arg2 9835 // multiply scaled arg1 by arg2 9836 // rescale product by 2^(15360) 9837 // 9838 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9839 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9840 match(Set dst (MulD dst src)); 9841 ins_cost(1); // Select this instruction for all strict FP double multiplies 9842 9843 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9844 "DMULp $dst,ST\n\t" 9845 "FLD $src\n\t" 9846 "DMULp $dst,ST\n\t" 9847 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9848 "DMULp $dst,ST\n\t" %} 9849 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9850 ins_encode( strictfp_bias1(dst), 9851 Push_Reg_DPR(src), 9852 OpcP, RegOpc(dst), 9853 strictfp_bias2(dst) ); 9854 ins_pipe( fpu_reg_reg ); 9855 %} 9856 9857 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9858 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9859 match(Set dst (MulD dst con)); 9860 ins_cost(200); 9861 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9862 "DMULp $dst,ST" %} 9863 ins_encode %{ 9864 __ fld_d($constantaddress($con)); 9865 __ fmulp($dst$$reg); 9866 %} 9867 ins_pipe(fpu_reg_mem); 9868 %} 9869 9870 9871 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9872 predicate( UseSSE<=1 ); 9873 match(Set dst (MulD dst (LoadD src))); 9874 ins_cost(200); 9875 format %{ "FLD_D $src\n\t" 9876 "DMULp $dst,ST" %} 9877 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9878 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9879 OpcP, RegOpc(dst) ); 9880 ins_pipe( fpu_reg_mem ); 9881 %} 9882 9883 // 9884 // Cisc-alternate to reg-reg multiply 9885 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9886 predicate( UseSSE<=1 ); 9887 match(Set dst (MulD src (LoadD mem))); 9888 ins_cost(250); 9889 format %{ "FLD_D $mem\n\t" 9890 "DMUL ST,$src\n\t" 9891 "FSTP_D $dst" %} 9892 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9893 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9894 OpcReg_FPR(src), 9895 Pop_Reg_DPR(dst) ); 9896 ins_pipe( fpu_reg_reg_mem ); 9897 %} 9898 9899 9900 // MACRO3 -- addDPR a mulDPR 9901 // This instruction is a '2-address' instruction in that the result goes 9902 // back to src2. This eliminates a move from the macro; possibly the 9903 // register allocator will have to add it back (and maybe not). 9904 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9905 predicate( UseSSE<=1 ); 9906 match(Set src2 (AddD (MulD src0 src1) src2)); 9907 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9908 "DMUL ST,$src1\n\t" 9909 "DADDp $src2,ST" %} 9910 ins_cost(250); 9911 opcode(0xDD); /* LoadD DD /0 */ 9912 ins_encode( Push_Reg_FPR(src0), 9913 FMul_ST_reg(src1), 9914 FAddP_reg_ST(src2) ); 9915 ins_pipe( fpu_reg_reg_reg ); 9916 %} 9917 9918 9919 // MACRO3 -- subDPR a mulDPR 9920 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9921 predicate( UseSSE<=1 ); 9922 match(Set src2 (SubD (MulD src0 src1) src2)); 9923 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9924 "DMUL ST,$src1\n\t" 9925 "DSUBRp $src2,ST" %} 9926 ins_cost(250); 9927 ins_encode( Push_Reg_FPR(src0), 9928 FMul_ST_reg(src1), 9929 Opcode(0xDE), Opc_plus(0xE0,src2)); 9930 ins_pipe( fpu_reg_reg_reg ); 9931 %} 9932 9933 9934 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9935 predicate( UseSSE<=1 ); 9936 match(Set dst (DivD dst src)); 9937 9938 format %{ "FLD $src\n\t" 9939 "FDIVp $dst,ST" %} 9940 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9941 ins_cost(150); 9942 ins_encode( Push_Reg_DPR(src), 9943 OpcP, RegOpc(dst) ); 9944 ins_pipe( fpu_reg_reg ); 9945 %} 9946 9947 // Strict FP instruction biases argument before division then 9948 // biases result, to avoid double rounding of subnormals. 9949 // 9950 // scale dividend by multiplying dividend by 2^(-15360) 9951 // load divisor 9952 // divide scaled dividend by divisor 9953 // rescale quotient by 2^(15360) 9954 // 9955 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9956 predicate (UseSSE<=1); 9957 match(Set dst (DivD dst src)); 9958 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9959 ins_cost(01); 9960 9961 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9962 "DMULp $dst,ST\n\t" 9963 "FLD $src\n\t" 9964 "FDIVp $dst,ST\n\t" 9965 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9966 "DMULp $dst,ST\n\t" %} 9967 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9968 ins_encode( strictfp_bias1(dst), 9969 Push_Reg_DPR(src), 9970 OpcP, RegOpc(dst), 9971 strictfp_bias2(dst) ); 9972 ins_pipe( fpu_reg_reg ); 9973 %} 9974 9975 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9976 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9977 match(Set dst (RoundDouble (DivD src1 src2))); 9978 9979 format %{ "FLD $src1\n\t" 9980 "FDIV ST,$src2\n\t" 9981 "FSTP_D $dst\t# D-round" %} 9982 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9983 ins_encode( Push_Reg_DPR(src1), 9984 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9985 ins_pipe( fpu_mem_reg_reg ); 9986 %} 9987 9988 9989 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9990 predicate(UseSSE<=1); 9991 match(Set dst (ModD dst src)); 9992 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9993 9994 format %{ "DMOD $dst,$src" %} 9995 ins_cost(250); 9996 ins_encode(Push_Reg_Mod_DPR(dst, src), 9997 emitModDPR(), 9998 Push_Result_Mod_DPR(src), 9999 Pop_Reg_DPR(dst)); 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10004 predicate(UseSSE>=2); 10005 match(Set dst (ModD src0 src1)); 10006 effect(KILL rax, KILL cr); 10007 10008 format %{ "SUB ESP,8\t # DMOD\n" 10009 "\tMOVSD [ESP+0],$src1\n" 10010 "\tFLD_D [ESP+0]\n" 10011 "\tMOVSD [ESP+0],$src0\n" 10012 "\tFLD_D [ESP+0]\n" 10013 "loop:\tFPREM\n" 10014 "\tFWAIT\n" 10015 "\tFNSTSW AX\n" 10016 "\tSAHF\n" 10017 "\tJP loop\n" 10018 "\tFSTP_D [ESP+0]\n" 10019 "\tMOVSD $dst,[ESP+0]\n" 10020 "\tADD ESP,8\n" 10021 "\tFSTP ST0\t # Restore FPU Stack" 10022 %} 10023 ins_cost(250); 10024 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10025 ins_pipe( pipe_slow ); 10026 %} 10027 10028 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10029 predicate (UseSSE<=1); 10030 match(Set dst(AtanD dst src)); 10031 format %{ "DATA $dst,$src" %} 10032 opcode(0xD9, 0xF3); 10033 ins_encode( Push_Reg_DPR(src), 10034 OpcP, OpcS, RegOpc(dst) ); 10035 ins_pipe( pipe_slow ); 10036 %} 10037 10038 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10039 predicate (UseSSE>=2); 10040 match(Set dst(AtanD dst src)); 10041 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10042 format %{ "DATA $dst,$src" %} 10043 opcode(0xD9, 0xF3); 10044 ins_encode( Push_SrcD(src), 10045 OpcP, OpcS, Push_ResultD(dst) ); 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10050 predicate (UseSSE<=1); 10051 match(Set dst (SqrtD src)); 10052 format %{ "DSQRT $dst,$src" %} 10053 opcode(0xFA, 0xD9); 10054 ins_encode( Push_Reg_DPR(src), 10055 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10056 ins_pipe( pipe_slow ); 10057 %} 10058 10059 //-------------Float Instructions------------------------------- 10060 // Float Math 10061 10062 // Code for float compare: 10063 // fcompp(); 10064 // fwait(); fnstsw_ax(); 10065 // sahf(); 10066 // movl(dst, unordered_result); 10067 // jcc(Assembler::parity, exit); 10068 // movl(dst, less_result); 10069 // jcc(Assembler::below, exit); 10070 // movl(dst, equal_result); 10071 // jcc(Assembler::equal, exit); 10072 // movl(dst, greater_result); 10073 // exit: 10074 10075 // P6 version of float compare, sets condition codes in EFLAGS 10076 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10077 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10078 match(Set cr (CmpF src1 src2)); 10079 effect(KILL rax); 10080 ins_cost(150); 10081 format %{ "FLD $src1\n\t" 10082 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10083 "JNP exit\n\t" 10084 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10085 "SAHF\n" 10086 "exit:\tNOP // avoid branch to branch" %} 10087 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10088 ins_encode( Push_Reg_DPR(src1), 10089 OpcP, RegOpc(src2), 10090 cmpF_P6_fixup ); 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10095 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10096 match(Set cr (CmpF src1 src2)); 10097 ins_cost(100); 10098 format %{ "FLD $src1\n\t" 10099 "FUCOMIP ST,$src2 // P6 instruction" %} 10100 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10101 ins_encode( Push_Reg_DPR(src1), 10102 OpcP, RegOpc(src2)); 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 10107 // Compare & branch 10108 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10109 predicate(UseSSE == 0); 10110 match(Set cr (CmpF src1 src2)); 10111 effect(KILL rax); 10112 ins_cost(200); 10113 format %{ "FLD $src1\n\t" 10114 "FCOMp $src2\n\t" 10115 "FNSTSW AX\n\t" 10116 "TEST AX,0x400\n\t" 10117 "JZ,s flags\n\t" 10118 "MOV AH,1\t# unordered treat as LT\n" 10119 "flags:\tSAHF" %} 10120 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10121 ins_encode( Push_Reg_DPR(src1), 10122 OpcP, RegOpc(src2), 10123 fpu_flags); 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 // Compare vs zero into -1,0,1 10128 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10129 predicate(UseSSE == 0); 10130 match(Set dst (CmpF3 src1 zero)); 10131 effect(KILL cr, KILL rax); 10132 ins_cost(280); 10133 format %{ "FTSTF $dst,$src1" %} 10134 opcode(0xE4, 0xD9); 10135 ins_encode( Push_Reg_DPR(src1), 10136 OpcS, OpcP, PopFPU, 10137 CmpF_Result(dst)); 10138 ins_pipe( pipe_slow ); 10139 %} 10140 10141 // Compare into -1,0,1 10142 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10143 predicate(UseSSE == 0); 10144 match(Set dst (CmpF3 src1 src2)); 10145 effect(KILL cr, KILL rax); 10146 ins_cost(300); 10147 format %{ "FCMPF $dst,$src1,$src2" %} 10148 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10149 ins_encode( Push_Reg_DPR(src1), 10150 OpcP, RegOpc(src2), 10151 CmpF_Result(dst)); 10152 ins_pipe( pipe_slow ); 10153 %} 10154 10155 // float compare and set condition codes in EFLAGS by XMM regs 10156 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10157 predicate(UseSSE>=1); 10158 match(Set cr (CmpF src1 src2)); 10159 ins_cost(145); 10160 format %{ "UCOMISS $src1,$src2\n\t" 10161 "JNP,s exit\n\t" 10162 "PUSHF\t# saw NaN, set CF\n\t" 10163 "AND [rsp], #0xffffff2b\n\t" 10164 "POPF\n" 10165 "exit:" %} 10166 ins_encode %{ 10167 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10168 emit_cmpfp_fixup(_masm); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10174 predicate(UseSSE>=1); 10175 match(Set cr (CmpF src1 src2)); 10176 ins_cost(100); 10177 format %{ "UCOMISS $src1,$src2" %} 10178 ins_encode %{ 10179 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10180 %} 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 // float compare and set condition codes in EFLAGS by XMM regs 10185 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10186 predicate(UseSSE>=1); 10187 match(Set cr (CmpF src1 (LoadF src2))); 10188 ins_cost(165); 10189 format %{ "UCOMISS $src1,$src2\n\t" 10190 "JNP,s exit\n\t" 10191 "PUSHF\t# saw NaN, set CF\n\t" 10192 "AND [rsp], #0xffffff2b\n\t" 10193 "POPF\n" 10194 "exit:" %} 10195 ins_encode %{ 10196 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10197 emit_cmpfp_fixup(_masm); 10198 %} 10199 ins_pipe( pipe_slow ); 10200 %} 10201 10202 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10203 predicate(UseSSE>=1); 10204 match(Set cr (CmpF src1 (LoadF src2))); 10205 ins_cost(100); 10206 format %{ "UCOMISS $src1,$src2" %} 10207 ins_encode %{ 10208 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10209 %} 10210 ins_pipe( pipe_slow ); 10211 %} 10212 10213 // Compare into -1,0,1 in XMM 10214 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10215 predicate(UseSSE>=1); 10216 match(Set dst (CmpF3 src1 src2)); 10217 effect(KILL cr); 10218 ins_cost(255); 10219 format %{ "UCOMISS $src1, $src2\n\t" 10220 "MOV $dst, #-1\n\t" 10221 "JP,s done\n\t" 10222 "JB,s done\n\t" 10223 "SETNE $dst\n\t" 10224 "MOVZB $dst, $dst\n" 10225 "done:" %} 10226 ins_encode %{ 10227 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10228 emit_cmpfp3(_masm, $dst$$Register); 10229 %} 10230 ins_pipe( pipe_slow ); 10231 %} 10232 10233 // Compare into -1,0,1 in XMM and memory 10234 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10235 predicate(UseSSE>=1); 10236 match(Set dst (CmpF3 src1 (LoadF src2))); 10237 effect(KILL cr); 10238 ins_cost(275); 10239 format %{ "UCOMISS $src1, $src2\n\t" 10240 "MOV $dst, #-1\n\t" 10241 "JP,s done\n\t" 10242 "JB,s done\n\t" 10243 "SETNE $dst\n\t" 10244 "MOVZB $dst, $dst\n" 10245 "done:" %} 10246 ins_encode %{ 10247 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10248 emit_cmpfp3(_masm, $dst$$Register); 10249 %} 10250 ins_pipe( pipe_slow ); 10251 %} 10252 10253 // Spill to obtain 24-bit precision 10254 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10255 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10256 match(Set dst (SubF src1 src2)); 10257 10258 format %{ "FSUB $dst,$src1 - $src2" %} 10259 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10260 ins_encode( Push_Reg_FPR(src1), 10261 OpcReg_FPR(src2), 10262 Pop_Mem_FPR(dst) ); 10263 ins_pipe( fpu_mem_reg_reg ); 10264 %} 10265 // 10266 // This instruction does not round to 24-bits 10267 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (SubF dst src)); 10270 10271 format %{ "FSUB $dst,$src" %} 10272 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10273 ins_encode( Push_Reg_FPR(src), 10274 OpcP, RegOpc(dst) ); 10275 ins_pipe( fpu_reg_reg ); 10276 %} 10277 10278 // Spill to obtain 24-bit precision 10279 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10280 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10281 match(Set dst (AddF src1 src2)); 10282 10283 format %{ "FADD $dst,$src1,$src2" %} 10284 opcode(0xD8, 0x0); /* D8 C0+i */ 10285 ins_encode( Push_Reg_FPR(src2), 10286 OpcReg_FPR(src1), 10287 Pop_Mem_FPR(dst) ); 10288 ins_pipe( fpu_mem_reg_reg ); 10289 %} 10290 // 10291 // This instruction does not round to 24-bits 10292 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10293 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10294 match(Set dst (AddF dst src)); 10295 10296 format %{ "FLD $src\n\t" 10297 "FADDp $dst,ST" %} 10298 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10299 ins_encode( Push_Reg_FPR(src), 10300 OpcP, RegOpc(dst) ); 10301 ins_pipe( fpu_reg_reg ); 10302 %} 10303 10304 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10305 predicate(UseSSE==0); 10306 match(Set dst (AbsF src)); 10307 ins_cost(100); 10308 format %{ "FABS" %} 10309 opcode(0xE1, 0xD9); 10310 ins_encode( OpcS, OpcP ); 10311 ins_pipe( fpu_reg_reg ); 10312 %} 10313 10314 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10315 predicate(UseSSE==0); 10316 match(Set dst (NegF src)); 10317 ins_cost(100); 10318 format %{ "FCHS" %} 10319 opcode(0xE0, 0xD9); 10320 ins_encode( OpcS, OpcP ); 10321 ins_pipe( fpu_reg_reg ); 10322 %} 10323 10324 // Cisc-alternate to addFPR_reg 10325 // Spill to obtain 24-bit precision 10326 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10327 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10328 match(Set dst (AddF src1 (LoadF src2))); 10329 10330 format %{ "FLD $src2\n\t" 10331 "FADD ST,$src1\n\t" 10332 "FSTP_S $dst" %} 10333 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10334 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10335 OpcReg_FPR(src1), 10336 Pop_Mem_FPR(dst) ); 10337 ins_pipe( fpu_mem_reg_mem ); 10338 %} 10339 // 10340 // Cisc-alternate to addFPR_reg 10341 // This instruction does not round to 24-bits 10342 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10343 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10344 match(Set dst (AddF dst (LoadF src))); 10345 10346 format %{ "FADD $dst,$src" %} 10347 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10348 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10349 OpcP, RegOpc(dst) ); 10350 ins_pipe( fpu_reg_mem ); 10351 %} 10352 10353 // // Following two instructions for _222_mpegaudio 10354 // Spill to obtain 24-bit precision 10355 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10356 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10357 match(Set dst (AddF src1 src2)); 10358 10359 format %{ "FADD $dst,$src1,$src2" %} 10360 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10361 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10362 OpcReg_FPR(src2), 10363 Pop_Mem_FPR(dst) ); 10364 ins_pipe( fpu_mem_reg_mem ); 10365 %} 10366 10367 // Cisc-spill variant 10368 // Spill to obtain 24-bit precision 10369 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10370 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10371 match(Set dst (AddF src1 (LoadF src2))); 10372 10373 format %{ "FADD $dst,$src1,$src2 cisc" %} 10374 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10375 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10376 set_instruction_start, 10377 OpcP, RMopc_Mem(secondary,src1), 10378 Pop_Mem_FPR(dst) ); 10379 ins_pipe( fpu_mem_mem_mem ); 10380 %} 10381 10382 // Spill to obtain 24-bit precision 10383 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10384 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10385 match(Set dst (AddF src1 src2)); 10386 10387 format %{ "FADD $dst,$src1,$src2" %} 10388 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10389 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10390 set_instruction_start, 10391 OpcP, RMopc_Mem(secondary,src1), 10392 Pop_Mem_FPR(dst) ); 10393 ins_pipe( fpu_mem_mem_mem ); 10394 %} 10395 10396 10397 // Spill to obtain 24-bit precision 10398 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10399 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10400 match(Set dst (AddF src con)); 10401 format %{ "FLD $src\n\t" 10402 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10403 "FSTP_S $dst" %} 10404 ins_encode %{ 10405 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10406 __ fadd_s($constantaddress($con)); 10407 __ fstp_s(Address(rsp, $dst$$disp)); 10408 %} 10409 ins_pipe(fpu_mem_reg_con); 10410 %} 10411 // 10412 // This instruction does not round to 24-bits 10413 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10414 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10415 match(Set dst (AddF src con)); 10416 format %{ "FLD $src\n\t" 10417 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10418 "FSTP $dst" %} 10419 ins_encode %{ 10420 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10421 __ fadd_s($constantaddress($con)); 10422 __ fstp_d($dst$$reg); 10423 %} 10424 ins_pipe(fpu_reg_reg_con); 10425 %} 10426 10427 // Spill to obtain 24-bit precision 10428 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10429 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10430 match(Set dst (MulF src1 src2)); 10431 10432 format %{ "FLD $src1\n\t" 10433 "FMUL $src2\n\t" 10434 "FSTP_S $dst" %} 10435 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10436 ins_encode( Push_Reg_FPR(src1), 10437 OpcReg_FPR(src2), 10438 Pop_Mem_FPR(dst) ); 10439 ins_pipe( fpu_mem_reg_reg ); 10440 %} 10441 // 10442 // This instruction does not round to 24-bits 10443 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10444 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10445 match(Set dst (MulF src1 src2)); 10446 10447 format %{ "FLD $src1\n\t" 10448 "FMUL $src2\n\t" 10449 "FSTP_S $dst" %} 10450 opcode(0xD8, 0x1); /* D8 C8+i */ 10451 ins_encode( Push_Reg_FPR(src2), 10452 OpcReg_FPR(src1), 10453 Pop_Reg_FPR(dst) ); 10454 ins_pipe( fpu_reg_reg_reg ); 10455 %} 10456 10457 10458 // Spill to obtain 24-bit precision 10459 // Cisc-alternate to reg-reg multiply 10460 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10461 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10462 match(Set dst (MulF src1 (LoadF src2))); 10463 10464 format %{ "FLD_S $src2\n\t" 10465 "FMUL $src1\n\t" 10466 "FSTP_S $dst" %} 10467 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10468 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10469 OpcReg_FPR(src1), 10470 Pop_Mem_FPR(dst) ); 10471 ins_pipe( fpu_mem_reg_mem ); 10472 %} 10473 // 10474 // This instruction does not round to 24-bits 10475 // Cisc-alternate to reg-reg multiply 10476 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10477 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10478 match(Set dst (MulF src1 (LoadF src2))); 10479 10480 format %{ "FMUL $dst,$src1,$src2" %} 10481 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10482 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10483 OpcReg_FPR(src1), 10484 Pop_Reg_FPR(dst) ); 10485 ins_pipe( fpu_reg_reg_mem ); 10486 %} 10487 10488 // Spill to obtain 24-bit precision 10489 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10490 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10491 match(Set dst (MulF src1 src2)); 10492 10493 format %{ "FMUL $dst,$src1,$src2" %} 10494 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10495 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10496 set_instruction_start, 10497 OpcP, RMopc_Mem(secondary,src1), 10498 Pop_Mem_FPR(dst) ); 10499 ins_pipe( fpu_mem_mem_mem ); 10500 %} 10501 10502 // Spill to obtain 24-bit precision 10503 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10504 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10505 match(Set dst (MulF src con)); 10506 10507 format %{ "FLD $src\n\t" 10508 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10509 "FSTP_S $dst" %} 10510 ins_encode %{ 10511 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10512 __ fmul_s($constantaddress($con)); 10513 __ fstp_s(Address(rsp, $dst$$disp)); 10514 %} 10515 ins_pipe(fpu_mem_reg_con); 10516 %} 10517 // 10518 // This instruction does not round to 24-bits 10519 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10520 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10521 match(Set dst (MulF src con)); 10522 10523 format %{ "FLD $src\n\t" 10524 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10525 "FSTP $dst" %} 10526 ins_encode %{ 10527 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10528 __ fmul_s($constantaddress($con)); 10529 __ fstp_d($dst$$reg); 10530 %} 10531 ins_pipe(fpu_reg_reg_con); 10532 %} 10533 10534 10535 // 10536 // MACRO1 -- subsume unshared load into mulFPR 10537 // This instruction does not round to 24-bits 10538 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10539 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10540 match(Set dst (MulF (LoadF mem1) src)); 10541 10542 format %{ "FLD $mem1 ===MACRO1===\n\t" 10543 "FMUL ST,$src\n\t" 10544 "FSTP $dst" %} 10545 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10546 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10547 OpcReg_FPR(src), 10548 Pop_Reg_FPR(dst) ); 10549 ins_pipe( fpu_reg_reg_mem ); 10550 %} 10551 // 10552 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10553 // This instruction does not round to 24-bits 10554 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10556 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10557 ins_cost(95); 10558 10559 format %{ "FLD $mem1 ===MACRO2===\n\t" 10560 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10561 "FADD ST,$src2\n\t" 10562 "FSTP $dst" %} 10563 opcode(0xD9); /* LoadF D9 /0 */ 10564 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10565 FMul_ST_reg(src1), 10566 FAdd_ST_reg(src2), 10567 Pop_Reg_FPR(dst) ); 10568 ins_pipe( fpu_reg_mem_reg_reg ); 10569 %} 10570 10571 // MACRO3 -- addFPR a mulFPR 10572 // This instruction does not round to 24-bits. It is a '2-address' 10573 // instruction in that the result goes back to src2. This eliminates 10574 // a move from the macro; possibly the register allocator will have 10575 // to add it back (and maybe not). 10576 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10577 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10578 match(Set src2 (AddF (MulF src0 src1) src2)); 10579 10580 format %{ "FLD $src0 ===MACRO3===\n\t" 10581 "FMUL ST,$src1\n\t" 10582 "FADDP $src2,ST" %} 10583 opcode(0xD9); /* LoadF D9 /0 */ 10584 ins_encode( Push_Reg_FPR(src0), 10585 FMul_ST_reg(src1), 10586 FAddP_reg_ST(src2) ); 10587 ins_pipe( fpu_reg_reg_reg ); 10588 %} 10589 10590 // MACRO4 -- divFPR subFPR 10591 // This instruction does not round to 24-bits 10592 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10593 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10594 match(Set dst (DivF (SubF src2 src1) src3)); 10595 10596 format %{ "FLD $src2 ===MACRO4===\n\t" 10597 "FSUB ST,$src1\n\t" 10598 "FDIV ST,$src3\n\t" 10599 "FSTP $dst" %} 10600 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10601 ins_encode( Push_Reg_FPR(src2), 10602 subFPR_divFPR_encode(src1,src3), 10603 Pop_Reg_FPR(dst) ); 10604 ins_pipe( fpu_reg_reg_reg_reg ); 10605 %} 10606 10607 // Spill to obtain 24-bit precision 10608 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10609 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10610 match(Set dst (DivF src1 src2)); 10611 10612 format %{ "FDIV $dst,$src1,$src2" %} 10613 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10614 ins_encode( Push_Reg_FPR(src1), 10615 OpcReg_FPR(src2), 10616 Pop_Mem_FPR(dst) ); 10617 ins_pipe( fpu_mem_reg_reg ); 10618 %} 10619 // 10620 // This instruction does not round to 24-bits 10621 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10622 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10623 match(Set dst (DivF dst src)); 10624 10625 format %{ "FDIV $dst,$src" %} 10626 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10627 ins_encode( Push_Reg_FPR(src), 10628 OpcP, RegOpc(dst) ); 10629 ins_pipe( fpu_reg_reg ); 10630 %} 10631 10632 10633 // Spill to obtain 24-bit precision 10634 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10635 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10636 match(Set dst (ModF src1 src2)); 10637 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10638 10639 format %{ "FMOD $dst,$src1,$src2" %} 10640 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10641 emitModDPR(), 10642 Push_Result_Mod_DPR(src2), 10643 Pop_Mem_FPR(dst)); 10644 ins_pipe( pipe_slow ); 10645 %} 10646 // 10647 // This instruction does not round to 24-bits 10648 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10649 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10650 match(Set dst (ModF dst src)); 10651 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10652 10653 format %{ "FMOD $dst,$src" %} 10654 ins_encode(Push_Reg_Mod_DPR(dst, src), 10655 emitModDPR(), 10656 Push_Result_Mod_DPR(src), 10657 Pop_Reg_FPR(dst)); 10658 ins_pipe( pipe_slow ); 10659 %} 10660 10661 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10662 predicate(UseSSE>=1); 10663 match(Set dst (ModF src0 src1)); 10664 effect(KILL rax, KILL cr); 10665 format %{ "SUB ESP,4\t # FMOD\n" 10666 "\tMOVSS [ESP+0],$src1\n" 10667 "\tFLD_S [ESP+0]\n" 10668 "\tMOVSS [ESP+0],$src0\n" 10669 "\tFLD_S [ESP+0]\n" 10670 "loop:\tFPREM\n" 10671 "\tFWAIT\n" 10672 "\tFNSTSW AX\n" 10673 "\tSAHF\n" 10674 "\tJP loop\n" 10675 "\tFSTP_S [ESP+0]\n" 10676 "\tMOVSS $dst,[ESP+0]\n" 10677 "\tADD ESP,4\n" 10678 "\tFSTP ST0\t # Restore FPU Stack" 10679 %} 10680 ins_cost(250); 10681 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10682 ins_pipe( pipe_slow ); 10683 %} 10684 10685 10686 //----------Arithmetic Conversion Instructions--------------------------------- 10687 // The conversions operations are all Alpha sorted. Please keep it that way! 10688 10689 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10690 predicate(UseSSE==0); 10691 match(Set dst (RoundFloat src)); 10692 ins_cost(125); 10693 format %{ "FST_S $dst,$src\t# F-round" %} 10694 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10695 ins_pipe( fpu_mem_reg ); 10696 %} 10697 10698 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10699 predicate(UseSSE<=1); 10700 match(Set dst (RoundDouble src)); 10701 ins_cost(125); 10702 format %{ "FST_D $dst,$src\t# D-round" %} 10703 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10704 ins_pipe( fpu_mem_reg ); 10705 %} 10706 10707 // Force rounding to 24-bit precision and 6-bit exponent 10708 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10709 predicate(UseSSE==0); 10710 match(Set dst (ConvD2F src)); 10711 format %{ "FST_S $dst,$src\t# F-round" %} 10712 expand %{ 10713 roundFloat_mem_reg(dst,src); 10714 %} 10715 %} 10716 10717 // Force rounding to 24-bit precision and 6-bit exponent 10718 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10719 predicate(UseSSE==1); 10720 match(Set dst (ConvD2F src)); 10721 effect( KILL cr ); 10722 format %{ "SUB ESP,4\n\t" 10723 "FST_S [ESP],$src\t# F-round\n\t" 10724 "MOVSS $dst,[ESP]\n\t" 10725 "ADD ESP,4" %} 10726 ins_encode %{ 10727 __ subptr(rsp, 4); 10728 if ($src$$reg != FPR1L_enc) { 10729 __ fld_s($src$$reg-1); 10730 __ fstp_s(Address(rsp, 0)); 10731 } else { 10732 __ fst_s(Address(rsp, 0)); 10733 } 10734 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10735 __ addptr(rsp, 4); 10736 %} 10737 ins_pipe( pipe_slow ); 10738 %} 10739 10740 // Force rounding double precision to single precision 10741 instruct convD2F_reg(regF dst, regD src) %{ 10742 predicate(UseSSE>=2); 10743 match(Set dst (ConvD2F src)); 10744 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10745 ins_encode %{ 10746 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10747 %} 10748 ins_pipe( pipe_slow ); 10749 %} 10750 10751 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10752 predicate(UseSSE==0); 10753 match(Set dst (ConvF2D src)); 10754 format %{ "FST_S $dst,$src\t# D-round" %} 10755 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10756 ins_pipe( fpu_reg_reg ); 10757 %} 10758 10759 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10760 predicate(UseSSE==1); 10761 match(Set dst (ConvF2D src)); 10762 format %{ "FST_D $dst,$src\t# D-round" %} 10763 expand %{ 10764 roundDouble_mem_reg(dst,src); 10765 %} 10766 %} 10767 10768 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10769 predicate(UseSSE==1); 10770 match(Set dst (ConvF2D src)); 10771 effect( KILL cr ); 10772 format %{ "SUB ESP,4\n\t" 10773 "MOVSS [ESP] $src\n\t" 10774 "FLD_S [ESP]\n\t" 10775 "ADD ESP,4\n\t" 10776 "FSTP $dst\t# D-round" %} 10777 ins_encode %{ 10778 __ subptr(rsp, 4); 10779 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10780 __ fld_s(Address(rsp, 0)); 10781 __ addptr(rsp, 4); 10782 __ fstp_d($dst$$reg); 10783 %} 10784 ins_pipe( pipe_slow ); 10785 %} 10786 10787 instruct convF2D_reg(regD dst, regF src) %{ 10788 predicate(UseSSE>=2); 10789 match(Set dst (ConvF2D src)); 10790 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10791 ins_encode %{ 10792 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10793 %} 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10798 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10799 predicate(UseSSE<=1); 10800 match(Set dst (ConvD2I src)); 10801 effect( KILL tmp, KILL cr ); 10802 format %{ "FLD $src\t# Convert double to int \n\t" 10803 "FLDCW trunc mode\n\t" 10804 "SUB ESP,4\n\t" 10805 "FISTp [ESP + #0]\n\t" 10806 "FLDCW std/24-bit mode\n\t" 10807 "POP EAX\n\t" 10808 "CMP EAX,0x80000000\n\t" 10809 "JNE,s fast\n\t" 10810 "FLD_D $src\n\t" 10811 "CALL d2i_wrapper\n" 10812 "fast:" %} 10813 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10814 ins_pipe( pipe_slow ); 10815 %} 10816 10817 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10818 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10819 predicate(UseSSE>=2); 10820 match(Set dst (ConvD2I src)); 10821 effect( KILL tmp, KILL cr ); 10822 format %{ "CVTTSD2SI $dst, $src\n\t" 10823 "CMP $dst,0x80000000\n\t" 10824 "JNE,s fast\n\t" 10825 "SUB ESP, 8\n\t" 10826 "MOVSD [ESP], $src\n\t" 10827 "FLD_D [ESP]\n\t" 10828 "ADD ESP, 8\n\t" 10829 "CALL d2i_wrapper\n" 10830 "fast:" %} 10831 ins_encode %{ 10832 Label fast; 10833 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10834 __ cmpl($dst$$Register, 0x80000000); 10835 __ jccb(Assembler::notEqual, fast); 10836 __ subptr(rsp, 8); 10837 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10838 __ fld_d(Address(rsp, 0)); 10839 __ addptr(rsp, 8); 10840 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10841 __ bind(fast); 10842 %} 10843 ins_pipe( pipe_slow ); 10844 %} 10845 10846 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10847 predicate(UseSSE<=1); 10848 match(Set dst (ConvD2L src)); 10849 effect( KILL cr ); 10850 format %{ "FLD $src\t# Convert double to long\n\t" 10851 "FLDCW trunc mode\n\t" 10852 "SUB ESP,8\n\t" 10853 "FISTp [ESP + #0]\n\t" 10854 "FLDCW std/24-bit mode\n\t" 10855 "POP EAX\n\t" 10856 "POP EDX\n\t" 10857 "CMP EDX,0x80000000\n\t" 10858 "JNE,s fast\n\t" 10859 "TEST EAX,EAX\n\t" 10860 "JNE,s fast\n\t" 10861 "FLD $src\n\t" 10862 "CALL d2l_wrapper\n" 10863 "fast:" %} 10864 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10865 ins_pipe( pipe_slow ); 10866 %} 10867 10868 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10869 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10870 predicate (UseSSE>=2); 10871 match(Set dst (ConvD2L src)); 10872 effect( KILL cr ); 10873 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10874 "MOVSD [ESP],$src\n\t" 10875 "FLD_D [ESP]\n\t" 10876 "FLDCW trunc mode\n\t" 10877 "FISTp [ESP + #0]\n\t" 10878 "FLDCW std/24-bit mode\n\t" 10879 "POP EAX\n\t" 10880 "POP EDX\n\t" 10881 "CMP EDX,0x80000000\n\t" 10882 "JNE,s fast\n\t" 10883 "TEST EAX,EAX\n\t" 10884 "JNE,s fast\n\t" 10885 "SUB ESP,8\n\t" 10886 "MOVSD [ESP],$src\n\t" 10887 "FLD_D [ESP]\n\t" 10888 "ADD ESP,8\n\t" 10889 "CALL d2l_wrapper\n" 10890 "fast:" %} 10891 ins_encode %{ 10892 Label fast; 10893 __ subptr(rsp, 8); 10894 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10895 __ fld_d(Address(rsp, 0)); 10896 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10897 __ fistp_d(Address(rsp, 0)); 10898 // Restore the rounding mode, mask the exception 10899 if (Compile::current()->in_24_bit_fp_mode()) { 10900 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10901 } else { 10902 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10903 } 10904 // Load the converted long, adjust CPU stack 10905 __ pop(rax); 10906 __ pop(rdx); 10907 __ cmpl(rdx, 0x80000000); 10908 __ jccb(Assembler::notEqual, fast); 10909 __ testl(rax, rax); 10910 __ jccb(Assembler::notEqual, fast); 10911 __ subptr(rsp, 8); 10912 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10913 __ fld_d(Address(rsp, 0)); 10914 __ addptr(rsp, 8); 10915 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10916 __ bind(fast); 10917 %} 10918 ins_pipe( pipe_slow ); 10919 %} 10920 10921 // Convert a double to an int. Java semantics require we do complex 10922 // manglations in the corner cases. So we set the rounding mode to 10923 // 'zero', store the darned double down as an int, and reset the 10924 // rounding mode to 'nearest'. The hardware stores a flag value down 10925 // if we would overflow or converted a NAN; we check for this and 10926 // and go the slow path if needed. 10927 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10928 predicate(UseSSE==0); 10929 match(Set dst (ConvF2I src)); 10930 effect( KILL tmp, KILL cr ); 10931 format %{ "FLD $src\t# Convert float to int \n\t" 10932 "FLDCW trunc mode\n\t" 10933 "SUB ESP,4\n\t" 10934 "FISTp [ESP + #0]\n\t" 10935 "FLDCW std/24-bit mode\n\t" 10936 "POP EAX\n\t" 10937 "CMP EAX,0x80000000\n\t" 10938 "JNE,s fast\n\t" 10939 "FLD $src\n\t" 10940 "CALL d2i_wrapper\n" 10941 "fast:" %} 10942 // DPR2I_encoding works for FPR2I 10943 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10944 ins_pipe( pipe_slow ); 10945 %} 10946 10947 // Convert a float in xmm to an int reg. 10948 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10949 predicate(UseSSE>=1); 10950 match(Set dst (ConvF2I src)); 10951 effect( KILL tmp, KILL cr ); 10952 format %{ "CVTTSS2SI $dst, $src\n\t" 10953 "CMP $dst,0x80000000\n\t" 10954 "JNE,s fast\n\t" 10955 "SUB ESP, 4\n\t" 10956 "MOVSS [ESP], $src\n\t" 10957 "FLD [ESP]\n\t" 10958 "ADD ESP, 4\n\t" 10959 "CALL d2i_wrapper\n" 10960 "fast:" %} 10961 ins_encode %{ 10962 Label fast; 10963 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10964 __ cmpl($dst$$Register, 0x80000000); 10965 __ jccb(Assembler::notEqual, fast); 10966 __ subptr(rsp, 4); 10967 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10968 __ fld_s(Address(rsp, 0)); 10969 __ addptr(rsp, 4); 10970 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10971 __ bind(fast); 10972 %} 10973 ins_pipe( pipe_slow ); 10974 %} 10975 10976 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10977 predicate(UseSSE==0); 10978 match(Set dst (ConvF2L src)); 10979 effect( KILL cr ); 10980 format %{ "FLD $src\t# Convert float to long\n\t" 10981 "FLDCW trunc mode\n\t" 10982 "SUB ESP,8\n\t" 10983 "FISTp [ESP + #0]\n\t" 10984 "FLDCW std/24-bit mode\n\t" 10985 "POP EAX\n\t" 10986 "POP EDX\n\t" 10987 "CMP EDX,0x80000000\n\t" 10988 "JNE,s fast\n\t" 10989 "TEST EAX,EAX\n\t" 10990 "JNE,s fast\n\t" 10991 "FLD $src\n\t" 10992 "CALL d2l_wrapper\n" 10993 "fast:" %} 10994 // DPR2L_encoding works for FPR2L 10995 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10996 ins_pipe( pipe_slow ); 10997 %} 10998 10999 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11000 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11001 predicate (UseSSE>=1); 11002 match(Set dst (ConvF2L src)); 11003 effect( KILL cr ); 11004 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11005 "MOVSS [ESP],$src\n\t" 11006 "FLD_S [ESP]\n\t" 11007 "FLDCW trunc mode\n\t" 11008 "FISTp [ESP + #0]\n\t" 11009 "FLDCW std/24-bit mode\n\t" 11010 "POP EAX\n\t" 11011 "POP EDX\n\t" 11012 "CMP EDX,0x80000000\n\t" 11013 "JNE,s fast\n\t" 11014 "TEST EAX,EAX\n\t" 11015 "JNE,s fast\n\t" 11016 "SUB ESP,4\t# Convert float to long\n\t" 11017 "MOVSS [ESP],$src\n\t" 11018 "FLD_S [ESP]\n\t" 11019 "ADD ESP,4\n\t" 11020 "CALL d2l_wrapper\n" 11021 "fast:" %} 11022 ins_encode %{ 11023 Label fast; 11024 __ subptr(rsp, 8); 11025 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11026 __ fld_s(Address(rsp, 0)); 11027 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11028 __ fistp_d(Address(rsp, 0)); 11029 // Restore the rounding mode, mask the exception 11030 if (Compile::current()->in_24_bit_fp_mode()) { 11031 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11032 } else { 11033 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11034 } 11035 // Load the converted long, adjust CPU stack 11036 __ pop(rax); 11037 __ pop(rdx); 11038 __ cmpl(rdx, 0x80000000); 11039 __ jccb(Assembler::notEqual, fast); 11040 __ testl(rax, rax); 11041 __ jccb(Assembler::notEqual, fast); 11042 __ subptr(rsp, 4); 11043 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11044 __ fld_s(Address(rsp, 0)); 11045 __ addptr(rsp, 4); 11046 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11047 __ bind(fast); 11048 %} 11049 ins_pipe( pipe_slow ); 11050 %} 11051 11052 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11053 predicate( UseSSE<=1 ); 11054 match(Set dst (ConvI2D src)); 11055 format %{ "FILD $src\n\t" 11056 "FSTP $dst" %} 11057 opcode(0xDB, 0x0); /* DB /0 */ 11058 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11059 ins_pipe( fpu_reg_mem ); 11060 %} 11061 11062 instruct convI2D_reg(regD dst, rRegI src) %{ 11063 predicate( UseSSE>=2 && !UseXmmI2D ); 11064 match(Set dst (ConvI2D src)); 11065 format %{ "CVTSI2SD $dst,$src" %} 11066 ins_encode %{ 11067 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11068 %} 11069 ins_pipe( pipe_slow ); 11070 %} 11071 11072 instruct convI2D_mem(regD dst, memory mem) %{ 11073 predicate( UseSSE>=2 ); 11074 match(Set dst (ConvI2D (LoadI mem))); 11075 format %{ "CVTSI2SD $dst,$mem" %} 11076 ins_encode %{ 11077 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11078 %} 11079 ins_pipe( pipe_slow ); 11080 %} 11081 11082 instruct convXI2D_reg(regD dst, rRegI src) 11083 %{ 11084 predicate( UseSSE>=2 && UseXmmI2D ); 11085 match(Set dst (ConvI2D src)); 11086 11087 format %{ "MOVD $dst,$src\n\t" 11088 "CVTDQ2PD $dst,$dst\t# i2d" %} 11089 ins_encode %{ 11090 __ movdl($dst$$XMMRegister, $src$$Register); 11091 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11092 %} 11093 ins_pipe(pipe_slow); // XXX 11094 %} 11095 11096 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11097 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11098 match(Set dst (ConvI2D (LoadI mem))); 11099 format %{ "FILD $mem\n\t" 11100 "FSTP $dst" %} 11101 opcode(0xDB); /* DB /0 */ 11102 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11103 Pop_Reg_DPR(dst)); 11104 ins_pipe( fpu_reg_mem ); 11105 %} 11106 11107 // Convert a byte to a float; no rounding step needed. 11108 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11109 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11110 match(Set dst (ConvI2F src)); 11111 format %{ "FILD $src\n\t" 11112 "FSTP $dst" %} 11113 11114 opcode(0xDB, 0x0); /* DB /0 */ 11115 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11116 ins_pipe( fpu_reg_mem ); 11117 %} 11118 11119 // In 24-bit mode, force exponent rounding by storing back out 11120 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11121 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11122 match(Set dst (ConvI2F src)); 11123 ins_cost(200); 11124 format %{ "FILD $src\n\t" 11125 "FSTP_S $dst" %} 11126 opcode(0xDB, 0x0); /* DB /0 */ 11127 ins_encode( Push_Mem_I(src), 11128 Pop_Mem_FPR(dst)); 11129 ins_pipe( fpu_mem_mem ); 11130 %} 11131 11132 // In 24-bit mode, force exponent rounding by storing back out 11133 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11134 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11135 match(Set dst (ConvI2F (LoadI mem))); 11136 ins_cost(200); 11137 format %{ "FILD $mem\n\t" 11138 "FSTP_S $dst" %} 11139 opcode(0xDB); /* DB /0 */ 11140 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11141 Pop_Mem_FPR(dst)); 11142 ins_pipe( fpu_mem_mem ); 11143 %} 11144 11145 // This instruction does not round to 24-bits 11146 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11147 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11148 match(Set dst (ConvI2F src)); 11149 format %{ "FILD $src\n\t" 11150 "FSTP $dst" %} 11151 opcode(0xDB, 0x0); /* DB /0 */ 11152 ins_encode( Push_Mem_I(src), 11153 Pop_Reg_FPR(dst)); 11154 ins_pipe( fpu_reg_mem ); 11155 %} 11156 11157 // This instruction does not round to 24-bits 11158 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11159 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11160 match(Set dst (ConvI2F (LoadI mem))); 11161 format %{ "FILD $mem\n\t" 11162 "FSTP $dst" %} 11163 opcode(0xDB); /* DB /0 */ 11164 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11165 Pop_Reg_FPR(dst)); 11166 ins_pipe( fpu_reg_mem ); 11167 %} 11168 11169 // Convert an int to a float in xmm; no rounding step needed. 11170 instruct convI2F_reg(regF dst, rRegI src) %{ 11171 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11172 match(Set dst (ConvI2F src)); 11173 format %{ "CVTSI2SS $dst, $src" %} 11174 ins_encode %{ 11175 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11176 %} 11177 ins_pipe( pipe_slow ); 11178 %} 11179 11180 instruct convXI2F_reg(regF dst, rRegI src) 11181 %{ 11182 predicate( UseSSE>=2 && UseXmmI2F ); 11183 match(Set dst (ConvI2F src)); 11184 11185 format %{ "MOVD $dst,$src\n\t" 11186 "CVTDQ2PS $dst,$dst\t# i2f" %} 11187 ins_encode %{ 11188 __ movdl($dst$$XMMRegister, $src$$Register); 11189 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11190 %} 11191 ins_pipe(pipe_slow); // XXX 11192 %} 11193 11194 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11195 match(Set dst (ConvI2L src)); 11196 effect(KILL cr); 11197 ins_cost(375); 11198 format %{ "MOV $dst.lo,$src\n\t" 11199 "MOV $dst.hi,$src\n\t" 11200 "SAR $dst.hi,31" %} 11201 ins_encode(convert_int_long(dst,src)); 11202 ins_pipe( ialu_reg_reg_long ); 11203 %} 11204 11205 // Zero-extend convert int to long 11206 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11207 match(Set dst (AndL (ConvI2L src) mask) ); 11208 effect( KILL flags ); 11209 ins_cost(250); 11210 format %{ "MOV $dst.lo,$src\n\t" 11211 "XOR $dst.hi,$dst.hi" %} 11212 opcode(0x33); // XOR 11213 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11214 ins_pipe( ialu_reg_reg_long ); 11215 %} 11216 11217 // Zero-extend long 11218 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11219 match(Set dst (AndL src mask) ); 11220 effect( KILL flags ); 11221 ins_cost(250); 11222 format %{ "MOV $dst.lo,$src.lo\n\t" 11223 "XOR $dst.hi,$dst.hi\n\t" %} 11224 opcode(0x33); // XOR 11225 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11226 ins_pipe( ialu_reg_reg_long ); 11227 %} 11228 11229 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11230 predicate (UseSSE<=1); 11231 match(Set dst (ConvL2D src)); 11232 effect( KILL cr ); 11233 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11234 "PUSH $src.lo\n\t" 11235 "FILD ST,[ESP + #0]\n\t" 11236 "ADD ESP,8\n\t" 11237 "FSTP_D $dst\t# D-round" %} 11238 opcode(0xDF, 0x5); /* DF /5 */ 11239 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11240 ins_pipe( pipe_slow ); 11241 %} 11242 11243 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11244 predicate (UseSSE>=2); 11245 match(Set dst (ConvL2D src)); 11246 effect( KILL cr ); 11247 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11248 "PUSH $src.lo\n\t" 11249 "FILD_D [ESP]\n\t" 11250 "FSTP_D [ESP]\n\t" 11251 "MOVSD $dst,[ESP]\n\t" 11252 "ADD ESP,8" %} 11253 opcode(0xDF, 0x5); /* DF /5 */ 11254 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11255 ins_pipe( pipe_slow ); 11256 %} 11257 11258 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11259 predicate (UseSSE>=1); 11260 match(Set dst (ConvL2F src)); 11261 effect( KILL cr ); 11262 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11263 "PUSH $src.lo\n\t" 11264 "FILD_D [ESP]\n\t" 11265 "FSTP_S [ESP]\n\t" 11266 "MOVSS $dst,[ESP]\n\t" 11267 "ADD ESP,8" %} 11268 opcode(0xDF, 0x5); /* DF /5 */ 11269 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11270 ins_pipe( pipe_slow ); 11271 %} 11272 11273 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11274 match(Set dst (ConvL2F src)); 11275 effect( KILL cr ); 11276 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11277 "PUSH $src.lo\n\t" 11278 "FILD ST,[ESP + #0]\n\t" 11279 "ADD ESP,8\n\t" 11280 "FSTP_S $dst\t# F-round" %} 11281 opcode(0xDF, 0x5); /* DF /5 */ 11282 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11283 ins_pipe( pipe_slow ); 11284 %} 11285 11286 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11287 match(Set dst (ConvL2I src)); 11288 effect( DEF dst, USE src ); 11289 format %{ "MOV $dst,$src.lo" %} 11290 ins_encode(enc_CopyL_Lo(dst,src)); 11291 ins_pipe( ialu_reg_reg ); 11292 %} 11293 11294 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11295 match(Set dst (MoveF2I src)); 11296 effect( DEF dst, USE src ); 11297 ins_cost(100); 11298 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11299 ins_encode %{ 11300 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11301 %} 11302 ins_pipe( ialu_reg_mem ); 11303 %} 11304 11305 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11306 predicate(UseSSE==0); 11307 match(Set dst (MoveF2I src)); 11308 effect( DEF dst, USE src ); 11309 11310 ins_cost(125); 11311 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11312 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11313 ins_pipe( fpu_mem_reg ); 11314 %} 11315 11316 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11317 predicate(UseSSE>=1); 11318 match(Set dst (MoveF2I src)); 11319 effect( DEF dst, USE src ); 11320 11321 ins_cost(95); 11322 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11323 ins_encode %{ 11324 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11325 %} 11326 ins_pipe( pipe_slow ); 11327 %} 11328 11329 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11330 predicate(UseSSE>=2); 11331 match(Set dst (MoveF2I src)); 11332 effect( DEF dst, USE src ); 11333 ins_cost(85); 11334 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11335 ins_encode %{ 11336 __ movdl($dst$$Register, $src$$XMMRegister); 11337 %} 11338 ins_pipe( pipe_slow ); 11339 %} 11340 11341 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11342 match(Set dst (MoveI2F src)); 11343 effect( DEF dst, USE src ); 11344 11345 ins_cost(100); 11346 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11347 ins_encode %{ 11348 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11349 %} 11350 ins_pipe( ialu_mem_reg ); 11351 %} 11352 11353 11354 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11355 predicate(UseSSE==0); 11356 match(Set dst (MoveI2F src)); 11357 effect(DEF dst, USE src); 11358 11359 ins_cost(125); 11360 format %{ "FLD_S $src\n\t" 11361 "FSTP $dst\t# MoveI2F_stack_reg" %} 11362 opcode(0xD9); /* D9 /0, FLD m32real */ 11363 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11364 Pop_Reg_FPR(dst) ); 11365 ins_pipe( fpu_reg_mem ); 11366 %} 11367 11368 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11369 predicate(UseSSE>=1); 11370 match(Set dst (MoveI2F src)); 11371 effect( DEF dst, USE src ); 11372 11373 ins_cost(95); 11374 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11375 ins_encode %{ 11376 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11377 %} 11378 ins_pipe( pipe_slow ); 11379 %} 11380 11381 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11382 predicate(UseSSE>=2); 11383 match(Set dst (MoveI2F src)); 11384 effect( DEF dst, USE src ); 11385 11386 ins_cost(85); 11387 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11388 ins_encode %{ 11389 __ movdl($dst$$XMMRegister, $src$$Register); 11390 %} 11391 ins_pipe( pipe_slow ); 11392 %} 11393 11394 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11395 match(Set dst (MoveD2L src)); 11396 effect(DEF dst, USE src); 11397 11398 ins_cost(250); 11399 format %{ "MOV $dst.lo,$src\n\t" 11400 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11401 opcode(0x8B, 0x8B); 11402 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11403 ins_pipe( ialu_mem_long_reg ); 11404 %} 11405 11406 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11407 predicate(UseSSE<=1); 11408 match(Set dst (MoveD2L src)); 11409 effect(DEF dst, USE src); 11410 11411 ins_cost(125); 11412 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11413 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11414 ins_pipe( fpu_mem_reg ); 11415 %} 11416 11417 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11418 predicate(UseSSE>=2); 11419 match(Set dst (MoveD2L src)); 11420 effect(DEF dst, USE src); 11421 ins_cost(95); 11422 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11423 ins_encode %{ 11424 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11425 %} 11426 ins_pipe( pipe_slow ); 11427 %} 11428 11429 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11430 predicate(UseSSE>=2); 11431 match(Set dst (MoveD2L src)); 11432 effect(DEF dst, USE src, TEMP tmp); 11433 ins_cost(85); 11434 format %{ "MOVD $dst.lo,$src\n\t" 11435 "PSHUFLW $tmp,$src,0x4E\n\t" 11436 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11437 ins_encode %{ 11438 __ movdl($dst$$Register, $src$$XMMRegister); 11439 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11440 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11441 %} 11442 ins_pipe( pipe_slow ); 11443 %} 11444 11445 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11446 match(Set dst (MoveL2D src)); 11447 effect(DEF dst, USE src); 11448 11449 ins_cost(200); 11450 format %{ "MOV $dst,$src.lo\n\t" 11451 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11452 opcode(0x89, 0x89); 11453 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11454 ins_pipe( ialu_mem_long_reg ); 11455 %} 11456 11457 11458 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11459 predicate(UseSSE<=1); 11460 match(Set dst (MoveL2D src)); 11461 effect(DEF dst, USE src); 11462 ins_cost(125); 11463 11464 format %{ "FLD_D $src\n\t" 11465 "FSTP $dst\t# MoveL2D_stack_reg" %} 11466 opcode(0xDD); /* DD /0, FLD m64real */ 11467 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11468 Pop_Reg_DPR(dst) ); 11469 ins_pipe( fpu_reg_mem ); 11470 %} 11471 11472 11473 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11474 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11475 match(Set dst (MoveL2D src)); 11476 effect(DEF dst, USE src); 11477 11478 ins_cost(95); 11479 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11480 ins_encode %{ 11481 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11482 %} 11483 ins_pipe( pipe_slow ); 11484 %} 11485 11486 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11487 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11488 match(Set dst (MoveL2D src)); 11489 effect(DEF dst, USE src); 11490 11491 ins_cost(95); 11492 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11493 ins_encode %{ 11494 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11495 %} 11496 ins_pipe( pipe_slow ); 11497 %} 11498 11499 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11500 predicate(UseSSE>=2); 11501 match(Set dst (MoveL2D src)); 11502 effect(TEMP dst, USE src, TEMP tmp); 11503 ins_cost(85); 11504 format %{ "MOVD $dst,$src.lo\n\t" 11505 "MOVD $tmp,$src.hi\n\t" 11506 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11507 ins_encode %{ 11508 __ movdl($dst$$XMMRegister, $src$$Register); 11509 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11510 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11511 %} 11512 ins_pipe( pipe_slow ); 11513 %} 11514 11515 11516 // ======================================================================= 11517 // fast clearing of an array 11518 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11519 predicate(!((ClearArrayNode*)n)->is_large()); 11520 match(Set dummy (ClearArray cnt base)); 11521 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11522 11523 format %{ $$template 11524 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11525 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11526 $$emit$$"JG LARGE\n\t" 11527 $$emit$$"SHL ECX, 1\n\t" 11528 $$emit$$"DEC ECX\n\t" 11529 $$emit$$"JS DONE\t# Zero length\n\t" 11530 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11531 $$emit$$"DEC ECX\n\t" 11532 $$emit$$"JGE LOOP\n\t" 11533 $$emit$$"JMP DONE\n\t" 11534 $$emit$$"# LARGE:\n\t" 11535 if (UseFastStosb) { 11536 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11537 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11538 } else if (UseXMMForObjInit) { 11539 $$emit$$"MOV RDI,RAX\n\t" 11540 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11541 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11542 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11543 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11544 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11545 $$emit$$"ADD 0x40,RAX\n\t" 11546 $$emit$$"# L_zero_64_bytes:\n\t" 11547 $$emit$$"SUB 0x8,RCX\n\t" 11548 $$emit$$"JGE L_loop\n\t" 11549 $$emit$$"ADD 0x4,RCX\n\t" 11550 $$emit$$"JL L_tail\n\t" 11551 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11552 $$emit$$"ADD 0x20,RAX\n\t" 11553 $$emit$$"SUB 0x4,RCX\n\t" 11554 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11555 $$emit$$"ADD 0x4,RCX\n\t" 11556 $$emit$$"JLE L_end\n\t" 11557 $$emit$$"DEC RCX\n\t" 11558 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11559 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11560 $$emit$$"ADD 0x8,RAX\n\t" 11561 $$emit$$"DEC RCX\n\t" 11562 $$emit$$"JGE L_sloop\n\t" 11563 $$emit$$"# L_end:\n\t" 11564 } else { 11565 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11566 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11567 } 11568 $$emit$$"# DONE" 11569 %} 11570 ins_encode %{ 11571 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11572 $tmp$$XMMRegister, false); 11573 %} 11574 ins_pipe( pipe_slow ); 11575 %} 11576 11577 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11578 predicate(((ClearArrayNode*)n)->is_large()); 11579 match(Set dummy (ClearArray cnt base)); 11580 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11581 format %{ $$template 11582 if (UseFastStosb) { 11583 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11584 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11585 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11586 } else if (UseXMMForObjInit) { 11587 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11588 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11589 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11590 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11591 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11592 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11593 $$emit$$"ADD 0x40,RAX\n\t" 11594 $$emit$$"# L_zero_64_bytes:\n\t" 11595 $$emit$$"SUB 0x8,RCX\n\t" 11596 $$emit$$"JGE L_loop\n\t" 11597 $$emit$$"ADD 0x4,RCX\n\t" 11598 $$emit$$"JL L_tail\n\t" 11599 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11600 $$emit$$"ADD 0x20,RAX\n\t" 11601 $$emit$$"SUB 0x4,RCX\n\t" 11602 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11603 $$emit$$"ADD 0x4,RCX\n\t" 11604 $$emit$$"JLE L_end\n\t" 11605 $$emit$$"DEC RCX\n\t" 11606 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11607 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11608 $$emit$$"ADD 0x8,RAX\n\t" 11609 $$emit$$"DEC RCX\n\t" 11610 $$emit$$"JGE L_sloop\n\t" 11611 $$emit$$"# L_end:\n\t" 11612 } else { 11613 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11614 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11615 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11616 } 11617 $$emit$$"# DONE" 11618 %} 11619 ins_encode %{ 11620 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11621 $tmp$$XMMRegister, true); 11622 %} 11623 ins_pipe( pipe_slow ); 11624 %} 11625 11626 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11627 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11628 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11629 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11630 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11631 11632 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11633 ins_encode %{ 11634 __ string_compare($str1$$Register, $str2$$Register, 11635 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11636 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11637 %} 11638 ins_pipe( pipe_slow ); 11639 %} 11640 11641 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11642 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11643 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11644 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11645 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11646 11647 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11648 ins_encode %{ 11649 __ string_compare($str1$$Register, $str2$$Register, 11650 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11651 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11652 %} 11653 ins_pipe( pipe_slow ); 11654 %} 11655 11656 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11657 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11658 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11659 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11660 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11661 11662 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11663 ins_encode %{ 11664 __ string_compare($str1$$Register, $str2$$Register, 11665 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11666 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11667 %} 11668 ins_pipe( pipe_slow ); 11669 %} 11670 11671 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11672 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11673 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11674 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11675 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11676 11677 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11678 ins_encode %{ 11679 __ string_compare($str2$$Register, $str1$$Register, 11680 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11681 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11682 %} 11683 ins_pipe( pipe_slow ); 11684 %} 11685 11686 // fast string equals 11687 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11688 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11689 match(Set result (StrEquals (Binary str1 str2) cnt)); 11690 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11691 11692 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11693 ins_encode %{ 11694 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11695 $cnt$$Register, $result$$Register, $tmp3$$Register, 11696 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11697 %} 11698 11699 ins_pipe( pipe_slow ); 11700 %} 11701 11702 // fast search of substring with known size. 11703 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11704 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11705 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11706 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11707 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11708 11709 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11710 ins_encode %{ 11711 int icnt2 = (int)$int_cnt2$$constant; 11712 if (icnt2 >= 16) { 11713 // IndexOf for constant substrings with size >= 16 elements 11714 // which don't need to be loaded through stack. 11715 __ string_indexofC8($str1$$Register, $str2$$Register, 11716 $cnt1$$Register, $cnt2$$Register, 11717 icnt2, $result$$Register, 11718 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11719 } else { 11720 // Small strings are loaded through stack if they cross page boundary. 11721 __ string_indexof($str1$$Register, $str2$$Register, 11722 $cnt1$$Register, $cnt2$$Register, 11723 icnt2, $result$$Register, 11724 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11725 } 11726 %} 11727 ins_pipe( pipe_slow ); 11728 %} 11729 11730 // fast search of substring with known size. 11731 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11732 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11733 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11734 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11735 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11736 11737 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11738 ins_encode %{ 11739 int icnt2 = (int)$int_cnt2$$constant; 11740 if (icnt2 >= 8) { 11741 // IndexOf for constant substrings with size >= 8 elements 11742 // which don't need to be loaded through stack. 11743 __ string_indexofC8($str1$$Register, $str2$$Register, 11744 $cnt1$$Register, $cnt2$$Register, 11745 icnt2, $result$$Register, 11746 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11747 } else { 11748 // Small strings are loaded through stack if they cross page boundary. 11749 __ string_indexof($str1$$Register, $str2$$Register, 11750 $cnt1$$Register, $cnt2$$Register, 11751 icnt2, $result$$Register, 11752 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11753 } 11754 %} 11755 ins_pipe( pipe_slow ); 11756 %} 11757 11758 // fast search of substring with known size. 11759 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11760 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11761 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11762 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11763 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11764 11765 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11766 ins_encode %{ 11767 int icnt2 = (int)$int_cnt2$$constant; 11768 if (icnt2 >= 8) { 11769 // IndexOf for constant substrings with size >= 8 elements 11770 // which don't need to be loaded through stack. 11771 __ string_indexofC8($str1$$Register, $str2$$Register, 11772 $cnt1$$Register, $cnt2$$Register, 11773 icnt2, $result$$Register, 11774 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11775 } else { 11776 // Small strings are loaded through stack if they cross page boundary. 11777 __ string_indexof($str1$$Register, $str2$$Register, 11778 $cnt1$$Register, $cnt2$$Register, 11779 icnt2, $result$$Register, 11780 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11781 } 11782 %} 11783 ins_pipe( pipe_slow ); 11784 %} 11785 11786 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11787 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11788 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11789 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11790 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11791 11792 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11793 ins_encode %{ 11794 __ string_indexof($str1$$Register, $str2$$Register, 11795 $cnt1$$Register, $cnt2$$Register, 11796 (-1), $result$$Register, 11797 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11798 %} 11799 ins_pipe( pipe_slow ); 11800 %} 11801 11802 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11803 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11804 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11805 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11806 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11807 11808 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11809 ins_encode %{ 11810 __ string_indexof($str1$$Register, $str2$$Register, 11811 $cnt1$$Register, $cnt2$$Register, 11812 (-1), $result$$Register, 11813 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11814 %} 11815 ins_pipe( pipe_slow ); 11816 %} 11817 11818 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11819 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11820 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11821 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11822 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11823 11824 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11825 ins_encode %{ 11826 __ string_indexof($str1$$Register, $str2$$Register, 11827 $cnt1$$Register, $cnt2$$Register, 11828 (-1), $result$$Register, 11829 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11830 %} 11831 ins_pipe( pipe_slow ); 11832 %} 11833 11834 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11835 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11836 predicate(UseSSE42Intrinsics); 11837 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11838 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11839 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11840 ins_encode %{ 11841 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11842 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 // fast array equals 11848 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11849 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11850 %{ 11851 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11852 match(Set result (AryEq ary1 ary2)); 11853 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11854 //ins_cost(300); 11855 11856 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11857 ins_encode %{ 11858 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11859 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11860 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11861 %} 11862 ins_pipe( pipe_slow ); 11863 %} 11864 11865 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11866 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11867 %{ 11868 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11869 match(Set result (AryEq ary1 ary2)); 11870 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11871 //ins_cost(300); 11872 11873 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11874 ins_encode %{ 11875 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11876 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11877 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11878 %} 11879 ins_pipe( pipe_slow ); 11880 %} 11881 11882 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11883 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11884 %{ 11885 match(Set result (HasNegatives ary1 len)); 11886 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11887 11888 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11889 ins_encode %{ 11890 __ has_negatives($ary1$$Register, $len$$Register, 11891 $result$$Register, $tmp3$$Register, 11892 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11893 %} 11894 ins_pipe( pipe_slow ); 11895 %} 11896 11897 // fast char[] to byte[] compression 11898 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11899 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11900 match(Set result (StrCompressedCopy src (Binary dst len))); 11901 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11902 11903 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11904 ins_encode %{ 11905 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11906 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11907 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11908 %} 11909 ins_pipe( pipe_slow ); 11910 %} 11911 11912 // fast byte[] to char[] inflation 11913 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11914 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11915 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11916 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11917 11918 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11919 ins_encode %{ 11920 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11921 $tmp1$$XMMRegister, $tmp2$$Register); 11922 %} 11923 ins_pipe( pipe_slow ); 11924 %} 11925 11926 // encode char[] to byte[] in ISO_8859_1 11927 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11928 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11929 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11930 match(Set result (EncodeISOArray src (Binary dst len))); 11931 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11932 11933 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11934 ins_encode %{ 11935 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11936 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11937 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11938 %} 11939 ins_pipe( pipe_slow ); 11940 %} 11941 11942 11943 //----------Control Flow Instructions------------------------------------------ 11944 // Signed compare Instructions 11945 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11946 match(Set cr (CmpI op1 op2)); 11947 effect( DEF cr, USE op1, USE op2 ); 11948 format %{ "CMP $op1,$op2" %} 11949 opcode(0x3B); /* Opcode 3B /r */ 11950 ins_encode( OpcP, RegReg( op1, op2) ); 11951 ins_pipe( ialu_cr_reg_reg ); 11952 %} 11953 11954 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11955 match(Set cr (CmpI op1 op2)); 11956 effect( DEF cr, USE op1 ); 11957 format %{ "CMP $op1,$op2" %} 11958 opcode(0x81,0x07); /* Opcode 81 /7 */ 11959 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11960 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11961 ins_pipe( ialu_cr_reg_imm ); 11962 %} 11963 11964 // Cisc-spilled version of cmpI_eReg 11965 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11966 match(Set cr (CmpI op1 (LoadI op2))); 11967 11968 format %{ "CMP $op1,$op2" %} 11969 ins_cost(500); 11970 opcode(0x3B); /* Opcode 3B /r */ 11971 ins_encode( OpcP, RegMem( op1, op2) ); 11972 ins_pipe( ialu_cr_reg_mem ); 11973 %} 11974 11975 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11976 match(Set cr (CmpI src zero)); 11977 effect( DEF cr, USE src ); 11978 11979 format %{ "TEST $src,$src" %} 11980 opcode(0x85); 11981 ins_encode( OpcP, RegReg( src, src ) ); 11982 ins_pipe( ialu_cr_reg_imm ); 11983 %} 11984 11985 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11986 match(Set cr (CmpI (AndI src con) zero)); 11987 11988 format %{ "TEST $src,$con" %} 11989 opcode(0xF7,0x00); 11990 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11991 ins_pipe( ialu_cr_reg_imm ); 11992 %} 11993 11994 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11995 match(Set cr (CmpI (AndI src mem) zero)); 11996 11997 format %{ "TEST $src,$mem" %} 11998 opcode(0x85); 11999 ins_encode( OpcP, RegMem( src, mem ) ); 12000 ins_pipe( ialu_cr_reg_mem ); 12001 %} 12002 12003 // Unsigned compare Instructions; really, same as signed except they 12004 // produce an eFlagsRegU instead of eFlagsReg. 12005 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12006 match(Set cr (CmpU op1 op2)); 12007 12008 format %{ "CMPu $op1,$op2" %} 12009 opcode(0x3B); /* Opcode 3B /r */ 12010 ins_encode( OpcP, RegReg( op1, op2) ); 12011 ins_pipe( ialu_cr_reg_reg ); 12012 %} 12013 12014 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12015 match(Set cr (CmpU op1 op2)); 12016 12017 format %{ "CMPu $op1,$op2" %} 12018 opcode(0x81,0x07); /* Opcode 81 /7 */ 12019 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12020 ins_pipe( ialu_cr_reg_imm ); 12021 %} 12022 12023 // // Cisc-spilled version of cmpU_eReg 12024 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12025 match(Set cr (CmpU op1 (LoadI op2))); 12026 12027 format %{ "CMPu $op1,$op2" %} 12028 ins_cost(500); 12029 opcode(0x3B); /* Opcode 3B /r */ 12030 ins_encode( OpcP, RegMem( op1, op2) ); 12031 ins_pipe( ialu_cr_reg_mem ); 12032 %} 12033 12034 // // Cisc-spilled version of cmpU_eReg 12035 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12036 // match(Set cr (CmpU (LoadI op1) op2)); 12037 // 12038 // format %{ "CMPu $op1,$op2" %} 12039 // ins_cost(500); 12040 // opcode(0x39); /* Opcode 39 /r */ 12041 // ins_encode( OpcP, RegMem( op1, op2) ); 12042 //%} 12043 12044 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12045 match(Set cr (CmpU src zero)); 12046 12047 format %{ "TESTu $src,$src" %} 12048 opcode(0x85); 12049 ins_encode( OpcP, RegReg( src, src ) ); 12050 ins_pipe( ialu_cr_reg_imm ); 12051 %} 12052 12053 // Unsigned pointer compare Instructions 12054 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12055 match(Set cr (CmpP op1 op2)); 12056 12057 format %{ "CMPu $op1,$op2" %} 12058 opcode(0x3B); /* Opcode 3B /r */ 12059 ins_encode( OpcP, RegReg( op1, op2) ); 12060 ins_pipe( ialu_cr_reg_reg ); 12061 %} 12062 12063 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12064 match(Set cr (CmpP op1 op2)); 12065 12066 format %{ "CMPu $op1,$op2" %} 12067 opcode(0x81,0x07); /* Opcode 81 /7 */ 12068 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12069 ins_pipe( ialu_cr_reg_imm ); 12070 %} 12071 12072 // // Cisc-spilled version of cmpP_eReg 12073 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12074 match(Set cr (CmpP op1 (LoadP op2))); 12075 12076 format %{ "CMPu $op1,$op2" %} 12077 ins_cost(500); 12078 opcode(0x3B); /* Opcode 3B /r */ 12079 ins_encode( OpcP, RegMem( op1, op2) ); 12080 ins_pipe( ialu_cr_reg_mem ); 12081 %} 12082 12083 // // Cisc-spilled version of cmpP_eReg 12084 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12085 // match(Set cr (CmpP (LoadP op1) op2)); 12086 // 12087 // format %{ "CMPu $op1,$op2" %} 12088 // ins_cost(500); 12089 // opcode(0x39); /* Opcode 39 /r */ 12090 // ins_encode( OpcP, RegMem( op1, op2) ); 12091 //%} 12092 12093 // Compare raw pointer (used in out-of-heap check). 12094 // Only works because non-oop pointers must be raw pointers 12095 // and raw pointers have no anti-dependencies. 12096 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12097 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12098 match(Set cr (CmpP op1 (LoadP op2))); 12099 12100 format %{ "CMPu $op1,$op2" %} 12101 opcode(0x3B); /* Opcode 3B /r */ 12102 ins_encode( OpcP, RegMem( op1, op2) ); 12103 ins_pipe( ialu_cr_reg_mem ); 12104 %} 12105 12106 // 12107 // This will generate a signed flags result. This should be ok 12108 // since any compare to a zero should be eq/neq. 12109 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12110 match(Set cr (CmpP src zero)); 12111 12112 format %{ "TEST $src,$src" %} 12113 opcode(0x85); 12114 ins_encode( OpcP, RegReg( src, src ) ); 12115 ins_pipe( ialu_cr_reg_imm ); 12116 %} 12117 12118 // Cisc-spilled version of testP_reg 12119 // This will generate a signed flags result. This should be ok 12120 // since any compare to a zero should be eq/neq. 12121 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12122 match(Set cr (CmpP (LoadP op) zero)); 12123 12124 format %{ "TEST $op,0xFFFFFFFF" %} 12125 ins_cost(500); 12126 opcode(0xF7); /* Opcode F7 /0 */ 12127 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12128 ins_pipe( ialu_cr_reg_imm ); 12129 %} 12130 12131 // Yanked all unsigned pointer compare operations. 12132 // Pointer compares are done with CmpP which is already unsigned. 12133 12134 //----------Max and Min-------------------------------------------------------- 12135 // Min Instructions 12136 //// 12137 // *** Min and Max using the conditional move are slower than the 12138 // *** branch version on a Pentium III. 12139 // // Conditional move for min 12140 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12141 // effect( USE_DEF op2, USE op1, USE cr ); 12142 // format %{ "CMOVlt $op2,$op1\t! min" %} 12143 // opcode(0x4C,0x0F); 12144 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12145 // ins_pipe( pipe_cmov_reg ); 12146 //%} 12147 // 12148 //// Min Register with Register (P6 version) 12149 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12150 // predicate(VM_Version::supports_cmov() ); 12151 // match(Set op2 (MinI op1 op2)); 12152 // ins_cost(200); 12153 // expand %{ 12154 // eFlagsReg cr; 12155 // compI_eReg(cr,op1,op2); 12156 // cmovI_reg_lt(op2,op1,cr); 12157 // %} 12158 //%} 12159 12160 // Min Register with Register (generic version) 12161 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12162 match(Set dst (MinI dst src)); 12163 effect(KILL flags); 12164 ins_cost(300); 12165 12166 format %{ "MIN $dst,$src" %} 12167 opcode(0xCC); 12168 ins_encode( min_enc(dst,src) ); 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 // Max Register with Register 12173 // *** Min and Max using the conditional move are slower than the 12174 // *** branch version on a Pentium III. 12175 // // Conditional move for max 12176 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12177 // effect( USE_DEF op2, USE op1, USE cr ); 12178 // format %{ "CMOVgt $op2,$op1\t! max" %} 12179 // opcode(0x4F,0x0F); 12180 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12181 // ins_pipe( pipe_cmov_reg ); 12182 //%} 12183 // 12184 // // Max Register with Register (P6 version) 12185 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12186 // predicate(VM_Version::supports_cmov() ); 12187 // match(Set op2 (MaxI op1 op2)); 12188 // ins_cost(200); 12189 // expand %{ 12190 // eFlagsReg cr; 12191 // compI_eReg(cr,op1,op2); 12192 // cmovI_reg_gt(op2,op1,cr); 12193 // %} 12194 //%} 12195 12196 // Max Register with Register (generic version) 12197 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12198 match(Set dst (MaxI dst src)); 12199 effect(KILL flags); 12200 ins_cost(300); 12201 12202 format %{ "MAX $dst,$src" %} 12203 opcode(0xCC); 12204 ins_encode( max_enc(dst,src) ); 12205 ins_pipe( pipe_slow ); 12206 %} 12207 12208 // ============================================================================ 12209 // Counted Loop limit node which represents exact final iterator value. 12210 // Note: the resulting value should fit into integer range since 12211 // counted loops have limit check on overflow. 12212 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12213 match(Set limit (LoopLimit (Binary init limit) stride)); 12214 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12215 ins_cost(300); 12216 12217 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12218 ins_encode %{ 12219 int strd = (int)$stride$$constant; 12220 assert(strd != 1 && strd != -1, "sanity"); 12221 int m1 = (strd > 0) ? 1 : -1; 12222 // Convert limit to long (EAX:EDX) 12223 __ cdql(); 12224 // Convert init to long (init:tmp) 12225 __ movl($tmp$$Register, $init$$Register); 12226 __ sarl($tmp$$Register, 31); 12227 // $limit - $init 12228 __ subl($limit$$Register, $init$$Register); 12229 __ sbbl($limit_hi$$Register, $tmp$$Register); 12230 // + ($stride - 1) 12231 if (strd > 0) { 12232 __ addl($limit$$Register, (strd - 1)); 12233 __ adcl($limit_hi$$Register, 0); 12234 __ movl($tmp$$Register, strd); 12235 } else { 12236 __ addl($limit$$Register, (strd + 1)); 12237 __ adcl($limit_hi$$Register, -1); 12238 __ lneg($limit_hi$$Register, $limit$$Register); 12239 __ movl($tmp$$Register, -strd); 12240 } 12241 // signed devision: (EAX:EDX) / pos_stride 12242 __ idivl($tmp$$Register); 12243 if (strd < 0) { 12244 // restore sign 12245 __ negl($tmp$$Register); 12246 } 12247 // (EAX) * stride 12248 __ mull($tmp$$Register); 12249 // + init (ignore upper bits) 12250 __ addl($limit$$Register, $init$$Register); 12251 %} 12252 ins_pipe( pipe_slow ); 12253 %} 12254 12255 // ============================================================================ 12256 // Branch Instructions 12257 // Jump Table 12258 instruct jumpXtnd(rRegI switch_val) %{ 12259 match(Jump switch_val); 12260 ins_cost(350); 12261 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12262 ins_encode %{ 12263 // Jump to Address(table_base + switch_reg) 12264 Address index(noreg, $switch_val$$Register, Address::times_1); 12265 __ jump(ArrayAddress($constantaddress, index)); 12266 %} 12267 ins_pipe(pipe_jmp); 12268 %} 12269 12270 // Jump Direct - Label defines a relative address from JMP+1 12271 instruct jmpDir(label labl) %{ 12272 match(Goto); 12273 effect(USE labl); 12274 12275 ins_cost(300); 12276 format %{ "JMP $labl" %} 12277 size(5); 12278 ins_encode %{ 12279 Label* L = $labl$$label; 12280 __ jmp(*L, false); // Always long jump 12281 %} 12282 ins_pipe( pipe_jmp ); 12283 %} 12284 12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12286 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12287 match(If cop cr); 12288 effect(USE labl); 12289 12290 ins_cost(300); 12291 format %{ "J$cop $labl" %} 12292 size(6); 12293 ins_encode %{ 12294 Label* L = $labl$$label; 12295 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12296 %} 12297 ins_pipe( pipe_jcc ); 12298 %} 12299 12300 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12301 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12302 predicate(!n->has_vector_mask_set()); 12303 match(CountedLoopEnd cop cr); 12304 effect(USE labl); 12305 12306 ins_cost(300); 12307 format %{ "J$cop $labl\t# Loop end" %} 12308 size(6); 12309 ins_encode %{ 12310 Label* L = $labl$$label; 12311 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12312 %} 12313 ins_pipe( pipe_jcc ); 12314 %} 12315 12316 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12317 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12318 predicate(!n->has_vector_mask_set()); 12319 match(CountedLoopEnd cop cmp); 12320 effect(USE labl); 12321 12322 ins_cost(300); 12323 format %{ "J$cop,u $labl\t# Loop end" %} 12324 size(6); 12325 ins_encode %{ 12326 Label* L = $labl$$label; 12327 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12328 %} 12329 ins_pipe( pipe_jcc ); 12330 %} 12331 12332 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12333 predicate(!n->has_vector_mask_set()); 12334 match(CountedLoopEnd cop cmp); 12335 effect(USE labl); 12336 12337 ins_cost(200); 12338 format %{ "J$cop,u $labl\t# Loop end" %} 12339 size(6); 12340 ins_encode %{ 12341 Label* L = $labl$$label; 12342 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12343 %} 12344 ins_pipe( pipe_jcc ); 12345 %} 12346 12347 // mask version 12348 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12349 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12350 predicate(n->has_vector_mask_set()); 12351 match(CountedLoopEnd cop cr); 12352 effect(USE labl); 12353 12354 ins_cost(400); 12355 format %{ "J$cop $labl\t# Loop end\n\t" 12356 "restorevectmask \t# vector mask restore for loops" %} 12357 size(10); 12358 ins_encode %{ 12359 Label* L = $labl$$label; 12360 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12361 __ restorevectmask(); 12362 %} 12363 ins_pipe( pipe_jcc ); 12364 %} 12365 12366 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12367 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12368 predicate(n->has_vector_mask_set()); 12369 match(CountedLoopEnd cop cmp); 12370 effect(USE labl); 12371 12372 ins_cost(400); 12373 format %{ "J$cop,u $labl\t# Loop end\n\t" 12374 "restorevectmask \t# vector mask restore for loops" %} 12375 size(10); 12376 ins_encode %{ 12377 Label* L = $labl$$label; 12378 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12379 __ restorevectmask(); 12380 %} 12381 ins_pipe( pipe_jcc ); 12382 %} 12383 12384 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12385 predicate(n->has_vector_mask_set()); 12386 match(CountedLoopEnd cop cmp); 12387 effect(USE labl); 12388 12389 ins_cost(300); 12390 format %{ "J$cop,u $labl\t# Loop end\n\t" 12391 "restorevectmask \t# vector mask restore for loops" %} 12392 size(10); 12393 ins_encode %{ 12394 Label* L = $labl$$label; 12395 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12396 __ restorevectmask(); 12397 %} 12398 ins_pipe( pipe_jcc ); 12399 %} 12400 12401 // Jump Direct Conditional - using unsigned comparison 12402 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12403 match(If cop cmp); 12404 effect(USE labl); 12405 12406 ins_cost(300); 12407 format %{ "J$cop,u $labl" %} 12408 size(6); 12409 ins_encode %{ 12410 Label* L = $labl$$label; 12411 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12412 %} 12413 ins_pipe(pipe_jcc); 12414 %} 12415 12416 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12417 match(If cop cmp); 12418 effect(USE labl); 12419 12420 ins_cost(200); 12421 format %{ "J$cop,u $labl" %} 12422 size(6); 12423 ins_encode %{ 12424 Label* L = $labl$$label; 12425 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12426 %} 12427 ins_pipe(pipe_jcc); 12428 %} 12429 12430 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12431 match(If cop cmp); 12432 effect(USE labl); 12433 12434 ins_cost(200); 12435 format %{ $$template 12436 if ($cop$$cmpcode == Assembler::notEqual) { 12437 $$emit$$"JP,u $labl\n\t" 12438 $$emit$$"J$cop,u $labl" 12439 } else { 12440 $$emit$$"JP,u done\n\t" 12441 $$emit$$"J$cop,u $labl\n\t" 12442 $$emit$$"done:" 12443 } 12444 %} 12445 ins_encode %{ 12446 Label* l = $labl$$label; 12447 if ($cop$$cmpcode == Assembler::notEqual) { 12448 __ jcc(Assembler::parity, *l, false); 12449 __ jcc(Assembler::notEqual, *l, false); 12450 } else if ($cop$$cmpcode == Assembler::equal) { 12451 Label done; 12452 __ jccb(Assembler::parity, done); 12453 __ jcc(Assembler::equal, *l, false); 12454 __ bind(done); 12455 } else { 12456 ShouldNotReachHere(); 12457 } 12458 %} 12459 ins_pipe(pipe_jcc); 12460 %} 12461 12462 // ============================================================================ 12463 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12464 // array for an instance of the superklass. Set a hidden internal cache on a 12465 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12466 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12467 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12468 match(Set result (PartialSubtypeCheck sub super)); 12469 effect( KILL rcx, KILL cr ); 12470 12471 ins_cost(1100); // slightly larger than the next version 12472 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12473 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12474 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12475 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12476 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12477 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12478 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12479 "miss:\t" %} 12480 12481 opcode(0x1); // Force a XOR of EDI 12482 ins_encode( enc_PartialSubtypeCheck() ); 12483 ins_pipe( pipe_slow ); 12484 %} 12485 12486 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12487 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12488 effect( KILL rcx, KILL result ); 12489 12490 ins_cost(1000); 12491 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12492 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12493 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12494 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12495 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12496 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12497 "miss:\t" %} 12498 12499 opcode(0x0); // No need to XOR EDI 12500 ins_encode( enc_PartialSubtypeCheck() ); 12501 ins_pipe( pipe_slow ); 12502 %} 12503 12504 // ============================================================================ 12505 // Branch Instructions -- short offset versions 12506 // 12507 // These instructions are used to replace jumps of a long offset (the default 12508 // match) with jumps of a shorter offset. These instructions are all tagged 12509 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12510 // match rules in general matching. Instead, the ADLC generates a conversion 12511 // method in the MachNode which can be used to do in-place replacement of the 12512 // long variant with the shorter variant. The compiler will determine if a 12513 // branch can be taken by the is_short_branch_offset() predicate in the machine 12514 // specific code section of the file. 12515 12516 // Jump Direct - Label defines a relative address from JMP+1 12517 instruct jmpDir_short(label labl) %{ 12518 match(Goto); 12519 effect(USE labl); 12520 12521 ins_cost(300); 12522 format %{ "JMP,s $labl" %} 12523 size(2); 12524 ins_encode %{ 12525 Label* L = $labl$$label; 12526 __ jmpb(*L); 12527 %} 12528 ins_pipe( pipe_jmp ); 12529 ins_short_branch(1); 12530 %} 12531 12532 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12533 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12534 match(If cop cr); 12535 effect(USE labl); 12536 12537 ins_cost(300); 12538 format %{ "J$cop,s $labl" %} 12539 size(2); 12540 ins_encode %{ 12541 Label* L = $labl$$label; 12542 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12543 %} 12544 ins_pipe( pipe_jcc ); 12545 ins_short_branch(1); 12546 %} 12547 12548 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12549 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12550 match(CountedLoopEnd cop cr); 12551 effect(USE labl); 12552 12553 ins_cost(300); 12554 format %{ "J$cop,s $labl\t# Loop end" %} 12555 size(2); 12556 ins_encode %{ 12557 Label* L = $labl$$label; 12558 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12559 %} 12560 ins_pipe( pipe_jcc ); 12561 ins_short_branch(1); 12562 %} 12563 12564 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12565 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12566 match(CountedLoopEnd cop cmp); 12567 effect(USE labl); 12568 12569 ins_cost(300); 12570 format %{ "J$cop,us $labl\t# Loop end" %} 12571 size(2); 12572 ins_encode %{ 12573 Label* L = $labl$$label; 12574 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12575 %} 12576 ins_pipe( pipe_jcc ); 12577 ins_short_branch(1); 12578 %} 12579 12580 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12581 match(CountedLoopEnd cop cmp); 12582 effect(USE labl); 12583 12584 ins_cost(300); 12585 format %{ "J$cop,us $labl\t# Loop end" %} 12586 size(2); 12587 ins_encode %{ 12588 Label* L = $labl$$label; 12589 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12590 %} 12591 ins_pipe( pipe_jcc ); 12592 ins_short_branch(1); 12593 %} 12594 12595 // Jump Direct Conditional - using unsigned comparison 12596 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12597 match(If cop cmp); 12598 effect(USE labl); 12599 12600 ins_cost(300); 12601 format %{ "J$cop,us $labl" %} 12602 size(2); 12603 ins_encode %{ 12604 Label* L = $labl$$label; 12605 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12606 %} 12607 ins_pipe( pipe_jcc ); 12608 ins_short_branch(1); 12609 %} 12610 12611 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12612 match(If cop cmp); 12613 effect(USE labl); 12614 12615 ins_cost(300); 12616 format %{ "J$cop,us $labl" %} 12617 size(2); 12618 ins_encode %{ 12619 Label* L = $labl$$label; 12620 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12621 %} 12622 ins_pipe( pipe_jcc ); 12623 ins_short_branch(1); 12624 %} 12625 12626 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12627 match(If cop cmp); 12628 effect(USE labl); 12629 12630 ins_cost(300); 12631 format %{ $$template 12632 if ($cop$$cmpcode == Assembler::notEqual) { 12633 $$emit$$"JP,u,s $labl\n\t" 12634 $$emit$$"J$cop,u,s $labl" 12635 } else { 12636 $$emit$$"JP,u,s done\n\t" 12637 $$emit$$"J$cop,u,s $labl\n\t" 12638 $$emit$$"done:" 12639 } 12640 %} 12641 size(4); 12642 ins_encode %{ 12643 Label* l = $labl$$label; 12644 if ($cop$$cmpcode == Assembler::notEqual) { 12645 __ jccb(Assembler::parity, *l); 12646 __ jccb(Assembler::notEqual, *l); 12647 } else if ($cop$$cmpcode == Assembler::equal) { 12648 Label done; 12649 __ jccb(Assembler::parity, done); 12650 __ jccb(Assembler::equal, *l); 12651 __ bind(done); 12652 } else { 12653 ShouldNotReachHere(); 12654 } 12655 %} 12656 ins_pipe(pipe_jcc); 12657 ins_short_branch(1); 12658 %} 12659 12660 // ============================================================================ 12661 // Long Compare 12662 // 12663 // Currently we hold longs in 2 registers. Comparing such values efficiently 12664 // is tricky. The flavor of compare used depends on whether we are testing 12665 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12666 // The GE test is the negated LT test. The LE test can be had by commuting 12667 // the operands (yielding a GE test) and then negating; negate again for the 12668 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12669 // NE test is negated from that. 12670 12671 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12672 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12673 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12674 // are collapsed internally in the ADLC's dfa-gen code. The match for 12675 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12676 // foo match ends up with the wrong leaf. One fix is to not match both 12677 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12678 // both forms beat the trinary form of long-compare and both are very useful 12679 // on Intel which has so few registers. 12680 12681 // Manifest a CmpL result in an integer register. Very painful. 12682 // This is the test to avoid. 12683 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12684 match(Set dst (CmpL3 src1 src2)); 12685 effect( KILL flags ); 12686 ins_cost(1000); 12687 format %{ "XOR $dst,$dst\n\t" 12688 "CMP $src1.hi,$src2.hi\n\t" 12689 "JLT,s m_one\n\t" 12690 "JGT,s p_one\n\t" 12691 "CMP $src1.lo,$src2.lo\n\t" 12692 "JB,s m_one\n\t" 12693 "JEQ,s done\n" 12694 "p_one:\tINC $dst\n\t" 12695 "JMP,s done\n" 12696 "m_one:\tDEC $dst\n" 12697 "done:" %} 12698 ins_encode %{ 12699 Label p_one, m_one, done; 12700 __ xorptr($dst$$Register, $dst$$Register); 12701 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12702 __ jccb(Assembler::less, m_one); 12703 __ jccb(Assembler::greater, p_one); 12704 __ cmpl($src1$$Register, $src2$$Register); 12705 __ jccb(Assembler::below, m_one); 12706 __ jccb(Assembler::equal, done); 12707 __ bind(p_one); 12708 __ incrementl($dst$$Register); 12709 __ jmpb(done); 12710 __ bind(m_one); 12711 __ decrementl($dst$$Register); 12712 __ bind(done); 12713 %} 12714 ins_pipe( pipe_slow ); 12715 %} 12716 12717 //====== 12718 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12719 // compares. Can be used for LE or GT compares by reversing arguments. 12720 // NOT GOOD FOR EQ/NE tests. 12721 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12722 match( Set flags (CmpL src zero )); 12723 ins_cost(100); 12724 format %{ "TEST $src.hi,$src.hi" %} 12725 opcode(0x85); 12726 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12727 ins_pipe( ialu_cr_reg_reg ); 12728 %} 12729 12730 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12731 // compares. Can be used for LE or GT compares by reversing arguments. 12732 // NOT GOOD FOR EQ/NE tests. 12733 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12734 match( Set flags (CmpL src1 src2 )); 12735 effect( TEMP tmp ); 12736 ins_cost(300); 12737 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12738 "MOV $tmp,$src1.hi\n\t" 12739 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12740 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12741 ins_pipe( ialu_cr_reg_reg ); 12742 %} 12743 12744 // Long compares reg < zero/req OR reg >= zero/req. 12745 // Just a wrapper for a normal branch, plus the predicate test. 12746 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12747 match(If cmp flags); 12748 effect(USE labl); 12749 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12750 expand %{ 12751 jmpCon(cmp,flags,labl); // JLT or JGE... 12752 %} 12753 %} 12754 12755 //====== 12756 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12757 // compares. Can be used for LE or GT compares by reversing arguments. 12758 // NOT GOOD FOR EQ/NE tests. 12759 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12760 match(Set flags (CmpUL src zero)); 12761 ins_cost(100); 12762 format %{ "TEST $src.hi,$src.hi" %} 12763 opcode(0x85); 12764 ins_encode(OpcP, RegReg_Hi2(src, src)); 12765 ins_pipe(ialu_cr_reg_reg); 12766 %} 12767 12768 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12769 // compares. Can be used for LE or GT compares by reversing arguments. 12770 // NOT GOOD FOR EQ/NE tests. 12771 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12772 match(Set flags (CmpUL src1 src2)); 12773 effect(TEMP tmp); 12774 ins_cost(300); 12775 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12776 "MOV $tmp,$src1.hi\n\t" 12777 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12778 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12779 ins_pipe(ialu_cr_reg_reg); 12780 %} 12781 12782 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12783 // Just a wrapper for a normal branch, plus the predicate test. 12784 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12785 match(If cmp flags); 12786 effect(USE labl); 12787 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12788 expand %{ 12789 jmpCon(cmp, flags, labl); // JLT or JGE... 12790 %} 12791 %} 12792 12793 // Compare 2 longs and CMOVE longs. 12794 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12795 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12796 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12797 ins_cost(400); 12798 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12799 "CMOV$cmp $dst.hi,$src.hi" %} 12800 opcode(0x0F,0x40); 12801 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12802 ins_pipe( pipe_cmov_reg_long ); 12803 %} 12804 12805 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12806 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12807 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12808 ins_cost(500); 12809 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12810 "CMOV$cmp $dst.hi,$src.hi" %} 12811 opcode(0x0F,0x40); 12812 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12813 ins_pipe( pipe_cmov_reg_long ); 12814 %} 12815 12816 // Compare 2 longs and CMOVE ints. 12817 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12818 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12819 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12820 ins_cost(200); 12821 format %{ "CMOV$cmp $dst,$src" %} 12822 opcode(0x0F,0x40); 12823 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12824 ins_pipe( pipe_cmov_reg ); 12825 %} 12826 12827 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12828 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12829 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12830 ins_cost(250); 12831 format %{ "CMOV$cmp $dst,$src" %} 12832 opcode(0x0F,0x40); 12833 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12834 ins_pipe( pipe_cmov_mem ); 12835 %} 12836 12837 // Compare 2 longs and CMOVE ints. 12838 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12839 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12840 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12841 ins_cost(200); 12842 format %{ "CMOV$cmp $dst,$src" %} 12843 opcode(0x0F,0x40); 12844 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12845 ins_pipe( pipe_cmov_reg ); 12846 %} 12847 12848 // Compare 2 longs and CMOVE doubles 12849 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12850 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12851 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12852 ins_cost(200); 12853 expand %{ 12854 fcmovDPR_regS(cmp,flags,dst,src); 12855 %} 12856 %} 12857 12858 // Compare 2 longs and CMOVE doubles 12859 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12860 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12861 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12862 ins_cost(200); 12863 expand %{ 12864 fcmovD_regS(cmp,flags,dst,src); 12865 %} 12866 %} 12867 12868 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12869 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12870 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12871 ins_cost(200); 12872 expand %{ 12873 fcmovFPR_regS(cmp,flags,dst,src); 12874 %} 12875 %} 12876 12877 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12878 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12879 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12880 ins_cost(200); 12881 expand %{ 12882 fcmovF_regS(cmp,flags,dst,src); 12883 %} 12884 %} 12885 12886 //====== 12887 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12888 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12889 match( Set flags (CmpL src zero )); 12890 effect(TEMP tmp); 12891 ins_cost(200); 12892 format %{ "MOV $tmp,$src.lo\n\t" 12893 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12894 ins_encode( long_cmp_flags0( src, tmp ) ); 12895 ins_pipe( ialu_reg_reg_long ); 12896 %} 12897 12898 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12899 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12900 match( Set flags (CmpL src1 src2 )); 12901 ins_cost(200+300); 12902 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12903 "JNE,s skip\n\t" 12904 "CMP $src1.hi,$src2.hi\n\t" 12905 "skip:\t" %} 12906 ins_encode( long_cmp_flags1( src1, src2 ) ); 12907 ins_pipe( ialu_cr_reg_reg ); 12908 %} 12909 12910 // Long compare reg == zero/reg OR reg != zero/reg 12911 // Just a wrapper for a normal branch, plus the predicate test. 12912 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12913 match(If cmp flags); 12914 effect(USE labl); 12915 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12916 expand %{ 12917 jmpCon(cmp,flags,labl); // JEQ or JNE... 12918 %} 12919 %} 12920 12921 //====== 12922 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12923 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12924 match(Set flags (CmpUL src zero)); 12925 effect(TEMP tmp); 12926 ins_cost(200); 12927 format %{ "MOV $tmp,$src.lo\n\t" 12928 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12929 ins_encode(long_cmp_flags0(src, tmp)); 12930 ins_pipe(ialu_reg_reg_long); 12931 %} 12932 12933 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12934 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12935 match(Set flags (CmpUL src1 src2)); 12936 ins_cost(200+300); 12937 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12938 "JNE,s skip\n\t" 12939 "CMP $src1.hi,$src2.hi\n\t" 12940 "skip:\t" %} 12941 ins_encode(long_cmp_flags1(src1, src2)); 12942 ins_pipe(ialu_cr_reg_reg); 12943 %} 12944 12945 // Unsigned long compare reg == zero/reg OR reg != zero/reg 12946 // Just a wrapper for a normal branch, plus the predicate test. 12947 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 12948 match(If cmp flags); 12949 effect(USE labl); 12950 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 12951 expand %{ 12952 jmpCon(cmp, flags, labl); // JEQ or JNE... 12953 %} 12954 %} 12955 12956 // Compare 2 longs and CMOVE longs. 12957 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12958 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12959 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12960 ins_cost(400); 12961 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12962 "CMOV$cmp $dst.hi,$src.hi" %} 12963 opcode(0x0F,0x40); 12964 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12965 ins_pipe( pipe_cmov_reg_long ); 12966 %} 12967 12968 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12969 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12970 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12971 ins_cost(500); 12972 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12973 "CMOV$cmp $dst.hi,$src.hi" %} 12974 opcode(0x0F,0x40); 12975 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12976 ins_pipe( pipe_cmov_reg_long ); 12977 %} 12978 12979 // Compare 2 longs and CMOVE ints. 12980 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12981 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12982 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12983 ins_cost(200); 12984 format %{ "CMOV$cmp $dst,$src" %} 12985 opcode(0x0F,0x40); 12986 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12987 ins_pipe( pipe_cmov_reg ); 12988 %} 12989 12990 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12991 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12992 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12993 ins_cost(250); 12994 format %{ "CMOV$cmp $dst,$src" %} 12995 opcode(0x0F,0x40); 12996 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12997 ins_pipe( pipe_cmov_mem ); 12998 %} 12999 13000 // Compare 2 longs and CMOVE ints. 13001 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13002 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13003 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13004 ins_cost(200); 13005 format %{ "CMOV$cmp $dst,$src" %} 13006 opcode(0x0F,0x40); 13007 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13008 ins_pipe( pipe_cmov_reg ); 13009 %} 13010 13011 // Compare 2 longs and CMOVE doubles 13012 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13013 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13014 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13015 ins_cost(200); 13016 expand %{ 13017 fcmovDPR_regS(cmp,flags,dst,src); 13018 %} 13019 %} 13020 13021 // Compare 2 longs and CMOVE doubles 13022 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13023 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13024 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13025 ins_cost(200); 13026 expand %{ 13027 fcmovD_regS(cmp,flags,dst,src); 13028 %} 13029 %} 13030 13031 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13032 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13033 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13034 ins_cost(200); 13035 expand %{ 13036 fcmovFPR_regS(cmp,flags,dst,src); 13037 %} 13038 %} 13039 13040 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13041 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13042 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13043 ins_cost(200); 13044 expand %{ 13045 fcmovF_regS(cmp,flags,dst,src); 13046 %} 13047 %} 13048 13049 //====== 13050 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13051 // Same as cmpL_reg_flags_LEGT except must negate src 13052 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13053 match( Set flags (CmpL src zero )); 13054 effect( TEMP tmp ); 13055 ins_cost(300); 13056 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13057 "CMP $tmp,$src.lo\n\t" 13058 "SBB $tmp,$src.hi\n\t" %} 13059 ins_encode( long_cmp_flags3(src, tmp) ); 13060 ins_pipe( ialu_reg_reg_long ); 13061 %} 13062 13063 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13064 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13065 // requires a commuted test to get the same result. 13066 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13067 match( Set flags (CmpL src1 src2 )); 13068 effect( TEMP tmp ); 13069 ins_cost(300); 13070 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13071 "MOV $tmp,$src2.hi\n\t" 13072 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13073 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13074 ins_pipe( ialu_cr_reg_reg ); 13075 %} 13076 13077 // Long compares reg < zero/req OR reg >= zero/req. 13078 // Just a wrapper for a normal branch, plus the predicate test 13079 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13080 match(If cmp flags); 13081 effect(USE labl); 13082 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13083 ins_cost(300); 13084 expand %{ 13085 jmpCon(cmp,flags,labl); // JGT or JLE... 13086 %} 13087 %} 13088 13089 //====== 13090 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13091 // Same as cmpUL_reg_flags_LEGT except must negate src 13092 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13093 match(Set flags (CmpUL src zero)); 13094 effect(TEMP tmp); 13095 ins_cost(300); 13096 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13097 "CMP $tmp,$src.lo\n\t" 13098 "SBB $tmp,$src.hi\n\t" %} 13099 ins_encode(long_cmp_flags3(src, tmp)); 13100 ins_pipe(ialu_reg_reg_long); 13101 %} 13102 13103 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13104 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13105 // requires a commuted test to get the same result. 13106 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13107 match(Set flags (CmpUL src1 src2)); 13108 effect(TEMP tmp); 13109 ins_cost(300); 13110 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13111 "MOV $tmp,$src2.hi\n\t" 13112 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13113 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13114 ins_pipe(ialu_cr_reg_reg); 13115 %} 13116 13117 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13118 // Just a wrapper for a normal branch, plus the predicate test 13119 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13120 match(If cmp flags); 13121 effect(USE labl); 13122 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13123 ins_cost(300); 13124 expand %{ 13125 jmpCon(cmp, flags, labl); // JGT or JLE... 13126 %} 13127 %} 13128 13129 // Compare 2 longs and CMOVE longs. 13130 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13131 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13132 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13133 ins_cost(400); 13134 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13135 "CMOV$cmp $dst.hi,$src.hi" %} 13136 opcode(0x0F,0x40); 13137 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13138 ins_pipe( pipe_cmov_reg_long ); 13139 %} 13140 13141 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13142 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13143 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13144 ins_cost(500); 13145 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13146 "CMOV$cmp $dst.hi,$src.hi+4" %} 13147 opcode(0x0F,0x40); 13148 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13149 ins_pipe( pipe_cmov_reg_long ); 13150 %} 13151 13152 // Compare 2 longs and CMOVE ints. 13153 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13154 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13155 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13156 ins_cost(200); 13157 format %{ "CMOV$cmp $dst,$src" %} 13158 opcode(0x0F,0x40); 13159 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13160 ins_pipe( pipe_cmov_reg ); 13161 %} 13162 13163 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13164 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13165 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13166 ins_cost(250); 13167 format %{ "CMOV$cmp $dst,$src" %} 13168 opcode(0x0F,0x40); 13169 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13170 ins_pipe( pipe_cmov_mem ); 13171 %} 13172 13173 // Compare 2 longs and CMOVE ptrs. 13174 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13175 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13176 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13177 ins_cost(200); 13178 format %{ "CMOV$cmp $dst,$src" %} 13179 opcode(0x0F,0x40); 13180 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13181 ins_pipe( pipe_cmov_reg ); 13182 %} 13183 13184 // Compare 2 longs and CMOVE doubles 13185 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13186 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13187 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13188 ins_cost(200); 13189 expand %{ 13190 fcmovDPR_regS(cmp,flags,dst,src); 13191 %} 13192 %} 13193 13194 // Compare 2 longs and CMOVE doubles 13195 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13196 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13197 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13198 ins_cost(200); 13199 expand %{ 13200 fcmovD_regS(cmp,flags,dst,src); 13201 %} 13202 %} 13203 13204 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13205 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13206 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13207 ins_cost(200); 13208 expand %{ 13209 fcmovFPR_regS(cmp,flags,dst,src); 13210 %} 13211 %} 13212 13213 13214 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13215 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13216 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13217 ins_cost(200); 13218 expand %{ 13219 fcmovF_regS(cmp,flags,dst,src); 13220 %} 13221 %} 13222 13223 13224 // ============================================================================ 13225 // Procedure Call/Return Instructions 13226 // Call Java Static Instruction 13227 // Note: If this code changes, the corresponding ret_addr_offset() and 13228 // compute_padding() functions will have to be adjusted. 13229 instruct CallStaticJavaDirect(method meth) %{ 13230 match(CallStaticJava); 13231 effect(USE meth); 13232 13233 ins_cost(300); 13234 format %{ "CALL,static " %} 13235 opcode(0xE8); /* E8 cd */ 13236 ins_encode( pre_call_resets, 13237 Java_Static_Call( meth ), 13238 call_epilog, 13239 post_call_FPU ); 13240 ins_pipe( pipe_slow ); 13241 ins_alignment(4); 13242 %} 13243 13244 // Call Java Dynamic Instruction 13245 // Note: If this code changes, the corresponding ret_addr_offset() and 13246 // compute_padding() functions will have to be adjusted. 13247 instruct CallDynamicJavaDirect(method meth) %{ 13248 match(CallDynamicJava); 13249 effect(USE meth); 13250 13251 ins_cost(300); 13252 format %{ "MOV EAX,(oop)-1\n\t" 13253 "CALL,dynamic" %} 13254 opcode(0xE8); /* E8 cd */ 13255 ins_encode( pre_call_resets, 13256 Java_Dynamic_Call( meth ), 13257 call_epilog, 13258 post_call_FPU ); 13259 ins_pipe( pipe_slow ); 13260 ins_alignment(4); 13261 %} 13262 13263 // Call Runtime Instruction 13264 instruct CallRuntimeDirect(method meth) %{ 13265 match(CallRuntime ); 13266 effect(USE meth); 13267 13268 ins_cost(300); 13269 format %{ "CALL,runtime " %} 13270 opcode(0xE8); /* E8 cd */ 13271 // Use FFREEs to clear entries in float stack 13272 ins_encode( pre_call_resets, 13273 FFree_Float_Stack_All, 13274 Java_To_Runtime( meth ), 13275 post_call_FPU ); 13276 ins_pipe( pipe_slow ); 13277 %} 13278 13279 // Call runtime without safepoint 13280 instruct CallLeafDirect(method meth) %{ 13281 match(CallLeaf); 13282 effect(USE meth); 13283 13284 ins_cost(300); 13285 format %{ "CALL_LEAF,runtime " %} 13286 opcode(0xE8); /* E8 cd */ 13287 ins_encode( pre_call_resets, 13288 FFree_Float_Stack_All, 13289 Java_To_Runtime( meth ), 13290 Verify_FPU_For_Leaf, post_call_FPU ); 13291 ins_pipe( pipe_slow ); 13292 %} 13293 13294 instruct CallLeafNoFPDirect(method meth) %{ 13295 match(CallLeafNoFP); 13296 effect(USE meth); 13297 13298 ins_cost(300); 13299 format %{ "CALL_LEAF_NOFP,runtime " %} 13300 opcode(0xE8); /* E8 cd */ 13301 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13302 ins_pipe( pipe_slow ); 13303 %} 13304 13305 13306 // Return Instruction 13307 // Remove the return address & jump to it. 13308 instruct Ret() %{ 13309 match(Return); 13310 format %{ "RET" %} 13311 opcode(0xC3); 13312 ins_encode(OpcP); 13313 ins_pipe( pipe_jmp ); 13314 %} 13315 13316 // Tail Call; Jump from runtime stub to Java code. 13317 // Also known as an 'interprocedural jump'. 13318 // Target of jump will eventually return to caller. 13319 // TailJump below removes the return address. 13320 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13321 match(TailCall jump_target method_oop ); 13322 ins_cost(300); 13323 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13324 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13325 ins_encode( OpcP, RegOpc(jump_target) ); 13326 ins_pipe( pipe_jmp ); 13327 %} 13328 13329 13330 // Tail Jump; remove the return address; jump to target. 13331 // TailCall above leaves the return address around. 13332 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13333 match( TailJump jump_target ex_oop ); 13334 ins_cost(300); 13335 format %{ "POP EDX\t# pop return address into dummy\n\t" 13336 "JMP $jump_target " %} 13337 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13338 ins_encode( enc_pop_rdx, 13339 OpcP, RegOpc(jump_target) ); 13340 ins_pipe( pipe_jmp ); 13341 %} 13342 13343 // Create exception oop: created by stack-crawling runtime code. 13344 // Created exception is now available to this handler, and is setup 13345 // just prior to jumping to this handler. No code emitted. 13346 instruct CreateException( eAXRegP ex_oop ) 13347 %{ 13348 match(Set ex_oop (CreateEx)); 13349 13350 size(0); 13351 // use the following format syntax 13352 format %{ "# exception oop is in EAX; no code emitted" %} 13353 ins_encode(); 13354 ins_pipe( empty ); 13355 %} 13356 13357 13358 // Rethrow exception: 13359 // The exception oop will come in the first argument position. 13360 // Then JUMP (not call) to the rethrow stub code. 13361 instruct RethrowException() 13362 %{ 13363 match(Rethrow); 13364 13365 // use the following format syntax 13366 format %{ "JMP rethrow_stub" %} 13367 ins_encode(enc_rethrow); 13368 ins_pipe( pipe_jmp ); 13369 %} 13370 13371 // inlined locking and unlocking 13372 13373 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13374 predicate(Compile::current()->use_rtm()); 13375 match(Set cr (FastLock object box)); 13376 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13377 ins_cost(300); 13378 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13379 ins_encode %{ 13380 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13381 $scr$$Register, $cx1$$Register, $cx2$$Register, 13382 _counters, _rtm_counters, _stack_rtm_counters, 13383 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13384 true, ra_->C->profile_rtm()); 13385 %} 13386 ins_pipe(pipe_slow); 13387 %} 13388 13389 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13390 predicate(!Compile::current()->use_rtm()); 13391 match(Set cr (FastLock object box)); 13392 effect(TEMP tmp, TEMP scr, USE_KILL box); 13393 ins_cost(300); 13394 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13395 ins_encode %{ 13396 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13397 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13398 %} 13399 ins_pipe(pipe_slow); 13400 %} 13401 13402 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13403 match(Set cr (FastUnlock object box)); 13404 effect(TEMP tmp, USE_KILL box); 13405 ins_cost(300); 13406 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13407 ins_encode %{ 13408 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13409 %} 13410 ins_pipe(pipe_slow); 13411 %} 13412 13413 13414 13415 // ============================================================================ 13416 // Safepoint Instruction 13417 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13418 match(SafePoint poll); 13419 effect(KILL cr, USE poll); 13420 13421 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13422 ins_cost(125); 13423 // EBP would need size(3) 13424 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13425 ins_encode %{ 13426 __ relocate(relocInfo::poll_type); 13427 address pre_pc = __ pc(); 13428 __ testl(rax, Address($poll$$Register, 0)); 13429 address post_pc = __ pc(); 13430 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13431 %} 13432 ins_pipe(ialu_reg_mem); 13433 %} 13434 13435 13436 // ============================================================================ 13437 // This name is KNOWN by the ADLC and cannot be changed. 13438 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13439 // for this guy. 13440 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13441 match(Set dst (ThreadLocal)); 13442 effect(DEF dst, KILL cr); 13443 13444 format %{ "MOV $dst, Thread::current()" %} 13445 ins_encode %{ 13446 Register dstReg = as_Register($dst$$reg); 13447 __ get_thread(dstReg); 13448 %} 13449 ins_pipe( ialu_reg_fat ); 13450 %} 13451 13452 13453 13454 //----------PEEPHOLE RULES----------------------------------------------------- 13455 // These must follow all instruction definitions as they use the names 13456 // defined in the instructions definitions. 13457 // 13458 // peepmatch ( root_instr_name [preceding_instruction]* ); 13459 // 13460 // peepconstraint %{ 13461 // (instruction_number.operand_name relational_op instruction_number.operand_name 13462 // [, ...] ); 13463 // // instruction numbers are zero-based using left to right order in peepmatch 13464 // 13465 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13466 // // provide an instruction_number.operand_name for each operand that appears 13467 // // in the replacement instruction's match rule 13468 // 13469 // ---------VM FLAGS--------------------------------------------------------- 13470 // 13471 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13472 // 13473 // Each peephole rule is given an identifying number starting with zero and 13474 // increasing by one in the order seen by the parser. An individual peephole 13475 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13476 // on the command-line. 13477 // 13478 // ---------CURRENT LIMITATIONS---------------------------------------------- 13479 // 13480 // Only match adjacent instructions in same basic block 13481 // Only equality constraints 13482 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13483 // Only one replacement instruction 13484 // 13485 // ---------EXAMPLE---------------------------------------------------------- 13486 // 13487 // // pertinent parts of existing instructions in architecture description 13488 // instruct movI(rRegI dst, rRegI src) %{ 13489 // match(Set dst (CopyI src)); 13490 // %} 13491 // 13492 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13493 // match(Set dst (AddI dst src)); 13494 // effect(KILL cr); 13495 // %} 13496 // 13497 // // Change (inc mov) to lea 13498 // peephole %{ 13499 // // increment preceeded by register-register move 13500 // peepmatch ( incI_eReg movI ); 13501 // // require that the destination register of the increment 13502 // // match the destination register of the move 13503 // peepconstraint ( 0.dst == 1.dst ); 13504 // // construct a replacement instruction that sets 13505 // // the destination to ( move's source register + one ) 13506 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13507 // %} 13508 // 13509 // Implementation no longer uses movX instructions since 13510 // machine-independent system no longer uses CopyX nodes. 13511 // 13512 // peephole %{ 13513 // peepmatch ( incI_eReg movI ); 13514 // peepconstraint ( 0.dst == 1.dst ); 13515 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13516 // %} 13517 // 13518 // peephole %{ 13519 // peepmatch ( decI_eReg movI ); 13520 // peepconstraint ( 0.dst == 1.dst ); 13521 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13522 // %} 13523 // 13524 // peephole %{ 13525 // peepmatch ( addI_eReg_imm movI ); 13526 // peepconstraint ( 0.dst == 1.dst ); 13527 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13528 // %} 13529 // 13530 // peephole %{ 13531 // peepmatch ( addP_eReg_imm movP ); 13532 // peepconstraint ( 0.dst == 1.dst ); 13533 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13534 // %} 13535 13536 // // Change load of spilled value to only a spill 13537 // instruct storeI(memory mem, rRegI src) %{ 13538 // match(Set mem (StoreI mem src)); 13539 // %} 13540 // 13541 // instruct loadI(rRegI dst, memory mem) %{ 13542 // match(Set dst (LoadI mem)); 13543 // %} 13544 // 13545 peephole %{ 13546 peepmatch ( loadI storeI ); 13547 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13548 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13549 %} 13550 13551 //----------SMARTSPILL RULES--------------------------------------------------- 13552 // These must follow all instruction definitions as they use the names 13553 // defined in the instructions definitions.