1 // 2 // Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // 318 // Compute padding required for nodes which need alignment 319 // 320 321 // The address of the call instruction needs to be 4-byte aligned to 322 // ensure that it does not span a cache line so that it can be patched. 323 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 324 current_offset += pre_call_resets_size(); // skip fldcw, if any 325 current_offset += 1; // skip call opcode byte 326 return align_up(current_offset, alignment_required()) - current_offset; 327 } 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 5; // skip MOV instruction 334 current_offset += 1; // skip call opcode byte 335 return align_up(current_offset, alignment_required()) - current_offset; 336 } 337 338 // EMIT_RM() 339 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 340 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 341 cbuf.insts()->emit_int8(c); 342 } 343 344 // EMIT_CC() 345 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 346 unsigned char c = (unsigned char)( f1 | f2 ); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_OPCODE() 351 void emit_opcode(CodeBuffer &cbuf, int code) { 352 cbuf.insts()->emit_int8((unsigned char) code); 353 } 354 355 // EMIT_OPCODE() w/ relocation information 356 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 357 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 358 emit_opcode(cbuf, code); 359 } 360 361 // EMIT_D8() 362 void emit_d8(CodeBuffer &cbuf, int d8) { 363 cbuf.insts()->emit_int8((unsigned char) d8); 364 } 365 366 // EMIT_D16() 367 void emit_d16(CodeBuffer &cbuf, int d16) { 368 cbuf.insts()->emit_int16(d16); 369 } 370 371 // EMIT_D32() 372 void emit_d32(CodeBuffer &cbuf, int d32) { 373 cbuf.insts()->emit_int32(d32); 374 } 375 376 // emit 32 bit value and construct relocation entry from relocInfo::relocType 377 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 378 int format) { 379 cbuf.relocate(cbuf.insts_mark(), reloc, format); 380 cbuf.insts()->emit_int32(d32); 381 } 382 383 // emit 32 bit value and construct relocation entry from RelocationHolder 384 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 385 int format) { 386 #ifdef ASSERT 387 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 388 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 389 } 390 #endif 391 cbuf.relocate(cbuf.insts_mark(), rspec, format); 392 cbuf.insts()->emit_int32(d32); 393 } 394 395 // Access stack slot for load or store 396 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 397 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 398 if( -128 <= disp && disp <= 127 ) { 399 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 400 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 401 emit_d8 (cbuf, disp); // Displacement // R/M byte 402 } else { 403 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 404 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 405 emit_d32(cbuf, disp); // Displacement // R/M byte 406 } 407 } 408 409 // rRegI ereg, memory mem) %{ // emit_reg_mem 410 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 411 // There is no index & no scale, use form without SIB byte 412 if ((index == 0x4) && 413 (scale == 0) && (base != ESP_enc)) { 414 // If no displacement, mode is 0x0; unless base is [EBP] 415 if ( (displace == 0) && (base != EBP_enc) ) { 416 emit_rm(cbuf, 0x0, reg_encoding, base); 417 } 418 else { // If 8-bit displacement, mode 0x1 419 if ((displace >= -128) && (displace <= 127) 420 && (disp_reloc == relocInfo::none) ) { 421 emit_rm(cbuf, 0x1, reg_encoding, base); 422 emit_d8(cbuf, displace); 423 } 424 else { // If 32-bit displacement 425 if (base == -1) { // Special flag for absolute address 426 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 427 // (manual lies; no SIB needed here) 428 if ( disp_reloc != relocInfo::none ) { 429 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 430 } else { 431 emit_d32 (cbuf, displace); 432 } 433 } 434 else { // Normal base + offset 435 emit_rm(cbuf, 0x2, reg_encoding, base); 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 } 443 } 444 } 445 else { // Else, encode with the SIB byte 446 // If no displacement, mode is 0x0; unless base is [EBP] 447 if (displace == 0 && (base != EBP_enc)) { // If no displacement 448 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 449 emit_rm(cbuf, scale, index, base); 450 } 451 else { // If 8-bit displacement, mode 0x1 452 if ((displace >= -128) && (displace <= 127) 453 && (disp_reloc == relocInfo::none) ) { 454 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 emit_d8(cbuf, displace); 457 } 458 else { // If 32-bit displacement 459 if (base == 0x04 ) { 460 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, 0x04); 462 } else { 463 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 464 emit_rm(cbuf, scale, index, base); 465 } 466 if ( disp_reloc != relocInfo::none ) { 467 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 468 } else { 469 emit_d32 (cbuf, displace); 470 } 471 } 472 } 473 } 474 } 475 476 477 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 478 if( dst_encoding == src_encoding ) { 479 // reg-reg copy, use an empty encoding 480 } else { 481 emit_opcode( cbuf, 0x8B ); 482 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 483 } 484 } 485 486 void emit_cmpfp_fixup(MacroAssembler& _masm) { 487 Label exit; 488 __ jccb(Assembler::noParity, exit); 489 __ pushf(); 490 // 491 // comiss/ucomiss instructions set ZF,PF,CF flags and 492 // zero OF,AF,SF for NaN values. 493 // Fixup flags by zeroing ZF,PF so that compare of NaN 494 // values returns 'less than' result (CF is set). 495 // Leave the rest of flags unchanged. 496 // 497 // 7 6 5 4 3 2 1 0 498 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 499 // 0 0 1 0 1 0 1 1 (0x2B) 500 // 501 __ andl(Address(rsp, 0), 0xffffff2b); 502 __ popf(); 503 __ bind(exit); 504 } 505 506 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 507 Label done; 508 __ movl(dst, -1); 509 __ jcc(Assembler::parity, done); 510 __ jcc(Assembler::below, done); 511 __ setb(Assembler::notEqual, dst); 512 __ movzbl(dst, dst); 513 __ bind(done); 514 } 515 516 517 //============================================================================= 518 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 519 520 int ConstantTable::calculate_table_base_offset() const { 521 return 0; // absolute addressing, no offset 522 } 523 524 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 525 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 526 ShouldNotReachHere(); 527 } 528 529 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 530 // Empty encoding 531 } 532 533 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 534 return 0; 535 } 536 537 #ifndef PRODUCT 538 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 539 st->print("# MachConstantBaseNode (empty encoding)"); 540 } 541 #endif 542 543 544 //============================================================================= 545 #ifndef PRODUCT 546 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 Compile* C = ra_->C; 548 549 int framesize = C->output()->frame_size_in_bytes(); 550 int bangsize = C->output()->bang_size_in_bytes(); 551 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 552 // Remove wordSize for return addr which is already pushed. 553 framesize -= wordSize; 554 555 if (C->output()->need_stack_bang(bangsize)) { 556 framesize -= wordSize; 557 st->print("# stack bang (%d bytes)", bangsize); 558 st->print("\n\t"); 559 st->print("PUSH EBP\t# Save EBP"); 560 if (PreserveFramePointer) { 561 st->print("\n\t"); 562 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 563 } 564 if (framesize) { 565 st->print("\n\t"); 566 st->print("SUB ESP, #%d\t# Create frame",framesize); 567 } 568 } else { 569 st->print("SUB ESP, #%d\t# Create frame",framesize); 570 st->print("\n\t"); 571 framesize -= wordSize; 572 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 573 if (PreserveFramePointer) { 574 st->print("\n\t"); 575 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 576 if (framesize > 0) { 577 st->print("\n\t"); 578 st->print("ADD EBP, #%d", framesize); 579 } 580 } 581 } 582 583 if (VerifyStackAtCalls) { 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 587 } 588 589 if( C->in_24_bit_fp_mode() ) { 590 st->print("\n\t"); 591 st->print("FLDCW \t# load 24 bit fpu control word"); 592 } 593 if (UseSSE >= 2 && VerifyFPU) { 594 st->print("\n\t"); 595 st->print("# verify FPU stack (must be clean on entry)"); 596 } 597 598 #ifdef ASSERT 599 if (VerifyStackAtCalls) { 600 st->print("\n\t"); 601 st->print("# stack alignment check"); 602 } 603 #endif 604 st->cr(); 605 } 606 #endif 607 608 609 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 610 Compile* C = ra_->C; 611 MacroAssembler _masm(&cbuf); 612 613 int framesize = C->output()->frame_size_in_bytes(); 614 int bangsize = C->output()->bang_size_in_bytes(); 615 616 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 617 618 C->output()->set_frame_complete(cbuf.insts_size()); 619 620 if (C->has_mach_constant_base_node()) { 621 // NOTE: We set the table base offset here because users might be 622 // emitted before MachConstantBaseNode. 623 ConstantTable& constant_table = C->output()->constant_table(); 624 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 625 } 626 } 627 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 629 return MachNode::size(ra_); // too many variables; just compute it the hard way 630 } 631 632 int MachPrologNode::reloc() const { 633 return 0; // a large enough number 634 } 635 636 //============================================================================= 637 #ifndef PRODUCT 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 639 Compile *C = ra_->C; 640 int framesize = C->output()->frame_size_in_bytes(); 641 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 642 // Remove two words for return addr and rbp, 643 framesize -= 2*wordSize; 644 645 if (C->max_vector_size() > 16) { 646 st->print("VZEROUPPER"); 647 st->cr(); st->print("\t"); 648 } 649 if (C->in_24_bit_fp_mode()) { 650 st->print("FLDCW standard control word"); 651 st->cr(); st->print("\t"); 652 } 653 if (framesize) { 654 st->print("ADD ESP,%d\t# Destroy frame",framesize); 655 st->cr(); st->print("\t"); 656 } 657 st->print_cr("POPL EBP"); st->print("\t"); 658 if (do_polling() && C->is_method_compilation()) { 659 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 660 st->cr(); st->print("\t"); 661 } 662 } 663 #endif 664 665 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 666 Compile *C = ra_->C; 667 MacroAssembler _masm(&cbuf); 668 669 if (C->max_vector_size() > 16) { 670 // Clear upper bits of YMM registers when current compiled code uses 671 // wide vectors to avoid AVX <-> SSE transition penalty during call. 672 _masm.vzeroupper(); 673 } 674 // If method set FPU control word, restore to standard control word 675 if (C->in_24_bit_fp_mode()) { 676 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 677 } 678 679 int framesize = C->output()->frame_size_in_bytes(); 680 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 681 // Remove two words for return addr and rbp, 682 framesize -= 2*wordSize; 683 684 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 685 686 if (framesize >= 128) { 687 emit_opcode(cbuf, 0x81); // add SP, #framesize 688 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 689 emit_d32(cbuf, framesize); 690 } else if (framesize) { 691 emit_opcode(cbuf, 0x83); // add SP, #framesize 692 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 693 emit_d8(cbuf, framesize); 694 } 695 696 emit_opcode(cbuf, 0x58 | EBP_enc); 697 698 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 699 __ reserved_stack_check(); 700 } 701 702 if (do_polling() && C->is_method_compilation()) { 703 Register pollReg = as_Register(EBX_enc); 704 MacroAssembler masm(&cbuf); 705 masm.get_thread(pollReg); 706 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 707 masm.relocate(relocInfo::poll_return_type); 708 masm.testl(rax, Address(pollReg, 0)); 709 } 710 } 711 712 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 713 return MachNode::size(ra_); // too many variables; just compute it 714 // the hard way 715 } 716 717 int MachEpilogNode::reloc() const { 718 return 0; // a large enough number 719 } 720 721 const Pipeline * MachEpilogNode::pipeline() const { 722 return MachNode::pipeline_class(); 723 } 724 725 //============================================================================= 726 727 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 728 static enum RC rc_class( OptoReg::Name reg ) { 729 730 if( !OptoReg::is_valid(reg) ) return rc_bad; 731 if (OptoReg::is_stack(reg)) return rc_stack; 732 733 VMReg r = OptoReg::as_VMReg(reg); 734 if (r->is_Register()) return rc_int; 735 if (r->is_FloatRegister()) { 736 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 737 return rc_float; 738 } 739 assert(r->is_XMMRegister(), "must be"); 740 return rc_xmm; 741 } 742 743 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 744 int opcode, const char *op_str, int size, outputStream* st ) { 745 if( cbuf ) { 746 emit_opcode (*cbuf, opcode ); 747 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 748 #ifndef PRODUCT 749 } else if( !do_size ) { 750 if( size != 0 ) st->print("\n\t"); 751 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 752 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 753 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 754 } else { // FLD, FST, PUSH, POP 755 st->print("%s [ESP + #%d]",op_str,offset); 756 } 757 #endif 758 } 759 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 760 return size+3+offset_size; 761 } 762 763 // Helper for XMM registers. Extra opcode bits, limited syntax. 764 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 765 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 766 int in_size_in_bits = Assembler::EVEX_32bit; 767 int evex_encoding = 0; 768 if (reg_lo+1 == reg_hi) { 769 in_size_in_bits = Assembler::EVEX_64bit; 770 evex_encoding = Assembler::VEX_W; 771 } 772 if (cbuf) { 773 MacroAssembler _masm(cbuf); 774 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 775 // it maps more cases to single byte displacement 776 _masm.set_managed(); 777 if (reg_lo+1 == reg_hi) { // double move? 778 if (is_load) { 779 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 780 } else { 781 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 782 } 783 } else { 784 if (is_load) { 785 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } 790 #ifndef PRODUCT 791 } else if (!do_size) { 792 if (size != 0) st->print("\n\t"); 793 if (reg_lo+1 == reg_hi) { // double move? 794 if (is_load) st->print("%s %s,[ESP + #%d]", 795 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 796 Matcher::regName[reg_lo], offset); 797 else st->print("MOVSD [ESP + #%d],%s", 798 offset, Matcher::regName[reg_lo]); 799 } else { 800 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 801 Matcher::regName[reg_lo], offset); 802 else st->print("MOVSS [ESP + #%d],%s", 803 offset, Matcher::regName[reg_lo]); 804 } 805 #endif 806 } 807 bool is_single_byte = false; 808 if ((UseAVX > 2) && (offset != 0)) { 809 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 810 } 811 int offset_size = 0; 812 if (UseAVX > 2 ) { 813 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 814 } else { 815 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 816 } 817 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 818 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 819 return size+5+offset_size; 820 } 821 822 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 824 int src_hi, int dst_hi, int size, outputStream* st ) { 825 if (cbuf) { 826 MacroAssembler _masm(cbuf); 827 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 828 _masm.set_managed(); 829 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 830 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 831 as_XMMRegister(Matcher::_regEncode[src_lo])); 832 } else { 833 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 834 as_XMMRegister(Matcher::_regEncode[src_lo])); 835 } 836 #ifndef PRODUCT 837 } else if (!do_size) { 838 if (size != 0) st->print("\n\t"); 839 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 840 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 841 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 842 } else { 843 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 844 } 845 } else { 846 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 847 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } 852 #endif 853 } 854 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 855 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 856 int sz = (UseAVX > 2) ? 6 : 4; 857 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 858 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 859 return size + sz; 860 } 861 862 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 863 int src_hi, int dst_hi, int size, outputStream* st ) { 864 // 32-bit 865 if (cbuf) { 866 MacroAssembler _masm(cbuf); 867 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 868 _masm.set_managed(); 869 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 870 as_Register(Matcher::_regEncode[src_lo])); 871 #ifndef PRODUCT 872 } else if (!do_size) { 873 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 874 #endif 875 } 876 return (UseAVX> 2) ? 6 : 4; 877 } 878 879 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 881 int src_hi, int dst_hi, int size, outputStream* st ) { 882 // 32-bit 883 if (cbuf) { 884 MacroAssembler _masm(cbuf); 885 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 886 _masm.set_managed(); 887 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 888 as_XMMRegister(Matcher::_regEncode[src_lo])); 889 #ifndef PRODUCT 890 } else if (!do_size) { 891 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 892 #endif 893 } 894 return (UseAVX> 2) ? 6 : 4; 895 } 896 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 898 if( cbuf ) { 899 emit_opcode(*cbuf, 0x8B ); 900 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 901 #ifndef PRODUCT 902 } else if( !do_size ) { 903 if( size != 0 ) st->print("\n\t"); 904 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 905 #endif 906 } 907 return size+2; 908 } 909 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 911 int offset, int size, outputStream* st ) { 912 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 913 if( cbuf ) { 914 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 915 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("FLD %s",Matcher::regName[src_lo]); 920 #endif 921 } 922 size += 2; 923 } 924 925 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 926 const char *op_str; 927 int op; 928 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 929 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 930 op = 0xDD; 931 } else { // 32-bit store 932 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 933 op = 0xD9; 934 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 935 } 936 937 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 938 } 939 940 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 941 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 942 int src_hi, int dst_hi, uint ireg, outputStream* st); 943 944 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 945 int stack_offset, int reg, uint ireg, outputStream* st); 946 947 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 948 int dst_offset, uint ireg, outputStream* st) { 949 int calc_size = 0; 950 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 951 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 952 switch (ireg) { 953 case Op_VecS: 954 calc_size = 3+src_offset_size + 3+dst_offset_size; 955 break; 956 case Op_VecD: { 957 calc_size = 3+src_offset_size + 3+dst_offset_size; 958 int tmp_src_offset = src_offset + 4; 959 int tmp_dst_offset = dst_offset + 4; 960 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 961 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 962 calc_size += 3+src_offset_size + 3+dst_offset_size; 963 break; 964 } 965 case Op_VecX: 966 case Op_VecY: 967 case Op_VecZ: 968 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 969 break; 970 default: 971 ShouldNotReachHere(); 972 } 973 if (cbuf) { 974 MacroAssembler _masm(cbuf); 975 int offset = __ offset(); 976 switch (ireg) { 977 case Op_VecS: 978 __ pushl(Address(rsp, src_offset)); 979 __ popl (Address(rsp, dst_offset)); 980 break; 981 case Op_VecD: 982 __ pushl(Address(rsp, src_offset)); 983 __ popl (Address(rsp, dst_offset)); 984 __ pushl(Address(rsp, src_offset+4)); 985 __ popl (Address(rsp, dst_offset+4)); 986 break; 987 case Op_VecX: 988 __ movdqu(Address(rsp, -16), xmm0); 989 __ movdqu(xmm0, Address(rsp, src_offset)); 990 __ movdqu(Address(rsp, dst_offset), xmm0); 991 __ movdqu(xmm0, Address(rsp, -16)); 992 break; 993 case Op_VecY: 994 __ vmovdqu(Address(rsp, -32), xmm0); 995 __ vmovdqu(xmm0, Address(rsp, src_offset)); 996 __ vmovdqu(Address(rsp, dst_offset), xmm0); 997 __ vmovdqu(xmm0, Address(rsp, -32)); 998 break; 999 case Op_VecZ: 1000 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1001 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1002 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1003 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1004 break; 1005 default: 1006 ShouldNotReachHere(); 1007 } 1008 int size = __ offset() - offset; 1009 assert(size == calc_size, "incorrect size calculation"); 1010 return size; 1011 #ifndef PRODUCT 1012 } else if (!do_size) { 1013 switch (ireg) { 1014 case Op_VecS: 1015 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1016 "popl [rsp + #%d]", 1017 src_offset, dst_offset); 1018 break; 1019 case Op_VecD: 1020 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1021 "popq [rsp + #%d]\n\t" 1022 "pushl [rsp + #%d]\n\t" 1023 "popq [rsp + #%d]", 1024 src_offset, dst_offset, src_offset+4, dst_offset+4); 1025 break; 1026 case Op_VecX: 1027 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1028 "movdqu xmm0, [rsp + #%d]\n\t" 1029 "movdqu [rsp + #%d], xmm0\n\t" 1030 "movdqu xmm0, [rsp - #16]", 1031 src_offset, dst_offset); 1032 break; 1033 case Op_VecY: 1034 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1035 "vmovdqu xmm0, [rsp + #%d]\n\t" 1036 "vmovdqu [rsp + #%d], xmm0\n\t" 1037 "vmovdqu xmm0, [rsp - #32]", 1038 src_offset, dst_offset); 1039 break; 1040 case Op_VecZ: 1041 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1042 "vmovdqu xmm0, [rsp + #%d]\n\t" 1043 "vmovdqu [rsp + #%d], xmm0\n\t" 1044 "vmovdqu xmm0, [rsp - #64]", 1045 src_offset, dst_offset); 1046 break; 1047 default: 1048 ShouldNotReachHere(); 1049 } 1050 #endif 1051 } 1052 return calc_size; 1053 } 1054 1055 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1056 // Get registers to move 1057 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1058 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1059 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1060 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1061 1062 enum RC src_second_rc = rc_class(src_second); 1063 enum RC src_first_rc = rc_class(src_first); 1064 enum RC dst_second_rc = rc_class(dst_second); 1065 enum RC dst_first_rc = rc_class(dst_first); 1066 1067 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1068 1069 // Generate spill code! 1070 int size = 0; 1071 1072 if( src_first == dst_first && src_second == dst_second ) 1073 return size; // Self copy, no move 1074 1075 if (bottom_type()->isa_vect() != NULL) { 1076 uint ireg = ideal_reg(); 1077 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1078 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1079 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1080 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1081 // mem -> mem 1082 int src_offset = ra_->reg2offset(src_first); 1083 int dst_offset = ra_->reg2offset(dst_first); 1084 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1085 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1086 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1087 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1088 int stack_offset = ra_->reg2offset(dst_first); 1089 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1090 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1091 int stack_offset = ra_->reg2offset(src_first); 1092 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1093 } else { 1094 ShouldNotReachHere(); 1095 } 1096 } 1097 1098 // -------------------------------------- 1099 // Check for mem-mem move. push/pop to move. 1100 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1101 if( src_second == dst_first ) { // overlapping stack copy ranges 1102 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1103 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1104 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1105 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1106 } 1107 // move low bits 1108 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1109 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1110 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1111 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1112 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1113 } 1114 return size; 1115 } 1116 1117 // -------------------------------------- 1118 // Check for integer reg-reg copy 1119 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1120 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1121 1122 // Check for integer store 1123 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1125 1126 // Check for integer load 1127 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1128 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1129 1130 // Check for integer reg-xmm reg copy 1131 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1132 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1133 "no 64 bit integer-float reg moves" ); 1134 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1135 } 1136 // -------------------------------------- 1137 // Check for float reg-reg copy 1138 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1139 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1140 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1141 if( cbuf ) { 1142 1143 // Note the mucking with the register encode to compensate for the 0/1 1144 // indexing issue mentioned in a comment in the reg_def sections 1145 // for FPR registers many lines above here. 1146 1147 if( src_first != FPR1L_num ) { 1148 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1149 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1150 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1151 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1152 } else { 1153 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1154 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1155 } 1156 #ifndef PRODUCT 1157 } else if( !do_size ) { 1158 if( size != 0 ) st->print("\n\t"); 1159 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1160 else st->print( "FST %s", Matcher::regName[dst_first]); 1161 #endif 1162 } 1163 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1164 } 1165 1166 // Check for float store 1167 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1168 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1169 } 1170 1171 // Check for float load 1172 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1173 int offset = ra_->reg2offset(src_first); 1174 const char *op_str; 1175 int op; 1176 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1177 op_str = "FLD_D"; 1178 op = 0xDD; 1179 } else { // 32-bit load 1180 op_str = "FLD_S"; 1181 op = 0xD9; 1182 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1183 } 1184 if( cbuf ) { 1185 emit_opcode (*cbuf, op ); 1186 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1187 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1188 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1189 #ifndef PRODUCT 1190 } else if( !do_size ) { 1191 if( size != 0 ) st->print("\n\t"); 1192 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1193 #endif 1194 } 1195 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1196 return size + 3+offset_size+2; 1197 } 1198 1199 // Check for xmm reg-reg copy 1200 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1201 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1202 (src_first+1 == src_second && dst_first+1 == dst_second), 1203 "no non-adjacent float-moves" ); 1204 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1205 } 1206 1207 // Check for xmm reg-integer reg copy 1208 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1209 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1210 "no 64 bit float-integer reg moves" ); 1211 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1212 } 1213 1214 // Check for xmm store 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1216 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1217 } 1218 1219 // Check for float xmm load 1220 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1221 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1222 } 1223 1224 // Copy from float reg to xmm reg 1225 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1226 // copy to the top of stack from floating point reg 1227 // and use LEA to preserve flags 1228 if( cbuf ) { 1229 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1230 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1231 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1232 emit_d8(*cbuf,0xF8); 1233 #ifndef PRODUCT 1234 } else if( !do_size ) { 1235 if( size != 0 ) st->print("\n\t"); 1236 st->print("LEA ESP,[ESP-8]"); 1237 #endif 1238 } 1239 size += 4; 1240 1241 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1242 1243 // Copy from the temp memory to the xmm reg. 1244 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1245 1246 if( cbuf ) { 1247 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1248 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1249 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1250 emit_d8(*cbuf,0x08); 1251 #ifndef PRODUCT 1252 } else if( !do_size ) { 1253 if( size != 0 ) st->print("\n\t"); 1254 st->print("LEA ESP,[ESP+8]"); 1255 #endif 1256 } 1257 size += 4; 1258 return size; 1259 } 1260 1261 assert( size > 0, "missed a case" ); 1262 1263 // -------------------------------------------------------------------- 1264 // Check for second bits still needing moving. 1265 if( src_second == dst_second ) 1266 return size; // Self copy; no move 1267 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1268 1269 // Check for second word int-int move 1270 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1271 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1272 1273 // Check for second word integer store 1274 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1275 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1276 1277 // Check for second word integer load 1278 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1279 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1280 1281 1282 Unimplemented(); 1283 return 0; // Mute compiler 1284 } 1285 1286 #ifndef PRODUCT 1287 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1288 implementation( NULL, ra_, false, st ); 1289 } 1290 #endif 1291 1292 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1293 implementation( &cbuf, ra_, false, NULL ); 1294 } 1295 1296 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1297 return MachNode::size(ra_); 1298 } 1299 1300 1301 //============================================================================= 1302 #ifndef PRODUCT 1303 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1304 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1305 int reg = ra_->get_reg_first(this); 1306 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1307 } 1308 #endif 1309 1310 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1311 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1312 int reg = ra_->get_encode(this); 1313 if( offset >= 128 ) { 1314 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1315 emit_rm(cbuf, 0x2, reg, 0x04); 1316 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1317 emit_d32(cbuf, offset); 1318 } 1319 else { 1320 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1321 emit_rm(cbuf, 0x1, reg, 0x04); 1322 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1323 emit_d8(cbuf, offset); 1324 } 1325 } 1326 1327 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 if( offset >= 128 ) { 1330 return 7; 1331 } 1332 else { 1333 return 4; 1334 } 1335 } 1336 1337 //============================================================================= 1338 #ifndef PRODUCT 1339 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1340 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1341 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1342 st->print_cr("\tNOP"); 1343 st->print_cr("\tNOP"); 1344 if( !OptoBreakpoint ) 1345 st->print_cr("\tNOP"); 1346 } 1347 #endif 1348 1349 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1350 MacroAssembler masm(&cbuf); 1351 #ifdef ASSERT 1352 uint insts_size = cbuf.insts_size(); 1353 #endif 1354 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1355 masm.jump_cc(Assembler::notEqual, 1356 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1357 /* WARNING these NOPs are critical so that verified entry point is properly 1358 aligned for patching by NativeJump::patch_verified_entry() */ 1359 int nops_cnt = 2; 1360 if( !OptoBreakpoint ) // Leave space for int3 1361 nops_cnt += 1; 1362 masm.nop(nops_cnt); 1363 1364 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1365 } 1366 1367 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1368 return OptoBreakpoint ? 11 : 12; 1369 } 1370 1371 1372 //============================================================================= 1373 1374 int Matcher::regnum_to_fpu_offset(int regnum) { 1375 return regnum - 32; // The FP registers are in the second chunk 1376 } 1377 1378 // This is UltraSparc specific, true just means we have fast l2f conversion 1379 const bool Matcher::convL2FSupported(void) { 1380 return true; 1381 } 1382 1383 // Is this branch offset short enough that a short branch can be used? 1384 // 1385 // NOTE: If the platform does not provide any short branch variants, then 1386 // this method should return false for offset 0. 1387 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1388 // The passed offset is relative to address of the branch. 1389 // On 86 a branch displacement is calculated relative to address 1390 // of a next instruction. 1391 offset -= br_size; 1392 1393 // the short version of jmpConUCF2 contains multiple branches, 1394 // making the reach slightly less 1395 if (rule == jmpConUCF2_rule) 1396 return (-126 <= offset && offset <= 125); 1397 return (-128 <= offset && offset <= 127); 1398 } 1399 1400 const bool Matcher::isSimpleConstant64(jlong value) { 1401 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1402 return false; 1403 } 1404 1405 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1406 const bool Matcher::init_array_count_is_in_bytes = false; 1407 1408 // Needs 2 CMOV's for longs. 1409 const int Matcher::long_cmove_cost() { return 1; } 1410 1411 // No CMOVF/CMOVD with SSE/SSE2 1412 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1413 1414 // Does the CPU require late expand (see block.cpp for description of late expand)? 1415 const bool Matcher::require_postalloc_expand = false; 1416 1417 // Do we need to mask the count passed to shift instructions or does 1418 // the cpu only look at the lower 5/6 bits anyway? 1419 const bool Matcher::need_masked_shift_count = false; 1420 1421 bool Matcher::narrow_oop_use_complex_address() { 1422 ShouldNotCallThis(); 1423 return true; 1424 } 1425 1426 bool Matcher::narrow_klass_use_complex_address() { 1427 ShouldNotCallThis(); 1428 return true; 1429 } 1430 1431 bool Matcher::const_oop_prefer_decode() { 1432 ShouldNotCallThis(); 1433 return true; 1434 } 1435 1436 bool Matcher::const_klass_prefer_decode() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 // Is it better to copy float constants, or load them directly from memory? 1442 // Intel can load a float constant from a direct address, requiring no 1443 // extra registers. Most RISCs will have to materialize an address into a 1444 // register first, so they would do better to copy the constant from stack. 1445 const bool Matcher::rematerialize_float_constants = true; 1446 1447 // If CPU can load and store mis-aligned doubles directly then no fixup is 1448 // needed. Else we split the double into 2 integer pieces and move it 1449 // piece-by-piece. Only happens when passing doubles into C code as the 1450 // Java calling convention forces doubles to be aligned. 1451 const bool Matcher::misaligned_doubles_ok = true; 1452 1453 1454 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1455 // Get the memory operand from the node 1456 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1457 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1458 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1459 uint opcnt = 1; // First operand 1460 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1461 while( idx >= skipped+num_edges ) { 1462 skipped += num_edges; 1463 opcnt++; // Bump operand count 1464 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1465 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1466 } 1467 1468 MachOper *memory = node->_opnds[opcnt]; 1469 MachOper *new_memory = NULL; 1470 switch (memory->opcode()) { 1471 case DIRECT: 1472 case INDOFFSET32X: 1473 // No transformation necessary. 1474 return; 1475 case INDIRECT: 1476 new_memory = new indirect_win95_safeOper( ); 1477 break; 1478 case INDOFFSET8: 1479 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1480 break; 1481 case INDOFFSET32: 1482 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1483 break; 1484 case INDINDEXOFFSET: 1485 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1486 break; 1487 case INDINDEXSCALE: 1488 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1489 break; 1490 case INDINDEXSCALEOFFSET: 1491 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1492 break; 1493 case LOAD_LONG_INDIRECT: 1494 case LOAD_LONG_INDOFFSET32: 1495 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1496 return; 1497 default: 1498 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1499 return; 1500 } 1501 node->_opnds[opcnt] = new_memory; 1502 } 1503 1504 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. 1505 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1506 1507 // Are floats conerted to double when stored to stack during deoptimization? 1508 // On x32 it is stored with convertion only when FPU is used for floats. 1509 bool Matcher::float_in_double() { return (UseSSE == 0); } 1510 1511 // Do ints take an entire long register or just half? 1512 const bool Matcher::int_in_long = false; 1513 1514 // Return whether or not this register is ever used as an argument. This 1515 // function is used on startup to build the trampoline stubs in generateOptoStub. 1516 // Registers not mentioned will be killed by the VM call in the trampoline, and 1517 // arguments in those registers not be available to the callee. 1518 bool Matcher::can_be_java_arg( int reg ) { 1519 if( reg == ECX_num || reg == EDX_num ) return true; 1520 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1521 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1522 return false; 1523 } 1524 1525 bool Matcher::is_spillable_arg( int reg ) { 1526 return can_be_java_arg(reg); 1527 } 1528 1529 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1530 // Use hardware integer DIV instruction when 1531 // it is faster than a code which use multiply. 1532 // Only when constant divisor fits into 32 bit 1533 // (min_jint is excluded to get only correct 1534 // positive 32 bit values from negative). 1535 return VM_Version::has_fast_idiv() && 1536 (divisor == (int)divisor && divisor != min_jint); 1537 } 1538 1539 // Register for DIVI projection of divmodI 1540 RegMask Matcher::divI_proj_mask() { 1541 return EAX_REG_mask(); 1542 } 1543 1544 // Register for MODI projection of divmodI 1545 RegMask Matcher::modI_proj_mask() { 1546 return EDX_REG_mask(); 1547 } 1548 1549 // Register for DIVL projection of divmodL 1550 RegMask Matcher::divL_proj_mask() { 1551 ShouldNotReachHere(); 1552 return RegMask(); 1553 } 1554 1555 // Register for MODL projection of divmodL 1556 RegMask Matcher::modL_proj_mask() { 1557 ShouldNotReachHere(); 1558 return RegMask(); 1559 } 1560 1561 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1562 return NO_REG_mask(); 1563 } 1564 1565 // Returns true if the high 32 bits of the value is known to be zero. 1566 bool is_operand_hi32_zero(Node* n) { 1567 int opc = n->Opcode(); 1568 if (opc == Op_AndL) { 1569 Node* o2 = n->in(2); 1570 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1571 return true; 1572 } 1573 } 1574 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1575 return true; 1576 } 1577 return false; 1578 } 1579 1580 %} 1581 1582 //----------ENCODING BLOCK----------------------------------------------------- 1583 // This block specifies the encoding classes used by the compiler to output 1584 // byte streams. Encoding classes generate functions which are called by 1585 // Machine Instruction Nodes in order to generate the bit encoding of the 1586 // instruction. Operands specify their base encoding interface with the 1587 // interface keyword. There are currently supported four interfaces, 1588 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1589 // operand to generate a function which returns its register number when 1590 // queried. CONST_INTER causes an operand to generate a function which 1591 // returns the value of the constant when queried. MEMORY_INTER causes an 1592 // operand to generate four functions which return the Base Register, the 1593 // Index Register, the Scale Value, and the Offset Value of the operand when 1594 // queried. COND_INTER causes an operand to generate six functions which 1595 // return the encoding code (ie - encoding bits for the instruction) 1596 // associated with each basic boolean condition for a conditional instruction. 1597 // Instructions specify two basic values for encoding. They use the 1598 // ins_encode keyword to specify their encoding class (which must be one of 1599 // the class names specified in the encoding block), and they use the 1600 // opcode keyword to specify, in order, their primary, secondary, and 1601 // tertiary opcode. Only the opcode sections which a particular instruction 1602 // needs for encoding need to be specified. 1603 encode %{ 1604 // Build emit functions for each basic byte or larger field in the intel 1605 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1606 // code in the enc_class source block. Emit functions will live in the 1607 // main source block for now. In future, we can generalize this by 1608 // adding a syntax that specifies the sizes of fields in an order, 1609 // so that the adlc can build the emit functions automagically 1610 1611 // Emit primary opcode 1612 enc_class OpcP %{ 1613 emit_opcode(cbuf, $primary); 1614 %} 1615 1616 // Emit secondary opcode 1617 enc_class OpcS %{ 1618 emit_opcode(cbuf, $secondary); 1619 %} 1620 1621 // Emit opcode directly 1622 enc_class Opcode(immI d8) %{ 1623 emit_opcode(cbuf, $d8$$constant); 1624 %} 1625 1626 enc_class SizePrefix %{ 1627 emit_opcode(cbuf,0x66); 1628 %} 1629 1630 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1631 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1632 %} 1633 1634 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1635 emit_opcode(cbuf,$opcode$$constant); 1636 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1637 %} 1638 1639 enc_class mov_r32_imm0( rRegI dst ) %{ 1640 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1641 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1642 %} 1643 1644 enc_class cdq_enc %{ 1645 // Full implementation of Java idiv and irem; checks for 1646 // special case as described in JVM spec., p.243 & p.271. 1647 // 1648 // normal case special case 1649 // 1650 // input : rax,: dividend min_int 1651 // reg: divisor -1 1652 // 1653 // output: rax,: quotient (= rax, idiv reg) min_int 1654 // rdx: remainder (= rax, irem reg) 0 1655 // 1656 // Code sequnce: 1657 // 1658 // 81 F8 00 00 00 80 cmp rax,80000000h 1659 // 0F 85 0B 00 00 00 jne normal_case 1660 // 33 D2 xor rdx,edx 1661 // 83 F9 FF cmp rcx,0FFh 1662 // 0F 84 03 00 00 00 je done 1663 // normal_case: 1664 // 99 cdq 1665 // F7 F9 idiv rax,ecx 1666 // done: 1667 // 1668 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1669 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1670 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1671 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1672 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1673 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1674 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1675 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1676 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1677 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1678 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1679 // normal_case: 1680 emit_opcode(cbuf,0x99); // cdq 1681 // idiv (note: must be emitted by the user of this rule) 1682 // normal: 1683 %} 1684 1685 // Dense encoding for older common ops 1686 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1687 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1688 %} 1689 1690 1691 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1692 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1693 // Check for 8-bit immediate, and set sign extend bit in opcode 1694 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1695 emit_opcode(cbuf, $primary | 0x02); 1696 } 1697 else { // If 32-bit immediate 1698 emit_opcode(cbuf, $primary); 1699 } 1700 %} 1701 1702 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1703 // Emit primary opcode and set sign-extend bit 1704 // Check for 8-bit immediate, and set sign extend bit in opcode 1705 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1706 emit_opcode(cbuf, $primary | 0x02); } 1707 else { // If 32-bit immediate 1708 emit_opcode(cbuf, $primary); 1709 } 1710 // Emit r/m byte with secondary opcode, after primary opcode. 1711 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1712 %} 1713 1714 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1715 // Check for 8-bit immediate, and set sign extend bit in opcode 1716 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1717 $$$emit8$imm$$constant; 1718 } 1719 else { // If 32-bit immediate 1720 // Output immediate 1721 $$$emit32$imm$$constant; 1722 } 1723 %} 1724 1725 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1726 // Emit primary opcode and set sign-extend bit 1727 // Check for 8-bit immediate, and set sign extend bit in opcode 1728 int con = (int)$imm$$constant; // Throw away top bits 1729 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1730 // Emit r/m byte with secondary opcode, after primary opcode. 1731 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1732 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1733 else emit_d32(cbuf,con); 1734 %} 1735 1736 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1737 // Emit primary opcode and set sign-extend bit 1738 // Check for 8-bit immediate, and set sign extend bit in opcode 1739 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1740 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1741 // Emit r/m byte with tertiary opcode, after primary opcode. 1742 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1743 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1744 else emit_d32(cbuf,con); 1745 %} 1746 1747 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1748 emit_cc(cbuf, $secondary, $dst$$reg ); 1749 %} 1750 1751 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1752 int destlo = $dst$$reg; 1753 int desthi = HIGH_FROM_LOW(destlo); 1754 // bswap lo 1755 emit_opcode(cbuf, 0x0F); 1756 emit_cc(cbuf, 0xC8, destlo); 1757 // bswap hi 1758 emit_opcode(cbuf, 0x0F); 1759 emit_cc(cbuf, 0xC8, desthi); 1760 // xchg lo and hi 1761 emit_opcode(cbuf, 0x87); 1762 emit_rm(cbuf, 0x3, destlo, desthi); 1763 %} 1764 1765 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1766 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1767 %} 1768 1769 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1770 $$$emit8$primary; 1771 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1772 %} 1773 1774 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1775 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1776 emit_d8(cbuf, op >> 8 ); 1777 emit_d8(cbuf, op & 255); 1778 %} 1779 1780 // emulate a CMOV with a conditional branch around a MOV 1781 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1782 // Invert sense of branch from sense of CMOV 1783 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1784 emit_d8( cbuf, $brOffs$$constant ); 1785 %} 1786 1787 enc_class enc_PartialSubtypeCheck( ) %{ 1788 Register Redi = as_Register(EDI_enc); // result register 1789 Register Reax = as_Register(EAX_enc); // super class 1790 Register Recx = as_Register(ECX_enc); // killed 1791 Register Resi = as_Register(ESI_enc); // sub class 1792 Label miss; 1793 1794 MacroAssembler _masm(&cbuf); 1795 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1796 NULL, &miss, 1797 /*set_cond_codes:*/ true); 1798 if ($primary) { 1799 __ xorptr(Redi, Redi); 1800 } 1801 __ bind(miss); 1802 %} 1803 1804 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1805 MacroAssembler masm(&cbuf); 1806 int start = masm.offset(); 1807 if (UseSSE >= 2) { 1808 if (VerifyFPU) { 1809 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1810 } 1811 } else { 1812 // External c_calling_convention expects the FPU stack to be 'clean'. 1813 // Compiled code leaves it dirty. Do cleanup now. 1814 masm.empty_FPU_stack(); 1815 } 1816 if (sizeof_FFree_Float_Stack_All == -1) { 1817 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1818 } else { 1819 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1820 } 1821 %} 1822 1823 enc_class Verify_FPU_For_Leaf %{ 1824 if( VerifyFPU ) { 1825 MacroAssembler masm(&cbuf); 1826 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1827 } 1828 %} 1829 1830 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1831 // This is the instruction starting address for relocation info. 1832 cbuf.set_insts_mark(); 1833 $$$emit8$primary; 1834 // CALL directly to the runtime 1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1836 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1837 1838 if (UseSSE >= 2) { 1839 MacroAssembler _masm(&cbuf); 1840 BasicType rt = tf()->return_type(); 1841 1842 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1843 // A C runtime call where the return value is unused. In SSE2+ 1844 // mode the result needs to be removed from the FPU stack. It's 1845 // likely that this function call could be removed by the 1846 // optimizer if the C function is a pure function. 1847 __ ffree(0); 1848 } else if (rt == T_FLOAT) { 1849 __ lea(rsp, Address(rsp, -4)); 1850 __ fstp_s(Address(rsp, 0)); 1851 __ movflt(xmm0, Address(rsp, 0)); 1852 __ lea(rsp, Address(rsp, 4)); 1853 } else if (rt == T_DOUBLE) { 1854 __ lea(rsp, Address(rsp, -8)); 1855 __ fstp_d(Address(rsp, 0)); 1856 __ movdbl(xmm0, Address(rsp, 0)); 1857 __ lea(rsp, Address(rsp, 8)); 1858 } 1859 } 1860 %} 1861 1862 enc_class pre_call_resets %{ 1863 // If method sets FPU control word restore it here 1864 debug_only(int off0 = cbuf.insts_size()); 1865 if (ra_->C->in_24_bit_fp_mode()) { 1866 MacroAssembler _masm(&cbuf); 1867 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1868 } 1869 // Clear upper bits of YMM registers when current compiled code uses 1870 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1871 MacroAssembler _masm(&cbuf); 1872 __ vzeroupper(); 1873 debug_only(int off1 = cbuf.insts_size()); 1874 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1875 %} 1876 1877 enc_class post_call_FPU %{ 1878 // If method sets FPU control word do it here also 1879 if (Compile::current()->in_24_bit_fp_mode()) { 1880 MacroAssembler masm(&cbuf); 1881 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1882 } 1883 %} 1884 1885 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1886 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1887 // who we intended to call. 1888 cbuf.set_insts_mark(); 1889 $$$emit8$primary; 1890 1891 if (!_method) { 1892 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1893 runtime_call_Relocation::spec(), 1894 RELOC_IMM32); 1895 } else { 1896 int method_index = resolved_method_index(cbuf); 1897 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1898 : static_call_Relocation::spec(method_index); 1899 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1900 rspec, RELOC_DISP32); 1901 // Emit stubs for static call. 1902 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1903 if (stub == NULL) { 1904 ciEnv::current()->record_failure("CodeCache is full"); 1905 return; 1906 } 1907 } 1908 %} 1909 1910 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1911 MacroAssembler _masm(&cbuf); 1912 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1913 %} 1914 1915 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1916 int disp = in_bytes(Method::from_compiled_offset()); 1917 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1918 1919 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1920 cbuf.set_insts_mark(); 1921 $$$emit8$primary; 1922 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1923 emit_d8(cbuf, disp); // Displacement 1924 1925 %} 1926 1927 // Following encoding is no longer used, but may be restored if calling 1928 // convention changes significantly. 1929 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1930 // 1931 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1932 // // int ic_reg = Matcher::inline_cache_reg(); 1933 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1934 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1935 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1936 // 1937 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1938 // // // so we load it immediately before the call 1939 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1940 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1941 // 1942 // // xor rbp,ebp 1943 // emit_opcode(cbuf, 0x33); 1944 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1945 // 1946 // // CALL to interpreter. 1947 // cbuf.set_insts_mark(); 1948 // $$$emit8$primary; 1949 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1950 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1951 // %} 1952 1953 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1954 $$$emit8$primary; 1955 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1956 $$$emit8$shift$$constant; 1957 %} 1958 1959 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1960 // Load immediate does not have a zero or sign extended version 1961 // for 8-bit immediates 1962 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1963 $$$emit32$src$$constant; 1964 %} 1965 1966 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1967 // Load immediate does not have a zero or sign extended version 1968 // for 8-bit immediates 1969 emit_opcode(cbuf, $primary + $dst$$reg); 1970 $$$emit32$src$$constant; 1971 %} 1972 1973 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1974 // Load immediate does not have a zero or sign extended version 1975 // for 8-bit immediates 1976 int dst_enc = $dst$$reg; 1977 int src_con = $src$$constant & 0x0FFFFFFFFL; 1978 if (src_con == 0) { 1979 // xor dst, dst 1980 emit_opcode(cbuf, 0x33); 1981 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1982 } else { 1983 emit_opcode(cbuf, $primary + dst_enc); 1984 emit_d32(cbuf, src_con); 1985 } 1986 %} 1987 1988 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1989 // Load immediate does not have a zero or sign extended version 1990 // for 8-bit immediates 1991 int dst_enc = $dst$$reg + 2; 1992 int src_con = ((julong)($src$$constant)) >> 32; 1993 if (src_con == 0) { 1994 // xor dst, dst 1995 emit_opcode(cbuf, 0x33); 1996 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1997 } else { 1998 emit_opcode(cbuf, $primary + dst_enc); 1999 emit_d32(cbuf, src_con); 2000 } 2001 %} 2002 2003 2004 // Encode a reg-reg copy. If it is useless, then empty encoding. 2005 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2006 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2007 %} 2008 2009 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2010 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2011 %} 2012 2013 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2014 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2015 %} 2016 2017 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2018 $$$emit8$primary; 2019 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2020 %} 2021 2022 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2023 $$$emit8$secondary; 2024 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2025 %} 2026 2027 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2028 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2029 %} 2030 2031 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2032 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2033 %} 2034 2035 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2036 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2037 %} 2038 2039 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2040 // Output immediate 2041 $$$emit32$src$$constant; 2042 %} 2043 2044 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2045 // Output Float immediate bits 2046 jfloat jf = $src$$constant; 2047 int jf_as_bits = jint_cast( jf ); 2048 emit_d32(cbuf, jf_as_bits); 2049 %} 2050 2051 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2052 // Output Float immediate bits 2053 jfloat jf = $src$$constant; 2054 int jf_as_bits = jint_cast( jf ); 2055 emit_d32(cbuf, jf_as_bits); 2056 %} 2057 2058 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2059 // Output immediate 2060 $$$emit16$src$$constant; 2061 %} 2062 2063 enc_class Con_d32(immI src) %{ 2064 emit_d32(cbuf,$src$$constant); 2065 %} 2066 2067 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2068 // Output immediate memory reference 2069 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2070 emit_d32(cbuf, 0x00); 2071 %} 2072 2073 enc_class lock_prefix( ) %{ 2074 emit_opcode(cbuf,0xF0); // [Lock] 2075 %} 2076 2077 // Cmp-xchg long value. 2078 // Note: we need to swap rbx, and rcx before and after the 2079 // cmpxchg8 instruction because the instruction uses 2080 // rcx as the high order word of the new value to store but 2081 // our register encoding uses rbx,. 2082 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2083 2084 // XCHG rbx,ecx 2085 emit_opcode(cbuf,0x87); 2086 emit_opcode(cbuf,0xD9); 2087 // [Lock] 2088 emit_opcode(cbuf,0xF0); 2089 // CMPXCHG8 [Eptr] 2090 emit_opcode(cbuf,0x0F); 2091 emit_opcode(cbuf,0xC7); 2092 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2093 // XCHG rbx,ecx 2094 emit_opcode(cbuf,0x87); 2095 emit_opcode(cbuf,0xD9); 2096 %} 2097 2098 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2099 // [Lock] 2100 emit_opcode(cbuf,0xF0); 2101 2102 // CMPXCHG [Eptr] 2103 emit_opcode(cbuf,0x0F); 2104 emit_opcode(cbuf,0xB1); 2105 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2106 %} 2107 2108 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2109 // [Lock] 2110 emit_opcode(cbuf,0xF0); 2111 2112 // CMPXCHGB [Eptr] 2113 emit_opcode(cbuf,0x0F); 2114 emit_opcode(cbuf,0xB0); 2115 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2116 %} 2117 2118 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2119 // [Lock] 2120 emit_opcode(cbuf,0xF0); 2121 2122 // 16-bit mode 2123 emit_opcode(cbuf, 0x66); 2124 2125 // CMPXCHGW [Eptr] 2126 emit_opcode(cbuf,0x0F); 2127 emit_opcode(cbuf,0xB1); 2128 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2129 %} 2130 2131 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2132 int res_encoding = $res$$reg; 2133 2134 // MOV res,0 2135 emit_opcode( cbuf, 0xB8 + res_encoding); 2136 emit_d32( cbuf, 0 ); 2137 // JNE,s fail 2138 emit_opcode(cbuf,0x75); 2139 emit_d8(cbuf, 5 ); 2140 // MOV res,1 2141 emit_opcode( cbuf, 0xB8 + res_encoding); 2142 emit_d32( cbuf, 1 ); 2143 // fail: 2144 %} 2145 2146 enc_class set_instruction_start( ) %{ 2147 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2148 %} 2149 2150 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2151 int reg_encoding = $ereg$$reg; 2152 int base = $mem$$base; 2153 int index = $mem$$index; 2154 int scale = $mem$$scale; 2155 int displace = $mem$$disp; 2156 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2157 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2158 %} 2159 2160 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2161 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2162 int base = $mem$$base; 2163 int index = $mem$$index; 2164 int scale = $mem$$scale; 2165 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2166 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2167 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2168 %} 2169 2170 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2171 int r1, r2; 2172 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2173 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2174 emit_opcode(cbuf,0x0F); 2175 emit_opcode(cbuf,$tertiary); 2176 emit_rm(cbuf, 0x3, r1, r2); 2177 emit_d8(cbuf,$cnt$$constant); 2178 emit_d8(cbuf,$primary); 2179 emit_rm(cbuf, 0x3, $secondary, r1); 2180 emit_d8(cbuf,$cnt$$constant); 2181 %} 2182 2183 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2184 emit_opcode( cbuf, 0x8B ); // Move 2185 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2186 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2187 emit_d8(cbuf,$primary); 2188 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2189 emit_d8(cbuf,$cnt$$constant-32); 2190 } 2191 emit_d8(cbuf,$primary); 2192 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2193 emit_d8(cbuf,31); 2194 %} 2195 2196 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2197 int r1, r2; 2198 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2199 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2200 2201 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2202 emit_rm(cbuf, 0x3, r1, r2); 2203 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2204 emit_opcode(cbuf,$primary); 2205 emit_rm(cbuf, 0x3, $secondary, r1); 2206 emit_d8(cbuf,$cnt$$constant-32); 2207 } 2208 emit_opcode(cbuf,0x33); // XOR r2,r2 2209 emit_rm(cbuf, 0x3, r2, r2); 2210 %} 2211 2212 // Clone of RegMem but accepts an extra parameter to access each 2213 // half of a double in memory; it never needs relocation info. 2214 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2215 emit_opcode(cbuf,$opcode$$constant); 2216 int reg_encoding = $rm_reg$$reg; 2217 int base = $mem$$base; 2218 int index = $mem$$index; 2219 int scale = $mem$$scale; 2220 int displace = $mem$$disp + $disp_for_half$$constant; 2221 relocInfo::relocType disp_reloc = relocInfo::none; 2222 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2223 %} 2224 2225 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2226 // 2227 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2228 // and it never needs relocation information. 2229 // Frequently used to move data between FPU's Stack Top and memory. 2230 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2231 int rm_byte_opcode = $rm_opcode$$constant; 2232 int base = $mem$$base; 2233 int index = $mem$$index; 2234 int scale = $mem$$scale; 2235 int displace = $mem$$disp; 2236 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2237 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2238 %} 2239 2240 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2241 int rm_byte_opcode = $rm_opcode$$constant; 2242 int base = $mem$$base; 2243 int index = $mem$$index; 2244 int scale = $mem$$scale; 2245 int displace = $mem$$disp; 2246 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2247 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2248 %} 2249 2250 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2251 int reg_encoding = $dst$$reg; 2252 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2253 int index = 0x04; // 0x04 indicates no index 2254 int scale = 0x00; // 0x00 indicates no scale 2255 int displace = $src1$$constant; // 0x00 indicates no displacement 2256 relocInfo::relocType disp_reloc = relocInfo::none; 2257 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2258 %} 2259 2260 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2261 // Compare dst,src 2262 emit_opcode(cbuf,0x3B); 2263 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2264 // jmp dst < src around move 2265 emit_opcode(cbuf,0x7C); 2266 emit_d8(cbuf,2); 2267 // move dst,src 2268 emit_opcode(cbuf,0x8B); 2269 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2270 %} 2271 2272 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2273 // Compare dst,src 2274 emit_opcode(cbuf,0x3B); 2275 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2276 // jmp dst > src around move 2277 emit_opcode(cbuf,0x7F); 2278 emit_d8(cbuf,2); 2279 // move dst,src 2280 emit_opcode(cbuf,0x8B); 2281 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2282 %} 2283 2284 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2285 // If src is FPR1, we can just FST to store it. 2286 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2287 int reg_encoding = 0x2; // Just store 2288 int base = $mem$$base; 2289 int index = $mem$$index; 2290 int scale = $mem$$scale; 2291 int displace = $mem$$disp; 2292 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2293 if( $src$$reg != FPR1L_enc ) { 2294 reg_encoding = 0x3; // Store & pop 2295 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2296 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2297 } 2298 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2299 emit_opcode(cbuf,$primary); 2300 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2301 %} 2302 2303 enc_class neg_reg(rRegI dst) %{ 2304 // NEG $dst 2305 emit_opcode(cbuf,0xF7); 2306 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2307 %} 2308 2309 enc_class setLT_reg(eCXRegI dst) %{ 2310 // SETLT $dst 2311 emit_opcode(cbuf,0x0F); 2312 emit_opcode(cbuf,0x9C); 2313 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2314 %} 2315 2316 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2317 int tmpReg = $tmp$$reg; 2318 2319 // SUB $p,$q 2320 emit_opcode(cbuf,0x2B); 2321 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2322 // SBB $tmp,$tmp 2323 emit_opcode(cbuf,0x1B); 2324 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2325 // AND $tmp,$y 2326 emit_opcode(cbuf,0x23); 2327 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2328 // ADD $p,$tmp 2329 emit_opcode(cbuf,0x03); 2330 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2331 %} 2332 2333 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2334 // TEST shift,32 2335 emit_opcode(cbuf,0xF7); 2336 emit_rm(cbuf, 0x3, 0, ECX_enc); 2337 emit_d32(cbuf,0x20); 2338 // JEQ,s small 2339 emit_opcode(cbuf, 0x74); 2340 emit_d8(cbuf, 0x04); 2341 // MOV $dst.hi,$dst.lo 2342 emit_opcode( cbuf, 0x8B ); 2343 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2344 // CLR $dst.lo 2345 emit_opcode(cbuf, 0x33); 2346 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2347 // small: 2348 // SHLD $dst.hi,$dst.lo,$shift 2349 emit_opcode(cbuf,0x0F); 2350 emit_opcode(cbuf,0xA5); 2351 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2352 // SHL $dst.lo,$shift" 2353 emit_opcode(cbuf,0xD3); 2354 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2355 %} 2356 2357 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2358 // TEST shift,32 2359 emit_opcode(cbuf,0xF7); 2360 emit_rm(cbuf, 0x3, 0, ECX_enc); 2361 emit_d32(cbuf,0x20); 2362 // JEQ,s small 2363 emit_opcode(cbuf, 0x74); 2364 emit_d8(cbuf, 0x04); 2365 // MOV $dst.lo,$dst.hi 2366 emit_opcode( cbuf, 0x8B ); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2368 // CLR $dst.hi 2369 emit_opcode(cbuf, 0x33); 2370 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2371 // small: 2372 // SHRD $dst.lo,$dst.hi,$shift 2373 emit_opcode(cbuf,0x0F); 2374 emit_opcode(cbuf,0xAD); 2375 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2376 // SHR $dst.hi,$shift" 2377 emit_opcode(cbuf,0xD3); 2378 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2379 %} 2380 2381 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2382 // TEST shift,32 2383 emit_opcode(cbuf,0xF7); 2384 emit_rm(cbuf, 0x3, 0, ECX_enc); 2385 emit_d32(cbuf,0x20); 2386 // JEQ,s small 2387 emit_opcode(cbuf, 0x74); 2388 emit_d8(cbuf, 0x05); 2389 // MOV $dst.lo,$dst.hi 2390 emit_opcode( cbuf, 0x8B ); 2391 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2392 // SAR $dst.hi,31 2393 emit_opcode(cbuf, 0xC1); 2394 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2395 emit_d8(cbuf, 0x1F ); 2396 // small: 2397 // SHRD $dst.lo,$dst.hi,$shift 2398 emit_opcode(cbuf,0x0F); 2399 emit_opcode(cbuf,0xAD); 2400 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2401 // SAR $dst.hi,$shift" 2402 emit_opcode(cbuf,0xD3); 2403 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2404 %} 2405 2406 2407 // ----------------- Encodings for floating point unit ----------------- 2408 // May leave result in FPU-TOS or FPU reg depending on opcodes 2409 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2410 $$$emit8$primary; 2411 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2412 %} 2413 2414 // Pop argument in FPR0 with FSTP ST(0) 2415 enc_class PopFPU() %{ 2416 emit_opcode( cbuf, 0xDD ); 2417 emit_d8( cbuf, 0xD8 ); 2418 %} 2419 2420 // !!!!! equivalent to Pop_Reg_F 2421 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2422 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2423 emit_d8( cbuf, 0xD8+$dst$$reg ); 2424 %} 2425 2426 enc_class Push_Reg_DPR( regDPR dst ) %{ 2427 emit_opcode( cbuf, 0xD9 ); 2428 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2429 %} 2430 2431 enc_class strictfp_bias1( regDPR dst ) %{ 2432 emit_opcode( cbuf, 0xDB ); // FLD m80real 2433 emit_opcode( cbuf, 0x2D ); 2434 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2435 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2436 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2437 %} 2438 2439 enc_class strictfp_bias2( regDPR dst ) %{ 2440 emit_opcode( cbuf, 0xDB ); // FLD m80real 2441 emit_opcode( cbuf, 0x2D ); 2442 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2443 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2444 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2445 %} 2446 2447 // Special case for moving an integer register to a stack slot. 2448 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2449 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2450 %} 2451 2452 // Special case for moving a register to a stack slot. 2453 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2454 // Opcode already emitted 2455 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2456 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2457 emit_d32(cbuf, $dst$$disp); // Displacement 2458 %} 2459 2460 // Push the integer in stackSlot 'src' onto FP-stack 2461 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2462 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2463 %} 2464 2465 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2466 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2467 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2468 %} 2469 2470 // Same as Pop_Mem_F except for opcode 2471 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2472 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2473 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2474 %} 2475 2476 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2477 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2478 emit_d8( cbuf, 0xD8+$dst$$reg ); 2479 %} 2480 2481 enc_class Push_Reg_FPR( regFPR dst ) %{ 2482 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2483 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2484 %} 2485 2486 // Push FPU's float to a stack-slot, and pop FPU-stack 2487 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2488 int pop = 0x02; 2489 if ($src$$reg != FPR1L_enc) { 2490 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2491 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2492 pop = 0x03; 2493 } 2494 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2495 %} 2496 2497 // Push FPU's double to a stack-slot, and pop FPU-stack 2498 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2499 int pop = 0x02; 2500 if ($src$$reg != FPR1L_enc) { 2501 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2502 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2503 pop = 0x03; 2504 } 2505 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2506 %} 2507 2508 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2509 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2510 int pop = 0xD0 - 1; // -1 since we skip FLD 2511 if ($src$$reg != FPR1L_enc) { 2512 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2513 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2514 pop = 0xD8; 2515 } 2516 emit_opcode( cbuf, 0xDD ); 2517 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2518 %} 2519 2520 2521 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2522 // load dst in FPR0 2523 emit_opcode( cbuf, 0xD9 ); 2524 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2525 if ($src$$reg != FPR1L_enc) { 2526 // fincstp 2527 emit_opcode (cbuf, 0xD9); 2528 emit_opcode (cbuf, 0xF7); 2529 // swap src with FPR1: 2530 // FXCH FPR1 with src 2531 emit_opcode(cbuf, 0xD9); 2532 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2533 // fdecstp 2534 emit_opcode (cbuf, 0xD9); 2535 emit_opcode (cbuf, 0xF6); 2536 } 2537 %} 2538 2539 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2540 MacroAssembler _masm(&cbuf); 2541 __ subptr(rsp, 8); 2542 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2543 __ fld_d(Address(rsp, 0)); 2544 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2545 __ fld_d(Address(rsp, 0)); 2546 %} 2547 2548 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2549 MacroAssembler _masm(&cbuf); 2550 __ subptr(rsp, 4); 2551 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2552 __ fld_s(Address(rsp, 0)); 2553 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2554 __ fld_s(Address(rsp, 0)); 2555 %} 2556 2557 enc_class Push_ResultD(regD dst) %{ 2558 MacroAssembler _masm(&cbuf); 2559 __ fstp_d(Address(rsp, 0)); 2560 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2561 __ addptr(rsp, 8); 2562 %} 2563 2564 enc_class Push_ResultF(regF dst, immI d8) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ fstp_s(Address(rsp, 0)); 2567 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2568 __ addptr(rsp, $d8$$constant); 2569 %} 2570 2571 enc_class Push_SrcD(regD src) %{ 2572 MacroAssembler _masm(&cbuf); 2573 __ subptr(rsp, 8); 2574 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2575 __ fld_d(Address(rsp, 0)); 2576 %} 2577 2578 enc_class push_stack_temp_qword() %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ subptr(rsp, 8); 2581 %} 2582 2583 enc_class pop_stack_temp_qword() %{ 2584 MacroAssembler _masm(&cbuf); 2585 __ addptr(rsp, 8); 2586 %} 2587 2588 enc_class push_xmm_to_fpr1(regD src) %{ 2589 MacroAssembler _masm(&cbuf); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2595 if ($src$$reg != FPR1L_enc) { 2596 // fincstp 2597 emit_opcode (cbuf, 0xD9); 2598 emit_opcode (cbuf, 0xF7); 2599 // FXCH FPR1 with src 2600 emit_opcode(cbuf, 0xD9); 2601 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2602 // fdecstp 2603 emit_opcode (cbuf, 0xD9); 2604 emit_opcode (cbuf, 0xF6); 2605 } 2606 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2607 // // FSTP FPR$dst$$reg 2608 // emit_opcode( cbuf, 0xDD ); 2609 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2610 %} 2611 2612 enc_class fnstsw_sahf_skip_parity() %{ 2613 // fnstsw ax 2614 emit_opcode( cbuf, 0xDF ); 2615 emit_opcode( cbuf, 0xE0 ); 2616 // sahf 2617 emit_opcode( cbuf, 0x9E ); 2618 // jnp ::skip 2619 emit_opcode( cbuf, 0x7B ); 2620 emit_opcode( cbuf, 0x05 ); 2621 %} 2622 2623 enc_class emitModDPR() %{ 2624 // fprem must be iterative 2625 // :: loop 2626 // fprem 2627 emit_opcode( cbuf, 0xD9 ); 2628 emit_opcode( cbuf, 0xF8 ); 2629 // wait 2630 emit_opcode( cbuf, 0x9b ); 2631 // fnstsw ax 2632 emit_opcode( cbuf, 0xDF ); 2633 emit_opcode( cbuf, 0xE0 ); 2634 // sahf 2635 emit_opcode( cbuf, 0x9E ); 2636 // jp ::loop 2637 emit_opcode( cbuf, 0x0F ); 2638 emit_opcode( cbuf, 0x8A ); 2639 emit_opcode( cbuf, 0xF4 ); 2640 emit_opcode( cbuf, 0xFF ); 2641 emit_opcode( cbuf, 0xFF ); 2642 emit_opcode( cbuf, 0xFF ); 2643 %} 2644 2645 enc_class fpu_flags() %{ 2646 // fnstsw_ax 2647 emit_opcode( cbuf, 0xDF); 2648 emit_opcode( cbuf, 0xE0); 2649 // test ax,0x0400 2650 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2651 emit_opcode( cbuf, 0xA9 ); 2652 emit_d16 ( cbuf, 0x0400 ); 2653 // // // This sequence works, but stalls for 12-16 cycles on PPro 2654 // // test rax,0x0400 2655 // emit_opcode( cbuf, 0xA9 ); 2656 // emit_d32 ( cbuf, 0x00000400 ); 2657 // 2658 // jz exit (no unordered comparison) 2659 emit_opcode( cbuf, 0x74 ); 2660 emit_d8 ( cbuf, 0x02 ); 2661 // mov ah,1 - treat as LT case (set carry flag) 2662 emit_opcode( cbuf, 0xB4 ); 2663 emit_d8 ( cbuf, 0x01 ); 2664 // sahf 2665 emit_opcode( cbuf, 0x9E); 2666 %} 2667 2668 enc_class cmpF_P6_fixup() %{ 2669 // Fixup the integer flags in case comparison involved a NaN 2670 // 2671 // JNP exit (no unordered comparison, P-flag is set by NaN) 2672 emit_opcode( cbuf, 0x7B ); 2673 emit_d8 ( cbuf, 0x03 ); 2674 // MOV AH,1 - treat as LT case (set carry flag) 2675 emit_opcode( cbuf, 0xB4 ); 2676 emit_d8 ( cbuf, 0x01 ); 2677 // SAHF 2678 emit_opcode( cbuf, 0x9E); 2679 // NOP // target for branch to avoid branch to branch 2680 emit_opcode( cbuf, 0x90); 2681 %} 2682 2683 // fnstsw_ax(); 2684 // sahf(); 2685 // movl(dst, nan_result); 2686 // jcc(Assembler::parity, exit); 2687 // movl(dst, less_result); 2688 // jcc(Assembler::below, exit); 2689 // movl(dst, equal_result); 2690 // jcc(Assembler::equal, exit); 2691 // movl(dst, greater_result); 2692 2693 // less_result = 1; 2694 // greater_result = -1; 2695 // equal_result = 0; 2696 // nan_result = -1; 2697 2698 enc_class CmpF_Result(rRegI dst) %{ 2699 // fnstsw_ax(); 2700 emit_opcode( cbuf, 0xDF); 2701 emit_opcode( cbuf, 0xE0); 2702 // sahf 2703 emit_opcode( cbuf, 0x9E); 2704 // movl(dst, nan_result); 2705 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2706 emit_d32( cbuf, -1 ); 2707 // jcc(Assembler::parity, exit); 2708 emit_opcode( cbuf, 0x7A ); 2709 emit_d8 ( cbuf, 0x13 ); 2710 // movl(dst, less_result); 2711 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2712 emit_d32( cbuf, -1 ); 2713 // jcc(Assembler::below, exit); 2714 emit_opcode( cbuf, 0x72 ); 2715 emit_d8 ( cbuf, 0x0C ); 2716 // movl(dst, equal_result); 2717 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2718 emit_d32( cbuf, 0 ); 2719 // jcc(Assembler::equal, exit); 2720 emit_opcode( cbuf, 0x74 ); 2721 emit_d8 ( cbuf, 0x05 ); 2722 // movl(dst, greater_result); 2723 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2724 emit_d32( cbuf, 1 ); 2725 %} 2726 2727 2728 // Compare the longs and set flags 2729 // BROKEN! Do Not use as-is 2730 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2731 // CMP $src1.hi,$src2.hi 2732 emit_opcode( cbuf, 0x3B ); 2733 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2734 // JNE,s done 2735 emit_opcode(cbuf,0x75); 2736 emit_d8(cbuf, 2 ); 2737 // CMP $src1.lo,$src2.lo 2738 emit_opcode( cbuf, 0x3B ); 2739 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2740 // done: 2741 %} 2742 2743 enc_class convert_int_long( regL dst, rRegI src ) %{ 2744 // mov $dst.lo,$src 2745 int dst_encoding = $dst$$reg; 2746 int src_encoding = $src$$reg; 2747 encode_Copy( cbuf, dst_encoding , src_encoding ); 2748 // mov $dst.hi,$src 2749 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2750 // sar $dst.hi,31 2751 emit_opcode( cbuf, 0xC1 ); 2752 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2753 emit_d8(cbuf, 0x1F ); 2754 %} 2755 2756 enc_class convert_long_double( eRegL src ) %{ 2757 // push $src.hi 2758 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2759 // push $src.lo 2760 emit_opcode(cbuf, 0x50+$src$$reg ); 2761 // fild 64-bits at [SP] 2762 emit_opcode(cbuf,0xdf); 2763 emit_d8(cbuf, 0x6C); 2764 emit_d8(cbuf, 0x24); 2765 emit_d8(cbuf, 0x00); 2766 // pop stack 2767 emit_opcode(cbuf, 0x83); // add SP, #8 2768 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2769 emit_d8(cbuf, 0x8); 2770 %} 2771 2772 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2773 // IMUL EDX:EAX,$src1 2774 emit_opcode( cbuf, 0xF7 ); 2775 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2776 // SAR EDX,$cnt-32 2777 int shift_count = ((int)$cnt$$constant) - 32; 2778 if (shift_count > 0) { 2779 emit_opcode(cbuf, 0xC1); 2780 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2781 emit_d8(cbuf, shift_count); 2782 } 2783 %} 2784 2785 // this version doesn't have add sp, 8 2786 enc_class convert_long_double2( eRegL src ) %{ 2787 // push $src.hi 2788 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2789 // push $src.lo 2790 emit_opcode(cbuf, 0x50+$src$$reg ); 2791 // fild 64-bits at [SP] 2792 emit_opcode(cbuf,0xdf); 2793 emit_d8(cbuf, 0x6C); 2794 emit_d8(cbuf, 0x24); 2795 emit_d8(cbuf, 0x00); 2796 %} 2797 2798 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2799 // Basic idea: long = (long)int * (long)int 2800 // IMUL EDX:EAX, src 2801 emit_opcode( cbuf, 0xF7 ); 2802 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2803 %} 2804 2805 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2806 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2807 // MUL EDX:EAX, src 2808 emit_opcode( cbuf, 0xF7 ); 2809 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2810 %} 2811 2812 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2813 // Basic idea: lo(result) = lo(x_lo * y_lo) 2814 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2815 // MOV $tmp,$src.lo 2816 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2817 // IMUL $tmp,EDX 2818 emit_opcode( cbuf, 0x0F ); 2819 emit_opcode( cbuf, 0xAF ); 2820 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2821 // MOV EDX,$src.hi 2822 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2823 // IMUL EDX,EAX 2824 emit_opcode( cbuf, 0x0F ); 2825 emit_opcode( cbuf, 0xAF ); 2826 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2827 // ADD $tmp,EDX 2828 emit_opcode( cbuf, 0x03 ); 2829 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2830 // MUL EDX:EAX,$src.lo 2831 emit_opcode( cbuf, 0xF7 ); 2832 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2833 // ADD EDX,ESI 2834 emit_opcode( cbuf, 0x03 ); 2835 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2836 %} 2837 2838 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2839 // Basic idea: lo(result) = lo(src * y_lo) 2840 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2841 // IMUL $tmp,EDX,$src 2842 emit_opcode( cbuf, 0x6B ); 2843 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2844 emit_d8( cbuf, (int)$src$$constant ); 2845 // MOV EDX,$src 2846 emit_opcode(cbuf, 0xB8 + EDX_enc); 2847 emit_d32( cbuf, (int)$src$$constant ); 2848 // MUL EDX:EAX,EDX 2849 emit_opcode( cbuf, 0xF7 ); 2850 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2851 // ADD EDX,ESI 2852 emit_opcode( cbuf, 0x03 ); 2853 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2854 %} 2855 2856 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2857 // PUSH src1.hi 2858 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2859 // PUSH src1.lo 2860 emit_opcode(cbuf, 0x50+$src1$$reg ); 2861 // PUSH src2.hi 2862 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2863 // PUSH src2.lo 2864 emit_opcode(cbuf, 0x50+$src2$$reg ); 2865 // CALL directly to the runtime 2866 cbuf.set_insts_mark(); 2867 emit_opcode(cbuf,0xE8); // Call into runtime 2868 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2869 // Restore stack 2870 emit_opcode(cbuf, 0x83); // add SP, #framesize 2871 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2872 emit_d8(cbuf, 4*4); 2873 %} 2874 2875 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2876 // PUSH src1.hi 2877 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2878 // PUSH src1.lo 2879 emit_opcode(cbuf, 0x50+$src1$$reg ); 2880 // PUSH src2.hi 2881 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2882 // PUSH src2.lo 2883 emit_opcode(cbuf, 0x50+$src2$$reg ); 2884 // CALL directly to the runtime 2885 cbuf.set_insts_mark(); 2886 emit_opcode(cbuf,0xE8); // Call into runtime 2887 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2888 // Restore stack 2889 emit_opcode(cbuf, 0x83); // add SP, #framesize 2890 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2891 emit_d8(cbuf, 4*4); 2892 %} 2893 2894 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2895 // MOV $tmp,$src.lo 2896 emit_opcode(cbuf, 0x8B); 2897 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2898 // OR $tmp,$src.hi 2899 emit_opcode(cbuf, 0x0B); 2900 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2901 %} 2902 2903 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2904 // CMP $src1.lo,$src2.lo 2905 emit_opcode( cbuf, 0x3B ); 2906 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2907 // JNE,s skip 2908 emit_cc(cbuf, 0x70, 0x5); 2909 emit_d8(cbuf,2); 2910 // CMP $src1.hi,$src2.hi 2911 emit_opcode( cbuf, 0x3B ); 2912 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2913 %} 2914 2915 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2916 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2917 emit_opcode( cbuf, 0x3B ); 2918 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2919 // MOV $tmp,$src1.hi 2920 emit_opcode( cbuf, 0x8B ); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2922 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2923 emit_opcode( cbuf, 0x1B ); 2924 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2925 %} 2926 2927 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2928 // XOR $tmp,$tmp 2929 emit_opcode(cbuf,0x33); // XOR 2930 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2931 // CMP $tmp,$src.lo 2932 emit_opcode( cbuf, 0x3B ); 2933 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2934 // SBB $tmp,$src.hi 2935 emit_opcode( cbuf, 0x1B ); 2936 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2937 %} 2938 2939 // Sniff, sniff... smells like Gnu Superoptimizer 2940 enc_class neg_long( eRegL dst ) %{ 2941 emit_opcode(cbuf,0xF7); // NEG hi 2942 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2943 emit_opcode(cbuf,0xF7); // NEG lo 2944 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2945 emit_opcode(cbuf,0x83); // SBB hi,0 2946 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2947 emit_d8 (cbuf,0 ); 2948 %} 2949 2950 enc_class enc_pop_rdx() %{ 2951 emit_opcode(cbuf,0x5A); 2952 %} 2953 2954 enc_class enc_rethrow() %{ 2955 cbuf.set_insts_mark(); 2956 emit_opcode(cbuf, 0xE9); // jmp entry 2957 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2958 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2959 %} 2960 2961 2962 // Convert a double to an int. Java semantics require we do complex 2963 // manglelations in the corner cases. So we set the rounding mode to 2964 // 'zero', store the darned double down as an int, and reset the 2965 // rounding mode to 'nearest'. The hardware throws an exception which 2966 // patches up the correct value directly to the stack. 2967 enc_class DPR2I_encoding( regDPR src ) %{ 2968 // Flip to round-to-zero mode. We attempted to allow invalid-op 2969 // exceptions here, so that a NAN or other corner-case value will 2970 // thrown an exception (but normal values get converted at full speed). 2971 // However, I2C adapters and other float-stack manglers leave pending 2972 // invalid-op exceptions hanging. We would have to clear them before 2973 // enabling them and that is more expensive than just testing for the 2974 // invalid value Intel stores down in the corner cases. 2975 emit_opcode(cbuf,0xD9); // FLDCW trunc 2976 emit_opcode(cbuf,0x2D); 2977 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2978 // Allocate a word 2979 emit_opcode(cbuf,0x83); // SUB ESP,4 2980 emit_opcode(cbuf,0xEC); 2981 emit_d8(cbuf,0x04); 2982 // Encoding assumes a double has been pushed into FPR0. 2983 // Store down the double as an int, popping the FPU stack 2984 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2985 emit_opcode(cbuf,0x1C); 2986 emit_d8(cbuf,0x24); 2987 // Restore the rounding mode; mask the exception 2988 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2989 emit_opcode(cbuf,0x2D); 2990 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2991 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2992 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2993 2994 // Load the converted int; adjust CPU stack 2995 emit_opcode(cbuf,0x58); // POP EAX 2996 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2997 emit_d32 (cbuf,0x80000000); // 0x80000000 2998 emit_opcode(cbuf,0x75); // JNE around_slow_call 2999 emit_d8 (cbuf,0x07); // Size of slow_call 3000 // Push src onto stack slow-path 3001 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3002 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3003 // CALL directly to the runtime 3004 cbuf.set_insts_mark(); 3005 emit_opcode(cbuf,0xE8); // Call into runtime 3006 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3007 // Carry on here... 3008 %} 3009 3010 enc_class DPR2L_encoding( regDPR src ) %{ 3011 emit_opcode(cbuf,0xD9); // FLDCW trunc 3012 emit_opcode(cbuf,0x2D); 3013 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3014 // Allocate a word 3015 emit_opcode(cbuf,0x83); // SUB ESP,8 3016 emit_opcode(cbuf,0xEC); 3017 emit_d8(cbuf,0x08); 3018 // Encoding assumes a double has been pushed into FPR0. 3019 // Store down the double as a long, popping the FPU stack 3020 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3021 emit_opcode(cbuf,0x3C); 3022 emit_d8(cbuf,0x24); 3023 // Restore the rounding mode; mask the exception 3024 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3025 emit_opcode(cbuf,0x2D); 3026 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3027 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3028 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3029 3030 // Load the converted int; adjust CPU stack 3031 emit_opcode(cbuf,0x58); // POP EAX 3032 emit_opcode(cbuf,0x5A); // POP EDX 3033 emit_opcode(cbuf,0x81); // CMP EDX,imm 3034 emit_d8 (cbuf,0xFA); // rdx 3035 emit_d32 (cbuf,0x80000000); // 0x80000000 3036 emit_opcode(cbuf,0x75); // JNE around_slow_call 3037 emit_d8 (cbuf,0x07+4); // Size of slow_call 3038 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3039 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3040 emit_opcode(cbuf,0x75); // JNE around_slow_call 3041 emit_d8 (cbuf,0x07); // Size of slow_call 3042 // Push src onto stack slow-path 3043 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3044 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3045 // CALL directly to the runtime 3046 cbuf.set_insts_mark(); 3047 emit_opcode(cbuf,0xE8); // Call into runtime 3048 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3049 // Carry on here... 3050 %} 3051 3052 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3053 // Operand was loaded from memory into fp ST (stack top) 3054 // FMUL ST,$src /* D8 C8+i */ 3055 emit_opcode(cbuf, 0xD8); 3056 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3057 %} 3058 3059 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3060 // FADDP ST,src2 /* D8 C0+i */ 3061 emit_opcode(cbuf, 0xD8); 3062 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3063 //could use FADDP src2,fpST /* DE C0+i */ 3064 %} 3065 3066 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3067 // FADDP src2,ST /* DE C0+i */ 3068 emit_opcode(cbuf, 0xDE); 3069 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3070 %} 3071 3072 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3073 // Operand has been loaded into fp ST (stack top) 3074 // FSUB ST,$src1 3075 emit_opcode(cbuf, 0xD8); 3076 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3077 3078 // FDIV 3079 emit_opcode(cbuf, 0xD8); 3080 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3081 %} 3082 3083 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3084 // Operand was loaded from memory into fp ST (stack top) 3085 // FADD ST,$src /* D8 C0+i */ 3086 emit_opcode(cbuf, 0xD8); 3087 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3088 3089 // FMUL ST,src2 /* D8 C*+i */ 3090 emit_opcode(cbuf, 0xD8); 3091 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3092 %} 3093 3094 3095 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3096 // Operand was loaded from memory into fp ST (stack top) 3097 // FADD ST,$src /* D8 C0+i */ 3098 emit_opcode(cbuf, 0xD8); 3099 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3100 3101 // FMULP src2,ST /* DE C8+i */ 3102 emit_opcode(cbuf, 0xDE); 3103 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3104 %} 3105 3106 // Atomically load the volatile long 3107 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3108 emit_opcode(cbuf,0xDF); 3109 int rm_byte_opcode = 0x05; 3110 int base = $mem$$base; 3111 int index = $mem$$index; 3112 int scale = $mem$$scale; 3113 int displace = $mem$$disp; 3114 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3115 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3116 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3117 %} 3118 3119 // Volatile Store Long. Must be atomic, so move it into 3120 // the FP TOS and then do a 64-bit FIST. Has to probe the 3121 // target address before the store (for null-ptr checks) 3122 // so the memory operand is used twice in the encoding. 3123 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3124 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3125 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3126 emit_opcode(cbuf,0xDF); 3127 int rm_byte_opcode = 0x07; 3128 int base = $mem$$base; 3129 int index = $mem$$index; 3130 int scale = $mem$$scale; 3131 int displace = $mem$$disp; 3132 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3133 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3134 %} 3135 3136 %} 3137 3138 3139 //----------FRAME-------------------------------------------------------------- 3140 // Definition of frame structure and management information. 3141 // 3142 // S T A C K L A Y O U T Allocators stack-slot number 3143 // | (to get allocators register number 3144 // G Owned by | | v add OptoReg::stack0()) 3145 // r CALLER | | 3146 // o | +--------+ pad to even-align allocators stack-slot 3147 // w V | pad0 | numbers; owned by CALLER 3148 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3149 // h ^ | in | 5 3150 // | | args | 4 Holes in incoming args owned by SELF 3151 // | | | | 3 3152 // | | +--------+ 3153 // V | | old out| Empty on Intel, window on Sparc 3154 // | old |preserve| Must be even aligned. 3155 // | SP-+--------+----> Matcher::_old_SP, even aligned 3156 // | | in | 3 area for Intel ret address 3157 // Owned by |preserve| Empty on Sparc. 3158 // SELF +--------+ 3159 // | | pad2 | 2 pad to align old SP 3160 // | +--------+ 1 3161 // | | locks | 0 3162 // | +--------+----> OptoReg::stack0(), even aligned 3163 // | | pad1 | 11 pad to align new SP 3164 // | +--------+ 3165 // | | | 10 3166 // | | spills | 9 spills 3167 // V | | 8 (pad0 slot for callee) 3168 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3169 // ^ | out | 7 3170 // | | args | 6 Holes in outgoing args owned by CALLEE 3171 // Owned by +--------+ 3172 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3173 // | new |preserve| Must be even-aligned. 3174 // | SP-+--------+----> Matcher::_new_SP, even aligned 3175 // | | | 3176 // 3177 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3178 // known from SELF's arguments and the Java calling convention. 3179 // Region 6-7 is determined per call site. 3180 // Note 2: If the calling convention leaves holes in the incoming argument 3181 // area, those holes are owned by SELF. Holes in the outgoing area 3182 // are owned by the CALLEE. Holes should not be nessecary in the 3183 // incoming area, as the Java calling convention is completely under 3184 // the control of the AD file. Doubles can be sorted and packed to 3185 // avoid holes. Holes in the outgoing arguments may be nessecary for 3186 // varargs C calling conventions. 3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3188 // even aligned with pad0 as needed. 3189 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3190 // region 6-11 is even aligned; it may be padded out more so that 3191 // the region from SP to FP meets the minimum stack alignment. 3192 3193 frame %{ 3194 // What direction does stack grow in (assumed to be same for C & Java) 3195 stack_direction(TOWARDS_LOW); 3196 3197 // These three registers define part of the calling convention 3198 // between compiled code and the interpreter. 3199 inline_cache_reg(EAX); // Inline Cache Register 3200 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3201 3202 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3203 cisc_spilling_operand_name(indOffset32); 3204 3205 // Number of stack slots consumed by locking an object 3206 sync_stack_slots(1); 3207 3208 // Compiled code's Frame Pointer 3209 frame_pointer(ESP); 3210 // Interpreter stores its frame pointer in a register which is 3211 // stored to the stack by I2CAdaptors. 3212 // I2CAdaptors convert from interpreted java to compiled java. 3213 interpreter_frame_pointer(EBP); 3214 3215 // Stack alignment requirement 3216 // Alignment size in bytes (128-bit -> 16 bytes) 3217 stack_alignment(StackAlignmentInBytes); 3218 3219 // Number of stack slots between incoming argument block and the start of 3220 // a new frame. The PROLOG must add this many slots to the stack. The 3221 // EPILOG must remove this many slots. Intel needs one slot for 3222 // return address and one for rbp, (must save rbp) 3223 in_preserve_stack_slots(2+VerifyStackAtCalls); 3224 3225 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3226 // for calls to C. Supports the var-args backing area for register parms. 3227 varargs_C_out_slots_killed(0); 3228 3229 // The after-PROLOG location of the return address. Location of 3230 // return address specifies a type (REG or STACK) and a number 3231 // representing the register number (i.e. - use a register name) or 3232 // stack slot. 3233 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3234 // Otherwise, it is above the locks and verification slot and alignment word 3235 return_addr(STACK - 1 + 3236 align_up((Compile::current()->in_preserve_stack_slots() + 3237 Compile::current()->fixed_slots()), 3238 stack_alignment_in_slots())); 3239 3240 // Body of function which returns an integer array locating 3241 // arguments either in registers or in stack slots. Passed an array 3242 // of ideal registers called "sig" and a "length" count. Stack-slot 3243 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3244 // arguments for a CALLEE. Incoming stack arguments are 3245 // automatically biased by the preserve_stack_slots field above. 3246 calling_convention %{ 3247 // No difference between ingoing/outgoing just pass false 3248 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3249 %} 3250 3251 3252 // Body of function which returns an integer array locating 3253 // arguments either in registers or in stack slots. Passed an array 3254 // of ideal registers called "sig" and a "length" count. Stack-slot 3255 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3256 // arguments for a CALLEE. Incoming stack arguments are 3257 // automatically biased by the preserve_stack_slots field above. 3258 c_calling_convention %{ 3259 // This is obviously always outgoing 3260 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3261 %} 3262 3263 // Location of C & interpreter return values 3264 c_return_value %{ 3265 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3266 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3267 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3268 3269 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3270 // that C functions return float and double results in XMM0. 3271 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3272 return OptoRegPair(XMM0b_num,XMM0_num); 3273 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3274 return OptoRegPair(OptoReg::Bad,XMM0_num); 3275 3276 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3277 %} 3278 3279 // Location of return values 3280 return_value %{ 3281 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3282 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3283 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3284 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3285 return OptoRegPair(XMM0b_num,XMM0_num); 3286 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3287 return OptoRegPair(OptoReg::Bad,XMM0_num); 3288 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3289 %} 3290 3291 %} 3292 3293 //----------ATTRIBUTES--------------------------------------------------------- 3294 //----------Operand Attributes------------------------------------------------- 3295 op_attrib op_cost(0); // Required cost attribute 3296 3297 //----------Instruction Attributes--------------------------------------------- 3298 ins_attrib ins_cost(100); // Required cost attribute 3299 ins_attrib ins_size(8); // Required size attribute (in bits) 3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3301 // non-matching short branch variant of some 3302 // long branch? 3303 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3304 // specifies the alignment that some part of the instruction (not 3305 // necessarily the start) requires. If > 1, a compute_padding() 3306 // function must be provided for the instruction 3307 3308 //----------OPERANDS----------------------------------------------------------- 3309 // Operand definitions must precede instruction definitions for correct parsing 3310 // in the ADLC because operands constitute user defined types which are used in 3311 // instruction definitions. 3312 3313 //----------Simple Operands---------------------------------------------------- 3314 // Immediate Operands 3315 // Integer Immediate 3316 operand immI() %{ 3317 match(ConI); 3318 3319 op_cost(10); 3320 format %{ %} 3321 interface(CONST_INTER); 3322 %} 3323 3324 // Constant for test vs zero 3325 operand immI0() %{ 3326 predicate(n->get_int() == 0); 3327 match(ConI); 3328 3329 op_cost(0); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 // Constant for increment 3335 operand immI1() %{ 3336 predicate(n->get_int() == 1); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Constant for decrement 3345 operand immI_M1() %{ 3346 predicate(n->get_int() == -1); 3347 match(ConI); 3348 3349 op_cost(0); 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Valid scale values for addressing modes 3355 operand immI2() %{ 3356 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3357 match(ConI); 3358 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 operand immI8() %{ 3364 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3365 match(ConI); 3366 3367 op_cost(5); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 operand immU8() %{ 3373 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3374 match(ConI); 3375 3376 op_cost(5); 3377 format %{ %} 3378 interface(CONST_INTER); 3379 %} 3380 3381 operand immI16() %{ 3382 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3383 match(ConI); 3384 3385 op_cost(10); 3386 format %{ %} 3387 interface(CONST_INTER); 3388 %} 3389 3390 // Int Immediate non-negative 3391 operand immU31() 3392 %{ 3393 predicate(n->get_int() >= 0); 3394 match(ConI); 3395 3396 op_cost(0); 3397 format %{ %} 3398 interface(CONST_INTER); 3399 %} 3400 3401 // Constant for long shifts 3402 operand immI_32() %{ 3403 predicate( n->get_int() == 32 ); 3404 match(ConI); 3405 3406 op_cost(0); 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 operand immI_1_31() %{ 3412 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 operand immI_32_63() %{ 3421 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3422 match(ConI); 3423 op_cost(0); 3424 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 operand immI_1() %{ 3430 predicate( n->get_int() == 1 ); 3431 match(ConI); 3432 3433 op_cost(0); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 operand immI_2() %{ 3439 predicate( n->get_int() == 2 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 operand immI_3() %{ 3448 predicate( n->get_int() == 3 ); 3449 match(ConI); 3450 3451 op_cost(0); 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 // Pointer Immediate 3457 operand immP() %{ 3458 match(ConP); 3459 3460 op_cost(10); 3461 format %{ %} 3462 interface(CONST_INTER); 3463 %} 3464 3465 // NULL Pointer Immediate 3466 operand immP0() %{ 3467 predicate( n->get_ptr() == 0 ); 3468 match(ConP); 3469 op_cost(0); 3470 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Long Immediate 3476 operand immL() %{ 3477 match(ConL); 3478 3479 op_cost(20); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 // Long Immediate zero 3485 operand immL0() %{ 3486 predicate( n->get_long() == 0L ); 3487 match(ConL); 3488 op_cost(0); 3489 3490 format %{ %} 3491 interface(CONST_INTER); 3492 %} 3493 3494 // Long Immediate zero 3495 operand immL_M1() %{ 3496 predicate( n->get_long() == -1L ); 3497 match(ConL); 3498 op_cost(0); 3499 3500 format %{ %} 3501 interface(CONST_INTER); 3502 %} 3503 3504 // Long immediate from 0 to 127. 3505 // Used for a shorter form of long mul by 10. 3506 operand immL_127() %{ 3507 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3508 match(ConL); 3509 op_cost(0); 3510 3511 format %{ %} 3512 interface(CONST_INTER); 3513 %} 3514 3515 // Long Immediate: low 32-bit mask 3516 operand immL_32bits() %{ 3517 predicate(n->get_long() == 0xFFFFFFFFL); 3518 match(ConL); 3519 op_cost(0); 3520 3521 format %{ %} 3522 interface(CONST_INTER); 3523 %} 3524 3525 // Long Immediate: low 32-bit mask 3526 operand immL32() %{ 3527 predicate(n->get_long() == (int)(n->get_long())); 3528 match(ConL); 3529 op_cost(20); 3530 3531 format %{ %} 3532 interface(CONST_INTER); 3533 %} 3534 3535 //Double Immediate zero 3536 operand immDPR0() %{ 3537 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3538 // bug that generates code such that NaNs compare equal to 0.0 3539 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3540 match(ConD); 3541 3542 op_cost(5); 3543 format %{ %} 3544 interface(CONST_INTER); 3545 %} 3546 3547 // Double Immediate one 3548 operand immDPR1() %{ 3549 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3550 match(ConD); 3551 3552 op_cost(5); 3553 format %{ %} 3554 interface(CONST_INTER); 3555 %} 3556 3557 // Double Immediate 3558 operand immDPR() %{ 3559 predicate(UseSSE<=1); 3560 match(ConD); 3561 3562 op_cost(5); 3563 format %{ %} 3564 interface(CONST_INTER); 3565 %} 3566 3567 operand immD() %{ 3568 predicate(UseSSE>=2); 3569 match(ConD); 3570 3571 op_cost(5); 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Double Immediate zero 3577 operand immD0() %{ 3578 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3579 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3580 // compare equal to -0.0. 3581 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3582 match(ConD); 3583 3584 format %{ %} 3585 interface(CONST_INTER); 3586 %} 3587 3588 // Float Immediate zero 3589 operand immFPR0() %{ 3590 predicate(UseSSE == 0 && n->getf() == 0.0F); 3591 match(ConF); 3592 3593 op_cost(5); 3594 format %{ %} 3595 interface(CONST_INTER); 3596 %} 3597 3598 // Float Immediate one 3599 operand immFPR1() %{ 3600 predicate(UseSSE == 0 && n->getf() == 1.0F); 3601 match(ConF); 3602 3603 op_cost(5); 3604 format %{ %} 3605 interface(CONST_INTER); 3606 %} 3607 3608 // Float Immediate 3609 operand immFPR() %{ 3610 predicate( UseSSE == 0 ); 3611 match(ConF); 3612 3613 op_cost(5); 3614 format %{ %} 3615 interface(CONST_INTER); 3616 %} 3617 3618 // Float Immediate 3619 operand immF() %{ 3620 predicate(UseSSE >= 1); 3621 match(ConF); 3622 3623 op_cost(5); 3624 format %{ %} 3625 interface(CONST_INTER); 3626 %} 3627 3628 // Float Immediate zero. Zero and not -0.0 3629 operand immF0() %{ 3630 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3631 match(ConF); 3632 3633 op_cost(5); 3634 format %{ %} 3635 interface(CONST_INTER); 3636 %} 3637 3638 // Immediates for special shifts (sign extend) 3639 3640 // Constants for increment 3641 operand immI_16() %{ 3642 predicate( n->get_int() == 16 ); 3643 match(ConI); 3644 3645 format %{ %} 3646 interface(CONST_INTER); 3647 %} 3648 3649 operand immI_24() %{ 3650 predicate( n->get_int() == 24 ); 3651 match(ConI); 3652 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Constant for byte-wide masking 3658 operand immI_255() %{ 3659 predicate( n->get_int() == 255 ); 3660 match(ConI); 3661 3662 format %{ %} 3663 interface(CONST_INTER); 3664 %} 3665 3666 // Constant for short-wide masking 3667 operand immI_65535() %{ 3668 predicate(n->get_int() == 65535); 3669 match(ConI); 3670 3671 format %{ %} 3672 interface(CONST_INTER); 3673 %} 3674 3675 // Register Operands 3676 // Integer Register 3677 operand rRegI() %{ 3678 constraint(ALLOC_IN_RC(int_reg)); 3679 match(RegI); 3680 match(xRegI); 3681 match(eAXRegI); 3682 match(eBXRegI); 3683 match(eCXRegI); 3684 match(eDXRegI); 3685 match(eDIRegI); 3686 match(eSIRegI); 3687 3688 format %{ %} 3689 interface(REG_INTER); 3690 %} 3691 3692 // Subset of Integer Register 3693 operand xRegI(rRegI reg) %{ 3694 constraint(ALLOC_IN_RC(int_x_reg)); 3695 match(reg); 3696 match(eAXRegI); 3697 match(eBXRegI); 3698 match(eCXRegI); 3699 match(eDXRegI); 3700 3701 format %{ %} 3702 interface(REG_INTER); 3703 %} 3704 3705 // Special Registers 3706 operand eAXRegI(xRegI reg) %{ 3707 constraint(ALLOC_IN_RC(eax_reg)); 3708 match(reg); 3709 match(rRegI); 3710 3711 format %{ "EAX" %} 3712 interface(REG_INTER); 3713 %} 3714 3715 // Special Registers 3716 operand eBXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(ebx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "EBX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eCXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(ecx_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "ECX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand eDXRegI(xRegI reg) %{ 3735 constraint(ALLOC_IN_RC(edx_reg)); 3736 match(reg); 3737 match(rRegI); 3738 3739 format %{ "EDX" %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand eDIRegI(xRegI reg) %{ 3744 constraint(ALLOC_IN_RC(edi_reg)); 3745 match(reg); 3746 match(rRegI); 3747 3748 format %{ "EDI" %} 3749 interface(REG_INTER); 3750 %} 3751 3752 operand naxRegI() %{ 3753 constraint(ALLOC_IN_RC(nax_reg)); 3754 match(RegI); 3755 match(eCXRegI); 3756 match(eDXRegI); 3757 match(eSIRegI); 3758 match(eDIRegI); 3759 3760 format %{ %} 3761 interface(REG_INTER); 3762 %} 3763 3764 operand nadxRegI() %{ 3765 constraint(ALLOC_IN_RC(nadx_reg)); 3766 match(RegI); 3767 match(eBXRegI); 3768 match(eCXRegI); 3769 match(eSIRegI); 3770 match(eDIRegI); 3771 3772 format %{ %} 3773 interface(REG_INTER); 3774 %} 3775 3776 operand ncxRegI() %{ 3777 constraint(ALLOC_IN_RC(ncx_reg)); 3778 match(RegI); 3779 match(eAXRegI); 3780 match(eDXRegI); 3781 match(eSIRegI); 3782 match(eDIRegI); 3783 3784 format %{ %} 3785 interface(REG_INTER); 3786 %} 3787 3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3789 // // 3790 operand eSIRegI(xRegI reg) %{ 3791 constraint(ALLOC_IN_RC(esi_reg)); 3792 match(reg); 3793 match(rRegI); 3794 3795 format %{ "ESI" %} 3796 interface(REG_INTER); 3797 %} 3798 3799 // Pointer Register 3800 operand anyRegP() %{ 3801 constraint(ALLOC_IN_RC(any_reg)); 3802 match(RegP); 3803 match(eAXRegP); 3804 match(eBXRegP); 3805 match(eCXRegP); 3806 match(eDIRegP); 3807 match(eRegP); 3808 3809 format %{ %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eRegP() %{ 3814 constraint(ALLOC_IN_RC(int_reg)); 3815 match(RegP); 3816 match(eAXRegP); 3817 match(eBXRegP); 3818 match(eCXRegP); 3819 match(eDIRegP); 3820 3821 format %{ %} 3822 interface(REG_INTER); 3823 %} 3824 3825 // On windows95, EBP is not safe to use for implicit null tests. 3826 operand eRegP_no_EBP() %{ 3827 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3828 match(RegP); 3829 match(eAXRegP); 3830 match(eBXRegP); 3831 match(eCXRegP); 3832 match(eDIRegP); 3833 3834 op_cost(100); 3835 format %{ %} 3836 interface(REG_INTER); 3837 %} 3838 3839 operand naxRegP() %{ 3840 constraint(ALLOC_IN_RC(nax_reg)); 3841 match(RegP); 3842 match(eBXRegP); 3843 match(eDXRegP); 3844 match(eCXRegP); 3845 match(eSIRegP); 3846 match(eDIRegP); 3847 3848 format %{ %} 3849 interface(REG_INTER); 3850 %} 3851 3852 operand nabxRegP() %{ 3853 constraint(ALLOC_IN_RC(nabx_reg)); 3854 match(RegP); 3855 match(eCXRegP); 3856 match(eDXRegP); 3857 match(eSIRegP); 3858 match(eDIRegP); 3859 3860 format %{ %} 3861 interface(REG_INTER); 3862 %} 3863 3864 operand pRegP() %{ 3865 constraint(ALLOC_IN_RC(p_reg)); 3866 match(RegP); 3867 match(eBXRegP); 3868 match(eDXRegP); 3869 match(eSIRegP); 3870 match(eDIRegP); 3871 3872 format %{ %} 3873 interface(REG_INTER); 3874 %} 3875 3876 // Special Registers 3877 // Return a pointer value 3878 operand eAXRegP(eRegP reg) %{ 3879 constraint(ALLOC_IN_RC(eax_reg)); 3880 match(reg); 3881 format %{ "EAX" %} 3882 interface(REG_INTER); 3883 %} 3884 3885 // Used in AtomicAdd 3886 operand eBXRegP(eRegP reg) %{ 3887 constraint(ALLOC_IN_RC(ebx_reg)); 3888 match(reg); 3889 format %{ "EBX" %} 3890 interface(REG_INTER); 3891 %} 3892 3893 // Tail-call (interprocedural jump) to interpreter 3894 operand eCXRegP(eRegP reg) %{ 3895 constraint(ALLOC_IN_RC(ecx_reg)); 3896 match(reg); 3897 format %{ "ECX" %} 3898 interface(REG_INTER); 3899 %} 3900 3901 operand eDXRegP(eRegP reg) %{ 3902 constraint(ALLOC_IN_RC(edx_reg)); 3903 match(reg); 3904 format %{ "EDX" %} 3905 interface(REG_INTER); 3906 %} 3907 3908 operand eSIRegP(eRegP reg) %{ 3909 constraint(ALLOC_IN_RC(esi_reg)); 3910 match(reg); 3911 format %{ "ESI" %} 3912 interface(REG_INTER); 3913 %} 3914 3915 // Used in rep stosw 3916 operand eDIRegP(eRegP reg) %{ 3917 constraint(ALLOC_IN_RC(edi_reg)); 3918 match(reg); 3919 format %{ "EDI" %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand eRegL() %{ 3924 constraint(ALLOC_IN_RC(long_reg)); 3925 match(RegL); 3926 match(eADXRegL); 3927 3928 format %{ %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand eADXRegL( eRegL reg ) %{ 3933 constraint(ALLOC_IN_RC(eadx_reg)); 3934 match(reg); 3935 3936 format %{ "EDX:EAX" %} 3937 interface(REG_INTER); 3938 %} 3939 3940 operand eBCXRegL( eRegL reg ) %{ 3941 constraint(ALLOC_IN_RC(ebcx_reg)); 3942 match(reg); 3943 3944 format %{ "EBX:ECX" %} 3945 interface(REG_INTER); 3946 %} 3947 3948 // Special case for integer high multiply 3949 operand eADXRegL_low_only() %{ 3950 constraint(ALLOC_IN_RC(eadx_reg)); 3951 match(RegL); 3952 3953 format %{ "EAX" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 // Flags register, used as output of compare instructions 3958 operand eFlagsReg() %{ 3959 constraint(ALLOC_IN_RC(int_flags)); 3960 match(RegFlags); 3961 3962 format %{ "EFLAGS" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 // Flags register, used as output of FLOATING POINT compare instructions 3967 operand eFlagsRegU() %{ 3968 constraint(ALLOC_IN_RC(int_flags)); 3969 match(RegFlags); 3970 3971 format %{ "EFLAGS_U" %} 3972 interface(REG_INTER); 3973 %} 3974 3975 operand eFlagsRegUCF() %{ 3976 constraint(ALLOC_IN_RC(int_flags)); 3977 match(RegFlags); 3978 predicate(false); 3979 3980 format %{ "EFLAGS_U_CF" %} 3981 interface(REG_INTER); 3982 %} 3983 3984 // Condition Code Register used by long compare 3985 operand flagsReg_long_LTGE() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_LTGE" %} 3989 interface(REG_INTER); 3990 %} 3991 operand flagsReg_long_EQNE() %{ 3992 constraint(ALLOC_IN_RC(int_flags)); 3993 match(RegFlags); 3994 format %{ "FLAGS_EQNE" %} 3995 interface(REG_INTER); 3996 %} 3997 operand flagsReg_long_LEGT() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_LEGT" %} 4001 interface(REG_INTER); 4002 %} 4003 4004 // Condition Code Register used by unsigned long compare 4005 operand flagsReg_ulong_LTGE() %{ 4006 constraint(ALLOC_IN_RC(int_flags)); 4007 match(RegFlags); 4008 format %{ "FLAGS_U_LTGE" %} 4009 interface(REG_INTER); 4010 %} 4011 operand flagsReg_ulong_EQNE() %{ 4012 constraint(ALLOC_IN_RC(int_flags)); 4013 match(RegFlags); 4014 format %{ "FLAGS_U_EQNE" %} 4015 interface(REG_INTER); 4016 %} 4017 operand flagsReg_ulong_LEGT() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_U_LEGT" %} 4021 interface(REG_INTER); 4022 %} 4023 4024 // Float register operands 4025 operand regDPR() %{ 4026 predicate( UseSSE < 2 ); 4027 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4028 match(RegD); 4029 match(regDPR1); 4030 match(regDPR2); 4031 format %{ %} 4032 interface(REG_INTER); 4033 %} 4034 4035 operand regDPR1(regDPR reg) %{ 4036 predicate( UseSSE < 2 ); 4037 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4038 match(reg); 4039 format %{ "FPR1" %} 4040 interface(REG_INTER); 4041 %} 4042 4043 operand regDPR2(regDPR reg) %{ 4044 predicate( UseSSE < 2 ); 4045 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4046 match(reg); 4047 format %{ "FPR2" %} 4048 interface(REG_INTER); 4049 %} 4050 4051 operand regnotDPR1(regDPR reg) %{ 4052 predicate( UseSSE < 2 ); 4053 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4054 match(reg); 4055 format %{ %} 4056 interface(REG_INTER); 4057 %} 4058 4059 // Float register operands 4060 operand regFPR() %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_flt_reg)); 4063 match(RegF); 4064 match(regFPR1); 4065 format %{ %} 4066 interface(REG_INTER); 4067 %} 4068 4069 // Float register operands 4070 operand regFPR1(regFPR reg) %{ 4071 predicate( UseSSE < 2 ); 4072 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4073 match(reg); 4074 format %{ "FPR1" %} 4075 interface(REG_INTER); 4076 %} 4077 4078 // XMM Float register operands 4079 operand regF() %{ 4080 predicate( UseSSE>=1 ); 4081 constraint(ALLOC_IN_RC(float_reg_legacy)); 4082 match(RegF); 4083 format %{ %} 4084 interface(REG_INTER); 4085 %} 4086 4087 // Float register operands 4088 operand vlRegF() %{ 4089 constraint(ALLOC_IN_RC(float_reg_vl)); 4090 match(RegF); 4091 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 // XMM Double register operands 4097 operand regD() %{ 4098 predicate( UseSSE>=2 ); 4099 constraint(ALLOC_IN_RC(double_reg_legacy)); 4100 match(RegD); 4101 format %{ %} 4102 interface(REG_INTER); 4103 %} 4104 4105 // Double register operands 4106 operand vlRegD() %{ 4107 constraint(ALLOC_IN_RC(double_reg_vl)); 4108 match(RegD); 4109 4110 format %{ %} 4111 interface(REG_INTER); 4112 %} 4113 4114 //----------Memory Operands---------------------------------------------------- 4115 // Direct Memory Operand 4116 operand direct(immP addr) %{ 4117 match(addr); 4118 4119 format %{ "[$addr]" %} 4120 interface(MEMORY_INTER) %{ 4121 base(0xFFFFFFFF); 4122 index(0x4); 4123 scale(0x0); 4124 disp($addr); 4125 %} 4126 %} 4127 4128 // Indirect Memory Operand 4129 operand indirect(eRegP reg) %{ 4130 constraint(ALLOC_IN_RC(int_reg)); 4131 match(reg); 4132 4133 format %{ "[$reg]" %} 4134 interface(MEMORY_INTER) %{ 4135 base($reg); 4136 index(0x4); 4137 scale(0x0); 4138 disp(0x0); 4139 %} 4140 %} 4141 4142 // Indirect Memory Plus Short Offset Operand 4143 operand indOffset8(eRegP reg, immI8 off) %{ 4144 match(AddP reg off); 4145 4146 format %{ "[$reg + $off]" %} 4147 interface(MEMORY_INTER) %{ 4148 base($reg); 4149 index(0x4); 4150 scale(0x0); 4151 disp($off); 4152 %} 4153 %} 4154 4155 // Indirect Memory Plus Long Offset Operand 4156 operand indOffset32(eRegP reg, immI off) %{ 4157 match(AddP reg off); 4158 4159 format %{ "[$reg + $off]" %} 4160 interface(MEMORY_INTER) %{ 4161 base($reg); 4162 index(0x4); 4163 scale(0x0); 4164 disp($off); 4165 %} 4166 %} 4167 4168 // Indirect Memory Plus Long Offset Operand 4169 operand indOffset32X(rRegI reg, immP off) %{ 4170 match(AddP off reg); 4171 4172 format %{ "[$reg + $off]" %} 4173 interface(MEMORY_INTER) %{ 4174 base($reg); 4175 index(0x4); 4176 scale(0x0); 4177 disp($off); 4178 %} 4179 %} 4180 4181 // Indirect Memory Plus Index Register Plus Offset Operand 4182 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4183 match(AddP (AddP reg ireg) off); 4184 4185 op_cost(10); 4186 format %{"[$reg + $off + $ireg]" %} 4187 interface(MEMORY_INTER) %{ 4188 base($reg); 4189 index($ireg); 4190 scale(0x0); 4191 disp($off); 4192 %} 4193 %} 4194 4195 // Indirect Memory Plus Index Register Plus Offset Operand 4196 operand indIndex(eRegP reg, rRegI ireg) %{ 4197 match(AddP reg ireg); 4198 4199 op_cost(10); 4200 format %{"[$reg + $ireg]" %} 4201 interface(MEMORY_INTER) %{ 4202 base($reg); 4203 index($ireg); 4204 scale(0x0); 4205 disp(0x0); 4206 %} 4207 %} 4208 4209 // // ------------------------------------------------------------------------- 4210 // // 486 architecture doesn't support "scale * index + offset" with out a base 4211 // // ------------------------------------------------------------------------- 4212 // // Scaled Memory Operands 4213 // // Indirect Memory Times Scale Plus Offset Operand 4214 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4215 // match(AddP off (LShiftI ireg scale)); 4216 // 4217 // op_cost(10); 4218 // format %{"[$off + $ireg << $scale]" %} 4219 // interface(MEMORY_INTER) %{ 4220 // base(0x4); 4221 // index($ireg); 4222 // scale($scale); 4223 // disp($off); 4224 // %} 4225 // %} 4226 4227 // Indirect Memory Times Scale Plus Index Register 4228 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4229 match(AddP reg (LShiftI ireg scale)); 4230 4231 op_cost(10); 4232 format %{"[$reg + $ireg << $scale]" %} 4233 interface(MEMORY_INTER) %{ 4234 base($reg); 4235 index($ireg); 4236 scale($scale); 4237 disp(0x0); 4238 %} 4239 %} 4240 4241 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4242 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4243 match(AddP (AddP reg (LShiftI ireg scale)) off); 4244 4245 op_cost(10); 4246 format %{"[$reg + $off + $ireg << $scale]" %} 4247 interface(MEMORY_INTER) %{ 4248 base($reg); 4249 index($ireg); 4250 scale($scale); 4251 disp($off); 4252 %} 4253 %} 4254 4255 //----------Load Long Memory Operands------------------------------------------ 4256 // The load-long idiom will use it's address expression again after loading 4257 // the first word of the long. If the load-long destination overlaps with 4258 // registers used in the addressing expression, the 2nd half will be loaded 4259 // from a clobbered address. Fix this by requiring that load-long use 4260 // address registers that do not overlap with the load-long target. 4261 4262 // load-long support 4263 operand load_long_RegP() %{ 4264 constraint(ALLOC_IN_RC(esi_reg)); 4265 match(RegP); 4266 match(eSIRegP); 4267 op_cost(100); 4268 format %{ %} 4269 interface(REG_INTER); 4270 %} 4271 4272 // Indirect Memory Operand Long 4273 operand load_long_indirect(load_long_RegP reg) %{ 4274 constraint(ALLOC_IN_RC(esi_reg)); 4275 match(reg); 4276 4277 format %{ "[$reg]" %} 4278 interface(MEMORY_INTER) %{ 4279 base($reg); 4280 index(0x4); 4281 scale(0x0); 4282 disp(0x0); 4283 %} 4284 %} 4285 4286 // Indirect Memory Plus Long Offset Operand 4287 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4288 match(AddP reg off); 4289 4290 format %{ "[$reg + $off]" %} 4291 interface(MEMORY_INTER) %{ 4292 base($reg); 4293 index(0x4); 4294 scale(0x0); 4295 disp($off); 4296 %} 4297 %} 4298 4299 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4300 4301 4302 //----------Special Memory Operands-------------------------------------------- 4303 // Stack Slot Operand - This operand is used for loading and storing temporary 4304 // values on the stack where a match requires a value to 4305 // flow through memory. 4306 operand stackSlotP(sRegP reg) %{ 4307 constraint(ALLOC_IN_RC(stack_slots)); 4308 // No match rule because this operand is only generated in matching 4309 format %{ "[$reg]" %} 4310 interface(MEMORY_INTER) %{ 4311 base(0x4); // ESP 4312 index(0x4); // No Index 4313 scale(0x0); // No Scale 4314 disp($reg); // Stack Offset 4315 %} 4316 %} 4317 4318 operand stackSlotI(sRegI reg) %{ 4319 constraint(ALLOC_IN_RC(stack_slots)); 4320 // No match rule because this operand is only generated in matching 4321 format %{ "[$reg]" %} 4322 interface(MEMORY_INTER) %{ 4323 base(0x4); // ESP 4324 index(0x4); // No Index 4325 scale(0x0); // No Scale 4326 disp($reg); // Stack Offset 4327 %} 4328 %} 4329 4330 operand stackSlotF(sRegF reg) %{ 4331 constraint(ALLOC_IN_RC(stack_slots)); 4332 // No match rule because this operand is only generated in matching 4333 format %{ "[$reg]" %} 4334 interface(MEMORY_INTER) %{ 4335 base(0x4); // ESP 4336 index(0x4); // No Index 4337 scale(0x0); // No Scale 4338 disp($reg); // Stack Offset 4339 %} 4340 %} 4341 4342 operand stackSlotD(sRegD reg) %{ 4343 constraint(ALLOC_IN_RC(stack_slots)); 4344 // No match rule because this operand is only generated in matching 4345 format %{ "[$reg]" %} 4346 interface(MEMORY_INTER) %{ 4347 base(0x4); // ESP 4348 index(0x4); // No Index 4349 scale(0x0); // No Scale 4350 disp($reg); // Stack Offset 4351 %} 4352 %} 4353 4354 operand stackSlotL(sRegL reg) %{ 4355 constraint(ALLOC_IN_RC(stack_slots)); 4356 // No match rule because this operand is only generated in matching 4357 format %{ "[$reg]" %} 4358 interface(MEMORY_INTER) %{ 4359 base(0x4); // ESP 4360 index(0x4); // No Index 4361 scale(0x0); // No Scale 4362 disp($reg); // Stack Offset 4363 %} 4364 %} 4365 4366 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4367 // Indirect Memory Operand 4368 operand indirect_win95_safe(eRegP_no_EBP reg) 4369 %{ 4370 constraint(ALLOC_IN_RC(int_reg)); 4371 match(reg); 4372 4373 op_cost(100); 4374 format %{ "[$reg]" %} 4375 interface(MEMORY_INTER) %{ 4376 base($reg); 4377 index(0x4); 4378 scale(0x0); 4379 disp(0x0); 4380 %} 4381 %} 4382 4383 // Indirect Memory Plus Short Offset Operand 4384 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4385 %{ 4386 match(AddP reg off); 4387 4388 op_cost(100); 4389 format %{ "[$reg + $off]" %} 4390 interface(MEMORY_INTER) %{ 4391 base($reg); 4392 index(0x4); 4393 scale(0x0); 4394 disp($off); 4395 %} 4396 %} 4397 4398 // Indirect Memory Plus Long Offset Operand 4399 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4400 %{ 4401 match(AddP reg off); 4402 4403 op_cost(100); 4404 format %{ "[$reg + $off]" %} 4405 interface(MEMORY_INTER) %{ 4406 base($reg); 4407 index(0x4); 4408 scale(0x0); 4409 disp($off); 4410 %} 4411 %} 4412 4413 // Indirect Memory Plus Index Register Plus Offset Operand 4414 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4415 %{ 4416 match(AddP (AddP reg ireg) off); 4417 4418 op_cost(100); 4419 format %{"[$reg + $off + $ireg]" %} 4420 interface(MEMORY_INTER) %{ 4421 base($reg); 4422 index($ireg); 4423 scale(0x0); 4424 disp($off); 4425 %} 4426 %} 4427 4428 // Indirect Memory Times Scale Plus Index Register 4429 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4430 %{ 4431 match(AddP reg (LShiftI ireg scale)); 4432 4433 op_cost(100); 4434 format %{"[$reg + $ireg << $scale]" %} 4435 interface(MEMORY_INTER) %{ 4436 base($reg); 4437 index($ireg); 4438 scale($scale); 4439 disp(0x0); 4440 %} 4441 %} 4442 4443 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4444 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4445 %{ 4446 match(AddP (AddP reg (LShiftI ireg scale)) off); 4447 4448 op_cost(100); 4449 format %{"[$reg + $off + $ireg << $scale]" %} 4450 interface(MEMORY_INTER) %{ 4451 base($reg); 4452 index($ireg); 4453 scale($scale); 4454 disp($off); 4455 %} 4456 %} 4457 4458 //----------Conditional Branch Operands---------------------------------------- 4459 // Comparison Op - This is the operation of the comparison, and is limited to 4460 // the following set of codes: 4461 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4462 // 4463 // Other attributes of the comparison, such as unsignedness, are specified 4464 // by the comparison instruction that sets a condition code flags register. 4465 // That result is represented by a flags operand whose subtype is appropriate 4466 // to the unsignedness (etc.) of the comparison. 4467 // 4468 // Later, the instruction which matches both the Comparison Op (a Bool) and 4469 // the flags (produced by the Cmp) specifies the coding of the comparison op 4470 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4471 4472 // Comparision Code 4473 operand cmpOp() %{ 4474 match(Bool); 4475 4476 format %{ "" %} 4477 interface(COND_INTER) %{ 4478 equal(0x4, "e"); 4479 not_equal(0x5, "ne"); 4480 less(0xC, "l"); 4481 greater_equal(0xD, "ge"); 4482 less_equal(0xE, "le"); 4483 greater(0xF, "g"); 4484 overflow(0x0, "o"); 4485 no_overflow(0x1, "no"); 4486 %} 4487 %} 4488 4489 // Comparison Code, unsigned compare. Used by FP also, with 4490 // C2 (unordered) turned into GT or LT already. The other bits 4491 // C0 and C3 are turned into Carry & Zero flags. 4492 operand cmpOpU() %{ 4493 match(Bool); 4494 4495 format %{ "" %} 4496 interface(COND_INTER) %{ 4497 equal(0x4, "e"); 4498 not_equal(0x5, "ne"); 4499 less(0x2, "b"); 4500 greater_equal(0x3, "nb"); 4501 less_equal(0x6, "be"); 4502 greater(0x7, "nbe"); 4503 overflow(0x0, "o"); 4504 no_overflow(0x1, "no"); 4505 %} 4506 %} 4507 4508 // Floating comparisons that don't require any fixup for the unordered case 4509 operand cmpOpUCF() %{ 4510 match(Bool); 4511 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4512 n->as_Bool()->_test._test == BoolTest::ge || 4513 n->as_Bool()->_test._test == BoolTest::le || 4514 n->as_Bool()->_test._test == BoolTest::gt); 4515 format %{ "" %} 4516 interface(COND_INTER) %{ 4517 equal(0x4, "e"); 4518 not_equal(0x5, "ne"); 4519 less(0x2, "b"); 4520 greater_equal(0x3, "nb"); 4521 less_equal(0x6, "be"); 4522 greater(0x7, "nbe"); 4523 overflow(0x0, "o"); 4524 no_overflow(0x1, "no"); 4525 %} 4526 %} 4527 4528 4529 // Floating comparisons that can be fixed up with extra conditional jumps 4530 operand cmpOpUCF2() %{ 4531 match(Bool); 4532 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4533 n->as_Bool()->_test._test == BoolTest::eq); 4534 format %{ "" %} 4535 interface(COND_INTER) %{ 4536 equal(0x4, "e"); 4537 not_equal(0x5, "ne"); 4538 less(0x2, "b"); 4539 greater_equal(0x3, "nb"); 4540 less_equal(0x6, "be"); 4541 greater(0x7, "nbe"); 4542 overflow(0x0, "o"); 4543 no_overflow(0x1, "no"); 4544 %} 4545 %} 4546 4547 // Comparison Code for FP conditional move 4548 operand cmpOp_fcmov() %{ 4549 match(Bool); 4550 4551 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4552 n->as_Bool()->_test._test != BoolTest::no_overflow); 4553 format %{ "" %} 4554 interface(COND_INTER) %{ 4555 equal (0x0C8); 4556 not_equal (0x1C8); 4557 less (0x0C0); 4558 greater_equal(0x1C0); 4559 less_equal (0x0D0); 4560 greater (0x1D0); 4561 overflow(0x0, "o"); // not really supported by the instruction 4562 no_overflow(0x1, "no"); // not really supported by the instruction 4563 %} 4564 %} 4565 4566 // Comparison Code used in long compares 4567 operand cmpOp_commute() %{ 4568 match(Bool); 4569 4570 format %{ "" %} 4571 interface(COND_INTER) %{ 4572 equal(0x4, "e"); 4573 not_equal(0x5, "ne"); 4574 less(0xF, "g"); 4575 greater_equal(0xE, "le"); 4576 less_equal(0xD, "ge"); 4577 greater(0xC, "l"); 4578 overflow(0x0, "o"); 4579 no_overflow(0x1, "no"); 4580 %} 4581 %} 4582 4583 // Comparison Code used in unsigned long compares 4584 operand cmpOpU_commute() %{ 4585 match(Bool); 4586 4587 format %{ "" %} 4588 interface(COND_INTER) %{ 4589 equal(0x4, "e"); 4590 not_equal(0x5, "ne"); 4591 less(0x7, "nbe"); 4592 greater_equal(0x6, "be"); 4593 less_equal(0x3, "nb"); 4594 greater(0x2, "b"); 4595 overflow(0x0, "o"); 4596 no_overflow(0x1, "no"); 4597 %} 4598 %} 4599 4600 //----------OPERAND CLASSES---------------------------------------------------- 4601 // Operand Classes are groups of operands that are used as to simplify 4602 // instruction definitions by not requiring the AD writer to specify separate 4603 // instructions for every form of operand when the instruction accepts 4604 // multiple operand types with the same basic encoding and format. The classic 4605 // case of this is memory operands. 4606 4607 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4608 indIndex, indIndexScale, indIndexScaleOffset); 4609 4610 // Long memory operations are encoded in 2 instructions and a +4 offset. 4611 // This means some kind of offset is always required and you cannot use 4612 // an oop as the offset (done when working on static globals). 4613 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4614 indIndex, indIndexScale, indIndexScaleOffset); 4615 4616 4617 //----------PIPELINE----------------------------------------------------------- 4618 // Rules which define the behavior of the target architectures pipeline. 4619 pipeline %{ 4620 4621 //----------ATTRIBUTES--------------------------------------------------------- 4622 attributes %{ 4623 variable_size_instructions; // Fixed size instructions 4624 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4625 instruction_unit_size = 1; // An instruction is 1 bytes long 4626 instruction_fetch_unit_size = 16; // The processor fetches one line 4627 instruction_fetch_units = 1; // of 16 bytes 4628 4629 // List of nop instructions 4630 nops( MachNop ); 4631 %} 4632 4633 //----------RESOURCES---------------------------------------------------------- 4634 // Resources are the functional units available to the machine 4635 4636 // Generic P2/P3 pipeline 4637 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4638 // 3 instructions decoded per cycle. 4639 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4640 // 2 ALU op, only ALU0 handles mul/div instructions. 4641 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4642 MS0, MS1, MEM = MS0 | MS1, 4643 BR, FPU, 4644 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4645 4646 //----------PIPELINE DESCRIPTION----------------------------------------------- 4647 // Pipeline Description specifies the stages in the machine's pipeline 4648 4649 // Generic P2/P3 pipeline 4650 pipe_desc(S0, S1, S2, S3, S4, S5); 4651 4652 //----------PIPELINE CLASSES--------------------------------------------------- 4653 // Pipeline Classes describe the stages in which input and output are 4654 // referenced by the hardware pipeline. 4655 4656 // Naming convention: ialu or fpu 4657 // Then: _reg 4658 // Then: _reg if there is a 2nd register 4659 // Then: _long if it's a pair of instructions implementing a long 4660 // Then: _fat if it requires the big decoder 4661 // Or: _mem if it requires the big decoder and a memory unit. 4662 4663 // Integer ALU reg operation 4664 pipe_class ialu_reg(rRegI dst) %{ 4665 single_instruction; 4666 dst : S4(write); 4667 dst : S3(read); 4668 DECODE : S0; // any decoder 4669 ALU : S3; // any alu 4670 %} 4671 4672 // Long ALU reg operation 4673 pipe_class ialu_reg_long(eRegL dst) %{ 4674 instruction_count(2); 4675 dst : S4(write); 4676 dst : S3(read); 4677 DECODE : S0(2); // any 2 decoders 4678 ALU : S3(2); // both alus 4679 %} 4680 4681 // Integer ALU reg operation using big decoder 4682 pipe_class ialu_reg_fat(rRegI dst) %{ 4683 single_instruction; 4684 dst : S4(write); 4685 dst : S3(read); 4686 D0 : S0; // big decoder only 4687 ALU : S3; // any alu 4688 %} 4689 4690 // Long ALU reg operation using big decoder 4691 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4692 instruction_count(2); 4693 dst : S4(write); 4694 dst : S3(read); 4695 D0 : S0(2); // big decoder only; twice 4696 ALU : S3(2); // any 2 alus 4697 %} 4698 4699 // Integer ALU reg-reg operation 4700 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4701 single_instruction; 4702 dst : S4(write); 4703 src : S3(read); 4704 DECODE : S0; // any decoder 4705 ALU : S3; // any alu 4706 %} 4707 4708 // Long ALU reg-reg operation 4709 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4710 instruction_count(2); 4711 dst : S4(write); 4712 src : S3(read); 4713 DECODE : S0(2); // any 2 decoders 4714 ALU : S3(2); // both alus 4715 %} 4716 4717 // Integer ALU reg-reg operation 4718 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4719 single_instruction; 4720 dst : S4(write); 4721 src : S3(read); 4722 D0 : S0; // big decoder only 4723 ALU : S3; // any alu 4724 %} 4725 4726 // Long ALU reg-reg operation 4727 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4728 instruction_count(2); 4729 dst : S4(write); 4730 src : S3(read); 4731 D0 : S0(2); // big decoder only; twice 4732 ALU : S3(2); // both alus 4733 %} 4734 4735 // Integer ALU reg-mem operation 4736 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4737 single_instruction; 4738 dst : S5(write); 4739 mem : S3(read); 4740 D0 : S0; // big decoder only 4741 ALU : S4; // any alu 4742 MEM : S3; // any mem 4743 %} 4744 4745 // Long ALU reg-mem operation 4746 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4747 instruction_count(2); 4748 dst : S5(write); 4749 mem : S3(read); 4750 D0 : S0(2); // big decoder only; twice 4751 ALU : S4(2); // any 2 alus 4752 MEM : S3(2); // both mems 4753 %} 4754 4755 // Integer mem operation (prefetch) 4756 pipe_class ialu_mem(memory mem) 4757 %{ 4758 single_instruction; 4759 mem : S3(read); 4760 D0 : S0; // big decoder only 4761 MEM : S3; // any mem 4762 %} 4763 4764 // Integer Store to Memory 4765 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4766 single_instruction; 4767 mem : S3(read); 4768 src : S5(read); 4769 D0 : S0; // big decoder only 4770 ALU : S4; // any alu 4771 MEM : S3; 4772 %} 4773 4774 // Long Store to Memory 4775 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4776 instruction_count(2); 4777 mem : S3(read); 4778 src : S5(read); 4779 D0 : S0(2); // big decoder only; twice 4780 ALU : S4(2); // any 2 alus 4781 MEM : S3(2); // Both mems 4782 %} 4783 4784 // Integer Store to Memory 4785 pipe_class ialu_mem_imm(memory mem) %{ 4786 single_instruction; 4787 mem : S3(read); 4788 D0 : S0; // big decoder only 4789 ALU : S4; // any alu 4790 MEM : S3; 4791 %} 4792 4793 // Integer ALU0 reg-reg operation 4794 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4795 single_instruction; 4796 dst : S4(write); 4797 src : S3(read); 4798 D0 : S0; // Big decoder only 4799 ALU0 : S3; // only alu0 4800 %} 4801 4802 // Integer ALU0 reg-mem operation 4803 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4804 single_instruction; 4805 dst : S5(write); 4806 mem : S3(read); 4807 D0 : S0; // big decoder only 4808 ALU0 : S4; // ALU0 only 4809 MEM : S3; // any mem 4810 %} 4811 4812 // Integer ALU reg-reg operation 4813 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4814 single_instruction; 4815 cr : S4(write); 4816 src1 : S3(read); 4817 src2 : S3(read); 4818 DECODE : S0; // any decoder 4819 ALU : S3; // any alu 4820 %} 4821 4822 // Integer ALU reg-imm operation 4823 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4824 single_instruction; 4825 cr : S4(write); 4826 src1 : S3(read); 4827 DECODE : S0; // any decoder 4828 ALU : S3; // any alu 4829 %} 4830 4831 // Integer ALU reg-mem operation 4832 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4833 single_instruction; 4834 cr : S4(write); 4835 src1 : S3(read); 4836 src2 : S3(read); 4837 D0 : S0; // big decoder only 4838 ALU : S4; // any alu 4839 MEM : S3; 4840 %} 4841 4842 // Conditional move reg-reg 4843 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4844 instruction_count(4); 4845 y : S4(read); 4846 q : S3(read); 4847 p : S3(read); 4848 DECODE : S0(4); // any decoder 4849 %} 4850 4851 // Conditional move reg-reg 4852 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4853 single_instruction; 4854 dst : S4(write); 4855 src : S3(read); 4856 cr : S3(read); 4857 DECODE : S0; // any decoder 4858 %} 4859 4860 // Conditional move reg-mem 4861 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4862 single_instruction; 4863 dst : S4(write); 4864 src : S3(read); 4865 cr : S3(read); 4866 DECODE : S0; // any decoder 4867 MEM : S3; 4868 %} 4869 4870 // Conditional move reg-reg long 4871 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4872 single_instruction; 4873 dst : S4(write); 4874 src : S3(read); 4875 cr : S3(read); 4876 DECODE : S0(2); // any 2 decoders 4877 %} 4878 4879 // Conditional move double reg-reg 4880 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4881 single_instruction; 4882 dst : S4(write); 4883 src : S3(read); 4884 cr : S3(read); 4885 DECODE : S0; // any decoder 4886 %} 4887 4888 // Float reg-reg operation 4889 pipe_class fpu_reg(regDPR dst) %{ 4890 instruction_count(2); 4891 dst : S3(read); 4892 DECODE : S0(2); // any 2 decoders 4893 FPU : S3; 4894 %} 4895 4896 // Float reg-reg operation 4897 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4898 instruction_count(2); 4899 dst : S4(write); 4900 src : S3(read); 4901 DECODE : S0(2); // any 2 decoders 4902 FPU : S3; 4903 %} 4904 4905 // Float reg-reg operation 4906 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4907 instruction_count(3); 4908 dst : S4(write); 4909 src1 : S3(read); 4910 src2 : S3(read); 4911 DECODE : S0(3); // any 3 decoders 4912 FPU : S3(2); 4913 %} 4914 4915 // Float reg-reg operation 4916 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4917 instruction_count(4); 4918 dst : S4(write); 4919 src1 : S3(read); 4920 src2 : S3(read); 4921 src3 : S3(read); 4922 DECODE : S0(4); // any 3 decoders 4923 FPU : S3(2); 4924 %} 4925 4926 // Float reg-reg operation 4927 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4928 instruction_count(4); 4929 dst : S4(write); 4930 src1 : S3(read); 4931 src2 : S3(read); 4932 src3 : S3(read); 4933 DECODE : S1(3); // any 3 decoders 4934 D0 : S0; // Big decoder only 4935 FPU : S3(2); 4936 MEM : S3; 4937 %} 4938 4939 // Float reg-mem operation 4940 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4941 instruction_count(2); 4942 dst : S5(write); 4943 mem : S3(read); 4944 D0 : S0; // big decoder only 4945 DECODE : S1; // any decoder for FPU POP 4946 FPU : S4; 4947 MEM : S3; // any mem 4948 %} 4949 4950 // Float reg-mem operation 4951 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4952 instruction_count(3); 4953 dst : S5(write); 4954 src1 : S3(read); 4955 mem : S3(read); 4956 D0 : S0; // big decoder only 4957 DECODE : S1(2); // any decoder for FPU POP 4958 FPU : S4; 4959 MEM : S3; // any mem 4960 %} 4961 4962 // Float mem-reg operation 4963 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4964 instruction_count(2); 4965 src : S5(read); 4966 mem : S3(read); 4967 DECODE : S0; // any decoder for FPU PUSH 4968 D0 : S1; // big decoder only 4969 FPU : S4; 4970 MEM : S3; // any mem 4971 %} 4972 4973 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4974 instruction_count(3); 4975 src1 : S3(read); 4976 src2 : S3(read); 4977 mem : S3(read); 4978 DECODE : S0(2); // any decoder for FPU PUSH 4979 D0 : S1; // big decoder only 4980 FPU : S4; 4981 MEM : S3; // any mem 4982 %} 4983 4984 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4985 instruction_count(3); 4986 src1 : S3(read); 4987 src2 : S3(read); 4988 mem : S4(read); 4989 DECODE : S0; // any decoder for FPU PUSH 4990 D0 : S0(2); // big decoder only 4991 FPU : S4; 4992 MEM : S3(2); // any mem 4993 %} 4994 4995 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4996 instruction_count(2); 4997 src1 : S3(read); 4998 dst : S4(read); 4999 D0 : S0(2); // big decoder only 5000 MEM : S3(2); // any mem 5001 %} 5002 5003 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5004 instruction_count(3); 5005 src1 : S3(read); 5006 src2 : S3(read); 5007 dst : S4(read); 5008 D0 : S0(3); // big decoder only 5009 FPU : S4; 5010 MEM : S3(3); // any mem 5011 %} 5012 5013 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5014 instruction_count(3); 5015 src1 : S4(read); 5016 mem : S4(read); 5017 DECODE : S0; // any decoder for FPU PUSH 5018 D0 : S0(2); // big decoder only 5019 FPU : S4; 5020 MEM : S3(2); // any mem 5021 %} 5022 5023 // Float load constant 5024 pipe_class fpu_reg_con(regDPR dst) %{ 5025 instruction_count(2); 5026 dst : S5(write); 5027 D0 : S0; // big decoder only for the load 5028 DECODE : S1; // any decoder for FPU POP 5029 FPU : S4; 5030 MEM : S3; // any mem 5031 %} 5032 5033 // Float load constant 5034 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5035 instruction_count(3); 5036 dst : S5(write); 5037 src : S3(read); 5038 D0 : S0; // big decoder only for the load 5039 DECODE : S1(2); // any decoder for FPU POP 5040 FPU : S4; 5041 MEM : S3; // any mem 5042 %} 5043 5044 // UnConditional branch 5045 pipe_class pipe_jmp( label labl ) %{ 5046 single_instruction; 5047 BR : S3; 5048 %} 5049 5050 // Conditional branch 5051 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5052 single_instruction; 5053 cr : S1(read); 5054 BR : S3; 5055 %} 5056 5057 // Allocation idiom 5058 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5059 instruction_count(1); force_serialization; 5060 fixed_latency(6); 5061 heap_ptr : S3(read); 5062 DECODE : S0(3); 5063 D0 : S2; 5064 MEM : S3; 5065 ALU : S3(2); 5066 dst : S5(write); 5067 BR : S5; 5068 %} 5069 5070 // Generic big/slow expanded idiom 5071 pipe_class pipe_slow( ) %{ 5072 instruction_count(10); multiple_bundles; force_serialization; 5073 fixed_latency(100); 5074 D0 : S0(2); 5075 MEM : S3(2); 5076 %} 5077 5078 // The real do-nothing guy 5079 pipe_class empty( ) %{ 5080 instruction_count(0); 5081 %} 5082 5083 // Define the class for the Nop node 5084 define %{ 5085 MachNop = empty; 5086 %} 5087 5088 %} 5089 5090 //----------INSTRUCTIONS------------------------------------------------------- 5091 // 5092 // match -- States which machine-independent subtree may be replaced 5093 // by this instruction. 5094 // ins_cost -- The estimated cost of this instruction is used by instruction 5095 // selection to identify a minimum cost tree of machine 5096 // instructions that matches a tree of machine-independent 5097 // instructions. 5098 // format -- A string providing the disassembly for this instruction. 5099 // The value of an instruction's operand may be inserted 5100 // by referring to it with a '$' prefix. 5101 // opcode -- Three instruction opcodes may be provided. These are referred 5102 // to within an encode class as $primary, $secondary, and $tertiary 5103 // respectively. The primary opcode is commonly used to 5104 // indicate the type of machine instruction, while secondary 5105 // and tertiary are often used for prefix options or addressing 5106 // modes. 5107 // ins_encode -- A list of encode classes with parameters. The encode class 5108 // name must have been defined in an 'enc_class' specification 5109 // in the encode section of the architecture description. 5110 5111 //----------BSWAP-Instruction-------------------------------------------------- 5112 instruct bytes_reverse_int(rRegI dst) %{ 5113 match(Set dst (ReverseBytesI dst)); 5114 5115 format %{ "BSWAP $dst" %} 5116 opcode(0x0F, 0xC8); 5117 ins_encode( OpcP, OpcSReg(dst) ); 5118 ins_pipe( ialu_reg ); 5119 %} 5120 5121 instruct bytes_reverse_long(eRegL dst) %{ 5122 match(Set dst (ReverseBytesL dst)); 5123 5124 format %{ "BSWAP $dst.lo\n\t" 5125 "BSWAP $dst.hi\n\t" 5126 "XCHG $dst.lo $dst.hi" %} 5127 5128 ins_cost(125); 5129 ins_encode( bswap_long_bytes(dst) ); 5130 ins_pipe( ialu_reg_reg); 5131 %} 5132 5133 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5134 match(Set dst (ReverseBytesUS dst)); 5135 effect(KILL cr); 5136 5137 format %{ "BSWAP $dst\n\t" 5138 "SHR $dst,16\n\t" %} 5139 ins_encode %{ 5140 __ bswapl($dst$$Register); 5141 __ shrl($dst$$Register, 16); 5142 %} 5143 ins_pipe( ialu_reg ); 5144 %} 5145 5146 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5147 match(Set dst (ReverseBytesS dst)); 5148 effect(KILL cr); 5149 5150 format %{ "BSWAP $dst\n\t" 5151 "SAR $dst,16\n\t" %} 5152 ins_encode %{ 5153 __ bswapl($dst$$Register); 5154 __ sarl($dst$$Register, 16); 5155 %} 5156 ins_pipe( ialu_reg ); 5157 %} 5158 5159 5160 //---------- Zeros Count Instructions ------------------------------------------ 5161 5162 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5163 predicate(UseCountLeadingZerosInstruction); 5164 match(Set dst (CountLeadingZerosI src)); 5165 effect(KILL cr); 5166 5167 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5168 ins_encode %{ 5169 __ lzcntl($dst$$Register, $src$$Register); 5170 %} 5171 ins_pipe(ialu_reg); 5172 %} 5173 5174 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5175 predicate(!UseCountLeadingZerosInstruction); 5176 match(Set dst (CountLeadingZerosI src)); 5177 effect(KILL cr); 5178 5179 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5180 "JNZ skip\n\t" 5181 "MOV $dst, -1\n" 5182 "skip:\n\t" 5183 "NEG $dst\n\t" 5184 "ADD $dst, 31" %} 5185 ins_encode %{ 5186 Register Rdst = $dst$$Register; 5187 Register Rsrc = $src$$Register; 5188 Label skip; 5189 __ bsrl(Rdst, Rsrc); 5190 __ jccb(Assembler::notZero, skip); 5191 __ movl(Rdst, -1); 5192 __ bind(skip); 5193 __ negl(Rdst); 5194 __ addl(Rdst, BitsPerInt - 1); 5195 %} 5196 ins_pipe(ialu_reg); 5197 %} 5198 5199 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5200 predicate(UseCountLeadingZerosInstruction); 5201 match(Set dst (CountLeadingZerosL src)); 5202 effect(TEMP dst, KILL cr); 5203 5204 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5205 "JNC done\n\t" 5206 "LZCNT $dst, $src.lo\n\t" 5207 "ADD $dst, 32\n" 5208 "done:" %} 5209 ins_encode %{ 5210 Register Rdst = $dst$$Register; 5211 Register Rsrc = $src$$Register; 5212 Label done; 5213 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5214 __ jccb(Assembler::carryClear, done); 5215 __ lzcntl(Rdst, Rsrc); 5216 __ addl(Rdst, BitsPerInt); 5217 __ bind(done); 5218 %} 5219 ins_pipe(ialu_reg); 5220 %} 5221 5222 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5223 predicate(!UseCountLeadingZerosInstruction); 5224 match(Set dst (CountLeadingZerosL src)); 5225 effect(TEMP dst, KILL cr); 5226 5227 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5228 "JZ msw_is_zero\n\t" 5229 "ADD $dst, 32\n\t" 5230 "JMP not_zero\n" 5231 "msw_is_zero:\n\t" 5232 "BSR $dst, $src.lo\n\t" 5233 "JNZ not_zero\n\t" 5234 "MOV $dst, -1\n" 5235 "not_zero:\n\t" 5236 "NEG $dst\n\t" 5237 "ADD $dst, 63\n" %} 5238 ins_encode %{ 5239 Register Rdst = $dst$$Register; 5240 Register Rsrc = $src$$Register; 5241 Label msw_is_zero; 5242 Label not_zero; 5243 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5244 __ jccb(Assembler::zero, msw_is_zero); 5245 __ addl(Rdst, BitsPerInt); 5246 __ jmpb(not_zero); 5247 __ bind(msw_is_zero); 5248 __ bsrl(Rdst, Rsrc); 5249 __ jccb(Assembler::notZero, not_zero); 5250 __ movl(Rdst, -1); 5251 __ bind(not_zero); 5252 __ negl(Rdst); 5253 __ addl(Rdst, BitsPerLong - 1); 5254 %} 5255 ins_pipe(ialu_reg); 5256 %} 5257 5258 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5259 predicate(UseCountTrailingZerosInstruction); 5260 match(Set dst (CountTrailingZerosI src)); 5261 effect(KILL cr); 5262 5263 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5264 ins_encode %{ 5265 __ tzcntl($dst$$Register, $src$$Register); 5266 %} 5267 ins_pipe(ialu_reg); 5268 %} 5269 5270 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5271 predicate(!UseCountTrailingZerosInstruction); 5272 match(Set dst (CountTrailingZerosI src)); 5273 effect(KILL cr); 5274 5275 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5276 "JNZ done\n\t" 5277 "MOV $dst, 32\n" 5278 "done:" %} 5279 ins_encode %{ 5280 Register Rdst = $dst$$Register; 5281 Label done; 5282 __ bsfl(Rdst, $src$$Register); 5283 __ jccb(Assembler::notZero, done); 5284 __ movl(Rdst, BitsPerInt); 5285 __ bind(done); 5286 %} 5287 ins_pipe(ialu_reg); 5288 %} 5289 5290 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5291 predicate(UseCountTrailingZerosInstruction); 5292 match(Set dst (CountTrailingZerosL src)); 5293 effect(TEMP dst, KILL cr); 5294 5295 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5296 "JNC done\n\t" 5297 "TZCNT $dst, $src.hi\n\t" 5298 "ADD $dst, 32\n" 5299 "done:" %} 5300 ins_encode %{ 5301 Register Rdst = $dst$$Register; 5302 Register Rsrc = $src$$Register; 5303 Label done; 5304 __ tzcntl(Rdst, Rsrc); 5305 __ jccb(Assembler::carryClear, done); 5306 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5307 __ addl(Rdst, BitsPerInt); 5308 __ bind(done); 5309 %} 5310 ins_pipe(ialu_reg); 5311 %} 5312 5313 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5314 predicate(!UseCountTrailingZerosInstruction); 5315 match(Set dst (CountTrailingZerosL src)); 5316 effect(TEMP dst, KILL cr); 5317 5318 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5319 "JNZ done\n\t" 5320 "BSF $dst, $src.hi\n\t" 5321 "JNZ msw_not_zero\n\t" 5322 "MOV $dst, 32\n" 5323 "msw_not_zero:\n\t" 5324 "ADD $dst, 32\n" 5325 "done:" %} 5326 ins_encode %{ 5327 Register Rdst = $dst$$Register; 5328 Register Rsrc = $src$$Register; 5329 Label msw_not_zero; 5330 Label done; 5331 __ bsfl(Rdst, Rsrc); 5332 __ jccb(Assembler::notZero, done); 5333 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5334 __ jccb(Assembler::notZero, msw_not_zero); 5335 __ movl(Rdst, BitsPerInt); 5336 __ bind(msw_not_zero); 5337 __ addl(Rdst, BitsPerInt); 5338 __ bind(done); 5339 %} 5340 ins_pipe(ialu_reg); 5341 %} 5342 5343 5344 //---------- Population Count Instructions ------------------------------------- 5345 5346 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5347 predicate(UsePopCountInstruction); 5348 match(Set dst (PopCountI src)); 5349 effect(KILL cr); 5350 5351 format %{ "POPCNT $dst, $src" %} 5352 ins_encode %{ 5353 __ popcntl($dst$$Register, $src$$Register); 5354 %} 5355 ins_pipe(ialu_reg); 5356 %} 5357 5358 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5359 predicate(UsePopCountInstruction); 5360 match(Set dst (PopCountI (LoadI mem))); 5361 effect(KILL cr); 5362 5363 format %{ "POPCNT $dst, $mem" %} 5364 ins_encode %{ 5365 __ popcntl($dst$$Register, $mem$$Address); 5366 %} 5367 ins_pipe(ialu_reg); 5368 %} 5369 5370 // Note: Long.bitCount(long) returns an int. 5371 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5372 predicate(UsePopCountInstruction); 5373 match(Set dst (PopCountL src)); 5374 effect(KILL cr, TEMP tmp, TEMP dst); 5375 5376 format %{ "POPCNT $dst, $src.lo\n\t" 5377 "POPCNT $tmp, $src.hi\n\t" 5378 "ADD $dst, $tmp" %} 5379 ins_encode %{ 5380 __ popcntl($dst$$Register, $src$$Register); 5381 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5382 __ addl($dst$$Register, $tmp$$Register); 5383 %} 5384 ins_pipe(ialu_reg); 5385 %} 5386 5387 // Note: Long.bitCount(long) returns an int. 5388 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5389 predicate(UsePopCountInstruction); 5390 match(Set dst (PopCountL (LoadL mem))); 5391 effect(KILL cr, TEMP tmp, TEMP dst); 5392 5393 format %{ "POPCNT $dst, $mem\n\t" 5394 "POPCNT $tmp, $mem+4\n\t" 5395 "ADD $dst, $tmp" %} 5396 ins_encode %{ 5397 //__ popcntl($dst$$Register, $mem$$Address$$first); 5398 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5399 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5400 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5401 __ addl($dst$$Register, $tmp$$Register); 5402 %} 5403 ins_pipe(ialu_reg); 5404 %} 5405 5406 5407 //----------Load/Store/Move Instructions--------------------------------------- 5408 //----------Load Instructions-------------------------------------------------- 5409 // Load Byte (8bit signed) 5410 instruct loadB(xRegI dst, memory mem) %{ 5411 match(Set dst (LoadB mem)); 5412 5413 ins_cost(125); 5414 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5415 5416 ins_encode %{ 5417 __ movsbl($dst$$Register, $mem$$Address); 5418 %} 5419 5420 ins_pipe(ialu_reg_mem); 5421 %} 5422 5423 // Load Byte (8bit signed) into Long Register 5424 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5425 match(Set dst (ConvI2L (LoadB mem))); 5426 effect(KILL cr); 5427 5428 ins_cost(375); 5429 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5430 "MOV $dst.hi,$dst.lo\n\t" 5431 "SAR $dst.hi,7" %} 5432 5433 ins_encode %{ 5434 __ movsbl($dst$$Register, $mem$$Address); 5435 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5436 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5437 %} 5438 5439 ins_pipe(ialu_reg_mem); 5440 %} 5441 5442 // Load Unsigned Byte (8bit UNsigned) 5443 instruct loadUB(xRegI dst, memory mem) %{ 5444 match(Set dst (LoadUB mem)); 5445 5446 ins_cost(125); 5447 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5448 5449 ins_encode %{ 5450 __ movzbl($dst$$Register, $mem$$Address); 5451 %} 5452 5453 ins_pipe(ialu_reg_mem); 5454 %} 5455 5456 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5457 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5458 match(Set dst (ConvI2L (LoadUB mem))); 5459 effect(KILL cr); 5460 5461 ins_cost(250); 5462 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5463 "XOR $dst.hi,$dst.hi" %} 5464 5465 ins_encode %{ 5466 Register Rdst = $dst$$Register; 5467 __ movzbl(Rdst, $mem$$Address); 5468 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5469 %} 5470 5471 ins_pipe(ialu_reg_mem); 5472 %} 5473 5474 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5475 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5476 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5477 effect(KILL cr); 5478 5479 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5480 "XOR $dst.hi,$dst.hi\n\t" 5481 "AND $dst.lo,right_n_bits($mask, 8)" %} 5482 ins_encode %{ 5483 Register Rdst = $dst$$Register; 5484 __ movzbl(Rdst, $mem$$Address); 5485 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5486 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5487 %} 5488 ins_pipe(ialu_reg_mem); 5489 %} 5490 5491 // Load Short (16bit signed) 5492 instruct loadS(rRegI dst, memory mem) %{ 5493 match(Set dst (LoadS mem)); 5494 5495 ins_cost(125); 5496 format %{ "MOVSX $dst,$mem\t# short" %} 5497 5498 ins_encode %{ 5499 __ movswl($dst$$Register, $mem$$Address); 5500 %} 5501 5502 ins_pipe(ialu_reg_mem); 5503 %} 5504 5505 // Load Short (16 bit signed) to Byte (8 bit signed) 5506 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5507 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5508 5509 ins_cost(125); 5510 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5511 ins_encode %{ 5512 __ movsbl($dst$$Register, $mem$$Address); 5513 %} 5514 ins_pipe(ialu_reg_mem); 5515 %} 5516 5517 // Load Short (16bit signed) into Long Register 5518 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5519 match(Set dst (ConvI2L (LoadS mem))); 5520 effect(KILL cr); 5521 5522 ins_cost(375); 5523 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5524 "MOV $dst.hi,$dst.lo\n\t" 5525 "SAR $dst.hi,15" %} 5526 5527 ins_encode %{ 5528 __ movswl($dst$$Register, $mem$$Address); 5529 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5530 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5531 %} 5532 5533 ins_pipe(ialu_reg_mem); 5534 %} 5535 5536 // Load Unsigned Short/Char (16bit unsigned) 5537 instruct loadUS(rRegI dst, memory mem) %{ 5538 match(Set dst (LoadUS mem)); 5539 5540 ins_cost(125); 5541 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5542 5543 ins_encode %{ 5544 __ movzwl($dst$$Register, $mem$$Address); 5545 %} 5546 5547 ins_pipe(ialu_reg_mem); 5548 %} 5549 5550 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5551 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5552 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5553 5554 ins_cost(125); 5555 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5556 ins_encode %{ 5557 __ movsbl($dst$$Register, $mem$$Address); 5558 %} 5559 ins_pipe(ialu_reg_mem); 5560 %} 5561 5562 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5563 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5564 match(Set dst (ConvI2L (LoadUS mem))); 5565 effect(KILL cr); 5566 5567 ins_cost(250); 5568 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5569 "XOR $dst.hi,$dst.hi" %} 5570 5571 ins_encode %{ 5572 __ movzwl($dst$$Register, $mem$$Address); 5573 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5574 %} 5575 5576 ins_pipe(ialu_reg_mem); 5577 %} 5578 5579 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5580 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5581 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5582 effect(KILL cr); 5583 5584 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5585 "XOR $dst.hi,$dst.hi" %} 5586 ins_encode %{ 5587 Register Rdst = $dst$$Register; 5588 __ movzbl(Rdst, $mem$$Address); 5589 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5590 %} 5591 ins_pipe(ialu_reg_mem); 5592 %} 5593 5594 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5595 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5596 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5597 effect(KILL cr); 5598 5599 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5600 "XOR $dst.hi,$dst.hi\n\t" 5601 "AND $dst.lo,right_n_bits($mask, 16)" %} 5602 ins_encode %{ 5603 Register Rdst = $dst$$Register; 5604 __ movzwl(Rdst, $mem$$Address); 5605 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5606 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5607 %} 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Integer 5612 instruct loadI(rRegI dst, memory mem) %{ 5613 match(Set dst (LoadI mem)); 5614 5615 ins_cost(125); 5616 format %{ "MOV $dst,$mem\t# int" %} 5617 5618 ins_encode %{ 5619 __ movl($dst$$Register, $mem$$Address); 5620 %} 5621 5622 ins_pipe(ialu_reg_mem); 5623 %} 5624 5625 // Load Integer (32 bit signed) to Byte (8 bit signed) 5626 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5627 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5628 5629 ins_cost(125); 5630 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5631 ins_encode %{ 5632 __ movsbl($dst$$Register, $mem$$Address); 5633 %} 5634 ins_pipe(ialu_reg_mem); 5635 %} 5636 5637 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5638 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5639 match(Set dst (AndI (LoadI mem) mask)); 5640 5641 ins_cost(125); 5642 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5643 ins_encode %{ 5644 __ movzbl($dst$$Register, $mem$$Address); 5645 %} 5646 ins_pipe(ialu_reg_mem); 5647 %} 5648 5649 // Load Integer (32 bit signed) to Short (16 bit signed) 5650 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5651 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5652 5653 ins_cost(125); 5654 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5655 ins_encode %{ 5656 __ movswl($dst$$Register, $mem$$Address); 5657 %} 5658 ins_pipe(ialu_reg_mem); 5659 %} 5660 5661 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5662 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5663 match(Set dst (AndI (LoadI mem) mask)); 5664 5665 ins_cost(125); 5666 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5667 ins_encode %{ 5668 __ movzwl($dst$$Register, $mem$$Address); 5669 %} 5670 ins_pipe(ialu_reg_mem); 5671 %} 5672 5673 // Load Integer into Long Register 5674 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5675 match(Set dst (ConvI2L (LoadI mem))); 5676 effect(KILL cr); 5677 5678 ins_cost(375); 5679 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5680 "MOV $dst.hi,$dst.lo\n\t" 5681 "SAR $dst.hi,31" %} 5682 5683 ins_encode %{ 5684 __ movl($dst$$Register, $mem$$Address); 5685 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5686 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5687 %} 5688 5689 ins_pipe(ialu_reg_mem); 5690 %} 5691 5692 // Load Integer with mask 0xFF into Long Register 5693 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5694 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5695 effect(KILL cr); 5696 5697 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5698 "XOR $dst.hi,$dst.hi" %} 5699 ins_encode %{ 5700 Register Rdst = $dst$$Register; 5701 __ movzbl(Rdst, $mem$$Address); 5702 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5703 %} 5704 ins_pipe(ialu_reg_mem); 5705 %} 5706 5707 // Load Integer with mask 0xFFFF into Long Register 5708 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5709 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5710 effect(KILL cr); 5711 5712 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5713 "XOR $dst.hi,$dst.hi" %} 5714 ins_encode %{ 5715 Register Rdst = $dst$$Register; 5716 __ movzwl(Rdst, $mem$$Address); 5717 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5718 %} 5719 ins_pipe(ialu_reg_mem); 5720 %} 5721 5722 // Load Integer with 31-bit mask into Long Register 5723 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5724 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5725 effect(KILL cr); 5726 5727 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5728 "XOR $dst.hi,$dst.hi\n\t" 5729 "AND $dst.lo,$mask" %} 5730 ins_encode %{ 5731 Register Rdst = $dst$$Register; 5732 __ movl(Rdst, $mem$$Address); 5733 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5734 __ andl(Rdst, $mask$$constant); 5735 %} 5736 ins_pipe(ialu_reg_mem); 5737 %} 5738 5739 // Load Unsigned Integer into Long Register 5740 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5741 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5742 effect(KILL cr); 5743 5744 ins_cost(250); 5745 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5746 "XOR $dst.hi,$dst.hi" %} 5747 5748 ins_encode %{ 5749 __ movl($dst$$Register, $mem$$Address); 5750 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5751 %} 5752 5753 ins_pipe(ialu_reg_mem); 5754 %} 5755 5756 // Load Long. Cannot clobber address while loading, so restrict address 5757 // register to ESI 5758 instruct loadL(eRegL dst, load_long_memory mem) %{ 5759 predicate(!((LoadLNode*)n)->require_atomic_access()); 5760 match(Set dst (LoadL mem)); 5761 5762 ins_cost(250); 5763 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5764 "MOV $dst.hi,$mem+4" %} 5765 5766 ins_encode %{ 5767 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5768 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5769 __ movl($dst$$Register, Amemlo); 5770 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5771 %} 5772 5773 ins_pipe(ialu_reg_long_mem); 5774 %} 5775 5776 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5777 // then store it down to the stack and reload on the int 5778 // side. 5779 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5780 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5781 match(Set dst (LoadL mem)); 5782 5783 ins_cost(200); 5784 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5785 "FISTp $dst" %} 5786 ins_encode(enc_loadL_volatile(mem,dst)); 5787 ins_pipe( fpu_reg_mem ); 5788 %} 5789 5790 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5791 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5792 match(Set dst (LoadL mem)); 5793 effect(TEMP tmp); 5794 ins_cost(180); 5795 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5796 "MOVSD $dst,$tmp" %} 5797 ins_encode %{ 5798 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5799 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5800 %} 5801 ins_pipe( pipe_slow ); 5802 %} 5803 5804 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5805 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5806 match(Set dst (LoadL mem)); 5807 effect(TEMP tmp); 5808 ins_cost(160); 5809 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5810 "MOVD $dst.lo,$tmp\n\t" 5811 "PSRLQ $tmp,32\n\t" 5812 "MOVD $dst.hi,$tmp" %} 5813 ins_encode %{ 5814 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5815 __ movdl($dst$$Register, $tmp$$XMMRegister); 5816 __ psrlq($tmp$$XMMRegister, 32); 5817 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 // Load Range 5823 instruct loadRange(rRegI dst, memory mem) %{ 5824 match(Set dst (LoadRange mem)); 5825 5826 ins_cost(125); 5827 format %{ "MOV $dst,$mem" %} 5828 opcode(0x8B); 5829 ins_encode( OpcP, RegMem(dst,mem)); 5830 ins_pipe( ialu_reg_mem ); 5831 %} 5832 5833 5834 // Load Pointer 5835 instruct loadP(eRegP dst, memory mem) %{ 5836 match(Set dst (LoadP mem)); 5837 5838 ins_cost(125); 5839 format %{ "MOV $dst,$mem" %} 5840 opcode(0x8B); 5841 ins_encode( OpcP, RegMem(dst,mem)); 5842 ins_pipe( ialu_reg_mem ); 5843 %} 5844 5845 // Load Klass Pointer 5846 instruct loadKlass(eRegP dst, memory mem) %{ 5847 match(Set dst (LoadKlass mem)); 5848 5849 ins_cost(125); 5850 format %{ "MOV $dst,$mem" %} 5851 opcode(0x8B); 5852 ins_encode( OpcP, RegMem(dst,mem)); 5853 ins_pipe( ialu_reg_mem ); 5854 %} 5855 5856 // Load Double 5857 instruct loadDPR(regDPR dst, memory mem) %{ 5858 predicate(UseSSE<=1); 5859 match(Set dst (LoadD mem)); 5860 5861 ins_cost(150); 5862 format %{ "FLD_D ST,$mem\n\t" 5863 "FSTP $dst" %} 5864 opcode(0xDD); /* DD /0 */ 5865 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5866 Pop_Reg_DPR(dst) ); 5867 ins_pipe( fpu_reg_mem ); 5868 %} 5869 5870 // Load Double to XMM 5871 instruct loadD(regD dst, memory mem) %{ 5872 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5873 match(Set dst (LoadD mem)); 5874 ins_cost(145); 5875 format %{ "MOVSD $dst,$mem" %} 5876 ins_encode %{ 5877 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5878 %} 5879 ins_pipe( pipe_slow ); 5880 %} 5881 5882 instruct loadD_partial(regD dst, memory mem) %{ 5883 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5884 match(Set dst (LoadD mem)); 5885 ins_cost(145); 5886 format %{ "MOVLPD $dst,$mem" %} 5887 ins_encode %{ 5888 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5889 %} 5890 ins_pipe( pipe_slow ); 5891 %} 5892 5893 // Load to XMM register (single-precision floating point) 5894 // MOVSS instruction 5895 instruct loadF(regF dst, memory mem) %{ 5896 predicate(UseSSE>=1); 5897 match(Set dst (LoadF mem)); 5898 ins_cost(145); 5899 format %{ "MOVSS $dst,$mem" %} 5900 ins_encode %{ 5901 __ movflt ($dst$$XMMRegister, $mem$$Address); 5902 %} 5903 ins_pipe( pipe_slow ); 5904 %} 5905 5906 // Load Float 5907 instruct loadFPR(regFPR dst, memory mem) %{ 5908 predicate(UseSSE==0); 5909 match(Set dst (LoadF mem)); 5910 5911 ins_cost(150); 5912 format %{ "FLD_S ST,$mem\n\t" 5913 "FSTP $dst" %} 5914 opcode(0xD9); /* D9 /0 */ 5915 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5916 Pop_Reg_FPR(dst) ); 5917 ins_pipe( fpu_reg_mem ); 5918 %} 5919 5920 // Load Effective Address 5921 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5922 match(Set dst mem); 5923 5924 ins_cost(110); 5925 format %{ "LEA $dst,$mem" %} 5926 opcode(0x8D); 5927 ins_encode( OpcP, RegMem(dst,mem)); 5928 ins_pipe( ialu_reg_reg_fat ); 5929 %} 5930 5931 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5932 match(Set dst mem); 5933 5934 ins_cost(110); 5935 format %{ "LEA $dst,$mem" %} 5936 opcode(0x8D); 5937 ins_encode( OpcP, RegMem(dst,mem)); 5938 ins_pipe( ialu_reg_reg_fat ); 5939 %} 5940 5941 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5942 match(Set dst mem); 5943 5944 ins_cost(110); 5945 format %{ "LEA $dst,$mem" %} 5946 opcode(0x8D); 5947 ins_encode( OpcP, RegMem(dst,mem)); 5948 ins_pipe( ialu_reg_reg_fat ); 5949 %} 5950 5951 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5952 match(Set dst mem); 5953 5954 ins_cost(110); 5955 format %{ "LEA $dst,$mem" %} 5956 opcode(0x8D); 5957 ins_encode( OpcP, RegMem(dst,mem)); 5958 ins_pipe( ialu_reg_reg_fat ); 5959 %} 5960 5961 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5962 match(Set dst mem); 5963 5964 ins_cost(110); 5965 format %{ "LEA $dst,$mem" %} 5966 opcode(0x8D); 5967 ins_encode( OpcP, RegMem(dst,mem)); 5968 ins_pipe( ialu_reg_reg_fat ); 5969 %} 5970 5971 // Load Constant 5972 instruct loadConI(rRegI dst, immI src) %{ 5973 match(Set dst src); 5974 5975 format %{ "MOV $dst,$src" %} 5976 ins_encode( LdImmI(dst, src) ); 5977 ins_pipe( ialu_reg_fat ); 5978 %} 5979 5980 // Load Constant zero 5981 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5982 match(Set dst src); 5983 effect(KILL cr); 5984 5985 ins_cost(50); 5986 format %{ "XOR $dst,$dst" %} 5987 opcode(0x33); /* + rd */ 5988 ins_encode( OpcP, RegReg( dst, dst ) ); 5989 ins_pipe( ialu_reg ); 5990 %} 5991 5992 instruct loadConP(eRegP dst, immP src) %{ 5993 match(Set dst src); 5994 5995 format %{ "MOV $dst,$src" %} 5996 opcode(0xB8); /* + rd */ 5997 ins_encode( LdImmP(dst, src) ); 5998 ins_pipe( ialu_reg_fat ); 5999 %} 6000 6001 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6002 match(Set dst src); 6003 effect(KILL cr); 6004 ins_cost(200); 6005 format %{ "MOV $dst.lo,$src.lo\n\t" 6006 "MOV $dst.hi,$src.hi" %} 6007 opcode(0xB8); 6008 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6009 ins_pipe( ialu_reg_long_fat ); 6010 %} 6011 6012 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6013 match(Set dst src); 6014 effect(KILL cr); 6015 ins_cost(150); 6016 format %{ "XOR $dst.lo,$dst.lo\n\t" 6017 "XOR $dst.hi,$dst.hi" %} 6018 opcode(0x33,0x33); 6019 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6020 ins_pipe( ialu_reg_long ); 6021 %} 6022 6023 // The instruction usage is guarded by predicate in operand immFPR(). 6024 instruct loadConFPR(regFPR dst, immFPR con) %{ 6025 match(Set dst con); 6026 ins_cost(125); 6027 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6028 "FSTP $dst" %} 6029 ins_encode %{ 6030 __ fld_s($constantaddress($con)); 6031 __ fstp_d($dst$$reg); 6032 %} 6033 ins_pipe(fpu_reg_con); 6034 %} 6035 6036 // The instruction usage is guarded by predicate in operand immFPR0(). 6037 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6038 match(Set dst con); 6039 ins_cost(125); 6040 format %{ "FLDZ ST\n\t" 6041 "FSTP $dst" %} 6042 ins_encode %{ 6043 __ fldz(); 6044 __ fstp_d($dst$$reg); 6045 %} 6046 ins_pipe(fpu_reg_con); 6047 %} 6048 6049 // The instruction usage is guarded by predicate in operand immFPR1(). 6050 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6051 match(Set dst con); 6052 ins_cost(125); 6053 format %{ "FLD1 ST\n\t" 6054 "FSTP $dst" %} 6055 ins_encode %{ 6056 __ fld1(); 6057 __ fstp_d($dst$$reg); 6058 %} 6059 ins_pipe(fpu_reg_con); 6060 %} 6061 6062 // The instruction usage is guarded by predicate in operand immF(). 6063 instruct loadConF(regF dst, immF con) %{ 6064 match(Set dst con); 6065 ins_cost(125); 6066 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6067 ins_encode %{ 6068 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6069 %} 6070 ins_pipe(pipe_slow); 6071 %} 6072 6073 // The instruction usage is guarded by predicate in operand immF0(). 6074 instruct loadConF0(regF dst, immF0 src) %{ 6075 match(Set dst src); 6076 ins_cost(100); 6077 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6078 ins_encode %{ 6079 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6080 %} 6081 ins_pipe(pipe_slow); 6082 %} 6083 6084 // The instruction usage is guarded by predicate in operand immDPR(). 6085 instruct loadConDPR(regDPR dst, immDPR con) %{ 6086 match(Set dst con); 6087 ins_cost(125); 6088 6089 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6090 "FSTP $dst" %} 6091 ins_encode %{ 6092 __ fld_d($constantaddress($con)); 6093 __ fstp_d($dst$$reg); 6094 %} 6095 ins_pipe(fpu_reg_con); 6096 %} 6097 6098 // The instruction usage is guarded by predicate in operand immDPR0(). 6099 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6100 match(Set dst con); 6101 ins_cost(125); 6102 6103 format %{ "FLDZ ST\n\t" 6104 "FSTP $dst" %} 6105 ins_encode %{ 6106 __ fldz(); 6107 __ fstp_d($dst$$reg); 6108 %} 6109 ins_pipe(fpu_reg_con); 6110 %} 6111 6112 // The instruction usage is guarded by predicate in operand immDPR1(). 6113 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6114 match(Set dst con); 6115 ins_cost(125); 6116 6117 format %{ "FLD1 ST\n\t" 6118 "FSTP $dst" %} 6119 ins_encode %{ 6120 __ fld1(); 6121 __ fstp_d($dst$$reg); 6122 %} 6123 ins_pipe(fpu_reg_con); 6124 %} 6125 6126 // The instruction usage is guarded by predicate in operand immD(). 6127 instruct loadConD(regD dst, immD con) %{ 6128 match(Set dst con); 6129 ins_cost(125); 6130 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6131 ins_encode %{ 6132 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6133 %} 6134 ins_pipe(pipe_slow); 6135 %} 6136 6137 // The instruction usage is guarded by predicate in operand immD0(). 6138 instruct loadConD0(regD dst, immD0 src) %{ 6139 match(Set dst src); 6140 ins_cost(100); 6141 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6142 ins_encode %{ 6143 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6144 %} 6145 ins_pipe( pipe_slow ); 6146 %} 6147 6148 // Load Stack Slot 6149 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6150 match(Set dst src); 6151 ins_cost(125); 6152 6153 format %{ "MOV $dst,$src" %} 6154 opcode(0x8B); 6155 ins_encode( OpcP, RegMem(dst,src)); 6156 ins_pipe( ialu_reg_mem ); 6157 %} 6158 6159 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6160 match(Set dst src); 6161 6162 ins_cost(200); 6163 format %{ "MOV $dst,$src.lo\n\t" 6164 "MOV $dst+4,$src.hi" %} 6165 opcode(0x8B, 0x8B); 6166 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6167 ins_pipe( ialu_mem_long_reg ); 6168 %} 6169 6170 // Load Stack Slot 6171 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6172 match(Set dst src); 6173 ins_cost(125); 6174 6175 format %{ "MOV $dst,$src" %} 6176 opcode(0x8B); 6177 ins_encode( OpcP, RegMem(dst,src)); 6178 ins_pipe( ialu_reg_mem ); 6179 %} 6180 6181 // Load Stack Slot 6182 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6183 match(Set dst src); 6184 ins_cost(125); 6185 6186 format %{ "FLD_S $src\n\t" 6187 "FSTP $dst" %} 6188 opcode(0xD9); /* D9 /0, FLD m32real */ 6189 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6190 Pop_Reg_FPR(dst) ); 6191 ins_pipe( fpu_reg_mem ); 6192 %} 6193 6194 // Load Stack Slot 6195 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6196 match(Set dst src); 6197 ins_cost(125); 6198 6199 format %{ "FLD_D $src\n\t" 6200 "FSTP $dst" %} 6201 opcode(0xDD); /* DD /0, FLD m64real */ 6202 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6203 Pop_Reg_DPR(dst) ); 6204 ins_pipe( fpu_reg_mem ); 6205 %} 6206 6207 // Prefetch instructions for allocation. 6208 // Must be safe to execute with invalid address (cannot fault). 6209 6210 instruct prefetchAlloc0( memory mem ) %{ 6211 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6212 match(PrefetchAllocation mem); 6213 ins_cost(0); 6214 size(0); 6215 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6216 ins_encode(); 6217 ins_pipe(empty); 6218 %} 6219 6220 instruct prefetchAlloc( memory mem ) %{ 6221 predicate(AllocatePrefetchInstr==3); 6222 match( PrefetchAllocation mem ); 6223 ins_cost(100); 6224 6225 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6226 ins_encode %{ 6227 __ prefetchw($mem$$Address); 6228 %} 6229 ins_pipe(ialu_mem); 6230 %} 6231 6232 instruct prefetchAllocNTA( memory mem ) %{ 6233 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6234 match(PrefetchAllocation mem); 6235 ins_cost(100); 6236 6237 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6238 ins_encode %{ 6239 __ prefetchnta($mem$$Address); 6240 %} 6241 ins_pipe(ialu_mem); 6242 %} 6243 6244 instruct prefetchAllocT0( memory mem ) %{ 6245 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6246 match(PrefetchAllocation mem); 6247 ins_cost(100); 6248 6249 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6250 ins_encode %{ 6251 __ prefetcht0($mem$$Address); 6252 %} 6253 ins_pipe(ialu_mem); 6254 %} 6255 6256 instruct prefetchAllocT2( memory mem ) %{ 6257 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6258 match(PrefetchAllocation mem); 6259 ins_cost(100); 6260 6261 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6262 ins_encode %{ 6263 __ prefetcht2($mem$$Address); 6264 %} 6265 ins_pipe(ialu_mem); 6266 %} 6267 6268 //----------Store Instructions------------------------------------------------- 6269 6270 // Store Byte 6271 instruct storeB(memory mem, xRegI src) %{ 6272 match(Set mem (StoreB mem src)); 6273 6274 ins_cost(125); 6275 format %{ "MOV8 $mem,$src" %} 6276 opcode(0x88); 6277 ins_encode( OpcP, RegMem( src, mem ) ); 6278 ins_pipe( ialu_mem_reg ); 6279 %} 6280 6281 // Store Char/Short 6282 instruct storeC(memory mem, rRegI src) %{ 6283 match(Set mem (StoreC mem src)); 6284 6285 ins_cost(125); 6286 format %{ "MOV16 $mem,$src" %} 6287 opcode(0x89, 0x66); 6288 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6289 ins_pipe( ialu_mem_reg ); 6290 %} 6291 6292 // Store Integer 6293 instruct storeI(memory mem, rRegI src) %{ 6294 match(Set mem (StoreI mem src)); 6295 6296 ins_cost(125); 6297 format %{ "MOV $mem,$src" %} 6298 opcode(0x89); 6299 ins_encode( OpcP, RegMem( src, mem ) ); 6300 ins_pipe( ialu_mem_reg ); 6301 %} 6302 6303 // Store Long 6304 instruct storeL(long_memory mem, eRegL src) %{ 6305 predicate(!((StoreLNode*)n)->require_atomic_access()); 6306 match(Set mem (StoreL mem src)); 6307 6308 ins_cost(200); 6309 format %{ "MOV $mem,$src.lo\n\t" 6310 "MOV $mem+4,$src.hi" %} 6311 opcode(0x89, 0x89); 6312 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6313 ins_pipe( ialu_mem_long_reg ); 6314 %} 6315 6316 // Store Long to Integer 6317 instruct storeL2I(memory mem, eRegL src) %{ 6318 match(Set mem (StoreI mem (ConvL2I src))); 6319 6320 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6321 ins_encode %{ 6322 __ movl($mem$$Address, $src$$Register); 6323 %} 6324 ins_pipe(ialu_mem_reg); 6325 %} 6326 6327 // Volatile Store Long. Must be atomic, so move it into 6328 // the FP TOS and then do a 64-bit FIST. Has to probe the 6329 // target address before the store (for null-ptr checks) 6330 // so the memory operand is used twice in the encoding. 6331 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6332 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6333 match(Set mem (StoreL mem src)); 6334 effect( KILL cr ); 6335 ins_cost(400); 6336 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6337 "FILD $src\n\t" 6338 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6339 opcode(0x3B); 6340 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6341 ins_pipe( fpu_reg_mem ); 6342 %} 6343 6344 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6345 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6346 match(Set mem (StoreL mem src)); 6347 effect( TEMP tmp, KILL cr ); 6348 ins_cost(380); 6349 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6350 "MOVSD $tmp,$src\n\t" 6351 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6352 ins_encode %{ 6353 __ cmpl(rax, $mem$$Address); 6354 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6355 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6361 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6362 match(Set mem (StoreL mem src)); 6363 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6364 ins_cost(360); 6365 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6366 "MOVD $tmp,$src.lo\n\t" 6367 "MOVD $tmp2,$src.hi\n\t" 6368 "PUNPCKLDQ $tmp,$tmp2\n\t" 6369 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6370 ins_encode %{ 6371 __ cmpl(rax, $mem$$Address); 6372 __ movdl($tmp$$XMMRegister, $src$$Register); 6373 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6374 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6375 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6376 %} 6377 ins_pipe( pipe_slow ); 6378 %} 6379 6380 // Store Pointer; for storing unknown oops and raw pointers 6381 instruct storeP(memory mem, anyRegP src) %{ 6382 match(Set mem (StoreP mem src)); 6383 6384 ins_cost(125); 6385 format %{ "MOV $mem,$src" %} 6386 opcode(0x89); 6387 ins_encode( OpcP, RegMem( src, mem ) ); 6388 ins_pipe( ialu_mem_reg ); 6389 %} 6390 6391 // Store Integer Immediate 6392 instruct storeImmI(memory mem, immI src) %{ 6393 match(Set mem (StoreI mem src)); 6394 6395 ins_cost(150); 6396 format %{ "MOV $mem,$src" %} 6397 opcode(0xC7); /* C7 /0 */ 6398 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6399 ins_pipe( ialu_mem_imm ); 6400 %} 6401 6402 // Store Short/Char Immediate 6403 instruct storeImmI16(memory mem, immI16 src) %{ 6404 predicate(UseStoreImmI16); 6405 match(Set mem (StoreC mem src)); 6406 6407 ins_cost(150); 6408 format %{ "MOV16 $mem,$src" %} 6409 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6410 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6411 ins_pipe( ialu_mem_imm ); 6412 %} 6413 6414 // Store Pointer Immediate; null pointers or constant oops that do not 6415 // need card-mark barriers. 6416 instruct storeImmP(memory mem, immP src) %{ 6417 match(Set mem (StoreP mem src)); 6418 6419 ins_cost(150); 6420 format %{ "MOV $mem,$src" %} 6421 opcode(0xC7); /* C7 /0 */ 6422 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6423 ins_pipe( ialu_mem_imm ); 6424 %} 6425 6426 // Store Byte Immediate 6427 instruct storeImmB(memory mem, immI8 src) %{ 6428 match(Set mem (StoreB mem src)); 6429 6430 ins_cost(150); 6431 format %{ "MOV8 $mem,$src" %} 6432 opcode(0xC6); /* C6 /0 */ 6433 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6434 ins_pipe( ialu_mem_imm ); 6435 %} 6436 6437 // Store CMS card-mark Immediate 6438 instruct storeImmCM(memory mem, immI8 src) %{ 6439 match(Set mem (StoreCM mem src)); 6440 6441 ins_cost(150); 6442 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6443 opcode(0xC6); /* C6 /0 */ 6444 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6445 ins_pipe( ialu_mem_imm ); 6446 %} 6447 6448 // Store Double 6449 instruct storeDPR( memory mem, regDPR1 src) %{ 6450 predicate(UseSSE<=1); 6451 match(Set mem (StoreD mem src)); 6452 6453 ins_cost(100); 6454 format %{ "FST_D $mem,$src" %} 6455 opcode(0xDD); /* DD /2 */ 6456 ins_encode( enc_FPR_store(mem,src) ); 6457 ins_pipe( fpu_mem_reg ); 6458 %} 6459 6460 // Store double does rounding on x86 6461 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6462 predicate(UseSSE<=1); 6463 match(Set mem (StoreD mem (RoundDouble src))); 6464 6465 ins_cost(100); 6466 format %{ "FST_D $mem,$src\t# round" %} 6467 opcode(0xDD); /* DD /2 */ 6468 ins_encode( enc_FPR_store(mem,src) ); 6469 ins_pipe( fpu_mem_reg ); 6470 %} 6471 6472 // Store XMM register to memory (double-precision floating points) 6473 // MOVSD instruction 6474 instruct storeD(memory mem, regD src) %{ 6475 predicate(UseSSE>=2); 6476 match(Set mem (StoreD mem src)); 6477 ins_cost(95); 6478 format %{ "MOVSD $mem,$src" %} 6479 ins_encode %{ 6480 __ movdbl($mem$$Address, $src$$XMMRegister); 6481 %} 6482 ins_pipe( pipe_slow ); 6483 %} 6484 6485 // Load Double 6486 instruct MoveD2VL(vlRegD dst, regD src) %{ 6487 match(Set dst src); 6488 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6489 ins_encode %{ 6490 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6491 %} 6492 ins_pipe( fpu_reg_reg ); 6493 %} 6494 6495 // Load Double 6496 instruct MoveVL2D(regD dst, vlRegD src) %{ 6497 match(Set dst src); 6498 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6499 ins_encode %{ 6500 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6501 %} 6502 ins_pipe( fpu_reg_reg ); 6503 %} 6504 6505 // Store XMM register to memory (single-precision floating point) 6506 // MOVSS instruction 6507 instruct storeF(memory mem, regF src) %{ 6508 predicate(UseSSE>=1); 6509 match(Set mem (StoreF mem src)); 6510 ins_cost(95); 6511 format %{ "MOVSS $mem,$src" %} 6512 ins_encode %{ 6513 __ movflt($mem$$Address, $src$$XMMRegister); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 // Load Float 6519 instruct MoveF2VL(vlRegF dst, regF src) %{ 6520 match(Set dst src); 6521 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6522 ins_encode %{ 6523 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6524 %} 6525 ins_pipe( fpu_reg_reg ); 6526 %} 6527 6528 // Load Float 6529 instruct MoveVL2F(regF dst, vlRegF src) %{ 6530 match(Set dst src); 6531 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6532 ins_encode %{ 6533 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6534 %} 6535 ins_pipe( fpu_reg_reg ); 6536 %} 6537 6538 // Store Float 6539 instruct storeFPR( memory mem, regFPR1 src) %{ 6540 predicate(UseSSE==0); 6541 match(Set mem (StoreF mem src)); 6542 6543 ins_cost(100); 6544 format %{ "FST_S $mem,$src" %} 6545 opcode(0xD9); /* D9 /2 */ 6546 ins_encode( enc_FPR_store(mem,src) ); 6547 ins_pipe( fpu_mem_reg ); 6548 %} 6549 6550 // Store Float does rounding on x86 6551 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6552 predicate(UseSSE==0); 6553 match(Set mem (StoreF mem (RoundFloat src))); 6554 6555 ins_cost(100); 6556 format %{ "FST_S $mem,$src\t# round" %} 6557 opcode(0xD9); /* D9 /2 */ 6558 ins_encode( enc_FPR_store(mem,src) ); 6559 ins_pipe( fpu_mem_reg ); 6560 %} 6561 6562 // Store Float does rounding on x86 6563 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6564 predicate(UseSSE<=1); 6565 match(Set mem (StoreF mem (ConvD2F src))); 6566 6567 ins_cost(100); 6568 format %{ "FST_S $mem,$src\t# D-round" %} 6569 opcode(0xD9); /* D9 /2 */ 6570 ins_encode( enc_FPR_store(mem,src) ); 6571 ins_pipe( fpu_mem_reg ); 6572 %} 6573 6574 // Store immediate Float value (it is faster than store from FPU register) 6575 // The instruction usage is guarded by predicate in operand immFPR(). 6576 instruct storeFPR_imm( memory mem, immFPR src) %{ 6577 match(Set mem (StoreF mem src)); 6578 6579 ins_cost(50); 6580 format %{ "MOV $mem,$src\t# store float" %} 6581 opcode(0xC7); /* C7 /0 */ 6582 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6583 ins_pipe( ialu_mem_imm ); 6584 %} 6585 6586 // Store immediate Float value (it is faster than store from XMM register) 6587 // The instruction usage is guarded by predicate in operand immF(). 6588 instruct storeF_imm( memory mem, immF src) %{ 6589 match(Set mem (StoreF mem src)); 6590 6591 ins_cost(50); 6592 format %{ "MOV $mem,$src\t# store float" %} 6593 opcode(0xC7); /* C7 /0 */ 6594 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6595 ins_pipe( ialu_mem_imm ); 6596 %} 6597 6598 // Store Integer to stack slot 6599 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6600 match(Set dst src); 6601 6602 ins_cost(100); 6603 format %{ "MOV $dst,$src" %} 6604 opcode(0x89); 6605 ins_encode( OpcPRegSS( dst, src ) ); 6606 ins_pipe( ialu_mem_reg ); 6607 %} 6608 6609 // Store Integer to stack slot 6610 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6611 match(Set dst src); 6612 6613 ins_cost(100); 6614 format %{ "MOV $dst,$src" %} 6615 opcode(0x89); 6616 ins_encode( OpcPRegSS( dst, src ) ); 6617 ins_pipe( ialu_mem_reg ); 6618 %} 6619 6620 // Store Long to stack slot 6621 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6622 match(Set dst src); 6623 6624 ins_cost(200); 6625 format %{ "MOV $dst,$src.lo\n\t" 6626 "MOV $dst+4,$src.hi" %} 6627 opcode(0x89, 0x89); 6628 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6629 ins_pipe( ialu_mem_long_reg ); 6630 %} 6631 6632 //----------MemBar Instructions----------------------------------------------- 6633 // Memory barrier flavors 6634 6635 instruct membar_acquire() %{ 6636 match(MemBarAcquire); 6637 match(LoadFence); 6638 ins_cost(400); 6639 6640 size(0); 6641 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6642 ins_encode(); 6643 ins_pipe(empty); 6644 %} 6645 6646 instruct membar_acquire_lock() %{ 6647 match(MemBarAcquireLock); 6648 ins_cost(0); 6649 6650 size(0); 6651 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6652 ins_encode( ); 6653 ins_pipe(empty); 6654 %} 6655 6656 instruct membar_release() %{ 6657 match(MemBarRelease); 6658 match(StoreFence); 6659 ins_cost(400); 6660 6661 size(0); 6662 format %{ "MEMBAR-release ! (empty encoding)" %} 6663 ins_encode( ); 6664 ins_pipe(empty); 6665 %} 6666 6667 instruct membar_release_lock() %{ 6668 match(MemBarReleaseLock); 6669 ins_cost(0); 6670 6671 size(0); 6672 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6673 ins_encode( ); 6674 ins_pipe(empty); 6675 %} 6676 6677 instruct membar_volatile(eFlagsReg cr) %{ 6678 match(MemBarVolatile); 6679 effect(KILL cr); 6680 ins_cost(400); 6681 6682 format %{ 6683 $$template 6684 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6685 %} 6686 ins_encode %{ 6687 __ membar(Assembler::StoreLoad); 6688 %} 6689 ins_pipe(pipe_slow); 6690 %} 6691 6692 instruct unnecessary_membar_volatile() %{ 6693 match(MemBarVolatile); 6694 predicate(Matcher::post_store_load_barrier(n)); 6695 ins_cost(0); 6696 6697 size(0); 6698 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6699 ins_encode( ); 6700 ins_pipe(empty); 6701 %} 6702 6703 instruct membar_storestore() %{ 6704 match(MemBarStoreStore); 6705 ins_cost(0); 6706 6707 size(0); 6708 format %{ "MEMBAR-storestore (empty encoding)" %} 6709 ins_encode( ); 6710 ins_pipe(empty); 6711 %} 6712 6713 //----------Move Instructions-------------------------------------------------- 6714 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6715 match(Set dst (CastX2P src)); 6716 format %{ "# X2P $dst, $src" %} 6717 ins_encode( /*empty encoding*/ ); 6718 ins_cost(0); 6719 ins_pipe(empty); 6720 %} 6721 6722 instruct castP2X(rRegI dst, eRegP src ) %{ 6723 match(Set dst (CastP2X src)); 6724 ins_cost(50); 6725 format %{ "MOV $dst, $src\t# CastP2X" %} 6726 ins_encode( enc_Copy( dst, src) ); 6727 ins_pipe( ialu_reg_reg ); 6728 %} 6729 6730 //----------Conditional Move--------------------------------------------------- 6731 // Conditional move 6732 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6733 predicate(!VM_Version::supports_cmov() ); 6734 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6735 ins_cost(200); 6736 format %{ "J$cop,us skip\t# signed cmove\n\t" 6737 "MOV $dst,$src\n" 6738 "skip:" %} 6739 ins_encode %{ 6740 Label Lskip; 6741 // Invert sense of branch from sense of CMOV 6742 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6743 __ movl($dst$$Register, $src$$Register); 6744 __ bind(Lskip); 6745 %} 6746 ins_pipe( pipe_cmov_reg ); 6747 %} 6748 6749 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6750 predicate(!VM_Version::supports_cmov() ); 6751 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6752 ins_cost(200); 6753 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6754 "MOV $dst,$src\n" 6755 "skip:" %} 6756 ins_encode %{ 6757 Label Lskip; 6758 // Invert sense of branch from sense of CMOV 6759 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6760 __ movl($dst$$Register, $src$$Register); 6761 __ bind(Lskip); 6762 %} 6763 ins_pipe( pipe_cmov_reg ); 6764 %} 6765 6766 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6767 predicate(VM_Version::supports_cmov() ); 6768 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6769 ins_cost(200); 6770 format %{ "CMOV$cop $dst,$src" %} 6771 opcode(0x0F,0x40); 6772 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6773 ins_pipe( pipe_cmov_reg ); 6774 %} 6775 6776 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6777 predicate(VM_Version::supports_cmov() ); 6778 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6779 ins_cost(200); 6780 format %{ "CMOV$cop $dst,$src" %} 6781 opcode(0x0F,0x40); 6782 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6783 ins_pipe( pipe_cmov_reg ); 6784 %} 6785 6786 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6787 predicate(VM_Version::supports_cmov() ); 6788 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6789 ins_cost(200); 6790 expand %{ 6791 cmovI_regU(cop, cr, dst, src); 6792 %} 6793 %} 6794 6795 // Conditional move 6796 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6797 predicate(VM_Version::supports_cmov() ); 6798 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6799 ins_cost(250); 6800 format %{ "CMOV$cop $dst,$src" %} 6801 opcode(0x0F,0x40); 6802 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6803 ins_pipe( pipe_cmov_mem ); 6804 %} 6805 6806 // Conditional move 6807 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6808 predicate(VM_Version::supports_cmov() ); 6809 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6810 ins_cost(250); 6811 format %{ "CMOV$cop $dst,$src" %} 6812 opcode(0x0F,0x40); 6813 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6814 ins_pipe( pipe_cmov_mem ); 6815 %} 6816 6817 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6818 predicate(VM_Version::supports_cmov() ); 6819 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6820 ins_cost(250); 6821 expand %{ 6822 cmovI_memU(cop, cr, dst, src); 6823 %} 6824 %} 6825 6826 // Conditional move 6827 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6828 predicate(VM_Version::supports_cmov() ); 6829 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6830 ins_cost(200); 6831 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6832 opcode(0x0F,0x40); 6833 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6834 ins_pipe( pipe_cmov_reg ); 6835 %} 6836 6837 // Conditional move (non-P6 version) 6838 // Note: a CMoveP is generated for stubs and native wrappers 6839 // regardless of whether we are on a P6, so we 6840 // emulate a cmov here 6841 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6842 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6843 ins_cost(300); 6844 format %{ "Jn$cop skip\n\t" 6845 "MOV $dst,$src\t# pointer\n" 6846 "skip:" %} 6847 opcode(0x8b); 6848 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6849 ins_pipe( pipe_cmov_reg ); 6850 %} 6851 6852 // Conditional move 6853 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6854 predicate(VM_Version::supports_cmov() ); 6855 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6856 ins_cost(200); 6857 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6858 opcode(0x0F,0x40); 6859 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6860 ins_pipe( pipe_cmov_reg ); 6861 %} 6862 6863 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6864 predicate(VM_Version::supports_cmov() ); 6865 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6866 ins_cost(200); 6867 expand %{ 6868 cmovP_regU(cop, cr, dst, src); 6869 %} 6870 %} 6871 6872 // DISABLED: Requires the ADLC to emit a bottom_type call that 6873 // correctly meets the two pointer arguments; one is an incoming 6874 // register but the other is a memory operand. ALSO appears to 6875 // be buggy with implicit null checks. 6876 // 6877 //// Conditional move 6878 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6879 // predicate(VM_Version::supports_cmov() ); 6880 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6881 // ins_cost(250); 6882 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6883 // opcode(0x0F,0x40); 6884 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6885 // ins_pipe( pipe_cmov_mem ); 6886 //%} 6887 // 6888 //// Conditional move 6889 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6890 // predicate(VM_Version::supports_cmov() ); 6891 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6892 // ins_cost(250); 6893 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6894 // opcode(0x0F,0x40); 6895 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6896 // ins_pipe( pipe_cmov_mem ); 6897 //%} 6898 6899 // Conditional move 6900 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6901 predicate(UseSSE<=1); 6902 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6903 ins_cost(200); 6904 format %{ "FCMOV$cop $dst,$src\t# double" %} 6905 opcode(0xDA); 6906 ins_encode( enc_cmov_dpr(cop,src) ); 6907 ins_pipe( pipe_cmovDPR_reg ); 6908 %} 6909 6910 // Conditional move 6911 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6912 predicate(UseSSE==0); 6913 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6914 ins_cost(200); 6915 format %{ "FCMOV$cop $dst,$src\t# float" %} 6916 opcode(0xDA); 6917 ins_encode( enc_cmov_dpr(cop,src) ); 6918 ins_pipe( pipe_cmovDPR_reg ); 6919 %} 6920 6921 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6922 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6923 predicate(UseSSE<=1); 6924 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6925 ins_cost(200); 6926 format %{ "Jn$cop skip\n\t" 6927 "MOV $dst,$src\t# double\n" 6928 "skip:" %} 6929 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6930 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6931 ins_pipe( pipe_cmovDPR_reg ); 6932 %} 6933 6934 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6935 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6936 predicate(UseSSE==0); 6937 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6938 ins_cost(200); 6939 format %{ "Jn$cop skip\n\t" 6940 "MOV $dst,$src\t# float\n" 6941 "skip:" %} 6942 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6943 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6944 ins_pipe( pipe_cmovDPR_reg ); 6945 %} 6946 6947 // No CMOVE with SSE/SSE2 6948 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6949 predicate (UseSSE>=1); 6950 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6951 ins_cost(200); 6952 format %{ "Jn$cop skip\n\t" 6953 "MOVSS $dst,$src\t# float\n" 6954 "skip:" %} 6955 ins_encode %{ 6956 Label skip; 6957 // Invert sense of branch from sense of CMOV 6958 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6959 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6960 __ bind(skip); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 // No CMOVE with SSE/SSE2 6966 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6967 predicate (UseSSE>=2); 6968 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6969 ins_cost(200); 6970 format %{ "Jn$cop skip\n\t" 6971 "MOVSD $dst,$src\t# float\n" 6972 "skip:" %} 6973 ins_encode %{ 6974 Label skip; 6975 // Invert sense of branch from sense of CMOV 6976 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6977 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6978 __ bind(skip); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 // unsigned version 6984 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6985 predicate (UseSSE>=1); 6986 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6987 ins_cost(200); 6988 format %{ "Jn$cop skip\n\t" 6989 "MOVSS $dst,$src\t# float\n" 6990 "skip:" %} 6991 ins_encode %{ 6992 Label skip; 6993 // Invert sense of branch from sense of CMOV 6994 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6995 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6996 __ bind(skip); 6997 %} 6998 ins_pipe( pipe_slow ); 6999 %} 7000 7001 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7002 predicate (UseSSE>=1); 7003 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7004 ins_cost(200); 7005 expand %{ 7006 fcmovF_regU(cop, cr, dst, src); 7007 %} 7008 %} 7009 7010 // unsigned version 7011 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7012 predicate (UseSSE>=2); 7013 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7014 ins_cost(200); 7015 format %{ "Jn$cop skip\n\t" 7016 "MOVSD $dst,$src\t# float\n" 7017 "skip:" %} 7018 ins_encode %{ 7019 Label skip; 7020 // Invert sense of branch from sense of CMOV 7021 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7022 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7023 __ bind(skip); 7024 %} 7025 ins_pipe( pipe_slow ); 7026 %} 7027 7028 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7029 predicate (UseSSE>=2); 7030 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7031 ins_cost(200); 7032 expand %{ 7033 fcmovD_regU(cop, cr, dst, src); 7034 %} 7035 %} 7036 7037 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7038 predicate(VM_Version::supports_cmov() ); 7039 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7040 ins_cost(200); 7041 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7042 "CMOV$cop $dst.hi,$src.hi" %} 7043 opcode(0x0F,0x40); 7044 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7045 ins_pipe( pipe_cmov_reg_long ); 7046 %} 7047 7048 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7049 predicate(VM_Version::supports_cmov() ); 7050 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7051 ins_cost(200); 7052 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7053 "CMOV$cop $dst.hi,$src.hi" %} 7054 opcode(0x0F,0x40); 7055 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7056 ins_pipe( pipe_cmov_reg_long ); 7057 %} 7058 7059 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7060 predicate(VM_Version::supports_cmov() ); 7061 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7062 ins_cost(200); 7063 expand %{ 7064 cmovL_regU(cop, cr, dst, src); 7065 %} 7066 %} 7067 7068 //----------Arithmetic Instructions-------------------------------------------- 7069 //----------Addition Instructions---------------------------------------------- 7070 7071 // Integer Addition Instructions 7072 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7073 match(Set dst (AddI dst src)); 7074 effect(KILL cr); 7075 7076 size(2); 7077 format %{ "ADD $dst,$src" %} 7078 opcode(0x03); 7079 ins_encode( OpcP, RegReg( dst, src) ); 7080 ins_pipe( ialu_reg_reg ); 7081 %} 7082 7083 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7084 match(Set dst (AddI dst src)); 7085 effect(KILL cr); 7086 7087 format %{ "ADD $dst,$src" %} 7088 opcode(0x81, 0x00); /* /0 id */ 7089 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7090 ins_pipe( ialu_reg ); 7091 %} 7092 7093 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7094 predicate(UseIncDec); 7095 match(Set dst (AddI dst src)); 7096 effect(KILL cr); 7097 7098 size(1); 7099 format %{ "INC $dst" %} 7100 opcode(0x40); /* */ 7101 ins_encode( Opc_plus( primary, dst ) ); 7102 ins_pipe( ialu_reg ); 7103 %} 7104 7105 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7106 match(Set dst (AddI src0 src1)); 7107 ins_cost(110); 7108 7109 format %{ "LEA $dst,[$src0 + $src1]" %} 7110 opcode(0x8D); /* 0x8D /r */ 7111 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7112 ins_pipe( ialu_reg_reg ); 7113 %} 7114 7115 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7116 match(Set dst (AddP src0 src1)); 7117 ins_cost(110); 7118 7119 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7120 opcode(0x8D); /* 0x8D /r */ 7121 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7122 ins_pipe( ialu_reg_reg ); 7123 %} 7124 7125 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7126 predicate(UseIncDec); 7127 match(Set dst (AddI dst src)); 7128 effect(KILL cr); 7129 7130 size(1); 7131 format %{ "DEC $dst" %} 7132 opcode(0x48); /* */ 7133 ins_encode( Opc_plus( primary, dst ) ); 7134 ins_pipe( ialu_reg ); 7135 %} 7136 7137 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7138 match(Set dst (AddP dst src)); 7139 effect(KILL cr); 7140 7141 size(2); 7142 format %{ "ADD $dst,$src" %} 7143 opcode(0x03); 7144 ins_encode( OpcP, RegReg( dst, src) ); 7145 ins_pipe( ialu_reg_reg ); 7146 %} 7147 7148 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7149 match(Set dst (AddP dst src)); 7150 effect(KILL cr); 7151 7152 format %{ "ADD $dst,$src" %} 7153 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7154 // ins_encode( RegImm( dst, src) ); 7155 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7156 ins_pipe( ialu_reg ); 7157 %} 7158 7159 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7160 match(Set dst (AddI dst (LoadI src))); 7161 effect(KILL cr); 7162 7163 ins_cost(125); 7164 format %{ "ADD $dst,$src" %} 7165 opcode(0x03); 7166 ins_encode( OpcP, RegMem( dst, src) ); 7167 ins_pipe( ialu_reg_mem ); 7168 %} 7169 7170 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7171 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7172 effect(KILL cr); 7173 7174 ins_cost(150); 7175 format %{ "ADD $dst,$src" %} 7176 opcode(0x01); /* Opcode 01 /r */ 7177 ins_encode( OpcP, RegMem( src, dst ) ); 7178 ins_pipe( ialu_mem_reg ); 7179 %} 7180 7181 // Add Memory with Immediate 7182 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7183 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7184 effect(KILL cr); 7185 7186 ins_cost(125); 7187 format %{ "ADD $dst,$src" %} 7188 opcode(0x81); /* Opcode 81 /0 id */ 7189 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7190 ins_pipe( ialu_mem_imm ); 7191 %} 7192 7193 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7194 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7195 effect(KILL cr); 7196 7197 ins_cost(125); 7198 format %{ "INC $dst" %} 7199 opcode(0xFF); /* Opcode FF /0 */ 7200 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7201 ins_pipe( ialu_mem_imm ); 7202 %} 7203 7204 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7205 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7206 effect(KILL cr); 7207 7208 ins_cost(125); 7209 format %{ "DEC $dst" %} 7210 opcode(0xFF); /* Opcode FF /1 */ 7211 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7212 ins_pipe( ialu_mem_imm ); 7213 %} 7214 7215 7216 instruct checkCastPP( eRegP dst ) %{ 7217 match(Set dst (CheckCastPP dst)); 7218 7219 size(0); 7220 format %{ "#checkcastPP of $dst" %} 7221 ins_encode( /*empty encoding*/ ); 7222 ins_pipe( empty ); 7223 %} 7224 7225 instruct castPP( eRegP dst ) %{ 7226 match(Set dst (CastPP dst)); 7227 format %{ "#castPP of $dst" %} 7228 ins_encode( /*empty encoding*/ ); 7229 ins_pipe( empty ); 7230 %} 7231 7232 instruct castII( rRegI dst ) %{ 7233 match(Set dst (CastII dst)); 7234 format %{ "#castII of $dst" %} 7235 ins_encode( /*empty encoding*/ ); 7236 ins_cost(0); 7237 ins_pipe( empty ); 7238 %} 7239 7240 // Load-locked - same as a regular pointer load when used with compare-swap 7241 instruct loadPLocked(eRegP dst, memory mem) %{ 7242 match(Set dst (LoadPLocked mem)); 7243 7244 ins_cost(125); 7245 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7246 opcode(0x8B); 7247 ins_encode( OpcP, RegMem(dst,mem)); 7248 ins_pipe( ialu_reg_mem ); 7249 %} 7250 7251 // Conditional-store of the updated heap-top. 7252 // Used during allocation of the shared heap. 7253 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7254 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7255 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7256 // EAX is killed if there is contention, but then it's also unused. 7257 // In the common case of no contention, EAX holds the new oop address. 7258 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7259 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7260 ins_pipe( pipe_cmpxchg ); 7261 %} 7262 7263 // Conditional-store of an int value. 7264 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7265 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7266 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7267 effect(KILL oldval); 7268 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7269 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7270 ins_pipe( pipe_cmpxchg ); 7271 %} 7272 7273 // Conditional-store of a long value. 7274 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7275 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7276 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7277 effect(KILL oldval); 7278 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7279 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7280 "XCHG EBX,ECX" 7281 %} 7282 ins_encode %{ 7283 // Note: we need to swap rbx, and rcx before and after the 7284 // cmpxchg8 instruction because the instruction uses 7285 // rcx as the high order word of the new value to store but 7286 // our register encoding uses rbx. 7287 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7288 __ lock(); 7289 __ cmpxchg8($mem$$Address); 7290 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7291 %} 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7296 7297 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7298 predicate(VM_Version::supports_cx8()); 7299 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7300 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7301 effect(KILL cr, KILL oldval); 7302 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7303 "MOV $res,0\n\t" 7304 "JNE,s fail\n\t" 7305 "MOV $res,1\n" 7306 "fail:" %} 7307 ins_encode( enc_cmpxchg8(mem_ptr), 7308 enc_flags_ne_to_boolean(res) ); 7309 ins_pipe( pipe_cmpxchg ); 7310 %} 7311 7312 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7313 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7314 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7315 effect(KILL cr, KILL oldval); 7316 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7317 "MOV $res,0\n\t" 7318 "JNE,s fail\n\t" 7319 "MOV $res,1\n" 7320 "fail:" %} 7321 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7322 ins_pipe( pipe_cmpxchg ); 7323 %} 7324 7325 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7326 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7327 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7328 effect(KILL cr, KILL oldval); 7329 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7330 "MOV $res,0\n\t" 7331 "JNE,s fail\n\t" 7332 "MOV $res,1\n" 7333 "fail:" %} 7334 ins_encode( enc_cmpxchgb(mem_ptr), 7335 enc_flags_ne_to_boolean(res) ); 7336 ins_pipe( pipe_cmpxchg ); 7337 %} 7338 7339 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7340 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7341 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7342 effect(KILL cr, KILL oldval); 7343 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7344 "MOV $res,0\n\t" 7345 "JNE,s fail\n\t" 7346 "MOV $res,1\n" 7347 "fail:" %} 7348 ins_encode( enc_cmpxchgw(mem_ptr), 7349 enc_flags_ne_to_boolean(res) ); 7350 ins_pipe( pipe_cmpxchg ); 7351 %} 7352 7353 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7354 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7355 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7356 effect(KILL cr, KILL oldval); 7357 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7358 "MOV $res,0\n\t" 7359 "JNE,s fail\n\t" 7360 "MOV $res,1\n" 7361 "fail:" %} 7362 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7363 ins_pipe( pipe_cmpxchg ); 7364 %} 7365 7366 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7367 predicate(VM_Version::supports_cx8()); 7368 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7369 effect(KILL cr); 7370 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7371 ins_encode( enc_cmpxchg8(mem_ptr) ); 7372 ins_pipe( pipe_cmpxchg ); 7373 %} 7374 7375 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7376 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7377 effect(KILL cr); 7378 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7379 ins_encode( enc_cmpxchg(mem_ptr) ); 7380 ins_pipe( pipe_cmpxchg ); 7381 %} 7382 7383 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7384 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7385 effect(KILL cr); 7386 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7387 ins_encode( enc_cmpxchgb(mem_ptr) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7392 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7393 effect(KILL cr); 7394 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7395 ins_encode( enc_cmpxchgw(mem_ptr) ); 7396 ins_pipe( pipe_cmpxchg ); 7397 %} 7398 7399 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7400 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7401 effect(KILL cr); 7402 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7403 ins_encode( enc_cmpxchg(mem_ptr) ); 7404 ins_pipe( pipe_cmpxchg ); 7405 %} 7406 7407 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7408 predicate(n->as_LoadStore()->result_not_used()); 7409 match(Set dummy (GetAndAddB mem add)); 7410 effect(KILL cr); 7411 format %{ "ADDB [$mem],$add" %} 7412 ins_encode %{ 7413 __ lock(); 7414 __ addb($mem$$Address, $add$$constant); 7415 %} 7416 ins_pipe( pipe_cmpxchg ); 7417 %} 7418 7419 // Important to match to xRegI: only 8-bit regs. 7420 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7421 match(Set newval (GetAndAddB mem newval)); 7422 effect(KILL cr); 7423 format %{ "XADDB [$mem],$newval" %} 7424 ins_encode %{ 7425 __ lock(); 7426 __ xaddb($mem$$Address, $newval$$Register); 7427 %} 7428 ins_pipe( pipe_cmpxchg ); 7429 %} 7430 7431 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7432 predicate(n->as_LoadStore()->result_not_used()); 7433 match(Set dummy (GetAndAddS mem add)); 7434 effect(KILL cr); 7435 format %{ "ADDS [$mem],$add" %} 7436 ins_encode %{ 7437 __ lock(); 7438 __ addw($mem$$Address, $add$$constant); 7439 %} 7440 ins_pipe( pipe_cmpxchg ); 7441 %} 7442 7443 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7444 match(Set newval (GetAndAddS mem newval)); 7445 effect(KILL cr); 7446 format %{ "XADDS [$mem],$newval" %} 7447 ins_encode %{ 7448 __ lock(); 7449 __ xaddw($mem$$Address, $newval$$Register); 7450 %} 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7455 predicate(n->as_LoadStore()->result_not_used()); 7456 match(Set dummy (GetAndAddI mem add)); 7457 effect(KILL cr); 7458 format %{ "ADDL [$mem],$add" %} 7459 ins_encode %{ 7460 __ lock(); 7461 __ addl($mem$$Address, $add$$constant); 7462 %} 7463 ins_pipe( pipe_cmpxchg ); 7464 %} 7465 7466 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7467 match(Set newval (GetAndAddI mem newval)); 7468 effect(KILL cr); 7469 format %{ "XADDL [$mem],$newval" %} 7470 ins_encode %{ 7471 __ lock(); 7472 __ xaddl($mem$$Address, $newval$$Register); 7473 %} 7474 ins_pipe( pipe_cmpxchg ); 7475 %} 7476 7477 // Important to match to xRegI: only 8-bit regs. 7478 instruct xchgB( memory mem, xRegI newval) %{ 7479 match(Set newval (GetAndSetB mem newval)); 7480 format %{ "XCHGB $newval,[$mem]" %} 7481 ins_encode %{ 7482 __ xchgb($newval$$Register, $mem$$Address); 7483 %} 7484 ins_pipe( pipe_cmpxchg ); 7485 %} 7486 7487 instruct xchgS( memory mem, rRegI newval) %{ 7488 match(Set newval (GetAndSetS mem newval)); 7489 format %{ "XCHGW $newval,[$mem]" %} 7490 ins_encode %{ 7491 __ xchgw($newval$$Register, $mem$$Address); 7492 %} 7493 ins_pipe( pipe_cmpxchg ); 7494 %} 7495 7496 instruct xchgI( memory mem, rRegI newval) %{ 7497 match(Set newval (GetAndSetI mem newval)); 7498 format %{ "XCHGL $newval,[$mem]" %} 7499 ins_encode %{ 7500 __ xchgl($newval$$Register, $mem$$Address); 7501 %} 7502 ins_pipe( pipe_cmpxchg ); 7503 %} 7504 7505 instruct xchgP( memory mem, pRegP newval) %{ 7506 match(Set newval (GetAndSetP mem newval)); 7507 format %{ "XCHGL $newval,[$mem]" %} 7508 ins_encode %{ 7509 __ xchgl($newval$$Register, $mem$$Address); 7510 %} 7511 ins_pipe( pipe_cmpxchg ); 7512 %} 7513 7514 //----------Subtraction Instructions------------------------------------------- 7515 7516 // Integer Subtraction Instructions 7517 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7518 match(Set dst (SubI dst src)); 7519 effect(KILL cr); 7520 7521 size(2); 7522 format %{ "SUB $dst,$src" %} 7523 opcode(0x2B); 7524 ins_encode( OpcP, RegReg( dst, src) ); 7525 ins_pipe( ialu_reg_reg ); 7526 %} 7527 7528 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7529 match(Set dst (SubI dst src)); 7530 effect(KILL cr); 7531 7532 format %{ "SUB $dst,$src" %} 7533 opcode(0x81,0x05); /* Opcode 81 /5 */ 7534 // ins_encode( RegImm( dst, src) ); 7535 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7536 ins_pipe( ialu_reg ); 7537 %} 7538 7539 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7540 match(Set dst (SubI dst (LoadI src))); 7541 effect(KILL cr); 7542 7543 ins_cost(125); 7544 format %{ "SUB $dst,$src" %} 7545 opcode(0x2B); 7546 ins_encode( OpcP, RegMem( dst, src) ); 7547 ins_pipe( ialu_reg_mem ); 7548 %} 7549 7550 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7551 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7552 effect(KILL cr); 7553 7554 ins_cost(150); 7555 format %{ "SUB $dst,$src" %} 7556 opcode(0x29); /* Opcode 29 /r */ 7557 ins_encode( OpcP, RegMem( src, dst ) ); 7558 ins_pipe( ialu_mem_reg ); 7559 %} 7560 7561 // Subtract from a pointer 7562 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7563 match(Set dst (AddP dst (SubI zero src))); 7564 effect(KILL cr); 7565 7566 size(2); 7567 format %{ "SUB $dst,$src" %} 7568 opcode(0x2B); 7569 ins_encode( OpcP, RegReg( dst, src) ); 7570 ins_pipe( ialu_reg_reg ); 7571 %} 7572 7573 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7574 match(Set dst (SubI zero dst)); 7575 effect(KILL cr); 7576 7577 size(2); 7578 format %{ "NEG $dst" %} 7579 opcode(0xF7,0x03); // Opcode F7 /3 7580 ins_encode( OpcP, RegOpc( dst ) ); 7581 ins_pipe( ialu_reg ); 7582 %} 7583 7584 //----------Multiplication/Division Instructions------------------------------- 7585 // Integer Multiplication Instructions 7586 // Multiply Register 7587 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7588 match(Set dst (MulI dst src)); 7589 effect(KILL cr); 7590 7591 size(3); 7592 ins_cost(300); 7593 format %{ "IMUL $dst,$src" %} 7594 opcode(0xAF, 0x0F); 7595 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7596 ins_pipe( ialu_reg_reg_alu0 ); 7597 %} 7598 7599 // Multiply 32-bit Immediate 7600 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7601 match(Set dst (MulI src imm)); 7602 effect(KILL cr); 7603 7604 ins_cost(300); 7605 format %{ "IMUL $dst,$src,$imm" %} 7606 opcode(0x69); /* 69 /r id */ 7607 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7608 ins_pipe( ialu_reg_reg_alu0 ); 7609 %} 7610 7611 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7612 match(Set dst src); 7613 effect(KILL cr); 7614 7615 // Note that this is artificially increased to make it more expensive than loadConL 7616 ins_cost(250); 7617 format %{ "MOV EAX,$src\t// low word only" %} 7618 opcode(0xB8); 7619 ins_encode( LdImmL_Lo(dst, src) ); 7620 ins_pipe( ialu_reg_fat ); 7621 %} 7622 7623 // Multiply by 32-bit Immediate, taking the shifted high order results 7624 // (special case for shift by 32) 7625 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7626 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7627 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7628 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7629 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7630 effect(USE src1, KILL cr); 7631 7632 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7633 ins_cost(0*100 + 1*400 - 150); 7634 format %{ "IMUL EDX:EAX,$src1" %} 7635 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 // Multiply by 32-bit Immediate, taking the shifted high order results 7640 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7641 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7642 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7643 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7644 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7645 effect(USE src1, KILL cr); 7646 7647 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7648 ins_cost(1*100 + 1*400 - 150); 7649 format %{ "IMUL EDX:EAX,$src1\n\t" 7650 "SAR EDX,$cnt-32" %} 7651 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 // Multiply Memory 32-bit Immediate 7656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7657 match(Set dst (MulI (LoadI src) imm)); 7658 effect(KILL cr); 7659 7660 ins_cost(300); 7661 format %{ "IMUL $dst,$src,$imm" %} 7662 opcode(0x69); /* 69 /r id */ 7663 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7664 ins_pipe( ialu_reg_mem_alu0 ); 7665 %} 7666 7667 // Multiply Memory 7668 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7669 match(Set dst (MulI dst (LoadI src))); 7670 effect(KILL cr); 7671 7672 ins_cost(350); 7673 format %{ "IMUL $dst,$src" %} 7674 opcode(0xAF, 0x0F); 7675 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7676 ins_pipe( ialu_reg_mem_alu0 ); 7677 %} 7678 7679 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7680 %{ 7681 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7682 effect(KILL cr, KILL src2); 7683 7684 expand %{ mulI_eReg(dst, src1, cr); 7685 mulI_eReg(src2, src3, cr); 7686 addI_eReg(dst, src2, cr); %} 7687 %} 7688 7689 // Multiply Register Int to Long 7690 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7691 // Basic Idea: long = (long)int * (long)int 7692 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7693 effect(DEF dst, USE src, USE src1, KILL flags); 7694 7695 ins_cost(300); 7696 format %{ "IMUL $dst,$src1" %} 7697 7698 ins_encode( long_int_multiply( dst, src1 ) ); 7699 ins_pipe( ialu_reg_reg_alu0 ); 7700 %} 7701 7702 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7703 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7704 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7705 effect(KILL flags); 7706 7707 ins_cost(300); 7708 format %{ "MUL $dst,$src1" %} 7709 7710 ins_encode( long_uint_multiply(dst, src1) ); 7711 ins_pipe( ialu_reg_reg_alu0 ); 7712 %} 7713 7714 // Multiply Register Long 7715 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7716 match(Set dst (MulL dst src)); 7717 effect(KILL cr, TEMP tmp); 7718 ins_cost(4*100+3*400); 7719 // Basic idea: lo(result) = lo(x_lo * y_lo) 7720 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7721 format %{ "MOV $tmp,$src.lo\n\t" 7722 "IMUL $tmp,EDX\n\t" 7723 "MOV EDX,$src.hi\n\t" 7724 "IMUL EDX,EAX\n\t" 7725 "ADD $tmp,EDX\n\t" 7726 "MUL EDX:EAX,$src.lo\n\t" 7727 "ADD EDX,$tmp" %} 7728 ins_encode( long_multiply( dst, src, tmp ) ); 7729 ins_pipe( pipe_slow ); 7730 %} 7731 7732 // Multiply Register Long where the left operand's high 32 bits are zero 7733 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7734 predicate(is_operand_hi32_zero(n->in(1))); 7735 match(Set dst (MulL dst src)); 7736 effect(KILL cr, TEMP tmp); 7737 ins_cost(2*100+2*400); 7738 // Basic idea: lo(result) = lo(x_lo * y_lo) 7739 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7740 format %{ "MOV $tmp,$src.hi\n\t" 7741 "IMUL $tmp,EAX\n\t" 7742 "MUL EDX:EAX,$src.lo\n\t" 7743 "ADD EDX,$tmp" %} 7744 ins_encode %{ 7745 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7746 __ imull($tmp$$Register, rax); 7747 __ mull($src$$Register); 7748 __ addl(rdx, $tmp$$Register); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 // Multiply Register Long where the right operand's high 32 bits are zero 7754 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7755 predicate(is_operand_hi32_zero(n->in(2))); 7756 match(Set dst (MulL dst src)); 7757 effect(KILL cr, TEMP tmp); 7758 ins_cost(2*100+2*400); 7759 // Basic idea: lo(result) = lo(x_lo * y_lo) 7760 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7761 format %{ "MOV $tmp,$src.lo\n\t" 7762 "IMUL $tmp,EDX\n\t" 7763 "MUL EDX:EAX,$src.lo\n\t" 7764 "ADD EDX,$tmp" %} 7765 ins_encode %{ 7766 __ movl($tmp$$Register, $src$$Register); 7767 __ imull($tmp$$Register, rdx); 7768 __ mull($src$$Register); 7769 __ addl(rdx, $tmp$$Register); 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7775 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7776 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7777 match(Set dst (MulL dst src)); 7778 effect(KILL cr); 7779 ins_cost(1*400); 7780 // Basic idea: lo(result) = lo(x_lo * y_lo) 7781 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7782 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7783 ins_encode %{ 7784 __ mull($src$$Register); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 // Multiply Register Long by small constant 7790 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7791 match(Set dst (MulL dst src)); 7792 effect(KILL cr, TEMP tmp); 7793 ins_cost(2*100+2*400); 7794 size(12); 7795 // Basic idea: lo(result) = lo(src * EAX) 7796 // hi(result) = hi(src * EAX) + lo(src * EDX) 7797 format %{ "IMUL $tmp,EDX,$src\n\t" 7798 "MOV EDX,$src\n\t" 7799 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7800 "ADD EDX,$tmp" %} 7801 ins_encode( long_multiply_con( dst, src, tmp ) ); 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 // Integer DIV with Register 7806 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7807 match(Set rax (DivI rax div)); 7808 effect(KILL rdx, KILL cr); 7809 size(26); 7810 ins_cost(30*100+10*100); 7811 format %{ "CMP EAX,0x80000000\n\t" 7812 "JNE,s normal\n\t" 7813 "XOR EDX,EDX\n\t" 7814 "CMP ECX,-1\n\t" 7815 "JE,s done\n" 7816 "normal: CDQ\n\t" 7817 "IDIV $div\n\t" 7818 "done:" %} 7819 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7820 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7821 ins_pipe( ialu_reg_reg_alu0 ); 7822 %} 7823 7824 // Divide Register Long 7825 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7826 match(Set dst (DivL src1 src2)); 7827 effect( KILL cr, KILL cx, KILL bx ); 7828 ins_cost(10000); 7829 format %{ "PUSH $src1.hi\n\t" 7830 "PUSH $src1.lo\n\t" 7831 "PUSH $src2.hi\n\t" 7832 "PUSH $src2.lo\n\t" 7833 "CALL SharedRuntime::ldiv\n\t" 7834 "ADD ESP,16" %} 7835 ins_encode( long_div(src1,src2) ); 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 // Integer DIVMOD with Register, both quotient and mod results 7840 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7841 match(DivModI rax div); 7842 effect(KILL cr); 7843 size(26); 7844 ins_cost(30*100+10*100); 7845 format %{ "CMP EAX,0x80000000\n\t" 7846 "JNE,s normal\n\t" 7847 "XOR EDX,EDX\n\t" 7848 "CMP ECX,-1\n\t" 7849 "JE,s done\n" 7850 "normal: CDQ\n\t" 7851 "IDIV $div\n\t" 7852 "done:" %} 7853 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7854 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 // Integer MOD with Register 7859 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7860 match(Set rdx (ModI rax div)); 7861 effect(KILL rax, KILL cr); 7862 7863 size(26); 7864 ins_cost(300); 7865 format %{ "CDQ\n\t" 7866 "IDIV $div" %} 7867 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7868 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7869 ins_pipe( ialu_reg_reg_alu0 ); 7870 %} 7871 7872 // Remainder Register Long 7873 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7874 match(Set dst (ModL src1 src2)); 7875 effect( KILL cr, KILL cx, KILL bx ); 7876 ins_cost(10000); 7877 format %{ "PUSH $src1.hi\n\t" 7878 "PUSH $src1.lo\n\t" 7879 "PUSH $src2.hi\n\t" 7880 "PUSH $src2.lo\n\t" 7881 "CALL SharedRuntime::lrem\n\t" 7882 "ADD ESP,16" %} 7883 ins_encode( long_mod(src1,src2) ); 7884 ins_pipe( pipe_slow ); 7885 %} 7886 7887 // Divide Register Long (no special case since divisor != -1) 7888 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7889 match(Set dst (DivL dst imm)); 7890 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7891 ins_cost(1000); 7892 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7893 "XOR $tmp2,$tmp2\n\t" 7894 "CMP $tmp,EDX\n\t" 7895 "JA,s fast\n\t" 7896 "MOV $tmp2,EAX\n\t" 7897 "MOV EAX,EDX\n\t" 7898 "MOV EDX,0\n\t" 7899 "JLE,s pos\n\t" 7900 "LNEG EAX : $tmp2\n\t" 7901 "DIV $tmp # unsigned division\n\t" 7902 "XCHG EAX,$tmp2\n\t" 7903 "DIV $tmp\n\t" 7904 "LNEG $tmp2 : EAX\n\t" 7905 "JMP,s done\n" 7906 "pos:\n\t" 7907 "DIV $tmp\n\t" 7908 "XCHG EAX,$tmp2\n" 7909 "fast:\n\t" 7910 "DIV $tmp\n" 7911 "done:\n\t" 7912 "MOV EDX,$tmp2\n\t" 7913 "NEG EDX:EAX # if $imm < 0" %} 7914 ins_encode %{ 7915 int con = (int)$imm$$constant; 7916 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7917 int pcon = (con > 0) ? con : -con; 7918 Label Lfast, Lpos, Ldone; 7919 7920 __ movl($tmp$$Register, pcon); 7921 __ xorl($tmp2$$Register,$tmp2$$Register); 7922 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7923 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7924 7925 __ movl($tmp2$$Register, $dst$$Register); // save 7926 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7927 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7928 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7929 7930 // Negative dividend. 7931 // convert value to positive to use unsigned division 7932 __ lneg($dst$$Register, $tmp2$$Register); 7933 __ divl($tmp$$Register); 7934 __ xchgl($dst$$Register, $tmp2$$Register); 7935 __ divl($tmp$$Register); 7936 // revert result back to negative 7937 __ lneg($tmp2$$Register, $dst$$Register); 7938 __ jmpb(Ldone); 7939 7940 __ bind(Lpos); 7941 __ divl($tmp$$Register); // Use unsigned division 7942 __ xchgl($dst$$Register, $tmp2$$Register); 7943 // Fallthrow for final divide, tmp2 has 32 bit hi result 7944 7945 __ bind(Lfast); 7946 // fast path: src is positive 7947 __ divl($tmp$$Register); // Use unsigned division 7948 7949 __ bind(Ldone); 7950 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7951 if (con < 0) { 7952 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7953 } 7954 %} 7955 ins_pipe( pipe_slow ); 7956 %} 7957 7958 // Remainder Register Long (remainder fit into 32 bits) 7959 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7960 match(Set dst (ModL dst imm)); 7961 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7962 ins_cost(1000); 7963 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7964 "CMP $tmp,EDX\n\t" 7965 "JA,s fast\n\t" 7966 "MOV $tmp2,EAX\n\t" 7967 "MOV EAX,EDX\n\t" 7968 "MOV EDX,0\n\t" 7969 "JLE,s pos\n\t" 7970 "LNEG EAX : $tmp2\n\t" 7971 "DIV $tmp # unsigned division\n\t" 7972 "MOV EAX,$tmp2\n\t" 7973 "DIV $tmp\n\t" 7974 "NEG EDX\n\t" 7975 "JMP,s done\n" 7976 "pos:\n\t" 7977 "DIV $tmp\n\t" 7978 "MOV EAX,$tmp2\n" 7979 "fast:\n\t" 7980 "DIV $tmp\n" 7981 "done:\n\t" 7982 "MOV EAX,EDX\n\t" 7983 "SAR EDX,31\n\t" %} 7984 ins_encode %{ 7985 int con = (int)$imm$$constant; 7986 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7987 int pcon = (con > 0) ? con : -con; 7988 Label Lfast, Lpos, Ldone; 7989 7990 __ movl($tmp$$Register, pcon); 7991 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7992 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7993 7994 __ movl($tmp2$$Register, $dst$$Register); // save 7995 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7996 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7997 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7998 7999 // Negative dividend. 8000 // convert value to positive to use unsigned division 8001 __ lneg($dst$$Register, $tmp2$$Register); 8002 __ divl($tmp$$Register); 8003 __ movl($dst$$Register, $tmp2$$Register); 8004 __ divl($tmp$$Register); 8005 // revert remainder back to negative 8006 __ negl(HIGH_FROM_LOW($dst$$Register)); 8007 __ jmpb(Ldone); 8008 8009 __ bind(Lpos); 8010 __ divl($tmp$$Register); 8011 __ movl($dst$$Register, $tmp2$$Register); 8012 8013 __ bind(Lfast); 8014 // fast path: src is positive 8015 __ divl($tmp$$Register); 8016 8017 __ bind(Ldone); 8018 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8019 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8020 8021 %} 8022 ins_pipe( pipe_slow ); 8023 %} 8024 8025 // Integer Shift Instructions 8026 // Shift Left by one 8027 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8028 match(Set dst (LShiftI dst shift)); 8029 effect(KILL cr); 8030 8031 size(2); 8032 format %{ "SHL $dst,$shift" %} 8033 opcode(0xD1, 0x4); /* D1 /4 */ 8034 ins_encode( OpcP, RegOpc( dst ) ); 8035 ins_pipe( ialu_reg ); 8036 %} 8037 8038 // Shift Left by 8-bit immediate 8039 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8040 match(Set dst (LShiftI dst shift)); 8041 effect(KILL cr); 8042 8043 size(3); 8044 format %{ "SHL $dst,$shift" %} 8045 opcode(0xC1, 0x4); /* C1 /4 ib */ 8046 ins_encode( RegOpcImm( dst, shift) ); 8047 ins_pipe( ialu_reg ); 8048 %} 8049 8050 // Shift Left by variable 8051 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8052 match(Set dst (LShiftI dst shift)); 8053 effect(KILL cr); 8054 8055 size(2); 8056 format %{ "SHL $dst,$shift" %} 8057 opcode(0xD3, 0x4); /* D3 /4 */ 8058 ins_encode( OpcP, RegOpc( dst ) ); 8059 ins_pipe( ialu_reg_reg ); 8060 %} 8061 8062 // Arithmetic shift right by one 8063 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8064 match(Set dst (RShiftI dst shift)); 8065 effect(KILL cr); 8066 8067 size(2); 8068 format %{ "SAR $dst,$shift" %} 8069 opcode(0xD1, 0x7); /* D1 /7 */ 8070 ins_encode( OpcP, RegOpc( dst ) ); 8071 ins_pipe( ialu_reg ); 8072 %} 8073 8074 // Arithmetic shift right by one 8075 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8076 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8077 effect(KILL cr); 8078 format %{ "SAR $dst,$shift" %} 8079 opcode(0xD1, 0x7); /* D1 /7 */ 8080 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8081 ins_pipe( ialu_mem_imm ); 8082 %} 8083 8084 // Arithmetic Shift Right by 8-bit immediate 8085 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8086 match(Set dst (RShiftI dst shift)); 8087 effect(KILL cr); 8088 8089 size(3); 8090 format %{ "SAR $dst,$shift" %} 8091 opcode(0xC1, 0x7); /* C1 /7 ib */ 8092 ins_encode( RegOpcImm( dst, shift ) ); 8093 ins_pipe( ialu_mem_imm ); 8094 %} 8095 8096 // Arithmetic Shift Right by 8-bit immediate 8097 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8098 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8099 effect(KILL cr); 8100 8101 format %{ "SAR $dst,$shift" %} 8102 opcode(0xC1, 0x7); /* C1 /7 ib */ 8103 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8104 ins_pipe( ialu_mem_imm ); 8105 %} 8106 8107 // Arithmetic Shift Right by variable 8108 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8109 match(Set dst (RShiftI dst shift)); 8110 effect(KILL cr); 8111 8112 size(2); 8113 format %{ "SAR $dst,$shift" %} 8114 opcode(0xD3, 0x7); /* D3 /7 */ 8115 ins_encode( OpcP, RegOpc( dst ) ); 8116 ins_pipe( ialu_reg_reg ); 8117 %} 8118 8119 // Logical shift right by one 8120 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8121 match(Set dst (URShiftI dst shift)); 8122 effect(KILL cr); 8123 8124 size(2); 8125 format %{ "SHR $dst,$shift" %} 8126 opcode(0xD1, 0x5); /* D1 /5 */ 8127 ins_encode( OpcP, RegOpc( dst ) ); 8128 ins_pipe( ialu_reg ); 8129 %} 8130 8131 // Logical Shift Right by 8-bit immediate 8132 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8133 match(Set dst (URShiftI dst shift)); 8134 effect(KILL cr); 8135 8136 size(3); 8137 format %{ "SHR $dst,$shift" %} 8138 opcode(0xC1, 0x5); /* C1 /5 ib */ 8139 ins_encode( RegOpcImm( dst, shift) ); 8140 ins_pipe( ialu_reg ); 8141 %} 8142 8143 8144 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8145 // This idiom is used by the compiler for the i2b bytecode. 8146 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8147 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8148 8149 size(3); 8150 format %{ "MOVSX $dst,$src :8" %} 8151 ins_encode %{ 8152 __ movsbl($dst$$Register, $src$$Register); 8153 %} 8154 ins_pipe(ialu_reg_reg); 8155 %} 8156 8157 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8158 // This idiom is used by the compiler the i2s bytecode. 8159 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8160 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8161 8162 size(3); 8163 format %{ "MOVSX $dst,$src :16" %} 8164 ins_encode %{ 8165 __ movswl($dst$$Register, $src$$Register); 8166 %} 8167 ins_pipe(ialu_reg_reg); 8168 %} 8169 8170 8171 // Logical Shift Right by variable 8172 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8173 match(Set dst (URShiftI dst shift)); 8174 effect(KILL cr); 8175 8176 size(2); 8177 format %{ "SHR $dst,$shift" %} 8178 opcode(0xD3, 0x5); /* D3 /5 */ 8179 ins_encode( OpcP, RegOpc( dst ) ); 8180 ins_pipe( ialu_reg_reg ); 8181 %} 8182 8183 8184 //----------Logical Instructions----------------------------------------------- 8185 //----------Integer Logical Instructions--------------------------------------- 8186 // And Instructions 8187 // And Register with Register 8188 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8189 match(Set dst (AndI dst src)); 8190 effect(KILL cr); 8191 8192 size(2); 8193 format %{ "AND $dst,$src" %} 8194 opcode(0x23); 8195 ins_encode( OpcP, RegReg( dst, src) ); 8196 ins_pipe( ialu_reg_reg ); 8197 %} 8198 8199 // And Register with Immediate 8200 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8201 match(Set dst (AndI dst src)); 8202 effect(KILL cr); 8203 8204 format %{ "AND $dst,$src" %} 8205 opcode(0x81,0x04); /* Opcode 81 /4 */ 8206 // ins_encode( RegImm( dst, src) ); 8207 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8208 ins_pipe( ialu_reg ); 8209 %} 8210 8211 // And Register with Memory 8212 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8213 match(Set dst (AndI dst (LoadI src))); 8214 effect(KILL cr); 8215 8216 ins_cost(125); 8217 format %{ "AND $dst,$src" %} 8218 opcode(0x23); 8219 ins_encode( OpcP, RegMem( dst, src) ); 8220 ins_pipe( ialu_reg_mem ); 8221 %} 8222 8223 // And Memory with Register 8224 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8225 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8226 effect(KILL cr); 8227 8228 ins_cost(150); 8229 format %{ "AND $dst,$src" %} 8230 opcode(0x21); /* Opcode 21 /r */ 8231 ins_encode( OpcP, RegMem( src, dst ) ); 8232 ins_pipe( ialu_mem_reg ); 8233 %} 8234 8235 // And Memory with Immediate 8236 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8237 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8238 effect(KILL cr); 8239 8240 ins_cost(125); 8241 format %{ "AND $dst,$src" %} 8242 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8243 // ins_encode( MemImm( dst, src) ); 8244 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8245 ins_pipe( ialu_mem_imm ); 8246 %} 8247 8248 // BMI1 instructions 8249 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8250 match(Set dst (AndI (XorI src1 minus_1) src2)); 8251 predicate(UseBMI1Instructions); 8252 effect(KILL cr); 8253 8254 format %{ "ANDNL $dst, $src1, $src2" %} 8255 8256 ins_encode %{ 8257 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8258 %} 8259 ins_pipe(ialu_reg); 8260 %} 8261 8262 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8263 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8264 predicate(UseBMI1Instructions); 8265 effect(KILL cr); 8266 8267 ins_cost(125); 8268 format %{ "ANDNL $dst, $src1, $src2" %} 8269 8270 ins_encode %{ 8271 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8272 %} 8273 ins_pipe(ialu_reg_mem); 8274 %} 8275 8276 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8277 match(Set dst (AndI (SubI imm_zero src) src)); 8278 predicate(UseBMI1Instructions); 8279 effect(KILL cr); 8280 8281 format %{ "BLSIL $dst, $src" %} 8282 8283 ins_encode %{ 8284 __ blsil($dst$$Register, $src$$Register); 8285 %} 8286 ins_pipe(ialu_reg); 8287 %} 8288 8289 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8290 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8291 predicate(UseBMI1Instructions); 8292 effect(KILL cr); 8293 8294 ins_cost(125); 8295 format %{ "BLSIL $dst, $src" %} 8296 8297 ins_encode %{ 8298 __ blsil($dst$$Register, $src$$Address); 8299 %} 8300 ins_pipe(ialu_reg_mem); 8301 %} 8302 8303 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8304 %{ 8305 match(Set dst (XorI (AddI src minus_1) src)); 8306 predicate(UseBMI1Instructions); 8307 effect(KILL cr); 8308 8309 format %{ "BLSMSKL $dst, $src" %} 8310 8311 ins_encode %{ 8312 __ blsmskl($dst$$Register, $src$$Register); 8313 %} 8314 8315 ins_pipe(ialu_reg); 8316 %} 8317 8318 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8319 %{ 8320 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8321 predicate(UseBMI1Instructions); 8322 effect(KILL cr); 8323 8324 ins_cost(125); 8325 format %{ "BLSMSKL $dst, $src" %} 8326 8327 ins_encode %{ 8328 __ blsmskl($dst$$Register, $src$$Address); 8329 %} 8330 8331 ins_pipe(ialu_reg_mem); 8332 %} 8333 8334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8335 %{ 8336 match(Set dst (AndI (AddI src minus_1) src) ); 8337 predicate(UseBMI1Instructions); 8338 effect(KILL cr); 8339 8340 format %{ "BLSRL $dst, $src" %} 8341 8342 ins_encode %{ 8343 __ blsrl($dst$$Register, $src$$Register); 8344 %} 8345 8346 ins_pipe(ialu_reg); 8347 %} 8348 8349 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8350 %{ 8351 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8352 predicate(UseBMI1Instructions); 8353 effect(KILL cr); 8354 8355 ins_cost(125); 8356 format %{ "BLSRL $dst, $src" %} 8357 8358 ins_encode %{ 8359 __ blsrl($dst$$Register, $src$$Address); 8360 %} 8361 8362 ins_pipe(ialu_reg_mem); 8363 %} 8364 8365 // Or Instructions 8366 // Or Register with Register 8367 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8368 match(Set dst (OrI dst src)); 8369 effect(KILL cr); 8370 8371 size(2); 8372 format %{ "OR $dst,$src" %} 8373 opcode(0x0B); 8374 ins_encode( OpcP, RegReg( dst, src) ); 8375 ins_pipe( ialu_reg_reg ); 8376 %} 8377 8378 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8379 match(Set dst (OrI dst (CastP2X src))); 8380 effect(KILL cr); 8381 8382 size(2); 8383 format %{ "OR $dst,$src" %} 8384 opcode(0x0B); 8385 ins_encode( OpcP, RegReg( dst, src) ); 8386 ins_pipe( ialu_reg_reg ); 8387 %} 8388 8389 8390 // Or Register with Immediate 8391 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8392 match(Set dst (OrI dst src)); 8393 effect(KILL cr); 8394 8395 format %{ "OR $dst,$src" %} 8396 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8397 // ins_encode( RegImm( dst, src) ); 8398 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8399 ins_pipe( ialu_reg ); 8400 %} 8401 8402 // Or Register with Memory 8403 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8404 match(Set dst (OrI dst (LoadI src))); 8405 effect(KILL cr); 8406 8407 ins_cost(125); 8408 format %{ "OR $dst,$src" %} 8409 opcode(0x0B); 8410 ins_encode( OpcP, RegMem( dst, src) ); 8411 ins_pipe( ialu_reg_mem ); 8412 %} 8413 8414 // Or Memory with Register 8415 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8416 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8417 effect(KILL cr); 8418 8419 ins_cost(150); 8420 format %{ "OR $dst,$src" %} 8421 opcode(0x09); /* Opcode 09 /r */ 8422 ins_encode( OpcP, RegMem( src, dst ) ); 8423 ins_pipe( ialu_mem_reg ); 8424 %} 8425 8426 // Or Memory with Immediate 8427 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8428 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8429 effect(KILL cr); 8430 8431 ins_cost(125); 8432 format %{ "OR $dst,$src" %} 8433 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8434 // ins_encode( MemImm( dst, src) ); 8435 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8436 ins_pipe( ialu_mem_imm ); 8437 %} 8438 8439 // ROL/ROR 8440 // ROL expand 8441 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8442 effect(USE_DEF dst, USE shift, KILL cr); 8443 8444 format %{ "ROL $dst, $shift" %} 8445 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8446 ins_encode( OpcP, RegOpc( dst )); 8447 ins_pipe( ialu_reg ); 8448 %} 8449 8450 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8451 effect(USE_DEF dst, USE shift, KILL cr); 8452 8453 format %{ "ROL $dst, $shift" %} 8454 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8455 ins_encode( RegOpcImm(dst, shift) ); 8456 ins_pipe(ialu_reg); 8457 %} 8458 8459 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8460 effect(USE_DEF dst, USE shift, KILL cr); 8461 8462 format %{ "ROL $dst, $shift" %} 8463 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8464 ins_encode(OpcP, RegOpc(dst)); 8465 ins_pipe( ialu_reg_reg ); 8466 %} 8467 // end of ROL expand 8468 8469 // ROL 32bit by one once 8470 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8471 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8472 8473 expand %{ 8474 rolI_eReg_imm1(dst, lshift, cr); 8475 %} 8476 %} 8477 8478 // ROL 32bit var by imm8 once 8479 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8480 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8481 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8482 8483 expand %{ 8484 rolI_eReg_imm8(dst, lshift, cr); 8485 %} 8486 %} 8487 8488 // ROL 32bit var by var once 8489 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8490 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8491 8492 expand %{ 8493 rolI_eReg_CL(dst, shift, cr); 8494 %} 8495 %} 8496 8497 // ROL 32bit var by var once 8498 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8499 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8500 8501 expand %{ 8502 rolI_eReg_CL(dst, shift, cr); 8503 %} 8504 %} 8505 8506 // ROR expand 8507 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8508 effect(USE_DEF dst, USE shift, KILL cr); 8509 8510 format %{ "ROR $dst, $shift" %} 8511 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8512 ins_encode( OpcP, RegOpc( dst ) ); 8513 ins_pipe( ialu_reg ); 8514 %} 8515 8516 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8517 effect (USE_DEF dst, USE shift, KILL cr); 8518 8519 format %{ "ROR $dst, $shift" %} 8520 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8521 ins_encode( RegOpcImm(dst, shift) ); 8522 ins_pipe( ialu_reg ); 8523 %} 8524 8525 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8526 effect(USE_DEF dst, USE shift, KILL cr); 8527 8528 format %{ "ROR $dst, $shift" %} 8529 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8530 ins_encode(OpcP, RegOpc(dst)); 8531 ins_pipe( ialu_reg_reg ); 8532 %} 8533 // end of ROR expand 8534 8535 // ROR right once 8536 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8537 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8538 8539 expand %{ 8540 rorI_eReg_imm1(dst, rshift, cr); 8541 %} 8542 %} 8543 8544 // ROR 32bit by immI8 once 8545 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8546 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8547 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8548 8549 expand %{ 8550 rorI_eReg_imm8(dst, rshift, cr); 8551 %} 8552 %} 8553 8554 // ROR 32bit var by var once 8555 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8556 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8557 8558 expand %{ 8559 rorI_eReg_CL(dst, shift, cr); 8560 %} 8561 %} 8562 8563 // ROR 32bit var by var once 8564 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8565 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8566 8567 expand %{ 8568 rorI_eReg_CL(dst, shift, cr); 8569 %} 8570 %} 8571 8572 // Xor Instructions 8573 // Xor Register with Register 8574 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8575 match(Set dst (XorI dst src)); 8576 effect(KILL cr); 8577 8578 size(2); 8579 format %{ "XOR $dst,$src" %} 8580 opcode(0x33); 8581 ins_encode( OpcP, RegReg( dst, src) ); 8582 ins_pipe( ialu_reg_reg ); 8583 %} 8584 8585 // Xor Register with Immediate -1 8586 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8587 match(Set dst (XorI dst imm)); 8588 8589 size(2); 8590 format %{ "NOT $dst" %} 8591 ins_encode %{ 8592 __ notl($dst$$Register); 8593 %} 8594 ins_pipe( ialu_reg ); 8595 %} 8596 8597 // Xor Register with Immediate 8598 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8599 match(Set dst (XorI dst src)); 8600 effect(KILL cr); 8601 8602 format %{ "XOR $dst,$src" %} 8603 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8604 // ins_encode( RegImm( dst, src) ); 8605 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8606 ins_pipe( ialu_reg ); 8607 %} 8608 8609 // Xor Register with Memory 8610 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8611 match(Set dst (XorI dst (LoadI src))); 8612 effect(KILL cr); 8613 8614 ins_cost(125); 8615 format %{ "XOR $dst,$src" %} 8616 opcode(0x33); 8617 ins_encode( OpcP, RegMem(dst, src) ); 8618 ins_pipe( ialu_reg_mem ); 8619 %} 8620 8621 // Xor Memory with Register 8622 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8623 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8624 effect(KILL cr); 8625 8626 ins_cost(150); 8627 format %{ "XOR $dst,$src" %} 8628 opcode(0x31); /* Opcode 31 /r */ 8629 ins_encode( OpcP, RegMem( src, dst ) ); 8630 ins_pipe( ialu_mem_reg ); 8631 %} 8632 8633 // Xor Memory with Immediate 8634 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8635 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8636 effect(KILL cr); 8637 8638 ins_cost(125); 8639 format %{ "XOR $dst,$src" %} 8640 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8641 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8642 ins_pipe( ialu_mem_imm ); 8643 %} 8644 8645 //----------Convert Int to Boolean--------------------------------------------- 8646 8647 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8648 effect( DEF dst, USE src ); 8649 format %{ "MOV $dst,$src" %} 8650 ins_encode( enc_Copy( dst, src) ); 8651 ins_pipe( ialu_reg_reg ); 8652 %} 8653 8654 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8655 effect( USE_DEF dst, USE src, KILL cr ); 8656 8657 size(4); 8658 format %{ "NEG $dst\n\t" 8659 "ADC $dst,$src" %} 8660 ins_encode( neg_reg(dst), 8661 OpcRegReg(0x13,dst,src) ); 8662 ins_pipe( ialu_reg_reg_long ); 8663 %} 8664 8665 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8666 match(Set dst (Conv2B src)); 8667 8668 expand %{ 8669 movI_nocopy(dst,src); 8670 ci2b(dst,src,cr); 8671 %} 8672 %} 8673 8674 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8675 effect( DEF dst, USE src ); 8676 format %{ "MOV $dst,$src" %} 8677 ins_encode( enc_Copy( dst, src) ); 8678 ins_pipe( ialu_reg_reg ); 8679 %} 8680 8681 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8682 effect( USE_DEF dst, USE src, KILL cr ); 8683 format %{ "NEG $dst\n\t" 8684 "ADC $dst,$src" %} 8685 ins_encode( neg_reg(dst), 8686 OpcRegReg(0x13,dst,src) ); 8687 ins_pipe( ialu_reg_reg_long ); 8688 %} 8689 8690 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8691 match(Set dst (Conv2B src)); 8692 8693 expand %{ 8694 movP_nocopy(dst,src); 8695 cp2b(dst,src,cr); 8696 %} 8697 %} 8698 8699 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8700 match(Set dst (CmpLTMask p q)); 8701 effect(KILL cr); 8702 ins_cost(400); 8703 8704 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8705 format %{ "XOR $dst,$dst\n\t" 8706 "CMP $p,$q\n\t" 8707 "SETlt $dst\n\t" 8708 "NEG $dst" %} 8709 ins_encode %{ 8710 Register Rp = $p$$Register; 8711 Register Rq = $q$$Register; 8712 Register Rd = $dst$$Register; 8713 Label done; 8714 __ xorl(Rd, Rd); 8715 __ cmpl(Rp, Rq); 8716 __ setb(Assembler::less, Rd); 8717 __ negl(Rd); 8718 %} 8719 8720 ins_pipe(pipe_slow); 8721 %} 8722 8723 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8724 match(Set dst (CmpLTMask dst zero)); 8725 effect(DEF dst, KILL cr); 8726 ins_cost(100); 8727 8728 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8729 ins_encode %{ 8730 __ sarl($dst$$Register, 31); 8731 %} 8732 ins_pipe(ialu_reg); 8733 %} 8734 8735 /* better to save a register than avoid a branch */ 8736 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8737 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8738 effect(KILL cr); 8739 ins_cost(400); 8740 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8741 "JGE done\n\t" 8742 "ADD $p,$y\n" 8743 "done: " %} 8744 ins_encode %{ 8745 Register Rp = $p$$Register; 8746 Register Rq = $q$$Register; 8747 Register Ry = $y$$Register; 8748 Label done; 8749 __ subl(Rp, Rq); 8750 __ jccb(Assembler::greaterEqual, done); 8751 __ addl(Rp, Ry); 8752 __ bind(done); 8753 %} 8754 8755 ins_pipe(pipe_cmplt); 8756 %} 8757 8758 /* better to save a register than avoid a branch */ 8759 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8760 match(Set y (AndI (CmpLTMask p q) y)); 8761 effect(KILL cr); 8762 8763 ins_cost(300); 8764 8765 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8766 "JLT done\n\t" 8767 "XORL $y, $y\n" 8768 "done: " %} 8769 ins_encode %{ 8770 Register Rp = $p$$Register; 8771 Register Rq = $q$$Register; 8772 Register Ry = $y$$Register; 8773 Label done; 8774 __ cmpl(Rp, Rq); 8775 __ jccb(Assembler::less, done); 8776 __ xorl(Ry, Ry); 8777 __ bind(done); 8778 %} 8779 8780 ins_pipe(pipe_cmplt); 8781 %} 8782 8783 /* If I enable this, I encourage spilling in the inner loop of compress. 8784 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8785 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8786 */ 8787 //----------Overflow Math Instructions----------------------------------------- 8788 8789 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8790 %{ 8791 match(Set cr (OverflowAddI op1 op2)); 8792 effect(DEF cr, USE_KILL op1, USE op2); 8793 8794 format %{ "ADD $op1, $op2\t# overflow check int" %} 8795 8796 ins_encode %{ 8797 __ addl($op1$$Register, $op2$$Register); 8798 %} 8799 ins_pipe(ialu_reg_reg); 8800 %} 8801 8802 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8803 %{ 8804 match(Set cr (OverflowAddI op1 op2)); 8805 effect(DEF cr, USE_KILL op1, USE op2); 8806 8807 format %{ "ADD $op1, $op2\t# overflow check int" %} 8808 8809 ins_encode %{ 8810 __ addl($op1$$Register, $op2$$constant); 8811 %} 8812 ins_pipe(ialu_reg_reg); 8813 %} 8814 8815 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8816 %{ 8817 match(Set cr (OverflowSubI op1 op2)); 8818 8819 format %{ "CMP $op1, $op2\t# overflow check int" %} 8820 ins_encode %{ 8821 __ cmpl($op1$$Register, $op2$$Register); 8822 %} 8823 ins_pipe(ialu_reg_reg); 8824 %} 8825 8826 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8827 %{ 8828 match(Set cr (OverflowSubI op1 op2)); 8829 8830 format %{ "CMP $op1, $op2\t# overflow check int" %} 8831 ins_encode %{ 8832 __ cmpl($op1$$Register, $op2$$constant); 8833 %} 8834 ins_pipe(ialu_reg_reg); 8835 %} 8836 8837 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8838 %{ 8839 match(Set cr (OverflowSubI zero op2)); 8840 effect(DEF cr, USE_KILL op2); 8841 8842 format %{ "NEG $op2\t# overflow check int" %} 8843 ins_encode %{ 8844 __ negl($op2$$Register); 8845 %} 8846 ins_pipe(ialu_reg_reg); 8847 %} 8848 8849 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8850 %{ 8851 match(Set cr (OverflowMulI op1 op2)); 8852 effect(DEF cr, USE_KILL op1, USE op2); 8853 8854 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8855 ins_encode %{ 8856 __ imull($op1$$Register, $op2$$Register); 8857 %} 8858 ins_pipe(ialu_reg_reg_alu0); 8859 %} 8860 8861 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8862 %{ 8863 match(Set cr (OverflowMulI op1 op2)); 8864 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8865 8866 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8867 ins_encode %{ 8868 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8869 %} 8870 ins_pipe(ialu_reg_reg_alu0); 8871 %} 8872 8873 // Integer Absolute Instructions 8874 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8875 %{ 8876 match(Set dst (AbsI src)); 8877 effect(TEMP dst, TEMP tmp, KILL cr); 8878 format %{ "movl $tmp, $src\n\t" 8879 "sarl $tmp, 31\n\t" 8880 "movl $dst, $src\n\t" 8881 "xorl $dst, $tmp\n\t" 8882 "subl $dst, $tmp\n" 8883 %} 8884 ins_encode %{ 8885 __ movl($tmp$$Register, $src$$Register); 8886 __ sarl($tmp$$Register, 31); 8887 __ movl($dst$$Register, $src$$Register); 8888 __ xorl($dst$$Register, $tmp$$Register); 8889 __ subl($dst$$Register, $tmp$$Register); 8890 %} 8891 8892 ins_pipe(ialu_reg_reg); 8893 %} 8894 8895 //----------Long Instructions------------------------------------------------ 8896 // Add Long Register with Register 8897 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8898 match(Set dst (AddL dst src)); 8899 effect(KILL cr); 8900 ins_cost(200); 8901 format %{ "ADD $dst.lo,$src.lo\n\t" 8902 "ADC $dst.hi,$src.hi" %} 8903 opcode(0x03, 0x13); 8904 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8905 ins_pipe( ialu_reg_reg_long ); 8906 %} 8907 8908 // Add Long Register with Immediate 8909 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8910 match(Set dst (AddL dst src)); 8911 effect(KILL cr); 8912 format %{ "ADD $dst.lo,$src.lo\n\t" 8913 "ADC $dst.hi,$src.hi" %} 8914 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8915 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8916 ins_pipe( ialu_reg_long ); 8917 %} 8918 8919 // Add Long Register with Memory 8920 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8921 match(Set dst (AddL dst (LoadL mem))); 8922 effect(KILL cr); 8923 ins_cost(125); 8924 format %{ "ADD $dst.lo,$mem\n\t" 8925 "ADC $dst.hi,$mem+4" %} 8926 opcode(0x03, 0x13); 8927 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8928 ins_pipe( ialu_reg_long_mem ); 8929 %} 8930 8931 // Subtract Long Register with Register. 8932 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8933 match(Set dst (SubL dst src)); 8934 effect(KILL cr); 8935 ins_cost(200); 8936 format %{ "SUB $dst.lo,$src.lo\n\t" 8937 "SBB $dst.hi,$src.hi" %} 8938 opcode(0x2B, 0x1B); 8939 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8940 ins_pipe( ialu_reg_reg_long ); 8941 %} 8942 8943 // Subtract Long Register with Immediate 8944 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8945 match(Set dst (SubL dst src)); 8946 effect(KILL cr); 8947 format %{ "SUB $dst.lo,$src.lo\n\t" 8948 "SBB $dst.hi,$src.hi" %} 8949 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8950 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8951 ins_pipe( ialu_reg_long ); 8952 %} 8953 8954 // Subtract Long Register with Memory 8955 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8956 match(Set dst (SubL dst (LoadL mem))); 8957 effect(KILL cr); 8958 ins_cost(125); 8959 format %{ "SUB $dst.lo,$mem\n\t" 8960 "SBB $dst.hi,$mem+4" %} 8961 opcode(0x2B, 0x1B); 8962 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8963 ins_pipe( ialu_reg_long_mem ); 8964 %} 8965 8966 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8967 match(Set dst (SubL zero dst)); 8968 effect(KILL cr); 8969 ins_cost(300); 8970 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8971 ins_encode( neg_long(dst) ); 8972 ins_pipe( ialu_reg_reg_long ); 8973 %} 8974 8975 // And Long Register with Register 8976 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8977 match(Set dst (AndL dst src)); 8978 effect(KILL cr); 8979 format %{ "AND $dst.lo,$src.lo\n\t" 8980 "AND $dst.hi,$src.hi" %} 8981 opcode(0x23,0x23); 8982 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8983 ins_pipe( ialu_reg_reg_long ); 8984 %} 8985 8986 // And Long Register with Immediate 8987 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8988 match(Set dst (AndL dst src)); 8989 effect(KILL cr); 8990 format %{ "AND $dst.lo,$src.lo\n\t" 8991 "AND $dst.hi,$src.hi" %} 8992 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8993 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8994 ins_pipe( ialu_reg_long ); 8995 %} 8996 8997 // And Long Register with Memory 8998 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8999 match(Set dst (AndL dst (LoadL mem))); 9000 effect(KILL cr); 9001 ins_cost(125); 9002 format %{ "AND $dst.lo,$mem\n\t" 9003 "AND $dst.hi,$mem+4" %} 9004 opcode(0x23, 0x23); 9005 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9006 ins_pipe( ialu_reg_long_mem ); 9007 %} 9008 9009 // BMI1 instructions 9010 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9011 match(Set dst (AndL (XorL src1 minus_1) src2)); 9012 predicate(UseBMI1Instructions); 9013 effect(KILL cr, TEMP dst); 9014 9015 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9016 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9017 %} 9018 9019 ins_encode %{ 9020 Register Rdst = $dst$$Register; 9021 Register Rsrc1 = $src1$$Register; 9022 Register Rsrc2 = $src2$$Register; 9023 __ andnl(Rdst, Rsrc1, Rsrc2); 9024 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9025 %} 9026 ins_pipe(ialu_reg_reg_long); 9027 %} 9028 9029 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9030 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9031 predicate(UseBMI1Instructions); 9032 effect(KILL cr, TEMP dst); 9033 9034 ins_cost(125); 9035 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9036 "ANDNL $dst.hi, $src1.hi, $src2+4" 9037 %} 9038 9039 ins_encode %{ 9040 Register Rdst = $dst$$Register; 9041 Register Rsrc1 = $src1$$Register; 9042 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9043 9044 __ andnl(Rdst, Rsrc1, $src2$$Address); 9045 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9046 %} 9047 ins_pipe(ialu_reg_mem); 9048 %} 9049 9050 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9051 match(Set dst (AndL (SubL imm_zero src) src)); 9052 predicate(UseBMI1Instructions); 9053 effect(KILL cr, TEMP dst); 9054 9055 format %{ "MOVL $dst.hi, 0\n\t" 9056 "BLSIL $dst.lo, $src.lo\n\t" 9057 "JNZ done\n\t" 9058 "BLSIL $dst.hi, $src.hi\n" 9059 "done:" 9060 %} 9061 9062 ins_encode %{ 9063 Label done; 9064 Register Rdst = $dst$$Register; 9065 Register Rsrc = $src$$Register; 9066 __ movl(HIGH_FROM_LOW(Rdst), 0); 9067 __ blsil(Rdst, Rsrc); 9068 __ jccb(Assembler::notZero, done); 9069 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9070 __ bind(done); 9071 %} 9072 ins_pipe(ialu_reg); 9073 %} 9074 9075 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9076 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9077 predicate(UseBMI1Instructions); 9078 effect(KILL cr, TEMP dst); 9079 9080 ins_cost(125); 9081 format %{ "MOVL $dst.hi, 0\n\t" 9082 "BLSIL $dst.lo, $src\n\t" 9083 "JNZ done\n\t" 9084 "BLSIL $dst.hi, $src+4\n" 9085 "done:" 9086 %} 9087 9088 ins_encode %{ 9089 Label done; 9090 Register Rdst = $dst$$Register; 9091 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9092 9093 __ movl(HIGH_FROM_LOW(Rdst), 0); 9094 __ blsil(Rdst, $src$$Address); 9095 __ jccb(Assembler::notZero, done); 9096 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9097 __ bind(done); 9098 %} 9099 ins_pipe(ialu_reg_mem); 9100 %} 9101 9102 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9103 %{ 9104 match(Set dst (XorL (AddL src minus_1) src)); 9105 predicate(UseBMI1Instructions); 9106 effect(KILL cr, TEMP dst); 9107 9108 format %{ "MOVL $dst.hi, 0\n\t" 9109 "BLSMSKL $dst.lo, $src.lo\n\t" 9110 "JNC done\n\t" 9111 "BLSMSKL $dst.hi, $src.hi\n" 9112 "done:" 9113 %} 9114 9115 ins_encode %{ 9116 Label done; 9117 Register Rdst = $dst$$Register; 9118 Register Rsrc = $src$$Register; 9119 __ movl(HIGH_FROM_LOW(Rdst), 0); 9120 __ blsmskl(Rdst, Rsrc); 9121 __ jccb(Assembler::carryClear, done); 9122 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9123 __ bind(done); 9124 %} 9125 9126 ins_pipe(ialu_reg); 9127 %} 9128 9129 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9130 %{ 9131 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9132 predicate(UseBMI1Instructions); 9133 effect(KILL cr, TEMP dst); 9134 9135 ins_cost(125); 9136 format %{ "MOVL $dst.hi, 0\n\t" 9137 "BLSMSKL $dst.lo, $src\n\t" 9138 "JNC done\n\t" 9139 "BLSMSKL $dst.hi, $src+4\n" 9140 "done:" 9141 %} 9142 9143 ins_encode %{ 9144 Label done; 9145 Register Rdst = $dst$$Register; 9146 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9147 9148 __ movl(HIGH_FROM_LOW(Rdst), 0); 9149 __ blsmskl(Rdst, $src$$Address); 9150 __ jccb(Assembler::carryClear, done); 9151 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9152 __ bind(done); 9153 %} 9154 9155 ins_pipe(ialu_reg_mem); 9156 %} 9157 9158 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9159 %{ 9160 match(Set dst (AndL (AddL src minus_1) src) ); 9161 predicate(UseBMI1Instructions); 9162 effect(KILL cr, TEMP dst); 9163 9164 format %{ "MOVL $dst.hi, $src.hi\n\t" 9165 "BLSRL $dst.lo, $src.lo\n\t" 9166 "JNC done\n\t" 9167 "BLSRL $dst.hi, $src.hi\n" 9168 "done:" 9169 %} 9170 9171 ins_encode %{ 9172 Label done; 9173 Register Rdst = $dst$$Register; 9174 Register Rsrc = $src$$Register; 9175 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9176 __ blsrl(Rdst, Rsrc); 9177 __ jccb(Assembler::carryClear, done); 9178 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9179 __ bind(done); 9180 %} 9181 9182 ins_pipe(ialu_reg); 9183 %} 9184 9185 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9186 %{ 9187 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9188 predicate(UseBMI1Instructions); 9189 effect(KILL cr, TEMP dst); 9190 9191 ins_cost(125); 9192 format %{ "MOVL $dst.hi, $src+4\n\t" 9193 "BLSRL $dst.lo, $src\n\t" 9194 "JNC done\n\t" 9195 "BLSRL $dst.hi, $src+4\n" 9196 "done:" 9197 %} 9198 9199 ins_encode %{ 9200 Label done; 9201 Register Rdst = $dst$$Register; 9202 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9203 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9204 __ blsrl(Rdst, $src$$Address); 9205 __ jccb(Assembler::carryClear, done); 9206 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9207 __ bind(done); 9208 %} 9209 9210 ins_pipe(ialu_reg_mem); 9211 %} 9212 9213 // Or Long Register with Register 9214 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9215 match(Set dst (OrL dst src)); 9216 effect(KILL cr); 9217 format %{ "OR $dst.lo,$src.lo\n\t" 9218 "OR $dst.hi,$src.hi" %} 9219 opcode(0x0B,0x0B); 9220 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9221 ins_pipe( ialu_reg_reg_long ); 9222 %} 9223 9224 // Or Long Register with Immediate 9225 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9226 match(Set dst (OrL dst src)); 9227 effect(KILL cr); 9228 format %{ "OR $dst.lo,$src.lo\n\t" 9229 "OR $dst.hi,$src.hi" %} 9230 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9231 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9232 ins_pipe( ialu_reg_long ); 9233 %} 9234 9235 // Or Long Register with Memory 9236 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9237 match(Set dst (OrL dst (LoadL mem))); 9238 effect(KILL cr); 9239 ins_cost(125); 9240 format %{ "OR $dst.lo,$mem\n\t" 9241 "OR $dst.hi,$mem+4" %} 9242 opcode(0x0B,0x0B); 9243 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9244 ins_pipe( ialu_reg_long_mem ); 9245 %} 9246 9247 // Xor Long Register with Register 9248 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9249 match(Set dst (XorL dst src)); 9250 effect(KILL cr); 9251 format %{ "XOR $dst.lo,$src.lo\n\t" 9252 "XOR $dst.hi,$src.hi" %} 9253 opcode(0x33,0x33); 9254 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9255 ins_pipe( ialu_reg_reg_long ); 9256 %} 9257 9258 // Xor Long Register with Immediate -1 9259 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9260 match(Set dst (XorL dst imm)); 9261 format %{ "NOT $dst.lo\n\t" 9262 "NOT $dst.hi" %} 9263 ins_encode %{ 9264 __ notl($dst$$Register); 9265 __ notl(HIGH_FROM_LOW($dst$$Register)); 9266 %} 9267 ins_pipe( ialu_reg_long ); 9268 %} 9269 9270 // Xor Long Register with Immediate 9271 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9272 match(Set dst (XorL dst src)); 9273 effect(KILL cr); 9274 format %{ "XOR $dst.lo,$src.lo\n\t" 9275 "XOR $dst.hi,$src.hi" %} 9276 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9277 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9278 ins_pipe( ialu_reg_long ); 9279 %} 9280 9281 // Xor Long Register with Memory 9282 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9283 match(Set dst (XorL dst (LoadL mem))); 9284 effect(KILL cr); 9285 ins_cost(125); 9286 format %{ "XOR $dst.lo,$mem\n\t" 9287 "XOR $dst.hi,$mem+4" %} 9288 opcode(0x33,0x33); 9289 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9290 ins_pipe( ialu_reg_long_mem ); 9291 %} 9292 9293 // Shift Left Long by 1 9294 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9295 predicate(UseNewLongLShift); 9296 match(Set dst (LShiftL dst cnt)); 9297 effect(KILL cr); 9298 ins_cost(100); 9299 format %{ "ADD $dst.lo,$dst.lo\n\t" 9300 "ADC $dst.hi,$dst.hi" %} 9301 ins_encode %{ 9302 __ addl($dst$$Register,$dst$$Register); 9303 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9304 %} 9305 ins_pipe( ialu_reg_long ); 9306 %} 9307 9308 // Shift Left Long by 2 9309 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9310 predicate(UseNewLongLShift); 9311 match(Set dst (LShiftL dst cnt)); 9312 effect(KILL cr); 9313 ins_cost(100); 9314 format %{ "ADD $dst.lo,$dst.lo\n\t" 9315 "ADC $dst.hi,$dst.hi\n\t" 9316 "ADD $dst.lo,$dst.lo\n\t" 9317 "ADC $dst.hi,$dst.hi" %} 9318 ins_encode %{ 9319 __ addl($dst$$Register,$dst$$Register); 9320 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9321 __ addl($dst$$Register,$dst$$Register); 9322 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9323 %} 9324 ins_pipe( ialu_reg_long ); 9325 %} 9326 9327 // Shift Left Long by 3 9328 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9329 predicate(UseNewLongLShift); 9330 match(Set dst (LShiftL dst cnt)); 9331 effect(KILL cr); 9332 ins_cost(100); 9333 format %{ "ADD $dst.lo,$dst.lo\n\t" 9334 "ADC $dst.hi,$dst.hi\n\t" 9335 "ADD $dst.lo,$dst.lo\n\t" 9336 "ADC $dst.hi,$dst.hi\n\t" 9337 "ADD $dst.lo,$dst.lo\n\t" 9338 "ADC $dst.hi,$dst.hi" %} 9339 ins_encode %{ 9340 __ addl($dst$$Register,$dst$$Register); 9341 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9342 __ addl($dst$$Register,$dst$$Register); 9343 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9344 __ addl($dst$$Register,$dst$$Register); 9345 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9346 %} 9347 ins_pipe( ialu_reg_long ); 9348 %} 9349 9350 // Shift Left Long by 1-31 9351 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9352 match(Set dst (LShiftL dst cnt)); 9353 effect(KILL cr); 9354 ins_cost(200); 9355 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9356 "SHL $dst.lo,$cnt" %} 9357 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9358 ins_encode( move_long_small_shift(dst,cnt) ); 9359 ins_pipe( ialu_reg_long ); 9360 %} 9361 9362 // Shift Left Long by 32-63 9363 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9364 match(Set dst (LShiftL dst cnt)); 9365 effect(KILL cr); 9366 ins_cost(300); 9367 format %{ "MOV $dst.hi,$dst.lo\n" 9368 "\tSHL $dst.hi,$cnt-32\n" 9369 "\tXOR $dst.lo,$dst.lo" %} 9370 opcode(0xC1, 0x4); /* C1 /4 ib */ 9371 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9372 ins_pipe( ialu_reg_long ); 9373 %} 9374 9375 // Shift Left Long by variable 9376 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9377 match(Set dst (LShiftL dst shift)); 9378 effect(KILL cr); 9379 ins_cost(500+200); 9380 size(17); 9381 format %{ "TEST $shift,32\n\t" 9382 "JEQ,s small\n\t" 9383 "MOV $dst.hi,$dst.lo\n\t" 9384 "XOR $dst.lo,$dst.lo\n" 9385 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9386 "SHL $dst.lo,$shift" %} 9387 ins_encode( shift_left_long( dst, shift ) ); 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 // Shift Right Long by 1-31 9392 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9393 match(Set dst (URShiftL dst cnt)); 9394 effect(KILL cr); 9395 ins_cost(200); 9396 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9397 "SHR $dst.hi,$cnt" %} 9398 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9399 ins_encode( move_long_small_shift(dst,cnt) ); 9400 ins_pipe( ialu_reg_long ); 9401 %} 9402 9403 // Shift Right Long by 32-63 9404 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9405 match(Set dst (URShiftL dst cnt)); 9406 effect(KILL cr); 9407 ins_cost(300); 9408 format %{ "MOV $dst.lo,$dst.hi\n" 9409 "\tSHR $dst.lo,$cnt-32\n" 9410 "\tXOR $dst.hi,$dst.hi" %} 9411 opcode(0xC1, 0x5); /* C1 /5 ib */ 9412 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9413 ins_pipe( ialu_reg_long ); 9414 %} 9415 9416 // Shift Right Long by variable 9417 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9418 match(Set dst (URShiftL dst shift)); 9419 effect(KILL cr); 9420 ins_cost(600); 9421 size(17); 9422 format %{ "TEST $shift,32\n\t" 9423 "JEQ,s small\n\t" 9424 "MOV $dst.lo,$dst.hi\n\t" 9425 "XOR $dst.hi,$dst.hi\n" 9426 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9427 "SHR $dst.hi,$shift" %} 9428 ins_encode( shift_right_long( dst, shift ) ); 9429 ins_pipe( pipe_slow ); 9430 %} 9431 9432 // Shift Right Long by 1-31 9433 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9434 match(Set dst (RShiftL dst cnt)); 9435 effect(KILL cr); 9436 ins_cost(200); 9437 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9438 "SAR $dst.hi,$cnt" %} 9439 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9440 ins_encode( move_long_small_shift(dst,cnt) ); 9441 ins_pipe( ialu_reg_long ); 9442 %} 9443 9444 // Shift Right Long by 32-63 9445 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9446 match(Set dst (RShiftL dst cnt)); 9447 effect(KILL cr); 9448 ins_cost(300); 9449 format %{ "MOV $dst.lo,$dst.hi\n" 9450 "\tSAR $dst.lo,$cnt-32\n" 9451 "\tSAR $dst.hi,31" %} 9452 opcode(0xC1, 0x7); /* C1 /7 ib */ 9453 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9454 ins_pipe( ialu_reg_long ); 9455 %} 9456 9457 // Shift Right arithmetic Long by variable 9458 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9459 match(Set dst (RShiftL dst shift)); 9460 effect(KILL cr); 9461 ins_cost(600); 9462 size(18); 9463 format %{ "TEST $shift,32\n\t" 9464 "JEQ,s small\n\t" 9465 "MOV $dst.lo,$dst.hi\n\t" 9466 "SAR $dst.hi,31\n" 9467 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9468 "SAR $dst.hi,$shift" %} 9469 ins_encode( shift_right_arith_long( dst, shift ) ); 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 9474 //----------Double Instructions------------------------------------------------ 9475 // Double Math 9476 9477 // Compare & branch 9478 9479 // P6 version of float compare, sets condition codes in EFLAGS 9480 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9481 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9482 match(Set cr (CmpD src1 src2)); 9483 effect(KILL rax); 9484 ins_cost(150); 9485 format %{ "FLD $src1\n\t" 9486 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9487 "JNP exit\n\t" 9488 "MOV ah,1 // saw a NaN, set CF\n\t" 9489 "SAHF\n" 9490 "exit:\tNOP // avoid branch to branch" %} 9491 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9492 ins_encode( Push_Reg_DPR(src1), 9493 OpcP, RegOpc(src2), 9494 cmpF_P6_fixup ); 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9499 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9500 match(Set cr (CmpD src1 src2)); 9501 ins_cost(150); 9502 format %{ "FLD $src1\n\t" 9503 "FUCOMIP ST,$src2 // P6 instruction" %} 9504 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9505 ins_encode( Push_Reg_DPR(src1), 9506 OpcP, RegOpc(src2)); 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 // Compare & branch 9511 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9512 predicate(UseSSE<=1); 9513 match(Set cr (CmpD src1 src2)); 9514 effect(KILL rax); 9515 ins_cost(200); 9516 format %{ "FLD $src1\n\t" 9517 "FCOMp $src2\n\t" 9518 "FNSTSW AX\n\t" 9519 "TEST AX,0x400\n\t" 9520 "JZ,s flags\n\t" 9521 "MOV AH,1\t# unordered treat as LT\n" 9522 "flags:\tSAHF" %} 9523 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9524 ins_encode( Push_Reg_DPR(src1), 9525 OpcP, RegOpc(src2), 9526 fpu_flags); 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 // Compare vs zero into -1,0,1 9531 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9532 predicate(UseSSE<=1); 9533 match(Set dst (CmpD3 src1 zero)); 9534 effect(KILL cr, KILL rax); 9535 ins_cost(280); 9536 format %{ "FTSTD $dst,$src1" %} 9537 opcode(0xE4, 0xD9); 9538 ins_encode( Push_Reg_DPR(src1), 9539 OpcS, OpcP, PopFPU, 9540 CmpF_Result(dst)); 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 // Compare into -1,0,1 9545 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9546 predicate(UseSSE<=1); 9547 match(Set dst (CmpD3 src1 src2)); 9548 effect(KILL cr, KILL rax); 9549 ins_cost(300); 9550 format %{ "FCMPD $dst,$src1,$src2" %} 9551 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9552 ins_encode( Push_Reg_DPR(src1), 9553 OpcP, RegOpc(src2), 9554 CmpF_Result(dst)); 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 // float compare and set condition codes in EFLAGS by XMM regs 9559 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9560 predicate(UseSSE>=2); 9561 match(Set cr (CmpD src1 src2)); 9562 ins_cost(145); 9563 format %{ "UCOMISD $src1,$src2\n\t" 9564 "JNP,s exit\n\t" 9565 "PUSHF\t# saw NaN, set CF\n\t" 9566 "AND [rsp], #0xffffff2b\n\t" 9567 "POPF\n" 9568 "exit:" %} 9569 ins_encode %{ 9570 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9571 emit_cmpfp_fixup(_masm); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9577 predicate(UseSSE>=2); 9578 match(Set cr (CmpD src1 src2)); 9579 ins_cost(100); 9580 format %{ "UCOMISD $src1,$src2" %} 9581 ins_encode %{ 9582 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 // float compare and set condition codes in EFLAGS by XMM regs 9588 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9589 predicate(UseSSE>=2); 9590 match(Set cr (CmpD src1 (LoadD src2))); 9591 ins_cost(145); 9592 format %{ "UCOMISD $src1,$src2\n\t" 9593 "JNP,s exit\n\t" 9594 "PUSHF\t# saw NaN, set CF\n\t" 9595 "AND [rsp], #0xffffff2b\n\t" 9596 "POPF\n" 9597 "exit:" %} 9598 ins_encode %{ 9599 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9600 emit_cmpfp_fixup(_masm); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9606 predicate(UseSSE>=2); 9607 match(Set cr (CmpD src1 (LoadD src2))); 9608 ins_cost(100); 9609 format %{ "UCOMISD $src1,$src2" %} 9610 ins_encode %{ 9611 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9612 %} 9613 ins_pipe( pipe_slow ); 9614 %} 9615 9616 // Compare into -1,0,1 in XMM 9617 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9618 predicate(UseSSE>=2); 9619 match(Set dst (CmpD3 src1 src2)); 9620 effect(KILL cr); 9621 ins_cost(255); 9622 format %{ "UCOMISD $src1, $src2\n\t" 9623 "MOV $dst, #-1\n\t" 9624 "JP,s done\n\t" 9625 "JB,s done\n\t" 9626 "SETNE $dst\n\t" 9627 "MOVZB $dst, $dst\n" 9628 "done:" %} 9629 ins_encode %{ 9630 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9631 emit_cmpfp3(_masm, $dst$$Register); 9632 %} 9633 ins_pipe( pipe_slow ); 9634 %} 9635 9636 // Compare into -1,0,1 in XMM and memory 9637 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9638 predicate(UseSSE>=2); 9639 match(Set dst (CmpD3 src1 (LoadD src2))); 9640 effect(KILL cr); 9641 ins_cost(275); 9642 format %{ "UCOMISD $src1, $src2\n\t" 9643 "MOV $dst, #-1\n\t" 9644 "JP,s done\n\t" 9645 "JB,s done\n\t" 9646 "SETNE $dst\n\t" 9647 "MOVZB $dst, $dst\n" 9648 "done:" %} 9649 ins_encode %{ 9650 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9651 emit_cmpfp3(_masm, $dst$$Register); 9652 %} 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 9657 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9658 predicate (UseSSE <=1); 9659 match(Set dst (SubD dst src)); 9660 9661 format %{ "FLD $src\n\t" 9662 "DSUBp $dst,ST" %} 9663 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9664 ins_cost(150); 9665 ins_encode( Push_Reg_DPR(src), 9666 OpcP, RegOpc(dst) ); 9667 ins_pipe( fpu_reg_reg ); 9668 %} 9669 9670 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9671 predicate (UseSSE <=1); 9672 match(Set dst (RoundDouble (SubD src1 src2))); 9673 ins_cost(250); 9674 9675 format %{ "FLD $src2\n\t" 9676 "DSUB ST,$src1\n\t" 9677 "FSTP_D $dst\t# D-round" %} 9678 opcode(0xD8, 0x5); 9679 ins_encode( Push_Reg_DPR(src2), 9680 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9681 ins_pipe( fpu_mem_reg_reg ); 9682 %} 9683 9684 9685 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9686 predicate (UseSSE <=1); 9687 match(Set dst (SubD dst (LoadD src))); 9688 ins_cost(150); 9689 9690 format %{ "FLD $src\n\t" 9691 "DSUBp $dst,ST" %} 9692 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9693 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9694 OpcP, RegOpc(dst) ); 9695 ins_pipe( fpu_reg_mem ); 9696 %} 9697 9698 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9699 predicate (UseSSE<=1); 9700 match(Set dst (AbsD src)); 9701 ins_cost(100); 9702 format %{ "FABS" %} 9703 opcode(0xE1, 0xD9); 9704 ins_encode( OpcS, OpcP ); 9705 ins_pipe( fpu_reg_reg ); 9706 %} 9707 9708 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9709 predicate(UseSSE<=1); 9710 match(Set dst (NegD src)); 9711 ins_cost(100); 9712 format %{ "FCHS" %} 9713 opcode(0xE0, 0xD9); 9714 ins_encode( OpcS, OpcP ); 9715 ins_pipe( fpu_reg_reg ); 9716 %} 9717 9718 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9719 predicate(UseSSE<=1); 9720 match(Set dst (AddD dst src)); 9721 format %{ "FLD $src\n\t" 9722 "DADD $dst,ST" %} 9723 size(4); 9724 ins_cost(150); 9725 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9726 ins_encode( Push_Reg_DPR(src), 9727 OpcP, RegOpc(dst) ); 9728 ins_pipe( fpu_reg_reg ); 9729 %} 9730 9731 9732 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9733 predicate(UseSSE<=1); 9734 match(Set dst (RoundDouble (AddD src1 src2))); 9735 ins_cost(250); 9736 9737 format %{ "FLD $src2\n\t" 9738 "DADD ST,$src1\n\t" 9739 "FSTP_D $dst\t# D-round" %} 9740 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9741 ins_encode( Push_Reg_DPR(src2), 9742 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9743 ins_pipe( fpu_mem_reg_reg ); 9744 %} 9745 9746 9747 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9748 predicate(UseSSE<=1); 9749 match(Set dst (AddD dst (LoadD src))); 9750 ins_cost(150); 9751 9752 format %{ "FLD $src\n\t" 9753 "DADDp $dst,ST" %} 9754 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9756 OpcP, RegOpc(dst) ); 9757 ins_pipe( fpu_reg_mem ); 9758 %} 9759 9760 // add-to-memory 9761 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9762 predicate(UseSSE<=1); 9763 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9764 ins_cost(150); 9765 9766 format %{ "FLD_D $dst\n\t" 9767 "DADD ST,$src\n\t" 9768 "FST_D $dst" %} 9769 opcode(0xDD, 0x0); 9770 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9771 Opcode(0xD8), RegOpc(src), 9772 set_instruction_start, 9773 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9774 ins_pipe( fpu_reg_mem ); 9775 %} 9776 9777 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9778 predicate(UseSSE<=1); 9779 match(Set dst (AddD dst con)); 9780 ins_cost(125); 9781 format %{ "FLD1\n\t" 9782 "DADDp $dst,ST" %} 9783 ins_encode %{ 9784 __ fld1(); 9785 __ faddp($dst$$reg); 9786 %} 9787 ins_pipe(fpu_reg); 9788 %} 9789 9790 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9791 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9792 match(Set dst (AddD dst con)); 9793 ins_cost(200); 9794 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9795 "DADDp $dst,ST" %} 9796 ins_encode %{ 9797 __ fld_d($constantaddress($con)); 9798 __ faddp($dst$$reg); 9799 %} 9800 ins_pipe(fpu_reg_mem); 9801 %} 9802 9803 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9804 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9805 match(Set dst (RoundDouble (AddD src con))); 9806 ins_cost(200); 9807 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9808 "DADD ST,$src\n\t" 9809 "FSTP_D $dst\t# D-round" %} 9810 ins_encode %{ 9811 __ fld_d($constantaddress($con)); 9812 __ fadd($src$$reg); 9813 __ fstp_d(Address(rsp, $dst$$disp)); 9814 %} 9815 ins_pipe(fpu_mem_reg_con); 9816 %} 9817 9818 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9819 predicate(UseSSE<=1); 9820 match(Set dst (MulD dst src)); 9821 format %{ "FLD $src\n\t" 9822 "DMULp $dst,ST" %} 9823 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9824 ins_cost(150); 9825 ins_encode( Push_Reg_DPR(src), 9826 OpcP, RegOpc(dst) ); 9827 ins_pipe( fpu_reg_reg ); 9828 %} 9829 9830 // Strict FP instruction biases argument before multiply then 9831 // biases result to avoid double rounding of subnormals. 9832 // 9833 // scale arg1 by multiplying arg1 by 2^(-15360) 9834 // load arg2 9835 // multiply scaled arg1 by arg2 9836 // rescale product by 2^(15360) 9837 // 9838 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9839 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9840 match(Set dst (MulD dst src)); 9841 ins_cost(1); // Select this instruction for all strict FP double multiplies 9842 9843 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9844 "DMULp $dst,ST\n\t" 9845 "FLD $src\n\t" 9846 "DMULp $dst,ST\n\t" 9847 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9848 "DMULp $dst,ST\n\t" %} 9849 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9850 ins_encode( strictfp_bias1(dst), 9851 Push_Reg_DPR(src), 9852 OpcP, RegOpc(dst), 9853 strictfp_bias2(dst) ); 9854 ins_pipe( fpu_reg_reg ); 9855 %} 9856 9857 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9858 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9859 match(Set dst (MulD dst con)); 9860 ins_cost(200); 9861 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9862 "DMULp $dst,ST" %} 9863 ins_encode %{ 9864 __ fld_d($constantaddress($con)); 9865 __ fmulp($dst$$reg); 9866 %} 9867 ins_pipe(fpu_reg_mem); 9868 %} 9869 9870 9871 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9872 predicate( UseSSE<=1 ); 9873 match(Set dst (MulD dst (LoadD src))); 9874 ins_cost(200); 9875 format %{ "FLD_D $src\n\t" 9876 "DMULp $dst,ST" %} 9877 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9878 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9879 OpcP, RegOpc(dst) ); 9880 ins_pipe( fpu_reg_mem ); 9881 %} 9882 9883 // 9884 // Cisc-alternate to reg-reg multiply 9885 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9886 predicate( UseSSE<=1 ); 9887 match(Set dst (MulD src (LoadD mem))); 9888 ins_cost(250); 9889 format %{ "FLD_D $mem\n\t" 9890 "DMUL ST,$src\n\t" 9891 "FSTP_D $dst" %} 9892 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9893 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9894 OpcReg_FPR(src), 9895 Pop_Reg_DPR(dst) ); 9896 ins_pipe( fpu_reg_reg_mem ); 9897 %} 9898 9899 9900 // MACRO3 -- addDPR a mulDPR 9901 // This instruction is a '2-address' instruction in that the result goes 9902 // back to src2. This eliminates a move from the macro; possibly the 9903 // register allocator will have to add it back (and maybe not). 9904 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9905 predicate( UseSSE<=1 ); 9906 match(Set src2 (AddD (MulD src0 src1) src2)); 9907 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9908 "DMUL ST,$src1\n\t" 9909 "DADDp $src2,ST" %} 9910 ins_cost(250); 9911 opcode(0xDD); /* LoadD DD /0 */ 9912 ins_encode( Push_Reg_FPR(src0), 9913 FMul_ST_reg(src1), 9914 FAddP_reg_ST(src2) ); 9915 ins_pipe( fpu_reg_reg_reg ); 9916 %} 9917 9918 9919 // MACRO3 -- subDPR a mulDPR 9920 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9921 predicate( UseSSE<=1 ); 9922 match(Set src2 (SubD (MulD src0 src1) src2)); 9923 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9924 "DMUL ST,$src1\n\t" 9925 "DSUBRp $src2,ST" %} 9926 ins_cost(250); 9927 ins_encode( Push_Reg_FPR(src0), 9928 FMul_ST_reg(src1), 9929 Opcode(0xDE), Opc_plus(0xE0,src2)); 9930 ins_pipe( fpu_reg_reg_reg ); 9931 %} 9932 9933 9934 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9935 predicate( UseSSE<=1 ); 9936 match(Set dst (DivD dst src)); 9937 9938 format %{ "FLD $src\n\t" 9939 "FDIVp $dst,ST" %} 9940 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9941 ins_cost(150); 9942 ins_encode( Push_Reg_DPR(src), 9943 OpcP, RegOpc(dst) ); 9944 ins_pipe( fpu_reg_reg ); 9945 %} 9946 9947 // Strict FP instruction biases argument before division then 9948 // biases result, to avoid double rounding of subnormals. 9949 // 9950 // scale dividend by multiplying dividend by 2^(-15360) 9951 // load divisor 9952 // divide scaled dividend by divisor 9953 // rescale quotient by 2^(15360) 9954 // 9955 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9956 predicate (UseSSE<=1); 9957 match(Set dst (DivD dst src)); 9958 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9959 ins_cost(01); 9960 9961 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9962 "DMULp $dst,ST\n\t" 9963 "FLD $src\n\t" 9964 "FDIVp $dst,ST\n\t" 9965 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9966 "DMULp $dst,ST\n\t" %} 9967 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9968 ins_encode( strictfp_bias1(dst), 9969 Push_Reg_DPR(src), 9970 OpcP, RegOpc(dst), 9971 strictfp_bias2(dst) ); 9972 ins_pipe( fpu_reg_reg ); 9973 %} 9974 9975 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9976 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9977 match(Set dst (RoundDouble (DivD src1 src2))); 9978 9979 format %{ "FLD $src1\n\t" 9980 "FDIV ST,$src2\n\t" 9981 "FSTP_D $dst\t# D-round" %} 9982 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9983 ins_encode( Push_Reg_DPR(src1), 9984 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9985 ins_pipe( fpu_mem_reg_reg ); 9986 %} 9987 9988 9989 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9990 predicate(UseSSE<=1); 9991 match(Set dst (ModD dst src)); 9992 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9993 9994 format %{ "DMOD $dst,$src" %} 9995 ins_cost(250); 9996 ins_encode(Push_Reg_Mod_DPR(dst, src), 9997 emitModDPR(), 9998 Push_Result_Mod_DPR(src), 9999 Pop_Reg_DPR(dst)); 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10004 predicate(UseSSE>=2); 10005 match(Set dst (ModD src0 src1)); 10006 effect(KILL rax, KILL cr); 10007 10008 format %{ "SUB ESP,8\t # DMOD\n" 10009 "\tMOVSD [ESP+0],$src1\n" 10010 "\tFLD_D [ESP+0]\n" 10011 "\tMOVSD [ESP+0],$src0\n" 10012 "\tFLD_D [ESP+0]\n" 10013 "loop:\tFPREM\n" 10014 "\tFWAIT\n" 10015 "\tFNSTSW AX\n" 10016 "\tSAHF\n" 10017 "\tJP loop\n" 10018 "\tFSTP_D [ESP+0]\n" 10019 "\tMOVSD $dst,[ESP+0]\n" 10020 "\tADD ESP,8\n" 10021 "\tFSTP ST0\t # Restore FPU Stack" 10022 %} 10023 ins_cost(250); 10024 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10025 ins_pipe( pipe_slow ); 10026 %} 10027 10028 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10029 predicate (UseSSE<=1); 10030 match(Set dst(AtanD dst src)); 10031 format %{ "DATA $dst,$src" %} 10032 opcode(0xD9, 0xF3); 10033 ins_encode( Push_Reg_DPR(src), 10034 OpcP, OpcS, RegOpc(dst) ); 10035 ins_pipe( pipe_slow ); 10036 %} 10037 10038 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10039 predicate (UseSSE>=2); 10040 match(Set dst(AtanD dst src)); 10041 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10042 format %{ "DATA $dst,$src" %} 10043 opcode(0xD9, 0xF3); 10044 ins_encode( Push_SrcD(src), 10045 OpcP, OpcS, Push_ResultD(dst) ); 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10050 predicate (UseSSE<=1); 10051 match(Set dst (SqrtD src)); 10052 format %{ "DSQRT $dst,$src" %} 10053 opcode(0xFA, 0xD9); 10054 ins_encode( Push_Reg_DPR(src), 10055 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10056 ins_pipe( pipe_slow ); 10057 %} 10058 10059 //-------------Float Instructions------------------------------- 10060 // Float Math 10061 10062 // Code for float compare: 10063 // fcompp(); 10064 // fwait(); fnstsw_ax(); 10065 // sahf(); 10066 // movl(dst, unordered_result); 10067 // jcc(Assembler::parity, exit); 10068 // movl(dst, less_result); 10069 // jcc(Assembler::below, exit); 10070 // movl(dst, equal_result); 10071 // jcc(Assembler::equal, exit); 10072 // movl(dst, greater_result); 10073 // exit: 10074 10075 // P6 version of float compare, sets condition codes in EFLAGS 10076 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10077 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10078 match(Set cr (CmpF src1 src2)); 10079 effect(KILL rax); 10080 ins_cost(150); 10081 format %{ "FLD $src1\n\t" 10082 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10083 "JNP exit\n\t" 10084 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10085 "SAHF\n" 10086 "exit:\tNOP // avoid branch to branch" %} 10087 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10088 ins_encode( Push_Reg_DPR(src1), 10089 OpcP, RegOpc(src2), 10090 cmpF_P6_fixup ); 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10095 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10096 match(Set cr (CmpF src1 src2)); 10097 ins_cost(100); 10098 format %{ "FLD $src1\n\t" 10099 "FUCOMIP ST,$src2 // P6 instruction" %} 10100 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10101 ins_encode( Push_Reg_DPR(src1), 10102 OpcP, RegOpc(src2)); 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 10107 // Compare & branch 10108 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10109 predicate(UseSSE == 0); 10110 match(Set cr (CmpF src1 src2)); 10111 effect(KILL rax); 10112 ins_cost(200); 10113 format %{ "FLD $src1\n\t" 10114 "FCOMp $src2\n\t" 10115 "FNSTSW AX\n\t" 10116 "TEST AX,0x400\n\t" 10117 "JZ,s flags\n\t" 10118 "MOV AH,1\t# unordered treat as LT\n" 10119 "flags:\tSAHF" %} 10120 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10121 ins_encode( Push_Reg_DPR(src1), 10122 OpcP, RegOpc(src2), 10123 fpu_flags); 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 // Compare vs zero into -1,0,1 10128 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10129 predicate(UseSSE == 0); 10130 match(Set dst (CmpF3 src1 zero)); 10131 effect(KILL cr, KILL rax); 10132 ins_cost(280); 10133 format %{ "FTSTF $dst,$src1" %} 10134 opcode(0xE4, 0xD9); 10135 ins_encode( Push_Reg_DPR(src1), 10136 OpcS, OpcP, PopFPU, 10137 CmpF_Result(dst)); 10138 ins_pipe( pipe_slow ); 10139 %} 10140 10141 // Compare into -1,0,1 10142 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10143 predicate(UseSSE == 0); 10144 match(Set dst (CmpF3 src1 src2)); 10145 effect(KILL cr, KILL rax); 10146 ins_cost(300); 10147 format %{ "FCMPF $dst,$src1,$src2" %} 10148 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10149 ins_encode( Push_Reg_DPR(src1), 10150 OpcP, RegOpc(src2), 10151 CmpF_Result(dst)); 10152 ins_pipe( pipe_slow ); 10153 %} 10154 10155 // float compare and set condition codes in EFLAGS by XMM regs 10156 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10157 predicate(UseSSE>=1); 10158 match(Set cr (CmpF src1 src2)); 10159 ins_cost(145); 10160 format %{ "UCOMISS $src1,$src2\n\t" 10161 "JNP,s exit\n\t" 10162 "PUSHF\t# saw NaN, set CF\n\t" 10163 "AND [rsp], #0xffffff2b\n\t" 10164 "POPF\n" 10165 "exit:" %} 10166 ins_encode %{ 10167 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10168 emit_cmpfp_fixup(_masm); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10174 predicate(UseSSE>=1); 10175 match(Set cr (CmpF src1 src2)); 10176 ins_cost(100); 10177 format %{ "UCOMISS $src1,$src2" %} 10178 ins_encode %{ 10179 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10180 %} 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 // float compare and set condition codes in EFLAGS by XMM regs 10185 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10186 predicate(UseSSE>=1); 10187 match(Set cr (CmpF src1 (LoadF src2))); 10188 ins_cost(165); 10189 format %{ "UCOMISS $src1,$src2\n\t" 10190 "JNP,s exit\n\t" 10191 "PUSHF\t# saw NaN, set CF\n\t" 10192 "AND [rsp], #0xffffff2b\n\t" 10193 "POPF\n" 10194 "exit:" %} 10195 ins_encode %{ 10196 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10197 emit_cmpfp_fixup(_masm); 10198 %} 10199 ins_pipe( pipe_slow ); 10200 %} 10201 10202 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10203 predicate(UseSSE>=1); 10204 match(Set cr (CmpF src1 (LoadF src2))); 10205 ins_cost(100); 10206 format %{ "UCOMISS $src1,$src2" %} 10207 ins_encode %{ 10208 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10209 %} 10210 ins_pipe( pipe_slow ); 10211 %} 10212 10213 // Compare into -1,0,1 in XMM 10214 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10215 predicate(UseSSE>=1); 10216 match(Set dst (CmpF3 src1 src2)); 10217 effect(KILL cr); 10218 ins_cost(255); 10219 format %{ "UCOMISS $src1, $src2\n\t" 10220 "MOV $dst, #-1\n\t" 10221 "JP,s done\n\t" 10222 "JB,s done\n\t" 10223 "SETNE $dst\n\t" 10224 "MOVZB $dst, $dst\n" 10225 "done:" %} 10226 ins_encode %{ 10227 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10228 emit_cmpfp3(_masm, $dst$$Register); 10229 %} 10230 ins_pipe( pipe_slow ); 10231 %} 10232 10233 // Compare into -1,0,1 in XMM and memory 10234 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10235 predicate(UseSSE>=1); 10236 match(Set dst (CmpF3 src1 (LoadF src2))); 10237 effect(KILL cr); 10238 ins_cost(275); 10239 format %{ "UCOMISS $src1, $src2\n\t" 10240 "MOV $dst, #-1\n\t" 10241 "JP,s done\n\t" 10242 "JB,s done\n\t" 10243 "SETNE $dst\n\t" 10244 "MOVZB $dst, $dst\n" 10245 "done:" %} 10246 ins_encode %{ 10247 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10248 emit_cmpfp3(_masm, $dst$$Register); 10249 %} 10250 ins_pipe( pipe_slow ); 10251 %} 10252 10253 // Spill to obtain 24-bit precision 10254 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10255 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10256 match(Set dst (SubF src1 src2)); 10257 10258 format %{ "FSUB $dst,$src1 - $src2" %} 10259 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10260 ins_encode( Push_Reg_FPR(src1), 10261 OpcReg_FPR(src2), 10262 Pop_Mem_FPR(dst) ); 10263 ins_pipe( fpu_mem_reg_reg ); 10264 %} 10265 // 10266 // This instruction does not round to 24-bits 10267 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (SubF dst src)); 10270 10271 format %{ "FSUB $dst,$src" %} 10272 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10273 ins_encode( Push_Reg_FPR(src), 10274 OpcP, RegOpc(dst) ); 10275 ins_pipe( fpu_reg_reg ); 10276 %} 10277 10278 // Spill to obtain 24-bit precision 10279 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10280 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10281 match(Set dst (AddF src1 src2)); 10282 10283 format %{ "FADD $dst,$src1,$src2" %} 10284 opcode(0xD8, 0x0); /* D8 C0+i */ 10285 ins_encode( Push_Reg_FPR(src2), 10286 OpcReg_FPR(src1), 10287 Pop_Mem_FPR(dst) ); 10288 ins_pipe( fpu_mem_reg_reg ); 10289 %} 10290 // 10291 // This instruction does not round to 24-bits 10292 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10293 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10294 match(Set dst (AddF dst src)); 10295 10296 format %{ "FLD $src\n\t" 10297 "FADDp $dst,ST" %} 10298 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10299 ins_encode( Push_Reg_FPR(src), 10300 OpcP, RegOpc(dst) ); 10301 ins_pipe( fpu_reg_reg ); 10302 %} 10303 10304 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10305 predicate(UseSSE==0); 10306 match(Set dst (AbsF src)); 10307 ins_cost(100); 10308 format %{ "FABS" %} 10309 opcode(0xE1, 0xD9); 10310 ins_encode( OpcS, OpcP ); 10311 ins_pipe( fpu_reg_reg ); 10312 %} 10313 10314 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10315 predicate(UseSSE==0); 10316 match(Set dst (NegF src)); 10317 ins_cost(100); 10318 format %{ "FCHS" %} 10319 opcode(0xE0, 0xD9); 10320 ins_encode( OpcS, OpcP ); 10321 ins_pipe( fpu_reg_reg ); 10322 %} 10323 10324 // Cisc-alternate to addFPR_reg 10325 // Spill to obtain 24-bit precision 10326 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10327 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10328 match(Set dst (AddF src1 (LoadF src2))); 10329 10330 format %{ "FLD $src2\n\t" 10331 "FADD ST,$src1\n\t" 10332 "FSTP_S $dst" %} 10333 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10334 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10335 OpcReg_FPR(src1), 10336 Pop_Mem_FPR(dst) ); 10337 ins_pipe( fpu_mem_reg_mem ); 10338 %} 10339 // 10340 // Cisc-alternate to addFPR_reg 10341 // This instruction does not round to 24-bits 10342 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10343 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10344 match(Set dst (AddF dst (LoadF src))); 10345 10346 format %{ "FADD $dst,$src" %} 10347 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10348 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10349 OpcP, RegOpc(dst) ); 10350 ins_pipe( fpu_reg_mem ); 10351 %} 10352 10353 // // Following two instructions for _222_mpegaudio 10354 // Spill to obtain 24-bit precision 10355 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10356 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10357 match(Set dst (AddF src1 src2)); 10358 10359 format %{ "FADD $dst,$src1,$src2" %} 10360 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10361 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10362 OpcReg_FPR(src2), 10363 Pop_Mem_FPR(dst) ); 10364 ins_pipe( fpu_mem_reg_mem ); 10365 %} 10366 10367 // Cisc-spill variant 10368 // Spill to obtain 24-bit precision 10369 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10370 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10371 match(Set dst (AddF src1 (LoadF src2))); 10372 10373 format %{ "FADD $dst,$src1,$src2 cisc" %} 10374 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10375 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10376 set_instruction_start, 10377 OpcP, RMopc_Mem(secondary,src1), 10378 Pop_Mem_FPR(dst) ); 10379 ins_pipe( fpu_mem_mem_mem ); 10380 %} 10381 10382 // Spill to obtain 24-bit precision 10383 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10384 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10385 match(Set dst (AddF src1 src2)); 10386 10387 format %{ "FADD $dst,$src1,$src2" %} 10388 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10389 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10390 set_instruction_start, 10391 OpcP, RMopc_Mem(secondary,src1), 10392 Pop_Mem_FPR(dst) ); 10393 ins_pipe( fpu_mem_mem_mem ); 10394 %} 10395 10396 10397 // Spill to obtain 24-bit precision 10398 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10399 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10400 match(Set dst (AddF src con)); 10401 format %{ "FLD $src\n\t" 10402 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10403 "FSTP_S $dst" %} 10404 ins_encode %{ 10405 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10406 __ fadd_s($constantaddress($con)); 10407 __ fstp_s(Address(rsp, $dst$$disp)); 10408 %} 10409 ins_pipe(fpu_mem_reg_con); 10410 %} 10411 // 10412 // This instruction does not round to 24-bits 10413 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10414 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10415 match(Set dst (AddF src con)); 10416 format %{ "FLD $src\n\t" 10417 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10418 "FSTP $dst" %} 10419 ins_encode %{ 10420 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10421 __ fadd_s($constantaddress($con)); 10422 __ fstp_d($dst$$reg); 10423 %} 10424 ins_pipe(fpu_reg_reg_con); 10425 %} 10426 10427 // Spill to obtain 24-bit precision 10428 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10429 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10430 match(Set dst (MulF src1 src2)); 10431 10432 format %{ "FLD $src1\n\t" 10433 "FMUL $src2\n\t" 10434 "FSTP_S $dst" %} 10435 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10436 ins_encode( Push_Reg_FPR(src1), 10437 OpcReg_FPR(src2), 10438 Pop_Mem_FPR(dst) ); 10439 ins_pipe( fpu_mem_reg_reg ); 10440 %} 10441 // 10442 // This instruction does not round to 24-bits 10443 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10444 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10445 match(Set dst (MulF src1 src2)); 10446 10447 format %{ "FLD $src1\n\t" 10448 "FMUL $src2\n\t" 10449 "FSTP_S $dst" %} 10450 opcode(0xD8, 0x1); /* D8 C8+i */ 10451 ins_encode( Push_Reg_FPR(src2), 10452 OpcReg_FPR(src1), 10453 Pop_Reg_FPR(dst) ); 10454 ins_pipe( fpu_reg_reg_reg ); 10455 %} 10456 10457 10458 // Spill to obtain 24-bit precision 10459 // Cisc-alternate to reg-reg multiply 10460 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10461 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10462 match(Set dst (MulF src1 (LoadF src2))); 10463 10464 format %{ "FLD_S $src2\n\t" 10465 "FMUL $src1\n\t" 10466 "FSTP_S $dst" %} 10467 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10468 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10469 OpcReg_FPR(src1), 10470 Pop_Mem_FPR(dst) ); 10471 ins_pipe( fpu_mem_reg_mem ); 10472 %} 10473 // 10474 // This instruction does not round to 24-bits 10475 // Cisc-alternate to reg-reg multiply 10476 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10477 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10478 match(Set dst (MulF src1 (LoadF src2))); 10479 10480 format %{ "FMUL $dst,$src1,$src2" %} 10481 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10482 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10483 OpcReg_FPR(src1), 10484 Pop_Reg_FPR(dst) ); 10485 ins_pipe( fpu_reg_reg_mem ); 10486 %} 10487 10488 // Spill to obtain 24-bit precision 10489 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10490 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10491 match(Set dst (MulF src1 src2)); 10492 10493 format %{ "FMUL $dst,$src1,$src2" %} 10494 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10495 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10496 set_instruction_start, 10497 OpcP, RMopc_Mem(secondary,src1), 10498 Pop_Mem_FPR(dst) ); 10499 ins_pipe( fpu_mem_mem_mem ); 10500 %} 10501 10502 // Spill to obtain 24-bit precision 10503 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10504 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10505 match(Set dst (MulF src con)); 10506 10507 format %{ "FLD $src\n\t" 10508 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10509 "FSTP_S $dst" %} 10510 ins_encode %{ 10511 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10512 __ fmul_s($constantaddress($con)); 10513 __ fstp_s(Address(rsp, $dst$$disp)); 10514 %} 10515 ins_pipe(fpu_mem_reg_con); 10516 %} 10517 // 10518 // This instruction does not round to 24-bits 10519 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10520 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10521 match(Set dst (MulF src con)); 10522 10523 format %{ "FLD $src\n\t" 10524 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10525 "FSTP $dst" %} 10526 ins_encode %{ 10527 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10528 __ fmul_s($constantaddress($con)); 10529 __ fstp_d($dst$$reg); 10530 %} 10531 ins_pipe(fpu_reg_reg_con); 10532 %} 10533 10534 10535 // 10536 // MACRO1 -- subsume unshared load into mulFPR 10537 // This instruction does not round to 24-bits 10538 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10539 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10540 match(Set dst (MulF (LoadF mem1) src)); 10541 10542 format %{ "FLD $mem1 ===MACRO1===\n\t" 10543 "FMUL ST,$src\n\t" 10544 "FSTP $dst" %} 10545 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10546 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10547 OpcReg_FPR(src), 10548 Pop_Reg_FPR(dst) ); 10549 ins_pipe( fpu_reg_reg_mem ); 10550 %} 10551 // 10552 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10553 // This instruction does not round to 24-bits 10554 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10556 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10557 ins_cost(95); 10558 10559 format %{ "FLD $mem1 ===MACRO2===\n\t" 10560 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10561 "FADD ST,$src2\n\t" 10562 "FSTP $dst" %} 10563 opcode(0xD9); /* LoadF D9 /0 */ 10564 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10565 FMul_ST_reg(src1), 10566 FAdd_ST_reg(src2), 10567 Pop_Reg_FPR(dst) ); 10568 ins_pipe( fpu_reg_mem_reg_reg ); 10569 %} 10570 10571 // MACRO3 -- addFPR a mulFPR 10572 // This instruction does not round to 24-bits. It is a '2-address' 10573 // instruction in that the result goes back to src2. This eliminates 10574 // a move from the macro; possibly the register allocator will have 10575 // to add it back (and maybe not). 10576 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10577 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10578 match(Set src2 (AddF (MulF src0 src1) src2)); 10579 10580 format %{ "FLD $src0 ===MACRO3===\n\t" 10581 "FMUL ST,$src1\n\t" 10582 "FADDP $src2,ST" %} 10583 opcode(0xD9); /* LoadF D9 /0 */ 10584 ins_encode( Push_Reg_FPR(src0), 10585 FMul_ST_reg(src1), 10586 FAddP_reg_ST(src2) ); 10587 ins_pipe( fpu_reg_reg_reg ); 10588 %} 10589 10590 // MACRO4 -- divFPR subFPR 10591 // This instruction does not round to 24-bits 10592 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10593 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10594 match(Set dst (DivF (SubF src2 src1) src3)); 10595 10596 format %{ "FLD $src2 ===MACRO4===\n\t" 10597 "FSUB ST,$src1\n\t" 10598 "FDIV ST,$src3\n\t" 10599 "FSTP $dst" %} 10600 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10601 ins_encode( Push_Reg_FPR(src2), 10602 subFPR_divFPR_encode(src1,src3), 10603 Pop_Reg_FPR(dst) ); 10604 ins_pipe( fpu_reg_reg_reg_reg ); 10605 %} 10606 10607 // Spill to obtain 24-bit precision 10608 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10609 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10610 match(Set dst (DivF src1 src2)); 10611 10612 format %{ "FDIV $dst,$src1,$src2" %} 10613 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10614 ins_encode( Push_Reg_FPR(src1), 10615 OpcReg_FPR(src2), 10616 Pop_Mem_FPR(dst) ); 10617 ins_pipe( fpu_mem_reg_reg ); 10618 %} 10619 // 10620 // This instruction does not round to 24-bits 10621 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10622 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10623 match(Set dst (DivF dst src)); 10624 10625 format %{ "FDIV $dst,$src" %} 10626 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10627 ins_encode( Push_Reg_FPR(src), 10628 OpcP, RegOpc(dst) ); 10629 ins_pipe( fpu_reg_reg ); 10630 %} 10631 10632 10633 // Spill to obtain 24-bit precision 10634 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10635 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10636 match(Set dst (ModF src1 src2)); 10637 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10638 10639 format %{ "FMOD $dst,$src1,$src2" %} 10640 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10641 emitModDPR(), 10642 Push_Result_Mod_DPR(src2), 10643 Pop_Mem_FPR(dst)); 10644 ins_pipe( pipe_slow ); 10645 %} 10646 // 10647 // This instruction does not round to 24-bits 10648 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10649 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10650 match(Set dst (ModF dst src)); 10651 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10652 10653 format %{ "FMOD $dst,$src" %} 10654 ins_encode(Push_Reg_Mod_DPR(dst, src), 10655 emitModDPR(), 10656 Push_Result_Mod_DPR(src), 10657 Pop_Reg_FPR(dst)); 10658 ins_pipe( pipe_slow ); 10659 %} 10660 10661 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10662 predicate(UseSSE>=1); 10663 match(Set dst (ModF src0 src1)); 10664 effect(KILL rax, KILL cr); 10665 format %{ "SUB ESP,4\t # FMOD\n" 10666 "\tMOVSS [ESP+0],$src1\n" 10667 "\tFLD_S [ESP+0]\n" 10668 "\tMOVSS [ESP+0],$src0\n" 10669 "\tFLD_S [ESP+0]\n" 10670 "loop:\tFPREM\n" 10671 "\tFWAIT\n" 10672 "\tFNSTSW AX\n" 10673 "\tSAHF\n" 10674 "\tJP loop\n" 10675 "\tFSTP_S [ESP+0]\n" 10676 "\tMOVSS $dst,[ESP+0]\n" 10677 "\tADD ESP,4\n" 10678 "\tFSTP ST0\t # Restore FPU Stack" 10679 %} 10680 ins_cost(250); 10681 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10682 ins_pipe( pipe_slow ); 10683 %} 10684 10685 10686 //----------Arithmetic Conversion Instructions--------------------------------- 10687 // The conversions operations are all Alpha sorted. Please keep it that way! 10688 10689 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10690 predicate(UseSSE==0); 10691 match(Set dst (RoundFloat src)); 10692 ins_cost(125); 10693 format %{ "FST_S $dst,$src\t# F-round" %} 10694 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10695 ins_pipe( fpu_mem_reg ); 10696 %} 10697 10698 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10699 predicate(UseSSE<=1); 10700 match(Set dst (RoundDouble src)); 10701 ins_cost(125); 10702 format %{ "FST_D $dst,$src\t# D-round" %} 10703 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10704 ins_pipe( fpu_mem_reg ); 10705 %} 10706 10707 // Force rounding to 24-bit precision and 6-bit exponent 10708 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10709 predicate(UseSSE==0); 10710 match(Set dst (ConvD2F src)); 10711 format %{ "FST_S $dst,$src\t# F-round" %} 10712 expand %{ 10713 roundFloat_mem_reg(dst,src); 10714 %} 10715 %} 10716 10717 // Force rounding to 24-bit precision and 6-bit exponent 10718 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10719 predicate(UseSSE==1); 10720 match(Set dst (ConvD2F src)); 10721 effect( KILL cr ); 10722 format %{ "SUB ESP,4\n\t" 10723 "FST_S [ESP],$src\t# F-round\n\t" 10724 "MOVSS $dst,[ESP]\n\t" 10725 "ADD ESP,4" %} 10726 ins_encode %{ 10727 __ subptr(rsp, 4); 10728 if ($src$$reg != FPR1L_enc) { 10729 __ fld_s($src$$reg-1); 10730 __ fstp_s(Address(rsp, 0)); 10731 } else { 10732 __ fst_s(Address(rsp, 0)); 10733 } 10734 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10735 __ addptr(rsp, 4); 10736 %} 10737 ins_pipe( pipe_slow ); 10738 %} 10739 10740 // Force rounding double precision to single precision 10741 instruct convD2F_reg(regF dst, regD src) %{ 10742 predicate(UseSSE>=2); 10743 match(Set dst (ConvD2F src)); 10744 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10745 ins_encode %{ 10746 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10747 %} 10748 ins_pipe( pipe_slow ); 10749 %} 10750 10751 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10752 predicate(UseSSE==0); 10753 match(Set dst (ConvF2D src)); 10754 format %{ "FST_S $dst,$src\t# D-round" %} 10755 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10756 ins_pipe( fpu_reg_reg ); 10757 %} 10758 10759 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10760 predicate(UseSSE==1); 10761 match(Set dst (ConvF2D src)); 10762 format %{ "FST_D $dst,$src\t# D-round" %} 10763 expand %{ 10764 roundDouble_mem_reg(dst,src); 10765 %} 10766 %} 10767 10768 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10769 predicate(UseSSE==1); 10770 match(Set dst (ConvF2D src)); 10771 effect( KILL cr ); 10772 format %{ "SUB ESP,4\n\t" 10773 "MOVSS [ESP] $src\n\t" 10774 "FLD_S [ESP]\n\t" 10775 "ADD ESP,4\n\t" 10776 "FSTP $dst\t# D-round" %} 10777 ins_encode %{ 10778 __ subptr(rsp, 4); 10779 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10780 __ fld_s(Address(rsp, 0)); 10781 __ addptr(rsp, 4); 10782 __ fstp_d($dst$$reg); 10783 %} 10784 ins_pipe( pipe_slow ); 10785 %} 10786 10787 instruct convF2D_reg(regD dst, regF src) %{ 10788 predicate(UseSSE>=2); 10789 match(Set dst (ConvF2D src)); 10790 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10791 ins_encode %{ 10792 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10793 %} 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10798 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10799 predicate(UseSSE<=1); 10800 match(Set dst (ConvD2I src)); 10801 effect( KILL tmp, KILL cr ); 10802 format %{ "FLD $src\t# Convert double to int \n\t" 10803 "FLDCW trunc mode\n\t" 10804 "SUB ESP,4\n\t" 10805 "FISTp [ESP + #0]\n\t" 10806 "FLDCW std/24-bit mode\n\t" 10807 "POP EAX\n\t" 10808 "CMP EAX,0x80000000\n\t" 10809 "JNE,s fast\n\t" 10810 "FLD_D $src\n\t" 10811 "CALL d2i_wrapper\n" 10812 "fast:" %} 10813 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10814 ins_pipe( pipe_slow ); 10815 %} 10816 10817 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10818 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10819 predicate(UseSSE>=2); 10820 match(Set dst (ConvD2I src)); 10821 effect( KILL tmp, KILL cr ); 10822 format %{ "CVTTSD2SI $dst, $src\n\t" 10823 "CMP $dst,0x80000000\n\t" 10824 "JNE,s fast\n\t" 10825 "SUB ESP, 8\n\t" 10826 "MOVSD [ESP], $src\n\t" 10827 "FLD_D [ESP]\n\t" 10828 "ADD ESP, 8\n\t" 10829 "CALL d2i_wrapper\n" 10830 "fast:" %} 10831 ins_encode %{ 10832 Label fast; 10833 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10834 __ cmpl($dst$$Register, 0x80000000); 10835 __ jccb(Assembler::notEqual, fast); 10836 __ subptr(rsp, 8); 10837 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10838 __ fld_d(Address(rsp, 0)); 10839 __ addptr(rsp, 8); 10840 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10841 __ bind(fast); 10842 %} 10843 ins_pipe( pipe_slow ); 10844 %} 10845 10846 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10847 predicate(UseSSE<=1); 10848 match(Set dst (ConvD2L src)); 10849 effect( KILL cr ); 10850 format %{ "FLD $src\t# Convert double to long\n\t" 10851 "FLDCW trunc mode\n\t" 10852 "SUB ESP,8\n\t" 10853 "FISTp [ESP + #0]\n\t" 10854 "FLDCW std/24-bit mode\n\t" 10855 "POP EAX\n\t" 10856 "POP EDX\n\t" 10857 "CMP EDX,0x80000000\n\t" 10858 "JNE,s fast\n\t" 10859 "TEST EAX,EAX\n\t" 10860 "JNE,s fast\n\t" 10861 "FLD $src\n\t" 10862 "CALL d2l_wrapper\n" 10863 "fast:" %} 10864 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10865 ins_pipe( pipe_slow ); 10866 %} 10867 10868 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10869 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10870 predicate (UseSSE>=2); 10871 match(Set dst (ConvD2L src)); 10872 effect( KILL cr ); 10873 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10874 "MOVSD [ESP],$src\n\t" 10875 "FLD_D [ESP]\n\t" 10876 "FLDCW trunc mode\n\t" 10877 "FISTp [ESP + #0]\n\t" 10878 "FLDCW std/24-bit mode\n\t" 10879 "POP EAX\n\t" 10880 "POP EDX\n\t" 10881 "CMP EDX,0x80000000\n\t" 10882 "JNE,s fast\n\t" 10883 "TEST EAX,EAX\n\t" 10884 "JNE,s fast\n\t" 10885 "SUB ESP,8\n\t" 10886 "MOVSD [ESP],$src\n\t" 10887 "FLD_D [ESP]\n\t" 10888 "ADD ESP,8\n\t" 10889 "CALL d2l_wrapper\n" 10890 "fast:" %} 10891 ins_encode %{ 10892 Label fast; 10893 __ subptr(rsp, 8); 10894 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10895 __ fld_d(Address(rsp, 0)); 10896 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10897 __ fistp_d(Address(rsp, 0)); 10898 // Restore the rounding mode, mask the exception 10899 if (Compile::current()->in_24_bit_fp_mode()) { 10900 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10901 } else { 10902 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10903 } 10904 // Load the converted long, adjust CPU stack 10905 __ pop(rax); 10906 __ pop(rdx); 10907 __ cmpl(rdx, 0x80000000); 10908 __ jccb(Assembler::notEqual, fast); 10909 __ testl(rax, rax); 10910 __ jccb(Assembler::notEqual, fast); 10911 __ subptr(rsp, 8); 10912 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10913 __ fld_d(Address(rsp, 0)); 10914 __ addptr(rsp, 8); 10915 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10916 __ bind(fast); 10917 %} 10918 ins_pipe( pipe_slow ); 10919 %} 10920 10921 // Convert a double to an int. Java semantics require we do complex 10922 // manglations in the corner cases. So we set the rounding mode to 10923 // 'zero', store the darned double down as an int, and reset the 10924 // rounding mode to 'nearest'. The hardware stores a flag value down 10925 // if we would overflow or converted a NAN; we check for this and 10926 // and go the slow path if needed. 10927 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10928 predicate(UseSSE==0); 10929 match(Set dst (ConvF2I src)); 10930 effect( KILL tmp, KILL cr ); 10931 format %{ "FLD $src\t# Convert float to int \n\t" 10932 "FLDCW trunc mode\n\t" 10933 "SUB ESP,4\n\t" 10934 "FISTp [ESP + #0]\n\t" 10935 "FLDCW std/24-bit mode\n\t" 10936 "POP EAX\n\t" 10937 "CMP EAX,0x80000000\n\t" 10938 "JNE,s fast\n\t" 10939 "FLD $src\n\t" 10940 "CALL d2i_wrapper\n" 10941 "fast:" %} 10942 // DPR2I_encoding works for FPR2I 10943 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10944 ins_pipe( pipe_slow ); 10945 %} 10946 10947 // Convert a float in xmm to an int reg. 10948 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10949 predicate(UseSSE>=1); 10950 match(Set dst (ConvF2I src)); 10951 effect( KILL tmp, KILL cr ); 10952 format %{ "CVTTSS2SI $dst, $src\n\t" 10953 "CMP $dst,0x80000000\n\t" 10954 "JNE,s fast\n\t" 10955 "SUB ESP, 4\n\t" 10956 "MOVSS [ESP], $src\n\t" 10957 "FLD [ESP]\n\t" 10958 "ADD ESP, 4\n\t" 10959 "CALL d2i_wrapper\n" 10960 "fast:" %} 10961 ins_encode %{ 10962 Label fast; 10963 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10964 __ cmpl($dst$$Register, 0x80000000); 10965 __ jccb(Assembler::notEqual, fast); 10966 __ subptr(rsp, 4); 10967 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10968 __ fld_s(Address(rsp, 0)); 10969 __ addptr(rsp, 4); 10970 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10971 __ bind(fast); 10972 %} 10973 ins_pipe( pipe_slow ); 10974 %} 10975 10976 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10977 predicate(UseSSE==0); 10978 match(Set dst (ConvF2L src)); 10979 effect( KILL cr ); 10980 format %{ "FLD $src\t# Convert float to long\n\t" 10981 "FLDCW trunc mode\n\t" 10982 "SUB ESP,8\n\t" 10983 "FISTp [ESP + #0]\n\t" 10984 "FLDCW std/24-bit mode\n\t" 10985 "POP EAX\n\t" 10986 "POP EDX\n\t" 10987 "CMP EDX,0x80000000\n\t" 10988 "JNE,s fast\n\t" 10989 "TEST EAX,EAX\n\t" 10990 "JNE,s fast\n\t" 10991 "FLD $src\n\t" 10992 "CALL d2l_wrapper\n" 10993 "fast:" %} 10994 // DPR2L_encoding works for FPR2L 10995 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10996 ins_pipe( pipe_slow ); 10997 %} 10998 10999 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11000 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11001 predicate (UseSSE>=1); 11002 match(Set dst (ConvF2L src)); 11003 effect( KILL cr ); 11004 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11005 "MOVSS [ESP],$src\n\t" 11006 "FLD_S [ESP]\n\t" 11007 "FLDCW trunc mode\n\t" 11008 "FISTp [ESP + #0]\n\t" 11009 "FLDCW std/24-bit mode\n\t" 11010 "POP EAX\n\t" 11011 "POP EDX\n\t" 11012 "CMP EDX,0x80000000\n\t" 11013 "JNE,s fast\n\t" 11014 "TEST EAX,EAX\n\t" 11015 "JNE,s fast\n\t" 11016 "SUB ESP,4\t# Convert float to long\n\t" 11017 "MOVSS [ESP],$src\n\t" 11018 "FLD_S [ESP]\n\t" 11019 "ADD ESP,4\n\t" 11020 "CALL d2l_wrapper\n" 11021 "fast:" %} 11022 ins_encode %{ 11023 Label fast; 11024 __ subptr(rsp, 8); 11025 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11026 __ fld_s(Address(rsp, 0)); 11027 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11028 __ fistp_d(Address(rsp, 0)); 11029 // Restore the rounding mode, mask the exception 11030 if (Compile::current()->in_24_bit_fp_mode()) { 11031 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11032 } else { 11033 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11034 } 11035 // Load the converted long, adjust CPU stack 11036 __ pop(rax); 11037 __ pop(rdx); 11038 __ cmpl(rdx, 0x80000000); 11039 __ jccb(Assembler::notEqual, fast); 11040 __ testl(rax, rax); 11041 __ jccb(Assembler::notEqual, fast); 11042 __ subptr(rsp, 4); 11043 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11044 __ fld_s(Address(rsp, 0)); 11045 __ addptr(rsp, 4); 11046 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11047 __ bind(fast); 11048 %} 11049 ins_pipe( pipe_slow ); 11050 %} 11051 11052 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11053 predicate( UseSSE<=1 ); 11054 match(Set dst (ConvI2D src)); 11055 format %{ "FILD $src\n\t" 11056 "FSTP $dst" %} 11057 opcode(0xDB, 0x0); /* DB /0 */ 11058 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11059 ins_pipe( fpu_reg_mem ); 11060 %} 11061 11062 instruct convI2D_reg(regD dst, rRegI src) %{ 11063 predicate( UseSSE>=2 && !UseXmmI2D ); 11064 match(Set dst (ConvI2D src)); 11065 format %{ "CVTSI2SD $dst,$src" %} 11066 ins_encode %{ 11067 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11068 %} 11069 ins_pipe( pipe_slow ); 11070 %} 11071 11072 instruct convI2D_mem(regD dst, memory mem) %{ 11073 predicate( UseSSE>=2 ); 11074 match(Set dst (ConvI2D (LoadI mem))); 11075 format %{ "CVTSI2SD $dst,$mem" %} 11076 ins_encode %{ 11077 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11078 %} 11079 ins_pipe( pipe_slow ); 11080 %} 11081 11082 instruct convXI2D_reg(regD dst, rRegI src) 11083 %{ 11084 predicate( UseSSE>=2 && UseXmmI2D ); 11085 match(Set dst (ConvI2D src)); 11086 11087 format %{ "MOVD $dst,$src\n\t" 11088 "CVTDQ2PD $dst,$dst\t# i2d" %} 11089 ins_encode %{ 11090 __ movdl($dst$$XMMRegister, $src$$Register); 11091 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11092 %} 11093 ins_pipe(pipe_slow); // XXX 11094 %} 11095 11096 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11097 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11098 match(Set dst (ConvI2D (LoadI mem))); 11099 format %{ "FILD $mem\n\t" 11100 "FSTP $dst" %} 11101 opcode(0xDB); /* DB /0 */ 11102 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11103 Pop_Reg_DPR(dst)); 11104 ins_pipe( fpu_reg_mem ); 11105 %} 11106 11107 // Convert a byte to a float; no rounding step needed. 11108 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11109 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11110 match(Set dst (ConvI2F src)); 11111 format %{ "FILD $src\n\t" 11112 "FSTP $dst" %} 11113 11114 opcode(0xDB, 0x0); /* DB /0 */ 11115 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11116 ins_pipe( fpu_reg_mem ); 11117 %} 11118 11119 // In 24-bit mode, force exponent rounding by storing back out 11120 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11121 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11122 match(Set dst (ConvI2F src)); 11123 ins_cost(200); 11124 format %{ "FILD $src\n\t" 11125 "FSTP_S $dst" %} 11126 opcode(0xDB, 0x0); /* DB /0 */ 11127 ins_encode( Push_Mem_I(src), 11128 Pop_Mem_FPR(dst)); 11129 ins_pipe( fpu_mem_mem ); 11130 %} 11131 11132 // In 24-bit mode, force exponent rounding by storing back out 11133 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11134 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11135 match(Set dst (ConvI2F (LoadI mem))); 11136 ins_cost(200); 11137 format %{ "FILD $mem\n\t" 11138 "FSTP_S $dst" %} 11139 opcode(0xDB); /* DB /0 */ 11140 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11141 Pop_Mem_FPR(dst)); 11142 ins_pipe( fpu_mem_mem ); 11143 %} 11144 11145 // This instruction does not round to 24-bits 11146 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11147 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11148 match(Set dst (ConvI2F src)); 11149 format %{ "FILD $src\n\t" 11150 "FSTP $dst" %} 11151 opcode(0xDB, 0x0); /* DB /0 */ 11152 ins_encode( Push_Mem_I(src), 11153 Pop_Reg_FPR(dst)); 11154 ins_pipe( fpu_reg_mem ); 11155 %} 11156 11157 // This instruction does not round to 24-bits 11158 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11159 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11160 match(Set dst (ConvI2F (LoadI mem))); 11161 format %{ "FILD $mem\n\t" 11162 "FSTP $dst" %} 11163 opcode(0xDB); /* DB /0 */ 11164 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11165 Pop_Reg_FPR(dst)); 11166 ins_pipe( fpu_reg_mem ); 11167 %} 11168 11169 // Convert an int to a float in xmm; no rounding step needed. 11170 instruct convI2F_reg(regF dst, rRegI src) %{ 11171 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11172 match(Set dst (ConvI2F src)); 11173 format %{ "CVTSI2SS $dst, $src" %} 11174 ins_encode %{ 11175 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11176 %} 11177 ins_pipe( pipe_slow ); 11178 %} 11179 11180 instruct convXI2F_reg(regF dst, rRegI src) 11181 %{ 11182 predicate( UseSSE>=2 && UseXmmI2F ); 11183 match(Set dst (ConvI2F src)); 11184 11185 format %{ "MOVD $dst,$src\n\t" 11186 "CVTDQ2PS $dst,$dst\t# i2f" %} 11187 ins_encode %{ 11188 __ movdl($dst$$XMMRegister, $src$$Register); 11189 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11190 %} 11191 ins_pipe(pipe_slow); // XXX 11192 %} 11193 11194 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11195 match(Set dst (ConvI2L src)); 11196 effect(KILL cr); 11197 ins_cost(375); 11198 format %{ "MOV $dst.lo,$src\n\t" 11199 "MOV $dst.hi,$src\n\t" 11200 "SAR $dst.hi,31" %} 11201 ins_encode(convert_int_long(dst,src)); 11202 ins_pipe( ialu_reg_reg_long ); 11203 %} 11204 11205 // Zero-extend convert int to long 11206 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11207 match(Set dst (AndL (ConvI2L src) mask) ); 11208 effect( KILL flags ); 11209 ins_cost(250); 11210 format %{ "MOV $dst.lo,$src\n\t" 11211 "XOR $dst.hi,$dst.hi" %} 11212 opcode(0x33); // XOR 11213 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11214 ins_pipe( ialu_reg_reg_long ); 11215 %} 11216 11217 // Zero-extend long 11218 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11219 match(Set dst (AndL src mask) ); 11220 effect( KILL flags ); 11221 ins_cost(250); 11222 format %{ "MOV $dst.lo,$src.lo\n\t" 11223 "XOR $dst.hi,$dst.hi\n\t" %} 11224 opcode(0x33); // XOR 11225 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11226 ins_pipe( ialu_reg_reg_long ); 11227 %} 11228 11229 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11230 predicate (UseSSE<=1); 11231 match(Set dst (ConvL2D src)); 11232 effect( KILL cr ); 11233 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11234 "PUSH $src.lo\n\t" 11235 "FILD ST,[ESP + #0]\n\t" 11236 "ADD ESP,8\n\t" 11237 "FSTP_D $dst\t# D-round" %} 11238 opcode(0xDF, 0x5); /* DF /5 */ 11239 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11240 ins_pipe( pipe_slow ); 11241 %} 11242 11243 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11244 predicate (UseSSE>=2); 11245 match(Set dst (ConvL2D src)); 11246 effect( KILL cr ); 11247 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11248 "PUSH $src.lo\n\t" 11249 "FILD_D [ESP]\n\t" 11250 "FSTP_D [ESP]\n\t" 11251 "MOVSD $dst,[ESP]\n\t" 11252 "ADD ESP,8" %} 11253 opcode(0xDF, 0x5); /* DF /5 */ 11254 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11255 ins_pipe( pipe_slow ); 11256 %} 11257 11258 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11259 predicate (UseSSE>=1); 11260 match(Set dst (ConvL2F src)); 11261 effect( KILL cr ); 11262 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11263 "PUSH $src.lo\n\t" 11264 "FILD_D [ESP]\n\t" 11265 "FSTP_S [ESP]\n\t" 11266 "MOVSS $dst,[ESP]\n\t" 11267 "ADD ESP,8" %} 11268 opcode(0xDF, 0x5); /* DF /5 */ 11269 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11270 ins_pipe( pipe_slow ); 11271 %} 11272 11273 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11274 match(Set dst (ConvL2F src)); 11275 effect( KILL cr ); 11276 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11277 "PUSH $src.lo\n\t" 11278 "FILD ST,[ESP + #0]\n\t" 11279 "ADD ESP,8\n\t" 11280 "FSTP_S $dst\t# F-round" %} 11281 opcode(0xDF, 0x5); /* DF /5 */ 11282 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11283 ins_pipe( pipe_slow ); 11284 %} 11285 11286 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11287 match(Set dst (ConvL2I src)); 11288 effect( DEF dst, USE src ); 11289 format %{ "MOV $dst,$src.lo" %} 11290 ins_encode(enc_CopyL_Lo(dst,src)); 11291 ins_pipe( ialu_reg_reg ); 11292 %} 11293 11294 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11295 match(Set dst (MoveF2I src)); 11296 effect( DEF dst, USE src ); 11297 ins_cost(100); 11298 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11299 ins_encode %{ 11300 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11301 %} 11302 ins_pipe( ialu_reg_mem ); 11303 %} 11304 11305 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11306 predicate(UseSSE==0); 11307 match(Set dst (MoveF2I src)); 11308 effect( DEF dst, USE src ); 11309 11310 ins_cost(125); 11311 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11312 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11313 ins_pipe( fpu_mem_reg ); 11314 %} 11315 11316 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11317 predicate(UseSSE>=1); 11318 match(Set dst (MoveF2I src)); 11319 effect( DEF dst, USE src ); 11320 11321 ins_cost(95); 11322 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11323 ins_encode %{ 11324 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11325 %} 11326 ins_pipe( pipe_slow ); 11327 %} 11328 11329 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11330 predicate(UseSSE>=2); 11331 match(Set dst (MoveF2I src)); 11332 effect( DEF dst, USE src ); 11333 ins_cost(85); 11334 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11335 ins_encode %{ 11336 __ movdl($dst$$Register, $src$$XMMRegister); 11337 %} 11338 ins_pipe( pipe_slow ); 11339 %} 11340 11341 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11342 match(Set dst (MoveI2F src)); 11343 effect( DEF dst, USE src ); 11344 11345 ins_cost(100); 11346 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11347 ins_encode %{ 11348 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11349 %} 11350 ins_pipe( ialu_mem_reg ); 11351 %} 11352 11353 11354 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11355 predicate(UseSSE==0); 11356 match(Set dst (MoveI2F src)); 11357 effect(DEF dst, USE src); 11358 11359 ins_cost(125); 11360 format %{ "FLD_S $src\n\t" 11361 "FSTP $dst\t# MoveI2F_stack_reg" %} 11362 opcode(0xD9); /* D9 /0, FLD m32real */ 11363 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11364 Pop_Reg_FPR(dst) ); 11365 ins_pipe( fpu_reg_mem ); 11366 %} 11367 11368 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11369 predicate(UseSSE>=1); 11370 match(Set dst (MoveI2F src)); 11371 effect( DEF dst, USE src ); 11372 11373 ins_cost(95); 11374 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11375 ins_encode %{ 11376 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11377 %} 11378 ins_pipe( pipe_slow ); 11379 %} 11380 11381 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11382 predicate(UseSSE>=2); 11383 match(Set dst (MoveI2F src)); 11384 effect( DEF dst, USE src ); 11385 11386 ins_cost(85); 11387 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11388 ins_encode %{ 11389 __ movdl($dst$$XMMRegister, $src$$Register); 11390 %} 11391 ins_pipe( pipe_slow ); 11392 %} 11393 11394 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11395 match(Set dst (MoveD2L src)); 11396 effect(DEF dst, USE src); 11397 11398 ins_cost(250); 11399 format %{ "MOV $dst.lo,$src\n\t" 11400 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11401 opcode(0x8B, 0x8B); 11402 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11403 ins_pipe( ialu_mem_long_reg ); 11404 %} 11405 11406 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11407 predicate(UseSSE<=1); 11408 match(Set dst (MoveD2L src)); 11409 effect(DEF dst, USE src); 11410 11411 ins_cost(125); 11412 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11413 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11414 ins_pipe( fpu_mem_reg ); 11415 %} 11416 11417 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11418 predicate(UseSSE>=2); 11419 match(Set dst (MoveD2L src)); 11420 effect(DEF dst, USE src); 11421 ins_cost(95); 11422 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11423 ins_encode %{ 11424 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11425 %} 11426 ins_pipe( pipe_slow ); 11427 %} 11428 11429 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11430 predicate(UseSSE>=2); 11431 match(Set dst (MoveD2L src)); 11432 effect(DEF dst, USE src, TEMP tmp); 11433 ins_cost(85); 11434 format %{ "MOVD $dst.lo,$src\n\t" 11435 "PSHUFLW $tmp,$src,0x4E\n\t" 11436 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11437 ins_encode %{ 11438 __ movdl($dst$$Register, $src$$XMMRegister); 11439 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11440 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11441 %} 11442 ins_pipe( pipe_slow ); 11443 %} 11444 11445 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11446 match(Set dst (MoveL2D src)); 11447 effect(DEF dst, USE src); 11448 11449 ins_cost(200); 11450 format %{ "MOV $dst,$src.lo\n\t" 11451 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11452 opcode(0x89, 0x89); 11453 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11454 ins_pipe( ialu_mem_long_reg ); 11455 %} 11456 11457 11458 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11459 predicate(UseSSE<=1); 11460 match(Set dst (MoveL2D src)); 11461 effect(DEF dst, USE src); 11462 ins_cost(125); 11463 11464 format %{ "FLD_D $src\n\t" 11465 "FSTP $dst\t# MoveL2D_stack_reg" %} 11466 opcode(0xDD); /* DD /0, FLD m64real */ 11467 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11468 Pop_Reg_DPR(dst) ); 11469 ins_pipe( fpu_reg_mem ); 11470 %} 11471 11472 11473 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11474 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11475 match(Set dst (MoveL2D src)); 11476 effect(DEF dst, USE src); 11477 11478 ins_cost(95); 11479 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11480 ins_encode %{ 11481 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11482 %} 11483 ins_pipe( pipe_slow ); 11484 %} 11485 11486 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11487 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11488 match(Set dst (MoveL2D src)); 11489 effect(DEF dst, USE src); 11490 11491 ins_cost(95); 11492 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11493 ins_encode %{ 11494 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11495 %} 11496 ins_pipe( pipe_slow ); 11497 %} 11498 11499 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11500 predicate(UseSSE>=2); 11501 match(Set dst (MoveL2D src)); 11502 effect(TEMP dst, USE src, TEMP tmp); 11503 ins_cost(85); 11504 format %{ "MOVD $dst,$src.lo\n\t" 11505 "MOVD $tmp,$src.hi\n\t" 11506 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11507 ins_encode %{ 11508 __ movdl($dst$$XMMRegister, $src$$Register); 11509 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11510 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11511 %} 11512 ins_pipe( pipe_slow ); 11513 %} 11514 11515 11516 // ======================================================================= 11517 // fast clearing of an array 11518 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11519 predicate(!((ClearArrayNode*)n)->is_large()); 11520 match(Set dummy (ClearArray cnt base)); 11521 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11522 11523 format %{ $$template 11524 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11525 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11526 $$emit$$"JG LARGE\n\t" 11527 $$emit$$"SHL ECX, 1\n\t" 11528 $$emit$$"DEC ECX\n\t" 11529 $$emit$$"JS DONE\t# Zero length\n\t" 11530 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11531 $$emit$$"DEC ECX\n\t" 11532 $$emit$$"JGE LOOP\n\t" 11533 $$emit$$"JMP DONE\n\t" 11534 $$emit$$"# LARGE:\n\t" 11535 if (UseFastStosb) { 11536 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11537 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11538 } else if (UseXMMForObjInit) { 11539 $$emit$$"MOV RDI,RAX\n\t" 11540 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11541 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11542 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11543 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11544 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11545 $$emit$$"ADD 0x40,RAX\n\t" 11546 $$emit$$"# L_zero_64_bytes:\n\t" 11547 $$emit$$"SUB 0x8,RCX\n\t" 11548 $$emit$$"JGE L_loop\n\t" 11549 $$emit$$"ADD 0x4,RCX\n\t" 11550 $$emit$$"JL L_tail\n\t" 11551 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11552 $$emit$$"ADD 0x20,RAX\n\t" 11553 $$emit$$"SUB 0x4,RCX\n\t" 11554 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11555 $$emit$$"ADD 0x4,RCX\n\t" 11556 $$emit$$"JLE L_end\n\t" 11557 $$emit$$"DEC RCX\n\t" 11558 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11559 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11560 $$emit$$"ADD 0x8,RAX\n\t" 11561 $$emit$$"DEC RCX\n\t" 11562 $$emit$$"JGE L_sloop\n\t" 11563 $$emit$$"# L_end:\n\t" 11564 } else { 11565 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11566 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11567 } 11568 $$emit$$"# DONE" 11569 %} 11570 ins_encode %{ 11571 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11572 $tmp$$XMMRegister, false); 11573 %} 11574 ins_pipe( pipe_slow ); 11575 %} 11576 11577 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11578 predicate(((ClearArrayNode*)n)->is_large()); 11579 match(Set dummy (ClearArray cnt base)); 11580 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11581 format %{ $$template 11582 if (UseFastStosb) { 11583 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11584 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11585 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11586 } else if (UseXMMForObjInit) { 11587 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11588 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11589 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11590 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11591 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11592 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11593 $$emit$$"ADD 0x40,RAX\n\t" 11594 $$emit$$"# L_zero_64_bytes:\n\t" 11595 $$emit$$"SUB 0x8,RCX\n\t" 11596 $$emit$$"JGE L_loop\n\t" 11597 $$emit$$"ADD 0x4,RCX\n\t" 11598 $$emit$$"JL L_tail\n\t" 11599 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11600 $$emit$$"ADD 0x20,RAX\n\t" 11601 $$emit$$"SUB 0x4,RCX\n\t" 11602 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11603 $$emit$$"ADD 0x4,RCX\n\t" 11604 $$emit$$"JLE L_end\n\t" 11605 $$emit$$"DEC RCX\n\t" 11606 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11607 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11608 $$emit$$"ADD 0x8,RAX\n\t" 11609 $$emit$$"DEC RCX\n\t" 11610 $$emit$$"JGE L_sloop\n\t" 11611 $$emit$$"# L_end:\n\t" 11612 } else { 11613 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11614 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11615 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11616 } 11617 $$emit$$"# DONE" 11618 %} 11619 ins_encode %{ 11620 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11621 $tmp$$XMMRegister, true); 11622 %} 11623 ins_pipe( pipe_slow ); 11624 %} 11625 11626 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11627 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11628 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11629 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11630 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11631 11632 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11633 ins_encode %{ 11634 __ string_compare($str1$$Register, $str2$$Register, 11635 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11636 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11637 %} 11638 ins_pipe( pipe_slow ); 11639 %} 11640 11641 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11642 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11643 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11644 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11645 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11646 11647 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11648 ins_encode %{ 11649 __ string_compare($str1$$Register, $str2$$Register, 11650 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11651 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11652 %} 11653 ins_pipe( pipe_slow ); 11654 %} 11655 11656 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11657 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11658 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11659 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11660 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11661 11662 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11663 ins_encode %{ 11664 __ string_compare($str1$$Register, $str2$$Register, 11665 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11666 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11667 %} 11668 ins_pipe( pipe_slow ); 11669 %} 11670 11671 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11672 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11673 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11674 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11675 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11676 11677 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11678 ins_encode %{ 11679 __ string_compare($str2$$Register, $str1$$Register, 11680 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11681 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11682 %} 11683 ins_pipe( pipe_slow ); 11684 %} 11685 11686 // fast string equals 11687 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11688 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11689 match(Set result (StrEquals (Binary str1 str2) cnt)); 11690 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11691 11692 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11693 ins_encode %{ 11694 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11695 $cnt$$Register, $result$$Register, $tmp3$$Register, 11696 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11697 %} 11698 11699 ins_pipe( pipe_slow ); 11700 %} 11701 11702 // fast search of substring with known size. 11703 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11704 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11705 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11706 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11707 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11708 11709 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11710 ins_encode %{ 11711 int icnt2 = (int)$int_cnt2$$constant; 11712 if (icnt2 >= 16) { 11713 // IndexOf for constant substrings with size >= 16 elements 11714 // which don't need to be loaded through stack. 11715 __ string_indexofC8($str1$$Register, $str2$$Register, 11716 $cnt1$$Register, $cnt2$$Register, 11717 icnt2, $result$$Register, 11718 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11719 } else { 11720 // Small strings are loaded through stack if they cross page boundary. 11721 __ string_indexof($str1$$Register, $str2$$Register, 11722 $cnt1$$Register, $cnt2$$Register, 11723 icnt2, $result$$Register, 11724 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11725 } 11726 %} 11727 ins_pipe( pipe_slow ); 11728 %} 11729 11730 // fast search of substring with known size. 11731 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11732 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11733 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11734 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11735 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11736 11737 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11738 ins_encode %{ 11739 int icnt2 = (int)$int_cnt2$$constant; 11740 if (icnt2 >= 8) { 11741 // IndexOf for constant substrings with size >= 8 elements 11742 // which don't need to be loaded through stack. 11743 __ string_indexofC8($str1$$Register, $str2$$Register, 11744 $cnt1$$Register, $cnt2$$Register, 11745 icnt2, $result$$Register, 11746 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11747 } else { 11748 // Small strings are loaded through stack if they cross page boundary. 11749 __ string_indexof($str1$$Register, $str2$$Register, 11750 $cnt1$$Register, $cnt2$$Register, 11751 icnt2, $result$$Register, 11752 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11753 } 11754 %} 11755 ins_pipe( pipe_slow ); 11756 %} 11757 11758 // fast search of substring with known size. 11759 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11760 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11761 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11762 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11763 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11764 11765 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11766 ins_encode %{ 11767 int icnt2 = (int)$int_cnt2$$constant; 11768 if (icnt2 >= 8) { 11769 // IndexOf for constant substrings with size >= 8 elements 11770 // which don't need to be loaded through stack. 11771 __ string_indexofC8($str1$$Register, $str2$$Register, 11772 $cnt1$$Register, $cnt2$$Register, 11773 icnt2, $result$$Register, 11774 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11775 } else { 11776 // Small strings are loaded through stack if they cross page boundary. 11777 __ string_indexof($str1$$Register, $str2$$Register, 11778 $cnt1$$Register, $cnt2$$Register, 11779 icnt2, $result$$Register, 11780 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11781 } 11782 %} 11783 ins_pipe( pipe_slow ); 11784 %} 11785 11786 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11787 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11788 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11789 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11790 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11791 11792 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11793 ins_encode %{ 11794 __ string_indexof($str1$$Register, $str2$$Register, 11795 $cnt1$$Register, $cnt2$$Register, 11796 (-1), $result$$Register, 11797 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11798 %} 11799 ins_pipe( pipe_slow ); 11800 %} 11801 11802 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11803 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11804 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11805 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11806 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11807 11808 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11809 ins_encode %{ 11810 __ string_indexof($str1$$Register, $str2$$Register, 11811 $cnt1$$Register, $cnt2$$Register, 11812 (-1), $result$$Register, 11813 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11814 %} 11815 ins_pipe( pipe_slow ); 11816 %} 11817 11818 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11819 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11820 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11821 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11822 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11823 11824 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11825 ins_encode %{ 11826 __ string_indexof($str1$$Register, $str2$$Register, 11827 $cnt1$$Register, $cnt2$$Register, 11828 (-1), $result$$Register, 11829 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11830 %} 11831 ins_pipe( pipe_slow ); 11832 %} 11833 11834 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11835 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11836 predicate(UseSSE42Intrinsics); 11837 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11838 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11839 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11840 ins_encode %{ 11841 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11842 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 // fast array equals 11848 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11849 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11850 %{ 11851 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11852 match(Set result (AryEq ary1 ary2)); 11853 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11854 //ins_cost(300); 11855 11856 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11857 ins_encode %{ 11858 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11859 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11860 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11861 %} 11862 ins_pipe( pipe_slow ); 11863 %} 11864 11865 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11866 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11867 %{ 11868 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11869 match(Set result (AryEq ary1 ary2)); 11870 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11871 //ins_cost(300); 11872 11873 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11874 ins_encode %{ 11875 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11876 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11877 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11878 %} 11879 ins_pipe( pipe_slow ); 11880 %} 11881 11882 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11883 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11884 %{ 11885 match(Set result (HasNegatives ary1 len)); 11886 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11887 11888 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11889 ins_encode %{ 11890 __ has_negatives($ary1$$Register, $len$$Register, 11891 $result$$Register, $tmp3$$Register, 11892 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11893 %} 11894 ins_pipe( pipe_slow ); 11895 %} 11896 11897 // fast char[] to byte[] compression 11898 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11899 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11900 match(Set result (StrCompressedCopy src (Binary dst len))); 11901 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11902 11903 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11904 ins_encode %{ 11905 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11906 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11907 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11908 %} 11909 ins_pipe( pipe_slow ); 11910 %} 11911 11912 // fast byte[] to char[] inflation 11913 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11914 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11915 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11916 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11917 11918 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11919 ins_encode %{ 11920 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11921 $tmp1$$XMMRegister, $tmp2$$Register); 11922 %} 11923 ins_pipe( pipe_slow ); 11924 %} 11925 11926 // encode char[] to byte[] in ISO_8859_1 11927 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11928 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11929 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11930 match(Set result (EncodeISOArray src (Binary dst len))); 11931 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11932 11933 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11934 ins_encode %{ 11935 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11936 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11937 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11938 %} 11939 ins_pipe( pipe_slow ); 11940 %} 11941 11942 11943 //----------Control Flow Instructions------------------------------------------ 11944 // Signed compare Instructions 11945 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11946 match(Set cr (CmpI op1 op2)); 11947 effect( DEF cr, USE op1, USE op2 ); 11948 format %{ "CMP $op1,$op2" %} 11949 opcode(0x3B); /* Opcode 3B /r */ 11950 ins_encode( OpcP, RegReg( op1, op2) ); 11951 ins_pipe( ialu_cr_reg_reg ); 11952 %} 11953 11954 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11955 match(Set cr (CmpI op1 op2)); 11956 effect( DEF cr, USE op1 ); 11957 format %{ "CMP $op1,$op2" %} 11958 opcode(0x81,0x07); /* Opcode 81 /7 */ 11959 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11960 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11961 ins_pipe( ialu_cr_reg_imm ); 11962 %} 11963 11964 // Cisc-spilled version of cmpI_eReg 11965 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11966 match(Set cr (CmpI op1 (LoadI op2))); 11967 11968 format %{ "CMP $op1,$op2" %} 11969 ins_cost(500); 11970 opcode(0x3B); /* Opcode 3B /r */ 11971 ins_encode( OpcP, RegMem( op1, op2) ); 11972 ins_pipe( ialu_cr_reg_mem ); 11973 %} 11974 11975 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11976 match(Set cr (CmpI src zero)); 11977 effect( DEF cr, USE src ); 11978 11979 format %{ "TEST $src,$src" %} 11980 opcode(0x85); 11981 ins_encode( OpcP, RegReg( src, src ) ); 11982 ins_pipe( ialu_cr_reg_imm ); 11983 %} 11984 11985 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11986 match(Set cr (CmpI (AndI src con) zero)); 11987 11988 format %{ "TEST $src,$con" %} 11989 opcode(0xF7,0x00); 11990 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11991 ins_pipe( ialu_cr_reg_imm ); 11992 %} 11993 11994 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11995 match(Set cr (CmpI (AndI src mem) zero)); 11996 11997 format %{ "TEST $src,$mem" %} 11998 opcode(0x85); 11999 ins_encode( OpcP, RegMem( src, mem ) ); 12000 ins_pipe( ialu_cr_reg_mem ); 12001 %} 12002 12003 // Unsigned compare Instructions; really, same as signed except they 12004 // produce an eFlagsRegU instead of eFlagsReg. 12005 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12006 match(Set cr (CmpU op1 op2)); 12007 12008 format %{ "CMPu $op1,$op2" %} 12009 opcode(0x3B); /* Opcode 3B /r */ 12010 ins_encode( OpcP, RegReg( op1, op2) ); 12011 ins_pipe( ialu_cr_reg_reg ); 12012 %} 12013 12014 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12015 match(Set cr (CmpU op1 op2)); 12016 12017 format %{ "CMPu $op1,$op2" %} 12018 opcode(0x81,0x07); /* Opcode 81 /7 */ 12019 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12020 ins_pipe( ialu_cr_reg_imm ); 12021 %} 12022 12023 // // Cisc-spilled version of cmpU_eReg 12024 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12025 match(Set cr (CmpU op1 (LoadI op2))); 12026 12027 format %{ "CMPu $op1,$op2" %} 12028 ins_cost(500); 12029 opcode(0x3B); /* Opcode 3B /r */ 12030 ins_encode( OpcP, RegMem( op1, op2) ); 12031 ins_pipe( ialu_cr_reg_mem ); 12032 %} 12033 12034 // // Cisc-spilled version of cmpU_eReg 12035 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12036 // match(Set cr (CmpU (LoadI op1) op2)); 12037 // 12038 // format %{ "CMPu $op1,$op2" %} 12039 // ins_cost(500); 12040 // opcode(0x39); /* Opcode 39 /r */ 12041 // ins_encode( OpcP, RegMem( op1, op2) ); 12042 //%} 12043 12044 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 12045 match(Set cr (CmpU src zero)); 12046 12047 format %{ "TESTu $src,$src" %} 12048 opcode(0x85); 12049 ins_encode( OpcP, RegReg( src, src ) ); 12050 ins_pipe( ialu_cr_reg_imm ); 12051 %} 12052 12053 // Unsigned pointer compare Instructions 12054 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12055 match(Set cr (CmpP op1 op2)); 12056 12057 format %{ "CMPu $op1,$op2" %} 12058 opcode(0x3B); /* Opcode 3B /r */ 12059 ins_encode( OpcP, RegReg( op1, op2) ); 12060 ins_pipe( ialu_cr_reg_reg ); 12061 %} 12062 12063 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12064 match(Set cr (CmpP op1 op2)); 12065 12066 format %{ "CMPu $op1,$op2" %} 12067 opcode(0x81,0x07); /* Opcode 81 /7 */ 12068 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12069 ins_pipe( ialu_cr_reg_imm ); 12070 %} 12071 12072 // // Cisc-spilled version of cmpP_eReg 12073 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12074 match(Set cr (CmpP op1 (LoadP op2))); 12075 12076 format %{ "CMPu $op1,$op2" %} 12077 ins_cost(500); 12078 opcode(0x3B); /* Opcode 3B /r */ 12079 ins_encode( OpcP, RegMem( op1, op2) ); 12080 ins_pipe( ialu_cr_reg_mem ); 12081 %} 12082 12083 // // Cisc-spilled version of cmpP_eReg 12084 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12085 // match(Set cr (CmpP (LoadP op1) op2)); 12086 // 12087 // format %{ "CMPu $op1,$op2" %} 12088 // ins_cost(500); 12089 // opcode(0x39); /* Opcode 39 /r */ 12090 // ins_encode( OpcP, RegMem( op1, op2) ); 12091 //%} 12092 12093 // Compare raw pointer (used in out-of-heap check). 12094 // Only works because non-oop pointers must be raw pointers 12095 // and raw pointers have no anti-dependencies. 12096 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12097 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12098 match(Set cr (CmpP op1 (LoadP op2))); 12099 12100 format %{ "CMPu $op1,$op2" %} 12101 opcode(0x3B); /* Opcode 3B /r */ 12102 ins_encode( OpcP, RegMem( op1, op2) ); 12103 ins_pipe( ialu_cr_reg_mem ); 12104 %} 12105 12106 // 12107 // This will generate a signed flags result. This should be ok 12108 // since any compare to a zero should be eq/neq. 12109 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12110 match(Set cr (CmpP src zero)); 12111 12112 format %{ "TEST $src,$src" %} 12113 opcode(0x85); 12114 ins_encode( OpcP, RegReg( src, src ) ); 12115 ins_pipe( ialu_cr_reg_imm ); 12116 %} 12117 12118 // Cisc-spilled version of testP_reg 12119 // This will generate a signed flags result. This should be ok 12120 // since any compare to a zero should be eq/neq. 12121 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12122 match(Set cr (CmpP (LoadP op) zero)); 12123 12124 format %{ "TEST $op,0xFFFFFFFF" %} 12125 ins_cost(500); 12126 opcode(0xF7); /* Opcode F7 /0 */ 12127 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12128 ins_pipe( ialu_cr_reg_imm ); 12129 %} 12130 12131 // Yanked all unsigned pointer compare operations. 12132 // Pointer compares are done with CmpP which is already unsigned. 12133 12134 //----------Max and Min-------------------------------------------------------- 12135 // Min Instructions 12136 //// 12137 // *** Min and Max using the conditional move are slower than the 12138 // *** branch version on a Pentium III. 12139 // // Conditional move for min 12140 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12141 // effect( USE_DEF op2, USE op1, USE cr ); 12142 // format %{ "CMOVlt $op2,$op1\t! min" %} 12143 // opcode(0x4C,0x0F); 12144 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12145 // ins_pipe( pipe_cmov_reg ); 12146 //%} 12147 // 12148 //// Min Register with Register (P6 version) 12149 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12150 // predicate(VM_Version::supports_cmov() ); 12151 // match(Set op2 (MinI op1 op2)); 12152 // ins_cost(200); 12153 // expand %{ 12154 // eFlagsReg cr; 12155 // compI_eReg(cr,op1,op2); 12156 // cmovI_reg_lt(op2,op1,cr); 12157 // %} 12158 //%} 12159 12160 // Min Register with Register (generic version) 12161 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12162 match(Set dst (MinI dst src)); 12163 effect(KILL flags); 12164 ins_cost(300); 12165 12166 format %{ "MIN $dst,$src" %} 12167 opcode(0xCC); 12168 ins_encode( min_enc(dst,src) ); 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 // Max Register with Register 12173 // *** Min and Max using the conditional move are slower than the 12174 // *** branch version on a Pentium III. 12175 // // Conditional move for max 12176 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12177 // effect( USE_DEF op2, USE op1, USE cr ); 12178 // format %{ "CMOVgt $op2,$op1\t! max" %} 12179 // opcode(0x4F,0x0F); 12180 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12181 // ins_pipe( pipe_cmov_reg ); 12182 //%} 12183 // 12184 // // Max Register with Register (P6 version) 12185 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12186 // predicate(VM_Version::supports_cmov() ); 12187 // match(Set op2 (MaxI op1 op2)); 12188 // ins_cost(200); 12189 // expand %{ 12190 // eFlagsReg cr; 12191 // compI_eReg(cr,op1,op2); 12192 // cmovI_reg_gt(op2,op1,cr); 12193 // %} 12194 //%} 12195 12196 // Max Register with Register (generic version) 12197 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12198 match(Set dst (MaxI dst src)); 12199 effect(KILL flags); 12200 ins_cost(300); 12201 12202 format %{ "MAX $dst,$src" %} 12203 opcode(0xCC); 12204 ins_encode( max_enc(dst,src) ); 12205 ins_pipe( pipe_slow ); 12206 %} 12207 12208 // ============================================================================ 12209 // Counted Loop limit node which represents exact final iterator value. 12210 // Note: the resulting value should fit into integer range since 12211 // counted loops have limit check on overflow. 12212 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12213 match(Set limit (LoopLimit (Binary init limit) stride)); 12214 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12215 ins_cost(300); 12216 12217 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12218 ins_encode %{ 12219 int strd = (int)$stride$$constant; 12220 assert(strd != 1 && strd != -1, "sanity"); 12221 int m1 = (strd > 0) ? 1 : -1; 12222 // Convert limit to long (EAX:EDX) 12223 __ cdql(); 12224 // Convert init to long (init:tmp) 12225 __ movl($tmp$$Register, $init$$Register); 12226 __ sarl($tmp$$Register, 31); 12227 // $limit - $init 12228 __ subl($limit$$Register, $init$$Register); 12229 __ sbbl($limit_hi$$Register, $tmp$$Register); 12230 // + ($stride - 1) 12231 if (strd > 0) { 12232 __ addl($limit$$Register, (strd - 1)); 12233 __ adcl($limit_hi$$Register, 0); 12234 __ movl($tmp$$Register, strd); 12235 } else { 12236 __ addl($limit$$Register, (strd + 1)); 12237 __ adcl($limit_hi$$Register, -1); 12238 __ lneg($limit_hi$$Register, $limit$$Register); 12239 __ movl($tmp$$Register, -strd); 12240 } 12241 // signed devision: (EAX:EDX) / pos_stride 12242 __ idivl($tmp$$Register); 12243 if (strd < 0) { 12244 // restore sign 12245 __ negl($tmp$$Register); 12246 } 12247 // (EAX) * stride 12248 __ mull($tmp$$Register); 12249 // + init (ignore upper bits) 12250 __ addl($limit$$Register, $init$$Register); 12251 %} 12252 ins_pipe( pipe_slow ); 12253 %} 12254 12255 // ============================================================================ 12256 // Branch Instructions 12257 // Jump Table 12258 instruct jumpXtnd(rRegI switch_val) %{ 12259 match(Jump switch_val); 12260 ins_cost(350); 12261 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12262 ins_encode %{ 12263 // Jump to Address(table_base + switch_reg) 12264 Address index(noreg, $switch_val$$Register, Address::times_1); 12265 __ jump(ArrayAddress($constantaddress, index)); 12266 %} 12267 ins_pipe(pipe_jmp); 12268 %} 12269 12270 // Jump Direct - Label defines a relative address from JMP+1 12271 instruct jmpDir(label labl) %{ 12272 match(Goto); 12273 effect(USE labl); 12274 12275 ins_cost(300); 12276 format %{ "JMP $labl" %} 12277 size(5); 12278 ins_encode %{ 12279 Label* L = $labl$$label; 12280 __ jmp(*L, false); // Always long jump 12281 %} 12282 ins_pipe( pipe_jmp ); 12283 %} 12284 12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12286 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12287 match(If cop cr); 12288 effect(USE labl); 12289 12290 ins_cost(300); 12291 format %{ "J$cop $labl" %} 12292 size(6); 12293 ins_encode %{ 12294 Label* L = $labl$$label; 12295 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12296 %} 12297 ins_pipe( pipe_jcc ); 12298 %} 12299 12300 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12301 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12302 predicate(!n->has_vector_mask_set()); 12303 match(CountedLoopEnd cop cr); 12304 effect(USE labl); 12305 12306 ins_cost(300); 12307 format %{ "J$cop $labl\t# Loop end" %} 12308 size(6); 12309 ins_encode %{ 12310 Label* L = $labl$$label; 12311 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12312 %} 12313 ins_pipe( pipe_jcc ); 12314 %} 12315 12316 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12317 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12318 predicate(!n->has_vector_mask_set()); 12319 match(CountedLoopEnd cop cmp); 12320 effect(USE labl); 12321 12322 ins_cost(300); 12323 format %{ "J$cop,u $labl\t# Loop end" %} 12324 size(6); 12325 ins_encode %{ 12326 Label* L = $labl$$label; 12327 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12328 %} 12329 ins_pipe( pipe_jcc ); 12330 %} 12331 12332 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12333 predicate(!n->has_vector_mask_set()); 12334 match(CountedLoopEnd cop cmp); 12335 effect(USE labl); 12336 12337 ins_cost(200); 12338 format %{ "J$cop,u $labl\t# Loop end" %} 12339 size(6); 12340 ins_encode %{ 12341 Label* L = $labl$$label; 12342 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12343 %} 12344 ins_pipe( pipe_jcc ); 12345 %} 12346 12347 // mask version 12348 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12349 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12350 predicate(n->has_vector_mask_set()); 12351 match(CountedLoopEnd cop cr); 12352 effect(USE labl); 12353 12354 ins_cost(400); 12355 format %{ "J$cop $labl\t# Loop end\n\t" 12356 "restorevectmask \t# vector mask restore for loops" %} 12357 size(10); 12358 ins_encode %{ 12359 Label* L = $labl$$label; 12360 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12361 __ restorevectmask(); 12362 %} 12363 ins_pipe( pipe_jcc ); 12364 %} 12365 12366 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12367 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12368 predicate(n->has_vector_mask_set()); 12369 match(CountedLoopEnd cop cmp); 12370 effect(USE labl); 12371 12372 ins_cost(400); 12373 format %{ "J$cop,u $labl\t# Loop end\n\t" 12374 "restorevectmask \t# vector mask restore for loops" %} 12375 size(10); 12376 ins_encode %{ 12377 Label* L = $labl$$label; 12378 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12379 __ restorevectmask(); 12380 %} 12381 ins_pipe( pipe_jcc ); 12382 %} 12383 12384 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12385 predicate(n->has_vector_mask_set()); 12386 match(CountedLoopEnd cop cmp); 12387 effect(USE labl); 12388 12389 ins_cost(300); 12390 format %{ "J$cop,u $labl\t# Loop end\n\t" 12391 "restorevectmask \t# vector mask restore for loops" %} 12392 size(10); 12393 ins_encode %{ 12394 Label* L = $labl$$label; 12395 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12396 __ restorevectmask(); 12397 %} 12398 ins_pipe( pipe_jcc ); 12399 %} 12400 12401 // Jump Direct Conditional - using unsigned comparison 12402 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12403 match(If cop cmp); 12404 effect(USE labl); 12405 12406 ins_cost(300); 12407 format %{ "J$cop,u $labl" %} 12408 size(6); 12409 ins_encode %{ 12410 Label* L = $labl$$label; 12411 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12412 %} 12413 ins_pipe(pipe_jcc); 12414 %} 12415 12416 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12417 match(If cop cmp); 12418 effect(USE labl); 12419 12420 ins_cost(200); 12421 format %{ "J$cop,u $labl" %} 12422 size(6); 12423 ins_encode %{ 12424 Label* L = $labl$$label; 12425 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12426 %} 12427 ins_pipe(pipe_jcc); 12428 %} 12429 12430 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12431 match(If cop cmp); 12432 effect(USE labl); 12433 12434 ins_cost(200); 12435 format %{ $$template 12436 if ($cop$$cmpcode == Assembler::notEqual) { 12437 $$emit$$"JP,u $labl\n\t" 12438 $$emit$$"J$cop,u $labl" 12439 } else { 12440 $$emit$$"JP,u done\n\t" 12441 $$emit$$"J$cop,u $labl\n\t" 12442 $$emit$$"done:" 12443 } 12444 %} 12445 ins_encode %{ 12446 Label* l = $labl$$label; 12447 if ($cop$$cmpcode == Assembler::notEqual) { 12448 __ jcc(Assembler::parity, *l, false); 12449 __ jcc(Assembler::notEqual, *l, false); 12450 } else if ($cop$$cmpcode == Assembler::equal) { 12451 Label done; 12452 __ jccb(Assembler::parity, done); 12453 __ jcc(Assembler::equal, *l, false); 12454 __ bind(done); 12455 } else { 12456 ShouldNotReachHere(); 12457 } 12458 %} 12459 ins_pipe(pipe_jcc); 12460 %} 12461 12462 // ============================================================================ 12463 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12464 // array for an instance of the superklass. Set a hidden internal cache on a 12465 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12466 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12467 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12468 match(Set result (PartialSubtypeCheck sub super)); 12469 effect( KILL rcx, KILL cr ); 12470 12471 ins_cost(1100); // slightly larger than the next version 12472 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12473 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12474 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12475 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12476 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12477 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12478 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12479 "miss:\t" %} 12480 12481 opcode(0x1); // Force a XOR of EDI 12482 ins_encode( enc_PartialSubtypeCheck() ); 12483 ins_pipe( pipe_slow ); 12484 %} 12485 12486 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12487 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12488 effect( KILL rcx, KILL result ); 12489 12490 ins_cost(1000); 12491 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12492 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12493 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12494 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12495 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12496 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12497 "miss:\t" %} 12498 12499 opcode(0x0); // No need to XOR EDI 12500 ins_encode( enc_PartialSubtypeCheck() ); 12501 ins_pipe( pipe_slow ); 12502 %} 12503 12504 // ============================================================================ 12505 // Branch Instructions -- short offset versions 12506 // 12507 // These instructions are used to replace jumps of a long offset (the default 12508 // match) with jumps of a shorter offset. These instructions are all tagged 12509 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12510 // match rules in general matching. Instead, the ADLC generates a conversion 12511 // method in the MachNode which can be used to do in-place replacement of the 12512 // long variant with the shorter variant. The compiler will determine if a 12513 // branch can be taken by the is_short_branch_offset() predicate in the machine 12514 // specific code section of the file. 12515 12516 // Jump Direct - Label defines a relative address from JMP+1 12517 instruct jmpDir_short(label labl) %{ 12518 match(Goto); 12519 effect(USE labl); 12520 12521 ins_cost(300); 12522 format %{ "JMP,s $labl" %} 12523 size(2); 12524 ins_encode %{ 12525 Label* L = $labl$$label; 12526 __ jmpb(*L); 12527 %} 12528 ins_pipe( pipe_jmp ); 12529 ins_short_branch(1); 12530 %} 12531 12532 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12533 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12534 match(If cop cr); 12535 effect(USE labl); 12536 12537 ins_cost(300); 12538 format %{ "J$cop,s $labl" %} 12539 size(2); 12540 ins_encode %{ 12541 Label* L = $labl$$label; 12542 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12543 %} 12544 ins_pipe( pipe_jcc ); 12545 ins_short_branch(1); 12546 %} 12547 12548 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12549 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12550 match(CountedLoopEnd cop cr); 12551 effect(USE labl); 12552 12553 ins_cost(300); 12554 format %{ "J$cop,s $labl\t# Loop end" %} 12555 size(2); 12556 ins_encode %{ 12557 Label* L = $labl$$label; 12558 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12559 %} 12560 ins_pipe( pipe_jcc ); 12561 ins_short_branch(1); 12562 %} 12563 12564 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12565 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12566 match(CountedLoopEnd cop cmp); 12567 effect(USE labl); 12568 12569 ins_cost(300); 12570 format %{ "J$cop,us $labl\t# Loop end" %} 12571 size(2); 12572 ins_encode %{ 12573 Label* L = $labl$$label; 12574 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12575 %} 12576 ins_pipe( pipe_jcc ); 12577 ins_short_branch(1); 12578 %} 12579 12580 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12581 match(CountedLoopEnd cop cmp); 12582 effect(USE labl); 12583 12584 ins_cost(300); 12585 format %{ "J$cop,us $labl\t# Loop end" %} 12586 size(2); 12587 ins_encode %{ 12588 Label* L = $labl$$label; 12589 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12590 %} 12591 ins_pipe( pipe_jcc ); 12592 ins_short_branch(1); 12593 %} 12594 12595 // Jump Direct Conditional - using unsigned comparison 12596 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12597 match(If cop cmp); 12598 effect(USE labl); 12599 12600 ins_cost(300); 12601 format %{ "J$cop,us $labl" %} 12602 size(2); 12603 ins_encode %{ 12604 Label* L = $labl$$label; 12605 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12606 %} 12607 ins_pipe( pipe_jcc ); 12608 ins_short_branch(1); 12609 %} 12610 12611 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12612 match(If cop cmp); 12613 effect(USE labl); 12614 12615 ins_cost(300); 12616 format %{ "J$cop,us $labl" %} 12617 size(2); 12618 ins_encode %{ 12619 Label* L = $labl$$label; 12620 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12621 %} 12622 ins_pipe( pipe_jcc ); 12623 ins_short_branch(1); 12624 %} 12625 12626 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12627 match(If cop cmp); 12628 effect(USE labl); 12629 12630 ins_cost(300); 12631 format %{ $$template 12632 if ($cop$$cmpcode == Assembler::notEqual) { 12633 $$emit$$"JP,u,s $labl\n\t" 12634 $$emit$$"J$cop,u,s $labl" 12635 } else { 12636 $$emit$$"JP,u,s done\n\t" 12637 $$emit$$"J$cop,u,s $labl\n\t" 12638 $$emit$$"done:" 12639 } 12640 %} 12641 size(4); 12642 ins_encode %{ 12643 Label* l = $labl$$label; 12644 if ($cop$$cmpcode == Assembler::notEqual) { 12645 __ jccb(Assembler::parity, *l); 12646 __ jccb(Assembler::notEqual, *l); 12647 } else if ($cop$$cmpcode == Assembler::equal) { 12648 Label done; 12649 __ jccb(Assembler::parity, done); 12650 __ jccb(Assembler::equal, *l); 12651 __ bind(done); 12652 } else { 12653 ShouldNotReachHere(); 12654 } 12655 %} 12656 ins_pipe(pipe_jcc); 12657 ins_short_branch(1); 12658 %} 12659 12660 // ============================================================================ 12661 // Long Compare 12662 // 12663 // Currently we hold longs in 2 registers. Comparing such values efficiently 12664 // is tricky. The flavor of compare used depends on whether we are testing 12665 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12666 // The GE test is the negated LT test. The LE test can be had by commuting 12667 // the operands (yielding a GE test) and then negating; negate again for the 12668 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12669 // NE test is negated from that. 12670 12671 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12672 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12673 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12674 // are collapsed internally in the ADLC's dfa-gen code. The match for 12675 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12676 // foo match ends up with the wrong leaf. One fix is to not match both 12677 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12678 // both forms beat the trinary form of long-compare and both are very useful 12679 // on Intel which has so few registers. 12680 12681 // Manifest a CmpL result in an integer register. Very painful. 12682 // This is the test to avoid. 12683 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12684 match(Set dst (CmpL3 src1 src2)); 12685 effect( KILL flags ); 12686 ins_cost(1000); 12687 format %{ "XOR $dst,$dst\n\t" 12688 "CMP $src1.hi,$src2.hi\n\t" 12689 "JLT,s m_one\n\t" 12690 "JGT,s p_one\n\t" 12691 "CMP $src1.lo,$src2.lo\n\t" 12692 "JB,s m_one\n\t" 12693 "JEQ,s done\n" 12694 "p_one:\tINC $dst\n\t" 12695 "JMP,s done\n" 12696 "m_one:\tDEC $dst\n" 12697 "done:" %} 12698 ins_encode %{ 12699 Label p_one, m_one, done; 12700 __ xorptr($dst$$Register, $dst$$Register); 12701 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12702 __ jccb(Assembler::less, m_one); 12703 __ jccb(Assembler::greater, p_one); 12704 __ cmpl($src1$$Register, $src2$$Register); 12705 __ jccb(Assembler::below, m_one); 12706 __ jccb(Assembler::equal, done); 12707 __ bind(p_one); 12708 __ incrementl($dst$$Register); 12709 __ jmpb(done); 12710 __ bind(m_one); 12711 __ decrementl($dst$$Register); 12712 __ bind(done); 12713 %} 12714 ins_pipe( pipe_slow ); 12715 %} 12716 12717 //====== 12718 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12719 // compares. Can be used for LE or GT compares by reversing arguments. 12720 // NOT GOOD FOR EQ/NE tests. 12721 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12722 match( Set flags (CmpL src zero )); 12723 ins_cost(100); 12724 format %{ "TEST $src.hi,$src.hi" %} 12725 opcode(0x85); 12726 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12727 ins_pipe( ialu_cr_reg_reg ); 12728 %} 12729 12730 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12731 // compares. Can be used for LE or GT compares by reversing arguments. 12732 // NOT GOOD FOR EQ/NE tests. 12733 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12734 match( Set flags (CmpL src1 src2 )); 12735 effect( TEMP tmp ); 12736 ins_cost(300); 12737 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12738 "MOV $tmp,$src1.hi\n\t" 12739 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12740 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12741 ins_pipe( ialu_cr_reg_reg ); 12742 %} 12743 12744 // Long compares reg < zero/req OR reg >= zero/req. 12745 // Just a wrapper for a normal branch, plus the predicate test. 12746 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12747 match(If cmp flags); 12748 effect(USE labl); 12749 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12750 expand %{ 12751 jmpCon(cmp,flags,labl); // JLT or JGE... 12752 %} 12753 %} 12754 12755 //====== 12756 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12757 // compares. Can be used for LE or GT compares by reversing arguments. 12758 // NOT GOOD FOR EQ/NE tests. 12759 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12760 match(Set flags (CmpUL src zero)); 12761 ins_cost(100); 12762 format %{ "TEST $src.hi,$src.hi" %} 12763 opcode(0x85); 12764 ins_encode(OpcP, RegReg_Hi2(src, src)); 12765 ins_pipe(ialu_cr_reg_reg); 12766 %} 12767 12768 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12769 // compares. Can be used for LE or GT compares by reversing arguments. 12770 // NOT GOOD FOR EQ/NE tests. 12771 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12772 match(Set flags (CmpUL src1 src2)); 12773 effect(TEMP tmp); 12774 ins_cost(300); 12775 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12776 "MOV $tmp,$src1.hi\n\t" 12777 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12778 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12779 ins_pipe(ialu_cr_reg_reg); 12780 %} 12781 12782 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12783 // Just a wrapper for a normal branch, plus the predicate test. 12784 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12785 match(If cmp flags); 12786 effect(USE labl); 12787 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12788 expand %{ 12789 jmpCon(cmp, flags, labl); // JLT or JGE... 12790 %} 12791 %} 12792 12793 // Compare 2 longs and CMOVE longs. 12794 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12795 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12796 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12797 ins_cost(400); 12798 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12799 "CMOV$cmp $dst.hi,$src.hi" %} 12800 opcode(0x0F,0x40); 12801 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12802 ins_pipe( pipe_cmov_reg_long ); 12803 %} 12804 12805 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12806 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12807 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12808 ins_cost(500); 12809 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12810 "CMOV$cmp $dst.hi,$src.hi" %} 12811 opcode(0x0F,0x40); 12812 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12813 ins_pipe( pipe_cmov_reg_long ); 12814 %} 12815 12816 // Compare 2 longs and CMOVE ints. 12817 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12818 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12819 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12820 ins_cost(200); 12821 format %{ "CMOV$cmp $dst,$src" %} 12822 opcode(0x0F,0x40); 12823 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12824 ins_pipe( pipe_cmov_reg ); 12825 %} 12826 12827 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12828 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12829 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12830 ins_cost(250); 12831 format %{ "CMOV$cmp $dst,$src" %} 12832 opcode(0x0F,0x40); 12833 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12834 ins_pipe( pipe_cmov_mem ); 12835 %} 12836 12837 // Compare 2 longs and CMOVE ints. 12838 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12839 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12840 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12841 ins_cost(200); 12842 format %{ "CMOV$cmp $dst,$src" %} 12843 opcode(0x0F,0x40); 12844 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12845 ins_pipe( pipe_cmov_reg ); 12846 %} 12847 12848 // Compare 2 longs and CMOVE doubles 12849 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12850 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12851 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12852 ins_cost(200); 12853 expand %{ 12854 fcmovDPR_regS(cmp,flags,dst,src); 12855 %} 12856 %} 12857 12858 // Compare 2 longs and CMOVE doubles 12859 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12860 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12861 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12862 ins_cost(200); 12863 expand %{ 12864 fcmovD_regS(cmp,flags,dst,src); 12865 %} 12866 %} 12867 12868 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12869 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12870 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12871 ins_cost(200); 12872 expand %{ 12873 fcmovFPR_regS(cmp,flags,dst,src); 12874 %} 12875 %} 12876 12877 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12878 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12879 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12880 ins_cost(200); 12881 expand %{ 12882 fcmovF_regS(cmp,flags,dst,src); 12883 %} 12884 %} 12885 12886 //====== 12887 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12888 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12889 match( Set flags (CmpL src zero )); 12890 effect(TEMP tmp); 12891 ins_cost(200); 12892 format %{ "MOV $tmp,$src.lo\n\t" 12893 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12894 ins_encode( long_cmp_flags0( src, tmp ) ); 12895 ins_pipe( ialu_reg_reg_long ); 12896 %} 12897 12898 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12899 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12900 match( Set flags (CmpL src1 src2 )); 12901 ins_cost(200+300); 12902 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12903 "JNE,s skip\n\t" 12904 "CMP $src1.hi,$src2.hi\n\t" 12905 "skip:\t" %} 12906 ins_encode( long_cmp_flags1( src1, src2 ) ); 12907 ins_pipe( ialu_cr_reg_reg ); 12908 %} 12909 12910 // Long compare reg == zero/reg OR reg != zero/reg 12911 // Just a wrapper for a normal branch, plus the predicate test. 12912 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12913 match(If cmp flags); 12914 effect(USE labl); 12915 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12916 expand %{ 12917 jmpCon(cmp,flags,labl); // JEQ or JNE... 12918 %} 12919 %} 12920 12921 //====== 12922 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12923 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12924 match(Set flags (CmpUL src zero)); 12925 effect(TEMP tmp); 12926 ins_cost(200); 12927 format %{ "MOV $tmp,$src.lo\n\t" 12928 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12929 ins_encode(long_cmp_flags0(src, tmp)); 12930 ins_pipe(ialu_reg_reg_long); 12931 %} 12932 12933 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12934 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12935 match(Set flags (CmpUL src1 src2)); 12936 ins_cost(200+300); 12937 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12938 "JNE,s skip\n\t" 12939 "CMP $src1.hi,$src2.hi\n\t" 12940 "skip:\t" %} 12941 ins_encode(long_cmp_flags1(src1, src2)); 12942 ins_pipe(ialu_cr_reg_reg); 12943 %} 12944 12945 // Unsigned long compare reg == zero/reg OR reg != zero/reg 12946 // Just a wrapper for a normal branch, plus the predicate test. 12947 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 12948 match(If cmp flags); 12949 effect(USE labl); 12950 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 12951 expand %{ 12952 jmpCon(cmp, flags, labl); // JEQ or JNE... 12953 %} 12954 %} 12955 12956 // Compare 2 longs and CMOVE longs. 12957 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12958 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12959 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12960 ins_cost(400); 12961 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12962 "CMOV$cmp $dst.hi,$src.hi" %} 12963 opcode(0x0F,0x40); 12964 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12965 ins_pipe( pipe_cmov_reg_long ); 12966 %} 12967 12968 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12969 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12970 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12971 ins_cost(500); 12972 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12973 "CMOV$cmp $dst.hi,$src.hi" %} 12974 opcode(0x0F,0x40); 12975 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12976 ins_pipe( pipe_cmov_reg_long ); 12977 %} 12978 12979 // Compare 2 longs and CMOVE ints. 12980 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12981 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12982 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12983 ins_cost(200); 12984 format %{ "CMOV$cmp $dst,$src" %} 12985 opcode(0x0F,0x40); 12986 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12987 ins_pipe( pipe_cmov_reg ); 12988 %} 12989 12990 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12991 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12992 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12993 ins_cost(250); 12994 format %{ "CMOV$cmp $dst,$src" %} 12995 opcode(0x0F,0x40); 12996 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12997 ins_pipe( pipe_cmov_mem ); 12998 %} 12999 13000 // Compare 2 longs and CMOVE ints. 13001 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13002 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13003 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13004 ins_cost(200); 13005 format %{ "CMOV$cmp $dst,$src" %} 13006 opcode(0x0F,0x40); 13007 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13008 ins_pipe( pipe_cmov_reg ); 13009 %} 13010 13011 // Compare 2 longs and CMOVE doubles 13012 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13013 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13014 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13015 ins_cost(200); 13016 expand %{ 13017 fcmovDPR_regS(cmp,flags,dst,src); 13018 %} 13019 %} 13020 13021 // Compare 2 longs and CMOVE doubles 13022 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13023 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13024 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13025 ins_cost(200); 13026 expand %{ 13027 fcmovD_regS(cmp,flags,dst,src); 13028 %} 13029 %} 13030 13031 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13032 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13033 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13034 ins_cost(200); 13035 expand %{ 13036 fcmovFPR_regS(cmp,flags,dst,src); 13037 %} 13038 %} 13039 13040 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13041 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13042 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13043 ins_cost(200); 13044 expand %{ 13045 fcmovF_regS(cmp,flags,dst,src); 13046 %} 13047 %} 13048 13049 //====== 13050 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13051 // Same as cmpL_reg_flags_LEGT except must negate src 13052 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13053 match( Set flags (CmpL src zero )); 13054 effect( TEMP tmp ); 13055 ins_cost(300); 13056 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13057 "CMP $tmp,$src.lo\n\t" 13058 "SBB $tmp,$src.hi\n\t" %} 13059 ins_encode( long_cmp_flags3(src, tmp) ); 13060 ins_pipe( ialu_reg_reg_long ); 13061 %} 13062 13063 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13064 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13065 // requires a commuted test to get the same result. 13066 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13067 match( Set flags (CmpL src1 src2 )); 13068 effect( TEMP tmp ); 13069 ins_cost(300); 13070 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13071 "MOV $tmp,$src2.hi\n\t" 13072 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13073 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13074 ins_pipe( ialu_cr_reg_reg ); 13075 %} 13076 13077 // Long compares reg < zero/req OR reg >= zero/req. 13078 // Just a wrapper for a normal branch, plus the predicate test 13079 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13080 match(If cmp flags); 13081 effect(USE labl); 13082 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13083 ins_cost(300); 13084 expand %{ 13085 jmpCon(cmp,flags,labl); // JGT or JLE... 13086 %} 13087 %} 13088 13089 //====== 13090 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13091 // Same as cmpUL_reg_flags_LEGT except must negate src 13092 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13093 match(Set flags (CmpUL src zero)); 13094 effect(TEMP tmp); 13095 ins_cost(300); 13096 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13097 "CMP $tmp,$src.lo\n\t" 13098 "SBB $tmp,$src.hi\n\t" %} 13099 ins_encode(long_cmp_flags3(src, tmp)); 13100 ins_pipe(ialu_reg_reg_long); 13101 %} 13102 13103 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13104 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13105 // requires a commuted test to get the same result. 13106 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13107 match(Set flags (CmpUL src1 src2)); 13108 effect(TEMP tmp); 13109 ins_cost(300); 13110 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13111 "MOV $tmp,$src2.hi\n\t" 13112 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13113 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13114 ins_pipe(ialu_cr_reg_reg); 13115 %} 13116 13117 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13118 // Just a wrapper for a normal branch, plus the predicate test 13119 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13120 match(If cmp flags); 13121 effect(USE labl); 13122 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13123 ins_cost(300); 13124 expand %{ 13125 jmpCon(cmp, flags, labl); // JGT or JLE... 13126 %} 13127 %} 13128 13129 // Compare 2 longs and CMOVE longs. 13130 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13131 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13132 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13133 ins_cost(400); 13134 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13135 "CMOV$cmp $dst.hi,$src.hi" %} 13136 opcode(0x0F,0x40); 13137 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13138 ins_pipe( pipe_cmov_reg_long ); 13139 %} 13140 13141 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13142 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13143 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13144 ins_cost(500); 13145 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13146 "CMOV$cmp $dst.hi,$src.hi+4" %} 13147 opcode(0x0F,0x40); 13148 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13149 ins_pipe( pipe_cmov_reg_long ); 13150 %} 13151 13152 // Compare 2 longs and CMOVE ints. 13153 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13154 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13155 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13156 ins_cost(200); 13157 format %{ "CMOV$cmp $dst,$src" %} 13158 opcode(0x0F,0x40); 13159 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13160 ins_pipe( pipe_cmov_reg ); 13161 %} 13162 13163 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13164 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13165 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13166 ins_cost(250); 13167 format %{ "CMOV$cmp $dst,$src" %} 13168 opcode(0x0F,0x40); 13169 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13170 ins_pipe( pipe_cmov_mem ); 13171 %} 13172 13173 // Compare 2 longs and CMOVE ptrs. 13174 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13175 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13176 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13177 ins_cost(200); 13178 format %{ "CMOV$cmp $dst,$src" %} 13179 opcode(0x0F,0x40); 13180 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13181 ins_pipe( pipe_cmov_reg ); 13182 %} 13183 13184 // Compare 2 longs and CMOVE doubles 13185 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13186 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13187 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13188 ins_cost(200); 13189 expand %{ 13190 fcmovDPR_regS(cmp,flags,dst,src); 13191 %} 13192 %} 13193 13194 // Compare 2 longs and CMOVE doubles 13195 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13196 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13197 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13198 ins_cost(200); 13199 expand %{ 13200 fcmovD_regS(cmp,flags,dst,src); 13201 %} 13202 %} 13203 13204 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13205 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13206 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13207 ins_cost(200); 13208 expand %{ 13209 fcmovFPR_regS(cmp,flags,dst,src); 13210 %} 13211 %} 13212 13213 13214 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13215 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13216 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13217 ins_cost(200); 13218 expand %{ 13219 fcmovF_regS(cmp,flags,dst,src); 13220 %} 13221 %} 13222 13223 13224 // ============================================================================ 13225 // Procedure Call/Return Instructions 13226 // Call Java Static Instruction 13227 // Note: If this code changes, the corresponding ret_addr_offset() and 13228 // compute_padding() functions will have to be adjusted. 13229 instruct CallStaticJavaDirect(method meth) %{ 13230 match(CallStaticJava); 13231 effect(USE meth); 13232 13233 ins_cost(300); 13234 format %{ "CALL,static " %} 13235 opcode(0xE8); /* E8 cd */ 13236 ins_encode( pre_call_resets, 13237 Java_Static_Call( meth ), 13238 call_epilog, 13239 post_call_FPU ); 13240 ins_pipe( pipe_slow ); 13241 ins_alignment(4); 13242 %} 13243 13244 // Call Java Dynamic Instruction 13245 // Note: If this code changes, the corresponding ret_addr_offset() and 13246 // compute_padding() functions will have to be adjusted. 13247 instruct CallDynamicJavaDirect(method meth) %{ 13248 match(CallDynamicJava); 13249 effect(USE meth); 13250 13251 ins_cost(300); 13252 format %{ "MOV EAX,(oop)-1\n\t" 13253 "CALL,dynamic" %} 13254 opcode(0xE8); /* E8 cd */ 13255 ins_encode( pre_call_resets, 13256 Java_Dynamic_Call( meth ), 13257 call_epilog, 13258 post_call_FPU ); 13259 ins_pipe( pipe_slow ); 13260 ins_alignment(4); 13261 %} 13262 13263 // Call Runtime Instruction 13264 instruct CallRuntimeDirect(method meth) %{ 13265 match(CallRuntime ); 13266 effect(USE meth); 13267 13268 ins_cost(300); 13269 format %{ "CALL,runtime " %} 13270 opcode(0xE8); /* E8 cd */ 13271 // Use FFREEs to clear entries in float stack 13272 ins_encode( pre_call_resets, 13273 FFree_Float_Stack_All, 13274 Java_To_Runtime( meth ), 13275 post_call_FPU ); 13276 ins_pipe( pipe_slow ); 13277 %} 13278 13279 // Call runtime without safepoint 13280 instruct CallLeafDirect(method meth) %{ 13281 match(CallLeaf); 13282 effect(USE meth); 13283 13284 ins_cost(300); 13285 format %{ "CALL_LEAF,runtime " %} 13286 opcode(0xE8); /* E8 cd */ 13287 ins_encode( pre_call_resets, 13288 FFree_Float_Stack_All, 13289 Java_To_Runtime( meth ), 13290 Verify_FPU_For_Leaf, post_call_FPU ); 13291 ins_pipe( pipe_slow ); 13292 %} 13293 13294 instruct CallLeafNoFPDirect(method meth) %{ 13295 match(CallLeafNoFP); 13296 effect(USE meth); 13297 13298 ins_cost(300); 13299 format %{ "CALL_LEAF_NOFP,runtime " %} 13300 opcode(0xE8); /* E8 cd */ 13301 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13302 ins_pipe( pipe_slow ); 13303 %} 13304 13305 13306 // Return Instruction 13307 // Remove the return address & jump to it. 13308 instruct Ret() %{ 13309 match(Return); 13310 format %{ "RET" %} 13311 opcode(0xC3); 13312 ins_encode(OpcP); 13313 ins_pipe( pipe_jmp ); 13314 %} 13315 13316 // Tail Call; Jump from runtime stub to Java code. 13317 // Also known as an 'interprocedural jump'. 13318 // Target of jump will eventually return to caller. 13319 // TailJump below removes the return address. 13320 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13321 match(TailCall jump_target method_oop ); 13322 ins_cost(300); 13323 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13324 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13325 ins_encode( OpcP, RegOpc(jump_target) ); 13326 ins_pipe( pipe_jmp ); 13327 %} 13328 13329 13330 // Tail Jump; remove the return address; jump to target. 13331 // TailCall above leaves the return address around. 13332 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13333 match( TailJump jump_target ex_oop ); 13334 ins_cost(300); 13335 format %{ "POP EDX\t# pop return address into dummy\n\t" 13336 "JMP $jump_target " %} 13337 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13338 ins_encode( enc_pop_rdx, 13339 OpcP, RegOpc(jump_target) ); 13340 ins_pipe( pipe_jmp ); 13341 %} 13342 13343 // Create exception oop: created by stack-crawling runtime code. 13344 // Created exception is now available to this handler, and is setup 13345 // just prior to jumping to this handler. No code emitted. 13346 instruct CreateException( eAXRegP ex_oop ) 13347 %{ 13348 match(Set ex_oop (CreateEx)); 13349 13350 size(0); 13351 // use the following format syntax 13352 format %{ "# exception oop is in EAX; no code emitted" %} 13353 ins_encode(); 13354 ins_pipe( empty ); 13355 %} 13356 13357 13358 // Rethrow exception: 13359 // The exception oop will come in the first argument position. 13360 // Then JUMP (not call) to the rethrow stub code. 13361 instruct RethrowException() 13362 %{ 13363 match(Rethrow); 13364 13365 // use the following format syntax 13366 format %{ "JMP rethrow_stub" %} 13367 ins_encode(enc_rethrow); 13368 ins_pipe( pipe_jmp ); 13369 %} 13370 13371 // inlined locking and unlocking 13372 13373 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13374 predicate(Compile::current()->use_rtm()); 13375 match(Set cr (FastLock object box)); 13376 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13377 ins_cost(300); 13378 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13379 ins_encode %{ 13380 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13381 $scr$$Register, $cx1$$Register, $cx2$$Register, 13382 _counters, _rtm_counters, _stack_rtm_counters, 13383 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13384 true, ra_->C->profile_rtm()); 13385 %} 13386 ins_pipe(pipe_slow); 13387 %} 13388 13389 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13390 predicate(!Compile::current()->use_rtm()); 13391 match(Set cr (FastLock object box)); 13392 effect(TEMP tmp, TEMP scr, USE_KILL box); 13393 ins_cost(300); 13394 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13395 ins_encode %{ 13396 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13397 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13398 %} 13399 ins_pipe(pipe_slow); 13400 %} 13401 13402 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13403 match(Set cr (FastUnlock object box)); 13404 effect(TEMP tmp, USE_KILL box); 13405 ins_cost(300); 13406 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13407 ins_encode %{ 13408 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13409 %} 13410 ins_pipe(pipe_slow); 13411 %} 13412 13413 13414 13415 // ============================================================================ 13416 // Safepoint Instruction 13417 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13418 match(SafePoint poll); 13419 effect(KILL cr, USE poll); 13420 13421 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13422 ins_cost(125); 13423 // EBP would need size(3) 13424 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13425 ins_encode %{ 13426 __ relocate(relocInfo::poll_type); 13427 address pre_pc = __ pc(); 13428 __ testl(rax, Address($poll$$Register, 0)); 13429 address post_pc = __ pc(); 13430 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13431 %} 13432 ins_pipe(ialu_reg_mem); 13433 %} 13434 13435 13436 // ============================================================================ 13437 // This name is KNOWN by the ADLC and cannot be changed. 13438 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13439 // for this guy. 13440 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13441 match(Set dst (ThreadLocal)); 13442 effect(DEF dst, KILL cr); 13443 13444 format %{ "MOV $dst, Thread::current()" %} 13445 ins_encode %{ 13446 Register dstReg = as_Register($dst$$reg); 13447 __ get_thread(dstReg); 13448 %} 13449 ins_pipe( ialu_reg_fat ); 13450 %} 13451 13452 13453 13454 //----------PEEPHOLE RULES----------------------------------------------------- 13455 // These must follow all instruction definitions as they use the names 13456 // defined in the instructions definitions. 13457 // 13458 // peepmatch ( root_instr_name [preceding_instruction]* ); 13459 // 13460 // peepconstraint %{ 13461 // (instruction_number.operand_name relational_op instruction_number.operand_name 13462 // [, ...] ); 13463 // // instruction numbers are zero-based using left to right order in peepmatch 13464 // 13465 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13466 // // provide an instruction_number.operand_name for each operand that appears 13467 // // in the replacement instruction's match rule 13468 // 13469 // ---------VM FLAGS--------------------------------------------------------- 13470 // 13471 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13472 // 13473 // Each peephole rule is given an identifying number starting with zero and 13474 // increasing by one in the order seen by the parser. An individual peephole 13475 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13476 // on the command-line. 13477 // 13478 // ---------CURRENT LIMITATIONS---------------------------------------------- 13479 // 13480 // Only match adjacent instructions in same basic block 13481 // Only equality constraints 13482 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13483 // Only one replacement instruction 13484 // 13485 // ---------EXAMPLE---------------------------------------------------------- 13486 // 13487 // // pertinent parts of existing instructions in architecture description 13488 // instruct movI(rRegI dst, rRegI src) %{ 13489 // match(Set dst (CopyI src)); 13490 // %} 13491 // 13492 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13493 // match(Set dst (AddI dst src)); 13494 // effect(KILL cr); 13495 // %} 13496 // 13497 // // Change (inc mov) to lea 13498 // peephole %{ 13499 // // increment preceeded by register-register move 13500 // peepmatch ( incI_eReg movI ); 13501 // // require that the destination register of the increment 13502 // // match the destination register of the move 13503 // peepconstraint ( 0.dst == 1.dst ); 13504 // // construct a replacement instruction that sets 13505 // // the destination to ( move's source register + one ) 13506 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13507 // %} 13508 // 13509 // Implementation no longer uses movX instructions since 13510 // machine-independent system no longer uses CopyX nodes. 13511 // 13512 // peephole %{ 13513 // peepmatch ( incI_eReg movI ); 13514 // peepconstraint ( 0.dst == 1.dst ); 13515 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13516 // %} 13517 // 13518 // peephole %{ 13519 // peepmatch ( decI_eReg movI ); 13520 // peepconstraint ( 0.dst == 1.dst ); 13521 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13522 // %} 13523 // 13524 // peephole %{ 13525 // peepmatch ( addI_eReg_imm movI ); 13526 // peepconstraint ( 0.dst == 1.dst ); 13527 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13528 // %} 13529 // 13530 // peephole %{ 13531 // peepmatch ( addP_eReg_imm movP ); 13532 // peepconstraint ( 0.dst == 1.dst ); 13533 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13534 // %} 13535 13536 // // Change load of spilled value to only a spill 13537 // instruct storeI(memory mem, rRegI src) %{ 13538 // match(Set mem (StoreI mem src)); 13539 // %} 13540 // 13541 // instruct loadI(rRegI dst, memory mem) %{ 13542 // match(Set dst (LoadI mem)); 13543 // %} 13544 // 13545 peephole %{ 13546 peepmatch ( loadI storeI ); 13547 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13548 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13549 %} 13550 13551 //----------SMARTSPILL RULES--------------------------------------------------- 13552 // These must follow all instruction definitions as they use the names 13553 // defined in the instructions definitions.