1 // 2 // Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // 318 // Compute padding required for nodes which need alignment 319 // 320 321 // The address of the call instruction needs to be 4-byte aligned to 322 // ensure that it does not span a cache line so that it can be patched. 323 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 324 current_offset += pre_call_resets_size(); // skip fldcw, if any 325 current_offset += 1; // skip call opcode byte 326 return align_up(current_offset, alignment_required()) - current_offset; 327 } 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 5; // skip MOV instruction 334 current_offset += 1; // skip call opcode byte 335 return align_up(current_offset, alignment_required()) - current_offset; 336 } 337 338 // EMIT_RM() 339 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 340 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 341 cbuf.insts()->emit_int8(c); 342 } 343 344 // EMIT_CC() 345 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 346 unsigned char c = (unsigned char)( f1 | f2 ); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_OPCODE() 351 void emit_opcode(CodeBuffer &cbuf, int code) { 352 cbuf.insts()->emit_int8((unsigned char) code); 353 } 354 355 // EMIT_OPCODE() w/ relocation information 356 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 357 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 358 emit_opcode(cbuf, code); 359 } 360 361 // EMIT_D8() 362 void emit_d8(CodeBuffer &cbuf, int d8) { 363 cbuf.insts()->emit_int8((unsigned char) d8); 364 } 365 366 // EMIT_D16() 367 void emit_d16(CodeBuffer &cbuf, int d16) { 368 cbuf.insts()->emit_int16(d16); 369 } 370 371 // EMIT_D32() 372 void emit_d32(CodeBuffer &cbuf, int d32) { 373 cbuf.insts()->emit_int32(d32); 374 } 375 376 // emit 32 bit value and construct relocation entry from relocInfo::relocType 377 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 378 int format) { 379 cbuf.relocate(cbuf.insts_mark(), reloc, format); 380 cbuf.insts()->emit_int32(d32); 381 } 382 383 // emit 32 bit value and construct relocation entry from RelocationHolder 384 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 385 int format) { 386 #ifdef ASSERT 387 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 388 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 389 } 390 #endif 391 cbuf.relocate(cbuf.insts_mark(), rspec, format); 392 cbuf.insts()->emit_int32(d32); 393 } 394 395 // Access stack slot for load or store 396 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 397 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 398 if( -128 <= disp && disp <= 127 ) { 399 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 400 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 401 emit_d8 (cbuf, disp); // Displacement // R/M byte 402 } else { 403 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 404 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 405 emit_d32(cbuf, disp); // Displacement // R/M byte 406 } 407 } 408 409 // rRegI ereg, memory mem) %{ // emit_reg_mem 410 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 411 // There is no index & no scale, use form without SIB byte 412 if ((index == 0x4) && 413 (scale == 0) && (base != ESP_enc)) { 414 // If no displacement, mode is 0x0; unless base is [EBP] 415 if ( (displace == 0) && (base != EBP_enc) ) { 416 emit_rm(cbuf, 0x0, reg_encoding, base); 417 } 418 else { // If 8-bit displacement, mode 0x1 419 if ((displace >= -128) && (displace <= 127) 420 && (disp_reloc == relocInfo::none) ) { 421 emit_rm(cbuf, 0x1, reg_encoding, base); 422 emit_d8(cbuf, displace); 423 } 424 else { // If 32-bit displacement 425 if (base == -1) { // Special flag for absolute address 426 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 427 // (manual lies; no SIB needed here) 428 if ( disp_reloc != relocInfo::none ) { 429 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 430 } else { 431 emit_d32 (cbuf, displace); 432 } 433 } 434 else { // Normal base + offset 435 emit_rm(cbuf, 0x2, reg_encoding, base); 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 } 443 } 444 } 445 else { // Else, encode with the SIB byte 446 // If no displacement, mode is 0x0; unless base is [EBP] 447 if (displace == 0 && (base != EBP_enc)) { // If no displacement 448 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 449 emit_rm(cbuf, scale, index, base); 450 } 451 else { // If 8-bit displacement, mode 0x1 452 if ((displace >= -128) && (displace <= 127) 453 && (disp_reloc == relocInfo::none) ) { 454 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 emit_d8(cbuf, displace); 457 } 458 else { // If 32-bit displacement 459 if (base == 0x04 ) { 460 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, 0x04); 462 } else { 463 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 464 emit_rm(cbuf, scale, index, base); 465 } 466 if ( disp_reloc != relocInfo::none ) { 467 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 468 } else { 469 emit_d32 (cbuf, displace); 470 } 471 } 472 } 473 } 474 } 475 476 477 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 478 if( dst_encoding == src_encoding ) { 479 // reg-reg copy, use an empty encoding 480 } else { 481 emit_opcode( cbuf, 0x8B ); 482 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 483 } 484 } 485 486 void emit_cmpfp_fixup(MacroAssembler& _masm) { 487 Label exit; 488 __ jccb(Assembler::noParity, exit); 489 __ pushf(); 490 // 491 // comiss/ucomiss instructions set ZF,PF,CF flags and 492 // zero OF,AF,SF for NaN values. 493 // Fixup flags by zeroing ZF,PF so that compare of NaN 494 // values returns 'less than' result (CF is set). 495 // Leave the rest of flags unchanged. 496 // 497 // 7 6 5 4 3 2 1 0 498 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 499 // 0 0 1 0 1 0 1 1 (0x2B) 500 // 501 __ andl(Address(rsp, 0), 0xffffff2b); 502 __ popf(); 503 __ bind(exit); 504 } 505 506 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 507 Label done; 508 __ movl(dst, -1); 509 __ jcc(Assembler::parity, done); 510 __ jcc(Assembler::below, done); 511 __ setb(Assembler::notEqual, dst); 512 __ movzbl(dst, dst); 513 __ bind(done); 514 } 515 516 517 //============================================================================= 518 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 519 520 int ConstantTable::calculate_table_base_offset() const { 521 return 0; // absolute addressing, no offset 522 } 523 524 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 525 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 526 ShouldNotReachHere(); 527 } 528 529 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 530 // Empty encoding 531 } 532 533 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 534 return 0; 535 } 536 537 #ifndef PRODUCT 538 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 539 st->print("# MachConstantBaseNode (empty encoding)"); 540 } 541 #endif 542 543 544 //============================================================================= 545 #ifndef PRODUCT 546 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 Compile* C = ra_->C; 548 549 int framesize = C->output()->frame_size_in_bytes(); 550 int bangsize = C->output()->bang_size_in_bytes(); 551 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 552 // Remove wordSize for return addr which is already pushed. 553 framesize -= wordSize; 554 555 if (C->output()->need_stack_bang(bangsize)) { 556 framesize -= wordSize; 557 st->print("# stack bang (%d bytes)", bangsize); 558 st->print("\n\t"); 559 st->print("PUSH EBP\t# Save EBP"); 560 if (PreserveFramePointer) { 561 st->print("\n\t"); 562 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 563 } 564 if (framesize) { 565 st->print("\n\t"); 566 st->print("SUB ESP, #%d\t# Create frame",framesize); 567 } 568 } else { 569 st->print("SUB ESP, #%d\t# Create frame",framesize); 570 st->print("\n\t"); 571 framesize -= wordSize; 572 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 573 if (PreserveFramePointer) { 574 st->print("\n\t"); 575 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 576 if (framesize > 0) { 577 st->print("\n\t"); 578 st->print("ADD EBP, #%d", framesize); 579 } 580 } 581 } 582 583 if (VerifyStackAtCalls) { 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 587 } 588 589 if( C->in_24_bit_fp_mode() ) { 590 st->print("\n\t"); 591 st->print("FLDCW \t# load 24 bit fpu control word"); 592 } 593 if (UseSSE >= 2 && VerifyFPU) { 594 st->print("\n\t"); 595 st->print("# verify FPU stack (must be clean on entry)"); 596 } 597 598 #ifdef ASSERT 599 if (VerifyStackAtCalls) { 600 st->print("\n\t"); 601 st->print("# stack alignment check"); 602 } 603 #endif 604 st->cr(); 605 } 606 #endif 607 608 609 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 610 Compile* C = ra_->C; 611 MacroAssembler _masm(&cbuf); 612 613 int framesize = C->output()->frame_size_in_bytes(); 614 int bangsize = C->output()->bang_size_in_bytes(); 615 616 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 617 618 C->output()->set_frame_complete(cbuf.insts_size()); 619 620 if (C->has_mach_constant_base_node()) { 621 // NOTE: We set the table base offset here because users might be 622 // emitted before MachConstantBaseNode. 623 ConstantTable& constant_table = C->output()->constant_table(); 624 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 625 } 626 } 627 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 629 return MachNode::size(ra_); // too many variables; just compute it the hard way 630 } 631 632 int MachPrologNode::reloc() const { 633 return 0; // a large enough number 634 } 635 636 //============================================================================= 637 #ifndef PRODUCT 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 639 Compile *C = ra_->C; 640 int framesize = C->output()->frame_size_in_bytes(); 641 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 642 // Remove two words for return addr and rbp, 643 framesize -= 2*wordSize; 644 645 if (C->max_vector_size() > 16) { 646 st->print("VZEROUPPER"); 647 st->cr(); st->print("\t"); 648 } 649 if (C->in_24_bit_fp_mode()) { 650 st->print("FLDCW standard control word"); 651 st->cr(); st->print("\t"); 652 } 653 if (framesize) { 654 st->print("ADD ESP,%d\t# Destroy frame",framesize); 655 st->cr(); st->print("\t"); 656 } 657 st->print_cr("POPL EBP"); st->print("\t"); 658 if (do_polling() && C->is_method_compilation()) { 659 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 660 st->cr(); st->print("\t"); 661 } 662 } 663 #endif 664 665 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 666 Compile *C = ra_->C; 667 MacroAssembler _masm(&cbuf); 668 669 if (C->max_vector_size() > 16) { 670 // Clear upper bits of YMM registers when current compiled code uses 671 // wide vectors to avoid AVX <-> SSE transition penalty during call. 672 _masm.vzeroupper(); 673 } 674 // If method set FPU control word, restore to standard control word 675 if (C->in_24_bit_fp_mode()) { 676 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 677 } 678 679 int framesize = C->output()->frame_size_in_bytes(); 680 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 681 // Remove two words for return addr and rbp, 682 framesize -= 2*wordSize; 683 684 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 685 686 if (framesize >= 128) { 687 emit_opcode(cbuf, 0x81); // add SP, #framesize 688 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 689 emit_d32(cbuf, framesize); 690 } else if (framesize) { 691 emit_opcode(cbuf, 0x83); // add SP, #framesize 692 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 693 emit_d8(cbuf, framesize); 694 } 695 696 emit_opcode(cbuf, 0x58 | EBP_enc); 697 698 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 699 __ reserved_stack_check(); 700 } 701 702 if (do_polling() && C->is_method_compilation()) { 703 Register pollReg = as_Register(EBX_enc); 704 MacroAssembler masm(&cbuf); 705 masm.get_thread(pollReg); 706 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 707 masm.relocate(relocInfo::poll_return_type); 708 masm.testl(rax, Address(pollReg, 0)); 709 } 710 } 711 712 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 713 return MachNode::size(ra_); // too many variables; just compute it 714 // the hard way 715 } 716 717 int MachEpilogNode::reloc() const { 718 return 0; // a large enough number 719 } 720 721 const Pipeline * MachEpilogNode::pipeline() const { 722 return MachNode::pipeline_class(); 723 } 724 725 //============================================================================= 726 727 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 728 static enum RC rc_class( OptoReg::Name reg ) { 729 730 if( !OptoReg::is_valid(reg) ) return rc_bad; 731 if (OptoReg::is_stack(reg)) return rc_stack; 732 733 VMReg r = OptoReg::as_VMReg(reg); 734 if (r->is_Register()) return rc_int; 735 if (r->is_FloatRegister()) { 736 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 737 return rc_float; 738 } 739 assert(r->is_XMMRegister(), "must be"); 740 return rc_xmm; 741 } 742 743 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 744 int opcode, const char *op_str, int size, outputStream* st ) { 745 if( cbuf ) { 746 emit_opcode (*cbuf, opcode ); 747 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 748 #ifndef PRODUCT 749 } else if( !do_size ) { 750 if( size != 0 ) st->print("\n\t"); 751 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 752 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 753 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 754 } else { // FLD, FST, PUSH, POP 755 st->print("%s [ESP + #%d]",op_str,offset); 756 } 757 #endif 758 } 759 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 760 return size+3+offset_size; 761 } 762 763 // Helper for XMM registers. Extra opcode bits, limited syntax. 764 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 765 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 766 int in_size_in_bits = Assembler::EVEX_32bit; 767 int evex_encoding = 0; 768 if (reg_lo+1 == reg_hi) { 769 in_size_in_bits = Assembler::EVEX_64bit; 770 evex_encoding = Assembler::VEX_W; 771 } 772 if (cbuf) { 773 MacroAssembler _masm(cbuf); 774 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 775 // it maps more cases to single byte displacement 776 _masm.set_managed(); 777 if (reg_lo+1 == reg_hi) { // double move? 778 if (is_load) { 779 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 780 } else { 781 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 782 } 783 } else { 784 if (is_load) { 785 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 786 } else { 787 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 788 } 789 } 790 #ifndef PRODUCT 791 } else if (!do_size) { 792 if (size != 0) st->print("\n\t"); 793 if (reg_lo+1 == reg_hi) { // double move? 794 if (is_load) st->print("%s %s,[ESP + #%d]", 795 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 796 Matcher::regName[reg_lo], offset); 797 else st->print("MOVSD [ESP + #%d],%s", 798 offset, Matcher::regName[reg_lo]); 799 } else { 800 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 801 Matcher::regName[reg_lo], offset); 802 else st->print("MOVSS [ESP + #%d],%s", 803 offset, Matcher::regName[reg_lo]); 804 } 805 #endif 806 } 807 bool is_single_byte = false; 808 if ((UseAVX > 2) && (offset != 0)) { 809 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 810 } 811 int offset_size = 0; 812 if (UseAVX > 2 ) { 813 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 814 } else { 815 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 816 } 817 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 818 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 819 return size+5+offset_size; 820 } 821 822 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 824 int src_hi, int dst_hi, int size, outputStream* st ) { 825 if (cbuf) { 826 MacroAssembler _masm(cbuf); 827 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 828 _masm.set_managed(); 829 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 830 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 831 as_XMMRegister(Matcher::_regEncode[src_lo])); 832 } else { 833 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 834 as_XMMRegister(Matcher::_regEncode[src_lo])); 835 } 836 #ifndef PRODUCT 837 } else if (!do_size) { 838 if (size != 0) st->print("\n\t"); 839 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 840 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 841 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 842 } else { 843 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 844 } 845 } else { 846 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 847 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 848 } else { 849 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 850 } 851 } 852 #endif 853 } 854 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 855 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 856 int sz = (UseAVX > 2) ? 6 : 4; 857 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 858 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 859 return size + sz; 860 } 861 862 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 863 int src_hi, int dst_hi, int size, outputStream* st ) { 864 // 32-bit 865 if (cbuf) { 866 MacroAssembler _masm(cbuf); 867 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 868 _masm.set_managed(); 869 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 870 as_Register(Matcher::_regEncode[src_lo])); 871 #ifndef PRODUCT 872 } else if (!do_size) { 873 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 874 #endif 875 } 876 return (UseAVX> 2) ? 6 : 4; 877 } 878 879 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 881 int src_hi, int dst_hi, int size, outputStream* st ) { 882 // 32-bit 883 if (cbuf) { 884 MacroAssembler _masm(cbuf); 885 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 886 _masm.set_managed(); 887 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 888 as_XMMRegister(Matcher::_regEncode[src_lo])); 889 #ifndef PRODUCT 890 } else if (!do_size) { 891 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 892 #endif 893 } 894 return (UseAVX> 2) ? 6 : 4; 895 } 896 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 898 if( cbuf ) { 899 emit_opcode(*cbuf, 0x8B ); 900 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 901 #ifndef PRODUCT 902 } else if( !do_size ) { 903 if( size != 0 ) st->print("\n\t"); 904 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 905 #endif 906 } 907 return size+2; 908 } 909 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 911 int offset, int size, outputStream* st ) { 912 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 913 if( cbuf ) { 914 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 915 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("FLD %s",Matcher::regName[src_lo]); 920 #endif 921 } 922 size += 2; 923 } 924 925 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 926 const char *op_str; 927 int op; 928 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 929 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 930 op = 0xDD; 931 } else { // 32-bit store 932 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 933 op = 0xD9; 934 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 935 } 936 937 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 938 } 939 940 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 941 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 942 int src_hi, int dst_hi, uint ireg, outputStream* st); 943 944 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 945 int stack_offset, int reg, uint ireg, outputStream* st); 946 947 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 948 int dst_offset, uint ireg, outputStream* st) { 949 int calc_size = 0; 950 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 951 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 952 switch (ireg) { 953 case Op_VecS: 954 calc_size = 3+src_offset_size + 3+dst_offset_size; 955 break; 956 case Op_VecD: { 957 calc_size = 3+src_offset_size + 3+dst_offset_size; 958 int tmp_src_offset = src_offset + 4; 959 int tmp_dst_offset = dst_offset + 4; 960 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 961 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 962 calc_size += 3+src_offset_size + 3+dst_offset_size; 963 break; 964 } 965 case Op_VecX: 966 case Op_VecY: 967 case Op_VecZ: 968 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 969 break; 970 default: 971 ShouldNotReachHere(); 972 } 973 if (cbuf) { 974 MacroAssembler _masm(cbuf); 975 int offset = __ offset(); 976 switch (ireg) { 977 case Op_VecS: 978 __ pushl(Address(rsp, src_offset)); 979 __ popl (Address(rsp, dst_offset)); 980 break; 981 case Op_VecD: 982 __ pushl(Address(rsp, src_offset)); 983 __ popl (Address(rsp, dst_offset)); 984 __ pushl(Address(rsp, src_offset+4)); 985 __ popl (Address(rsp, dst_offset+4)); 986 break; 987 case Op_VecX: 988 __ movdqu(Address(rsp, -16), xmm0); 989 __ movdqu(xmm0, Address(rsp, src_offset)); 990 __ movdqu(Address(rsp, dst_offset), xmm0); 991 __ movdqu(xmm0, Address(rsp, -16)); 992 break; 993 case Op_VecY: 994 __ vmovdqu(Address(rsp, -32), xmm0); 995 __ vmovdqu(xmm0, Address(rsp, src_offset)); 996 __ vmovdqu(Address(rsp, dst_offset), xmm0); 997 __ vmovdqu(xmm0, Address(rsp, -32)); 998 break; 999 case Op_VecZ: 1000 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1001 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1002 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1003 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1004 break; 1005 default: 1006 ShouldNotReachHere(); 1007 } 1008 int size = __ offset() - offset; 1009 assert(size == calc_size, "incorrect size calculation"); 1010 return size; 1011 #ifndef PRODUCT 1012 } else if (!do_size) { 1013 switch (ireg) { 1014 case Op_VecS: 1015 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1016 "popl [rsp + #%d]", 1017 src_offset, dst_offset); 1018 break; 1019 case Op_VecD: 1020 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1021 "popq [rsp + #%d]\n\t" 1022 "pushl [rsp + #%d]\n\t" 1023 "popq [rsp + #%d]", 1024 src_offset, dst_offset, src_offset+4, dst_offset+4); 1025 break; 1026 case Op_VecX: 1027 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1028 "movdqu xmm0, [rsp + #%d]\n\t" 1029 "movdqu [rsp + #%d], xmm0\n\t" 1030 "movdqu xmm0, [rsp - #16]", 1031 src_offset, dst_offset); 1032 break; 1033 case Op_VecY: 1034 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1035 "vmovdqu xmm0, [rsp + #%d]\n\t" 1036 "vmovdqu [rsp + #%d], xmm0\n\t" 1037 "vmovdqu xmm0, [rsp - #32]", 1038 src_offset, dst_offset); 1039 break; 1040 case Op_VecZ: 1041 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1042 "vmovdqu xmm0, [rsp + #%d]\n\t" 1043 "vmovdqu [rsp + #%d], xmm0\n\t" 1044 "vmovdqu xmm0, [rsp - #64]", 1045 src_offset, dst_offset); 1046 break; 1047 default: 1048 ShouldNotReachHere(); 1049 } 1050 #endif 1051 } 1052 return calc_size; 1053 } 1054 1055 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1056 // Get registers to move 1057 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1058 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1059 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1060 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1061 1062 enum RC src_second_rc = rc_class(src_second); 1063 enum RC src_first_rc = rc_class(src_first); 1064 enum RC dst_second_rc = rc_class(dst_second); 1065 enum RC dst_first_rc = rc_class(dst_first); 1066 1067 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1068 1069 // Generate spill code! 1070 int size = 0; 1071 1072 if( src_first == dst_first && src_second == dst_second ) 1073 return size; // Self copy, no move 1074 1075 if (bottom_type()->isa_vect() != NULL) { 1076 uint ireg = ideal_reg(); 1077 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1078 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1079 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1080 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1081 // mem -> mem 1082 int src_offset = ra_->reg2offset(src_first); 1083 int dst_offset = ra_->reg2offset(dst_first); 1084 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1085 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1086 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1087 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1088 int stack_offset = ra_->reg2offset(dst_first); 1089 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1090 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1091 int stack_offset = ra_->reg2offset(src_first); 1092 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1093 } else { 1094 ShouldNotReachHere(); 1095 } 1096 } 1097 1098 // -------------------------------------- 1099 // Check for mem-mem move. push/pop to move. 1100 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1101 if( src_second == dst_first ) { // overlapping stack copy ranges 1102 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1103 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1104 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1105 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1106 } 1107 // move low bits 1108 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1109 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1110 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1111 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1112 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1113 } 1114 return size; 1115 } 1116 1117 // -------------------------------------- 1118 // Check for integer reg-reg copy 1119 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1120 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1121 1122 // Check for integer store 1123 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1125 1126 // Check for integer load 1127 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1128 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1129 1130 // Check for integer reg-xmm reg copy 1131 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1132 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1133 "no 64 bit integer-float reg moves" ); 1134 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1135 } 1136 // -------------------------------------- 1137 // Check for float reg-reg copy 1138 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1139 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1140 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1141 if( cbuf ) { 1142 1143 // Note the mucking with the register encode to compensate for the 0/1 1144 // indexing issue mentioned in a comment in the reg_def sections 1145 // for FPR registers many lines above here. 1146 1147 if( src_first != FPR1L_num ) { 1148 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1149 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1150 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1151 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1152 } else { 1153 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1154 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1155 } 1156 #ifndef PRODUCT 1157 } else if( !do_size ) { 1158 if( size != 0 ) st->print("\n\t"); 1159 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1160 else st->print( "FST %s", Matcher::regName[dst_first]); 1161 #endif 1162 } 1163 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1164 } 1165 1166 // Check for float store 1167 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1168 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1169 } 1170 1171 // Check for float load 1172 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1173 int offset = ra_->reg2offset(src_first); 1174 const char *op_str; 1175 int op; 1176 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1177 op_str = "FLD_D"; 1178 op = 0xDD; 1179 } else { // 32-bit load 1180 op_str = "FLD_S"; 1181 op = 0xD9; 1182 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1183 } 1184 if( cbuf ) { 1185 emit_opcode (*cbuf, op ); 1186 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1187 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1188 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1189 #ifndef PRODUCT 1190 } else if( !do_size ) { 1191 if( size != 0 ) st->print("\n\t"); 1192 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1193 #endif 1194 } 1195 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1196 return size + 3+offset_size+2; 1197 } 1198 1199 // Check for xmm reg-reg copy 1200 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1201 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1202 (src_first+1 == src_second && dst_first+1 == dst_second), 1203 "no non-adjacent float-moves" ); 1204 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1205 } 1206 1207 // Check for xmm reg-integer reg copy 1208 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1209 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1210 "no 64 bit float-integer reg moves" ); 1211 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1212 } 1213 1214 // Check for xmm store 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1216 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1217 } 1218 1219 // Check for float xmm load 1220 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1221 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1222 } 1223 1224 // Copy from float reg to xmm reg 1225 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1226 // copy to the top of stack from floating point reg 1227 // and use LEA to preserve flags 1228 if( cbuf ) { 1229 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1230 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1231 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1232 emit_d8(*cbuf,0xF8); 1233 #ifndef PRODUCT 1234 } else if( !do_size ) { 1235 if( size != 0 ) st->print("\n\t"); 1236 st->print("LEA ESP,[ESP-8]"); 1237 #endif 1238 } 1239 size += 4; 1240 1241 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1242 1243 // Copy from the temp memory to the xmm reg. 1244 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1245 1246 if( cbuf ) { 1247 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1248 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1249 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1250 emit_d8(*cbuf,0x08); 1251 #ifndef PRODUCT 1252 } else if( !do_size ) { 1253 if( size != 0 ) st->print("\n\t"); 1254 st->print("LEA ESP,[ESP+8]"); 1255 #endif 1256 } 1257 size += 4; 1258 return size; 1259 } 1260 1261 assert( size > 0, "missed a case" ); 1262 1263 // -------------------------------------------------------------------- 1264 // Check for second bits still needing moving. 1265 if( src_second == dst_second ) 1266 return size; // Self copy; no move 1267 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1268 1269 // Check for second word int-int move 1270 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1271 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1272 1273 // Check for second word integer store 1274 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1275 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1276 1277 // Check for second word integer load 1278 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1279 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1280 1281 1282 Unimplemented(); 1283 return 0; // Mute compiler 1284 } 1285 1286 #ifndef PRODUCT 1287 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1288 implementation( NULL, ra_, false, st ); 1289 } 1290 #endif 1291 1292 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1293 implementation( &cbuf, ra_, false, NULL ); 1294 } 1295 1296 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1297 return MachNode::size(ra_); 1298 } 1299 1300 1301 //============================================================================= 1302 #ifndef PRODUCT 1303 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1304 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1305 int reg = ra_->get_reg_first(this); 1306 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1307 } 1308 #endif 1309 1310 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1311 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1312 int reg = ra_->get_encode(this); 1313 if( offset >= 128 ) { 1314 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1315 emit_rm(cbuf, 0x2, reg, 0x04); 1316 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1317 emit_d32(cbuf, offset); 1318 } 1319 else { 1320 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1321 emit_rm(cbuf, 0x1, reg, 0x04); 1322 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1323 emit_d8(cbuf, offset); 1324 } 1325 } 1326 1327 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 if( offset >= 128 ) { 1330 return 7; 1331 } 1332 else { 1333 return 4; 1334 } 1335 } 1336 1337 //============================================================================= 1338 #ifndef PRODUCT 1339 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1340 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1341 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1342 st->print_cr("\tNOP"); 1343 st->print_cr("\tNOP"); 1344 if( !OptoBreakpoint ) 1345 st->print_cr("\tNOP"); 1346 } 1347 #endif 1348 1349 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1350 MacroAssembler masm(&cbuf); 1351 #ifdef ASSERT 1352 uint insts_size = cbuf.insts_size(); 1353 #endif 1354 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1355 masm.jump_cc(Assembler::notEqual, 1356 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1357 /* WARNING these NOPs are critical so that verified entry point is properly 1358 aligned for patching by NativeJump::patch_verified_entry() */ 1359 int nops_cnt = 2; 1360 if( !OptoBreakpoint ) // Leave space for int3 1361 nops_cnt += 1; 1362 masm.nop(nops_cnt); 1363 1364 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1365 } 1366 1367 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1368 return OptoBreakpoint ? 11 : 12; 1369 } 1370 1371 1372 //============================================================================= 1373 1374 int Matcher::regnum_to_fpu_offset(int regnum) { 1375 return regnum - 32; // The FP registers are in the second chunk 1376 } 1377 1378 // This is UltraSparc specific, true just means we have fast l2f conversion 1379 const bool Matcher::convL2FSupported(void) { 1380 return true; 1381 } 1382 1383 // Is this branch offset short enough that a short branch can be used? 1384 // 1385 // NOTE: If the platform does not provide any short branch variants, then 1386 // this method should return false for offset 0. 1387 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1388 // The passed offset is relative to address of the branch. 1389 // On 86 a branch displacement is calculated relative to address 1390 // of a next instruction. 1391 offset -= br_size; 1392 1393 // the short version of jmpConUCF2 contains multiple branches, 1394 // making the reach slightly less 1395 if (rule == jmpConUCF2_rule) 1396 return (-126 <= offset && offset <= 125); 1397 return (-128 <= offset && offset <= 127); 1398 } 1399 1400 const bool Matcher::isSimpleConstant64(jlong value) { 1401 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1402 return false; 1403 } 1404 1405 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1406 const bool Matcher::init_array_count_is_in_bytes = false; 1407 1408 // Needs 2 CMOV's for longs. 1409 const int Matcher::long_cmove_cost() { return 1; } 1410 1411 // No CMOVF/CMOVD with SSE/SSE2 1412 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1413 1414 // Does the CPU require late expand (see block.cpp for description of late expand)? 1415 const bool Matcher::require_postalloc_expand = false; 1416 1417 // Do we need to mask the count passed to shift instructions or does 1418 // the cpu only look at the lower 5/6 bits anyway? 1419 const bool Matcher::need_masked_shift_count = false; 1420 1421 bool Matcher::narrow_oop_use_complex_address() { 1422 ShouldNotCallThis(); 1423 return true; 1424 } 1425 1426 bool Matcher::narrow_klass_use_complex_address() { 1427 ShouldNotCallThis(); 1428 return true; 1429 } 1430 1431 bool Matcher::const_oop_prefer_decode() { 1432 ShouldNotCallThis(); 1433 return true; 1434 } 1435 1436 bool Matcher::const_klass_prefer_decode() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 // Is it better to copy float constants, or load them directly from memory? 1442 // Intel can load a float constant from a direct address, requiring no 1443 // extra registers. Most RISCs will have to materialize an address into a 1444 // register first, so they would do better to copy the constant from stack. 1445 const bool Matcher::rematerialize_float_constants = true; 1446 1447 // If CPU can load and store mis-aligned doubles directly then no fixup is 1448 // needed. Else we split the double into 2 integer pieces and move it 1449 // piece-by-piece. Only happens when passing doubles into C code as the 1450 // Java calling convention forces doubles to be aligned. 1451 const bool Matcher::misaligned_doubles_ok = true; 1452 1453 1454 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1455 // Get the memory operand from the node 1456 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1457 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1458 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1459 uint opcnt = 1; // First operand 1460 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1461 while( idx >= skipped+num_edges ) { 1462 skipped += num_edges; 1463 opcnt++; // Bump operand count 1464 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1465 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1466 } 1467 1468 MachOper *memory = node->_opnds[opcnt]; 1469 MachOper *new_memory = NULL; 1470 switch (memory->opcode()) { 1471 case DIRECT: 1472 case INDOFFSET32X: 1473 // No transformation necessary. 1474 return; 1475 case INDIRECT: 1476 new_memory = new indirect_win95_safeOper( ); 1477 break; 1478 case INDOFFSET8: 1479 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1480 break; 1481 case INDOFFSET32: 1482 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1483 break; 1484 case INDINDEXOFFSET: 1485 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1486 break; 1487 case INDINDEXSCALE: 1488 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1489 break; 1490 case INDINDEXSCALEOFFSET: 1491 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1492 break; 1493 case LOAD_LONG_INDIRECT: 1494 case LOAD_LONG_INDOFFSET32: 1495 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1496 return; 1497 default: 1498 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1499 return; 1500 } 1501 node->_opnds[opcnt] = new_memory; 1502 } 1503 1504 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. 1505 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1506 1507 // Are floats conerted to double when stored to stack during deoptimization? 1508 // On x32 it is stored with convertion only when FPU is used for floats. 1509 bool Matcher::float_in_double() { return (UseSSE == 0); } 1510 1511 // Do ints take an entire long register or just half? 1512 const bool Matcher::int_in_long = false; 1513 1514 // Return whether or not this register is ever used as an argument. This 1515 // function is used on startup to build the trampoline stubs in generateOptoStub. 1516 // Registers not mentioned will be killed by the VM call in the trampoline, and 1517 // arguments in those registers not be available to the callee. 1518 bool Matcher::can_be_java_arg( int reg ) { 1519 if( reg == ECX_num || reg == EDX_num ) return true; 1520 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1521 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1522 return false; 1523 } 1524 1525 bool Matcher::is_spillable_arg( int reg ) { 1526 return can_be_java_arg(reg); 1527 } 1528 1529 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1530 // Use hardware integer DIV instruction when 1531 // it is faster than a code which use multiply. 1532 // Only when constant divisor fits into 32 bit 1533 // (min_jint is excluded to get only correct 1534 // positive 32 bit values from negative). 1535 return VM_Version::has_fast_idiv() && 1536 (divisor == (int)divisor && divisor != min_jint); 1537 } 1538 1539 // Register for DIVI projection of divmodI 1540 RegMask Matcher::divI_proj_mask() { 1541 return EAX_REG_mask(); 1542 } 1543 1544 // Register for MODI projection of divmodI 1545 RegMask Matcher::modI_proj_mask() { 1546 return EDX_REG_mask(); 1547 } 1548 1549 // Register for DIVL projection of divmodL 1550 RegMask Matcher::divL_proj_mask() { 1551 ShouldNotReachHere(); 1552 return RegMask(); 1553 } 1554 1555 // Register for MODL projection of divmodL 1556 RegMask Matcher::modL_proj_mask() { 1557 ShouldNotReachHere(); 1558 return RegMask(); 1559 } 1560 1561 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1562 return NO_REG_mask(); 1563 } 1564 1565 // Returns true if the high 32 bits of the value is known to be zero. 1566 bool is_operand_hi32_zero(Node* n) { 1567 int opc = n->Opcode(); 1568 if (opc == Op_AndL) { 1569 Node* o2 = n->in(2); 1570 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1571 return true; 1572 } 1573 } 1574 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1575 return true; 1576 } 1577 return false; 1578 } 1579 1580 %} 1581 1582 //----------ENCODING BLOCK----------------------------------------------------- 1583 // This block specifies the encoding classes used by the compiler to output 1584 // byte streams. Encoding classes generate functions which are called by 1585 // Machine Instruction Nodes in order to generate the bit encoding of the 1586 // instruction. Operands specify their base encoding interface with the 1587 // interface keyword. There are currently supported four interfaces, 1588 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1589 // operand to generate a function which returns its register number when 1590 // queried. CONST_INTER causes an operand to generate a function which 1591 // returns the value of the constant when queried. MEMORY_INTER causes an 1592 // operand to generate four functions which return the Base Register, the 1593 // Index Register, the Scale Value, and the Offset Value of the operand when 1594 // queried. COND_INTER causes an operand to generate six functions which 1595 // return the encoding code (ie - encoding bits for the instruction) 1596 // associated with each basic boolean condition for a conditional instruction. 1597 // Instructions specify two basic values for encoding. They use the 1598 // ins_encode keyword to specify their encoding class (which must be one of 1599 // the class names specified in the encoding block), and they use the 1600 // opcode keyword to specify, in order, their primary, secondary, and 1601 // tertiary opcode. Only the opcode sections which a particular instruction 1602 // needs for encoding need to be specified. 1603 encode %{ 1604 // Build emit functions for each basic byte or larger field in the intel 1605 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1606 // code in the enc_class source block. Emit functions will live in the 1607 // main source block for now. In future, we can generalize this by 1608 // adding a syntax that specifies the sizes of fields in an order, 1609 // so that the adlc can build the emit functions automagically 1610 1611 // Emit primary opcode 1612 enc_class OpcP %{ 1613 emit_opcode(cbuf, $primary); 1614 %} 1615 1616 // Emit secondary opcode 1617 enc_class OpcS %{ 1618 emit_opcode(cbuf, $secondary); 1619 %} 1620 1621 // Emit opcode directly 1622 enc_class Opcode(immI d8) %{ 1623 emit_opcode(cbuf, $d8$$constant); 1624 %} 1625 1626 enc_class SizePrefix %{ 1627 emit_opcode(cbuf,0x66); 1628 %} 1629 1630 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1631 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1632 %} 1633 1634 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1635 emit_opcode(cbuf,$opcode$$constant); 1636 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1637 %} 1638 1639 enc_class mov_r32_imm0( rRegI dst ) %{ 1640 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1641 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1642 %} 1643 1644 enc_class cdq_enc %{ 1645 // Full implementation of Java idiv and irem; checks for 1646 // special case as described in JVM spec., p.243 & p.271. 1647 // 1648 // normal case special case 1649 // 1650 // input : rax,: dividend min_int 1651 // reg: divisor -1 1652 // 1653 // output: rax,: quotient (= rax, idiv reg) min_int 1654 // rdx: remainder (= rax, irem reg) 0 1655 // 1656 // Code sequnce: 1657 // 1658 // 81 F8 00 00 00 80 cmp rax,80000000h 1659 // 0F 85 0B 00 00 00 jne normal_case 1660 // 33 D2 xor rdx,edx 1661 // 83 F9 FF cmp rcx,0FFh 1662 // 0F 84 03 00 00 00 je done 1663 // normal_case: 1664 // 99 cdq 1665 // F7 F9 idiv rax,ecx 1666 // done: 1667 // 1668 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1669 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1670 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1671 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1672 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1673 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1674 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1675 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1676 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1677 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1678 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1679 // normal_case: 1680 emit_opcode(cbuf,0x99); // cdq 1681 // idiv (note: must be emitted by the user of this rule) 1682 // normal: 1683 %} 1684 1685 // Dense encoding for older common ops 1686 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1687 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1688 %} 1689 1690 1691 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1692 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1693 // Check for 8-bit immediate, and set sign extend bit in opcode 1694 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1695 emit_opcode(cbuf, $primary | 0x02); 1696 } 1697 else { // If 32-bit immediate 1698 emit_opcode(cbuf, $primary); 1699 } 1700 %} 1701 1702 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1703 // Emit primary opcode and set sign-extend bit 1704 // Check for 8-bit immediate, and set sign extend bit in opcode 1705 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1706 emit_opcode(cbuf, $primary | 0x02); } 1707 else { // If 32-bit immediate 1708 emit_opcode(cbuf, $primary); 1709 } 1710 // Emit r/m byte with secondary opcode, after primary opcode. 1711 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1712 %} 1713 1714 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1715 // Check for 8-bit immediate, and set sign extend bit in opcode 1716 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1717 $$$emit8$imm$$constant; 1718 } 1719 else { // If 32-bit immediate 1720 // Output immediate 1721 $$$emit32$imm$$constant; 1722 } 1723 %} 1724 1725 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1726 // Emit primary opcode and set sign-extend bit 1727 // Check for 8-bit immediate, and set sign extend bit in opcode 1728 int con = (int)$imm$$constant; // Throw away top bits 1729 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1730 // Emit r/m byte with secondary opcode, after primary opcode. 1731 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1732 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1733 else emit_d32(cbuf,con); 1734 %} 1735 1736 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1737 // Emit primary opcode and set sign-extend bit 1738 // Check for 8-bit immediate, and set sign extend bit in opcode 1739 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1740 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1741 // Emit r/m byte with tertiary opcode, after primary opcode. 1742 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1743 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1744 else emit_d32(cbuf,con); 1745 %} 1746 1747 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1748 emit_cc(cbuf, $secondary, $dst$$reg ); 1749 %} 1750 1751 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1752 int destlo = $dst$$reg; 1753 int desthi = HIGH_FROM_LOW(destlo); 1754 // bswap lo 1755 emit_opcode(cbuf, 0x0F); 1756 emit_cc(cbuf, 0xC8, destlo); 1757 // bswap hi 1758 emit_opcode(cbuf, 0x0F); 1759 emit_cc(cbuf, 0xC8, desthi); 1760 // xchg lo and hi 1761 emit_opcode(cbuf, 0x87); 1762 emit_rm(cbuf, 0x3, destlo, desthi); 1763 %} 1764 1765 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1766 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1767 %} 1768 1769 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1770 $$$emit8$primary; 1771 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1772 %} 1773 1774 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1775 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1776 emit_d8(cbuf, op >> 8 ); 1777 emit_d8(cbuf, op & 255); 1778 %} 1779 1780 // emulate a CMOV with a conditional branch around a MOV 1781 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1782 // Invert sense of branch from sense of CMOV 1783 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1784 emit_d8( cbuf, $brOffs$$constant ); 1785 %} 1786 1787 enc_class enc_PartialSubtypeCheck( ) %{ 1788 Register Redi = as_Register(EDI_enc); // result register 1789 Register Reax = as_Register(EAX_enc); // super class 1790 Register Recx = as_Register(ECX_enc); // killed 1791 Register Resi = as_Register(ESI_enc); // sub class 1792 Label miss; 1793 1794 MacroAssembler _masm(&cbuf); 1795 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1796 NULL, &miss, 1797 /*set_cond_codes:*/ true); 1798 if ($primary) { 1799 __ xorptr(Redi, Redi); 1800 } 1801 __ bind(miss); 1802 %} 1803 1804 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1805 MacroAssembler masm(&cbuf); 1806 int start = masm.offset(); 1807 if (UseSSE >= 2) { 1808 if (VerifyFPU) { 1809 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1810 } 1811 } else { 1812 // External c_calling_convention expects the FPU stack to be 'clean'. 1813 // Compiled code leaves it dirty. Do cleanup now. 1814 masm.empty_FPU_stack(); 1815 } 1816 if (sizeof_FFree_Float_Stack_All == -1) { 1817 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1818 } else { 1819 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1820 } 1821 %} 1822 1823 enc_class Verify_FPU_For_Leaf %{ 1824 if( VerifyFPU ) { 1825 MacroAssembler masm(&cbuf); 1826 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1827 } 1828 %} 1829 1830 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1831 // This is the instruction starting address for relocation info. 1832 cbuf.set_insts_mark(); 1833 $$$emit8$primary; 1834 // CALL directly to the runtime 1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1836 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1837 1838 if (UseSSE >= 2) { 1839 MacroAssembler _masm(&cbuf); 1840 BasicType rt = tf()->return_type(); 1841 1842 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1843 // A C runtime call where the return value is unused. In SSE2+ 1844 // mode the result needs to be removed from the FPU stack. It's 1845 // likely that this function call could be removed by the 1846 // optimizer if the C function is a pure function. 1847 __ ffree(0); 1848 } else if (rt == T_FLOAT) { 1849 __ lea(rsp, Address(rsp, -4)); 1850 __ fstp_s(Address(rsp, 0)); 1851 __ movflt(xmm0, Address(rsp, 0)); 1852 __ lea(rsp, Address(rsp, 4)); 1853 } else if (rt == T_DOUBLE) { 1854 __ lea(rsp, Address(rsp, -8)); 1855 __ fstp_d(Address(rsp, 0)); 1856 __ movdbl(xmm0, Address(rsp, 0)); 1857 __ lea(rsp, Address(rsp, 8)); 1858 } 1859 } 1860 %} 1861 1862 enc_class pre_call_resets %{ 1863 // If method sets FPU control word restore it here 1864 debug_only(int off0 = cbuf.insts_size()); 1865 if (ra_->C->in_24_bit_fp_mode()) { 1866 MacroAssembler _masm(&cbuf); 1867 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1868 } 1869 // Clear upper bits of YMM registers when current compiled code uses 1870 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1871 MacroAssembler _masm(&cbuf); 1872 __ vzeroupper(); 1873 debug_only(int off1 = cbuf.insts_size()); 1874 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1875 %} 1876 1877 enc_class post_call_FPU %{ 1878 // If method sets FPU control word do it here also 1879 if (Compile::current()->in_24_bit_fp_mode()) { 1880 MacroAssembler masm(&cbuf); 1881 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1882 } 1883 %} 1884 1885 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1886 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1887 // who we intended to call. 1888 cbuf.set_insts_mark(); 1889 $$$emit8$primary; 1890 1891 if (!_method) { 1892 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1893 runtime_call_Relocation::spec(), 1894 RELOC_IMM32); 1895 } else { 1896 int method_index = resolved_method_index(cbuf); 1897 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1898 : static_call_Relocation::spec(method_index); 1899 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1900 rspec, RELOC_DISP32); 1901 // Emit stubs for static call. 1902 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1903 if (stub == NULL) { 1904 ciEnv::current()->record_failure("CodeCache is full"); 1905 return; 1906 } 1907 } 1908 %} 1909 1910 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1911 MacroAssembler _masm(&cbuf); 1912 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1913 %} 1914 1915 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1916 int disp = in_bytes(Method::from_compiled_offset()); 1917 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1918 1919 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1920 cbuf.set_insts_mark(); 1921 $$$emit8$primary; 1922 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1923 emit_d8(cbuf, disp); // Displacement 1924 1925 %} 1926 1927 // Following encoding is no longer used, but may be restored if calling 1928 // convention changes significantly. 1929 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1930 // 1931 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1932 // // int ic_reg = Matcher::inline_cache_reg(); 1933 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1934 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1935 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1936 // 1937 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1938 // // // so we load it immediately before the call 1939 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1940 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1941 // 1942 // // xor rbp,ebp 1943 // emit_opcode(cbuf, 0x33); 1944 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1945 // 1946 // // CALL to interpreter. 1947 // cbuf.set_insts_mark(); 1948 // $$$emit8$primary; 1949 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1950 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1951 // %} 1952 1953 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1954 $$$emit8$primary; 1955 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1956 $$$emit8$shift$$constant; 1957 %} 1958 1959 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1960 // Load immediate does not have a zero or sign extended version 1961 // for 8-bit immediates 1962 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1963 $$$emit32$src$$constant; 1964 %} 1965 1966 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1967 // Load immediate does not have a zero or sign extended version 1968 // for 8-bit immediates 1969 emit_opcode(cbuf, $primary + $dst$$reg); 1970 $$$emit32$src$$constant; 1971 %} 1972 1973 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1974 // Load immediate does not have a zero or sign extended version 1975 // for 8-bit immediates 1976 int dst_enc = $dst$$reg; 1977 int src_con = $src$$constant & 0x0FFFFFFFFL; 1978 if (src_con == 0) { 1979 // xor dst, dst 1980 emit_opcode(cbuf, 0x33); 1981 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1982 } else { 1983 emit_opcode(cbuf, $primary + dst_enc); 1984 emit_d32(cbuf, src_con); 1985 } 1986 %} 1987 1988 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1989 // Load immediate does not have a zero or sign extended version 1990 // for 8-bit immediates 1991 int dst_enc = $dst$$reg + 2; 1992 int src_con = ((julong)($src$$constant)) >> 32; 1993 if (src_con == 0) { 1994 // xor dst, dst 1995 emit_opcode(cbuf, 0x33); 1996 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1997 } else { 1998 emit_opcode(cbuf, $primary + dst_enc); 1999 emit_d32(cbuf, src_con); 2000 } 2001 %} 2002 2003 2004 // Encode a reg-reg copy. If it is useless, then empty encoding. 2005 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2006 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2007 %} 2008 2009 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2010 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2011 %} 2012 2013 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2014 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2015 %} 2016 2017 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2018 $$$emit8$primary; 2019 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2020 %} 2021 2022 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2023 $$$emit8$secondary; 2024 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2025 %} 2026 2027 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2028 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2029 %} 2030 2031 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2032 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2033 %} 2034 2035 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2036 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2037 %} 2038 2039 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2040 // Output immediate 2041 $$$emit32$src$$constant; 2042 %} 2043 2044 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2045 // Output Float immediate bits 2046 jfloat jf = $src$$constant; 2047 int jf_as_bits = jint_cast( jf ); 2048 emit_d32(cbuf, jf_as_bits); 2049 %} 2050 2051 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2052 // Output Float immediate bits 2053 jfloat jf = $src$$constant; 2054 int jf_as_bits = jint_cast( jf ); 2055 emit_d32(cbuf, jf_as_bits); 2056 %} 2057 2058 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2059 // Output immediate 2060 $$$emit16$src$$constant; 2061 %} 2062 2063 enc_class Con_d32(immI src) %{ 2064 emit_d32(cbuf,$src$$constant); 2065 %} 2066 2067 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2068 // Output immediate memory reference 2069 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2070 emit_d32(cbuf, 0x00); 2071 %} 2072 2073 enc_class lock_prefix( ) %{ 2074 emit_opcode(cbuf,0xF0); // [Lock] 2075 %} 2076 2077 // Cmp-xchg long value. 2078 // Note: we need to swap rbx, and rcx before and after the 2079 // cmpxchg8 instruction because the instruction uses 2080 // rcx as the high order word of the new value to store but 2081 // our register encoding uses rbx,. 2082 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2083 2084 // XCHG rbx,ecx 2085 emit_opcode(cbuf,0x87); 2086 emit_opcode(cbuf,0xD9); 2087 // [Lock] 2088 emit_opcode(cbuf,0xF0); 2089 // CMPXCHG8 [Eptr] 2090 emit_opcode(cbuf,0x0F); 2091 emit_opcode(cbuf,0xC7); 2092 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2093 // XCHG rbx,ecx 2094 emit_opcode(cbuf,0x87); 2095 emit_opcode(cbuf,0xD9); 2096 %} 2097 2098 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2099 // [Lock] 2100 emit_opcode(cbuf,0xF0); 2101 2102 // CMPXCHG [Eptr] 2103 emit_opcode(cbuf,0x0F); 2104 emit_opcode(cbuf,0xB1); 2105 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2106 %} 2107 2108 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2109 // [Lock] 2110 emit_opcode(cbuf,0xF0); 2111 2112 // CMPXCHGB [Eptr] 2113 emit_opcode(cbuf,0x0F); 2114 emit_opcode(cbuf,0xB0); 2115 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2116 %} 2117 2118 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2119 // [Lock] 2120 emit_opcode(cbuf,0xF0); 2121 2122 // 16-bit mode 2123 emit_opcode(cbuf, 0x66); 2124 2125 // CMPXCHGW [Eptr] 2126 emit_opcode(cbuf,0x0F); 2127 emit_opcode(cbuf,0xB1); 2128 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2129 %} 2130 2131 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2132 int res_encoding = $res$$reg; 2133 2134 // MOV res,0 2135 emit_opcode( cbuf, 0xB8 + res_encoding); 2136 emit_d32( cbuf, 0 ); 2137 // JNE,s fail 2138 emit_opcode(cbuf,0x75); 2139 emit_d8(cbuf, 5 ); 2140 // MOV res,1 2141 emit_opcode( cbuf, 0xB8 + res_encoding); 2142 emit_d32( cbuf, 1 ); 2143 // fail: 2144 %} 2145 2146 enc_class set_instruction_start( ) %{ 2147 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2148 %} 2149 2150 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2151 int reg_encoding = $ereg$$reg; 2152 int base = $mem$$base; 2153 int index = $mem$$index; 2154 int scale = $mem$$scale; 2155 int displace = $mem$$disp; 2156 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2157 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2158 %} 2159 2160 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2161 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2162 int base = $mem$$base; 2163 int index = $mem$$index; 2164 int scale = $mem$$scale; 2165 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2166 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2167 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2168 %} 2169 2170 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2171 int r1, r2; 2172 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2173 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2174 emit_opcode(cbuf,0x0F); 2175 emit_opcode(cbuf,$tertiary); 2176 emit_rm(cbuf, 0x3, r1, r2); 2177 emit_d8(cbuf,$cnt$$constant); 2178 emit_d8(cbuf,$primary); 2179 emit_rm(cbuf, 0x3, $secondary, r1); 2180 emit_d8(cbuf,$cnt$$constant); 2181 %} 2182 2183 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2184 emit_opcode( cbuf, 0x8B ); // Move 2185 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2186 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2187 emit_d8(cbuf,$primary); 2188 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2189 emit_d8(cbuf,$cnt$$constant-32); 2190 } 2191 emit_d8(cbuf,$primary); 2192 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2193 emit_d8(cbuf,31); 2194 %} 2195 2196 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2197 int r1, r2; 2198 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2199 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2200 2201 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2202 emit_rm(cbuf, 0x3, r1, r2); 2203 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2204 emit_opcode(cbuf,$primary); 2205 emit_rm(cbuf, 0x3, $secondary, r1); 2206 emit_d8(cbuf,$cnt$$constant-32); 2207 } 2208 emit_opcode(cbuf,0x33); // XOR r2,r2 2209 emit_rm(cbuf, 0x3, r2, r2); 2210 %} 2211 2212 // Clone of RegMem but accepts an extra parameter to access each 2213 // half of a double in memory; it never needs relocation info. 2214 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2215 emit_opcode(cbuf,$opcode$$constant); 2216 int reg_encoding = $rm_reg$$reg; 2217 int base = $mem$$base; 2218 int index = $mem$$index; 2219 int scale = $mem$$scale; 2220 int displace = $mem$$disp + $disp_for_half$$constant; 2221 relocInfo::relocType disp_reloc = relocInfo::none; 2222 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2223 %} 2224 2225 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2226 // 2227 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2228 // and it never needs relocation information. 2229 // Frequently used to move data between FPU's Stack Top and memory. 2230 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2231 int rm_byte_opcode = $rm_opcode$$constant; 2232 int base = $mem$$base; 2233 int index = $mem$$index; 2234 int scale = $mem$$scale; 2235 int displace = $mem$$disp; 2236 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2237 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2238 %} 2239 2240 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2241 int rm_byte_opcode = $rm_opcode$$constant; 2242 int base = $mem$$base; 2243 int index = $mem$$index; 2244 int scale = $mem$$scale; 2245 int displace = $mem$$disp; 2246 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2247 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2248 %} 2249 2250 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2251 int reg_encoding = $dst$$reg; 2252 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2253 int index = 0x04; // 0x04 indicates no index 2254 int scale = 0x00; // 0x00 indicates no scale 2255 int displace = $src1$$constant; // 0x00 indicates no displacement 2256 relocInfo::relocType disp_reloc = relocInfo::none; 2257 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2258 %} 2259 2260 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2261 // Compare dst,src 2262 emit_opcode(cbuf,0x3B); 2263 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2264 // jmp dst < src around move 2265 emit_opcode(cbuf,0x7C); 2266 emit_d8(cbuf,2); 2267 // move dst,src 2268 emit_opcode(cbuf,0x8B); 2269 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2270 %} 2271 2272 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2273 // Compare dst,src 2274 emit_opcode(cbuf,0x3B); 2275 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2276 // jmp dst > src around move 2277 emit_opcode(cbuf,0x7F); 2278 emit_d8(cbuf,2); 2279 // move dst,src 2280 emit_opcode(cbuf,0x8B); 2281 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2282 %} 2283 2284 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2285 // If src is FPR1, we can just FST to store it. 2286 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2287 int reg_encoding = 0x2; // Just store 2288 int base = $mem$$base; 2289 int index = $mem$$index; 2290 int scale = $mem$$scale; 2291 int displace = $mem$$disp; 2292 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2293 if( $src$$reg != FPR1L_enc ) { 2294 reg_encoding = 0x3; // Store & pop 2295 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2296 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2297 } 2298 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2299 emit_opcode(cbuf,$primary); 2300 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2301 %} 2302 2303 enc_class neg_reg(rRegI dst) %{ 2304 // NEG $dst 2305 emit_opcode(cbuf,0xF7); 2306 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2307 %} 2308 2309 enc_class setLT_reg(eCXRegI dst) %{ 2310 // SETLT $dst 2311 emit_opcode(cbuf,0x0F); 2312 emit_opcode(cbuf,0x9C); 2313 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2314 %} 2315 2316 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2317 int tmpReg = $tmp$$reg; 2318 2319 // SUB $p,$q 2320 emit_opcode(cbuf,0x2B); 2321 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2322 // SBB $tmp,$tmp 2323 emit_opcode(cbuf,0x1B); 2324 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2325 // AND $tmp,$y 2326 emit_opcode(cbuf,0x23); 2327 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2328 // ADD $p,$tmp 2329 emit_opcode(cbuf,0x03); 2330 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2331 %} 2332 2333 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2334 // TEST shift,32 2335 emit_opcode(cbuf,0xF7); 2336 emit_rm(cbuf, 0x3, 0, ECX_enc); 2337 emit_d32(cbuf,0x20); 2338 // JEQ,s small 2339 emit_opcode(cbuf, 0x74); 2340 emit_d8(cbuf, 0x04); 2341 // MOV $dst.hi,$dst.lo 2342 emit_opcode( cbuf, 0x8B ); 2343 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2344 // CLR $dst.lo 2345 emit_opcode(cbuf, 0x33); 2346 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2347 // small: 2348 // SHLD $dst.hi,$dst.lo,$shift 2349 emit_opcode(cbuf,0x0F); 2350 emit_opcode(cbuf,0xA5); 2351 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2352 // SHL $dst.lo,$shift" 2353 emit_opcode(cbuf,0xD3); 2354 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2355 %} 2356 2357 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2358 // TEST shift,32 2359 emit_opcode(cbuf,0xF7); 2360 emit_rm(cbuf, 0x3, 0, ECX_enc); 2361 emit_d32(cbuf,0x20); 2362 // JEQ,s small 2363 emit_opcode(cbuf, 0x74); 2364 emit_d8(cbuf, 0x04); 2365 // MOV $dst.lo,$dst.hi 2366 emit_opcode( cbuf, 0x8B ); 2367 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2368 // CLR $dst.hi 2369 emit_opcode(cbuf, 0x33); 2370 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2371 // small: 2372 // SHRD $dst.lo,$dst.hi,$shift 2373 emit_opcode(cbuf,0x0F); 2374 emit_opcode(cbuf,0xAD); 2375 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2376 // SHR $dst.hi,$shift" 2377 emit_opcode(cbuf,0xD3); 2378 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2379 %} 2380 2381 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2382 // TEST shift,32 2383 emit_opcode(cbuf,0xF7); 2384 emit_rm(cbuf, 0x3, 0, ECX_enc); 2385 emit_d32(cbuf,0x20); 2386 // JEQ,s small 2387 emit_opcode(cbuf, 0x74); 2388 emit_d8(cbuf, 0x05); 2389 // MOV $dst.lo,$dst.hi 2390 emit_opcode( cbuf, 0x8B ); 2391 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2392 // SAR $dst.hi,31 2393 emit_opcode(cbuf, 0xC1); 2394 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2395 emit_d8(cbuf, 0x1F ); 2396 // small: 2397 // SHRD $dst.lo,$dst.hi,$shift 2398 emit_opcode(cbuf,0x0F); 2399 emit_opcode(cbuf,0xAD); 2400 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2401 // SAR $dst.hi,$shift" 2402 emit_opcode(cbuf,0xD3); 2403 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2404 %} 2405 2406 2407 // ----------------- Encodings for floating point unit ----------------- 2408 // May leave result in FPU-TOS or FPU reg depending on opcodes 2409 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2410 $$$emit8$primary; 2411 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2412 %} 2413 2414 // Pop argument in FPR0 with FSTP ST(0) 2415 enc_class PopFPU() %{ 2416 emit_opcode( cbuf, 0xDD ); 2417 emit_d8( cbuf, 0xD8 ); 2418 %} 2419 2420 // !!!!! equivalent to Pop_Reg_F 2421 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2422 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2423 emit_d8( cbuf, 0xD8+$dst$$reg ); 2424 %} 2425 2426 enc_class Push_Reg_DPR( regDPR dst ) %{ 2427 emit_opcode( cbuf, 0xD9 ); 2428 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2429 %} 2430 2431 enc_class strictfp_bias1( regDPR dst ) %{ 2432 emit_opcode( cbuf, 0xDB ); // FLD m80real 2433 emit_opcode( cbuf, 0x2D ); 2434 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2435 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2436 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2437 %} 2438 2439 enc_class strictfp_bias2( regDPR dst ) %{ 2440 emit_opcode( cbuf, 0xDB ); // FLD m80real 2441 emit_opcode( cbuf, 0x2D ); 2442 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2443 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2444 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2445 %} 2446 2447 // Special case for moving an integer register to a stack slot. 2448 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2449 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2450 %} 2451 2452 // Special case for moving a register to a stack slot. 2453 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2454 // Opcode already emitted 2455 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2456 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2457 emit_d32(cbuf, $dst$$disp); // Displacement 2458 %} 2459 2460 // Push the integer in stackSlot 'src' onto FP-stack 2461 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2462 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2463 %} 2464 2465 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2466 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2467 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2468 %} 2469 2470 // Same as Pop_Mem_F except for opcode 2471 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2472 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2473 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2474 %} 2475 2476 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2477 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2478 emit_d8( cbuf, 0xD8+$dst$$reg ); 2479 %} 2480 2481 enc_class Push_Reg_FPR( regFPR dst ) %{ 2482 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2483 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2484 %} 2485 2486 // Push FPU's float to a stack-slot, and pop FPU-stack 2487 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2488 int pop = 0x02; 2489 if ($src$$reg != FPR1L_enc) { 2490 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2491 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2492 pop = 0x03; 2493 } 2494 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2495 %} 2496 2497 // Push FPU's double to a stack-slot, and pop FPU-stack 2498 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2499 int pop = 0x02; 2500 if ($src$$reg != FPR1L_enc) { 2501 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2502 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2503 pop = 0x03; 2504 } 2505 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2506 %} 2507 2508 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2509 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2510 int pop = 0xD0 - 1; // -1 since we skip FLD 2511 if ($src$$reg != FPR1L_enc) { 2512 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2513 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2514 pop = 0xD8; 2515 } 2516 emit_opcode( cbuf, 0xDD ); 2517 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2518 %} 2519 2520 2521 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2522 // load dst in FPR0 2523 emit_opcode( cbuf, 0xD9 ); 2524 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2525 if ($src$$reg != FPR1L_enc) { 2526 // fincstp 2527 emit_opcode (cbuf, 0xD9); 2528 emit_opcode (cbuf, 0xF7); 2529 // swap src with FPR1: 2530 // FXCH FPR1 with src 2531 emit_opcode(cbuf, 0xD9); 2532 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2533 // fdecstp 2534 emit_opcode (cbuf, 0xD9); 2535 emit_opcode (cbuf, 0xF6); 2536 } 2537 %} 2538 2539 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2540 MacroAssembler _masm(&cbuf); 2541 __ subptr(rsp, 8); 2542 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2543 __ fld_d(Address(rsp, 0)); 2544 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2545 __ fld_d(Address(rsp, 0)); 2546 %} 2547 2548 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2549 MacroAssembler _masm(&cbuf); 2550 __ subptr(rsp, 4); 2551 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2552 __ fld_s(Address(rsp, 0)); 2553 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2554 __ fld_s(Address(rsp, 0)); 2555 %} 2556 2557 enc_class Push_ResultD(regD dst) %{ 2558 MacroAssembler _masm(&cbuf); 2559 __ fstp_d(Address(rsp, 0)); 2560 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2561 __ addptr(rsp, 8); 2562 %} 2563 2564 enc_class Push_ResultF(regF dst, immI d8) %{ 2565 MacroAssembler _masm(&cbuf); 2566 __ fstp_s(Address(rsp, 0)); 2567 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2568 __ addptr(rsp, $d8$$constant); 2569 %} 2570 2571 enc_class Push_SrcD(regD src) %{ 2572 MacroAssembler _masm(&cbuf); 2573 __ subptr(rsp, 8); 2574 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2575 __ fld_d(Address(rsp, 0)); 2576 %} 2577 2578 enc_class push_stack_temp_qword() %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ subptr(rsp, 8); 2581 %} 2582 2583 enc_class pop_stack_temp_qword() %{ 2584 MacroAssembler _masm(&cbuf); 2585 __ addptr(rsp, 8); 2586 %} 2587 2588 enc_class push_xmm_to_fpr1(regD src) %{ 2589 MacroAssembler _masm(&cbuf); 2590 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2591 __ fld_d(Address(rsp, 0)); 2592 %} 2593 2594 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2595 if ($src$$reg != FPR1L_enc) { 2596 // fincstp 2597 emit_opcode (cbuf, 0xD9); 2598 emit_opcode (cbuf, 0xF7); 2599 // FXCH FPR1 with src 2600 emit_opcode(cbuf, 0xD9); 2601 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2602 // fdecstp 2603 emit_opcode (cbuf, 0xD9); 2604 emit_opcode (cbuf, 0xF6); 2605 } 2606 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2607 // // FSTP FPR$dst$$reg 2608 // emit_opcode( cbuf, 0xDD ); 2609 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2610 %} 2611 2612 enc_class fnstsw_sahf_skip_parity() %{ 2613 // fnstsw ax 2614 emit_opcode( cbuf, 0xDF ); 2615 emit_opcode( cbuf, 0xE0 ); 2616 // sahf 2617 emit_opcode( cbuf, 0x9E ); 2618 // jnp ::skip 2619 emit_opcode( cbuf, 0x7B ); 2620 emit_opcode( cbuf, 0x05 ); 2621 %} 2622 2623 enc_class emitModDPR() %{ 2624 // fprem must be iterative 2625 // :: loop 2626 // fprem 2627 emit_opcode( cbuf, 0xD9 ); 2628 emit_opcode( cbuf, 0xF8 ); 2629 // wait 2630 emit_opcode( cbuf, 0x9b ); 2631 // fnstsw ax 2632 emit_opcode( cbuf, 0xDF ); 2633 emit_opcode( cbuf, 0xE0 ); 2634 // sahf 2635 emit_opcode( cbuf, 0x9E ); 2636 // jp ::loop 2637 emit_opcode( cbuf, 0x0F ); 2638 emit_opcode( cbuf, 0x8A ); 2639 emit_opcode( cbuf, 0xF4 ); 2640 emit_opcode( cbuf, 0xFF ); 2641 emit_opcode( cbuf, 0xFF ); 2642 emit_opcode( cbuf, 0xFF ); 2643 %} 2644 2645 enc_class fpu_flags() %{ 2646 // fnstsw_ax 2647 emit_opcode( cbuf, 0xDF); 2648 emit_opcode( cbuf, 0xE0); 2649 // test ax,0x0400 2650 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2651 emit_opcode( cbuf, 0xA9 ); 2652 emit_d16 ( cbuf, 0x0400 ); 2653 // // // This sequence works, but stalls for 12-16 cycles on PPro 2654 // // test rax,0x0400 2655 // emit_opcode( cbuf, 0xA9 ); 2656 // emit_d32 ( cbuf, 0x00000400 ); 2657 // 2658 // jz exit (no unordered comparison) 2659 emit_opcode( cbuf, 0x74 ); 2660 emit_d8 ( cbuf, 0x02 ); 2661 // mov ah,1 - treat as LT case (set carry flag) 2662 emit_opcode( cbuf, 0xB4 ); 2663 emit_d8 ( cbuf, 0x01 ); 2664 // sahf 2665 emit_opcode( cbuf, 0x9E); 2666 %} 2667 2668 enc_class cmpF_P6_fixup() %{ 2669 // Fixup the integer flags in case comparison involved a NaN 2670 // 2671 // JNP exit (no unordered comparison, P-flag is set by NaN) 2672 emit_opcode( cbuf, 0x7B ); 2673 emit_d8 ( cbuf, 0x03 ); 2674 // MOV AH,1 - treat as LT case (set carry flag) 2675 emit_opcode( cbuf, 0xB4 ); 2676 emit_d8 ( cbuf, 0x01 ); 2677 // SAHF 2678 emit_opcode( cbuf, 0x9E); 2679 // NOP // target for branch to avoid branch to branch 2680 emit_opcode( cbuf, 0x90); 2681 %} 2682 2683 // fnstsw_ax(); 2684 // sahf(); 2685 // movl(dst, nan_result); 2686 // jcc(Assembler::parity, exit); 2687 // movl(dst, less_result); 2688 // jcc(Assembler::below, exit); 2689 // movl(dst, equal_result); 2690 // jcc(Assembler::equal, exit); 2691 // movl(dst, greater_result); 2692 2693 // less_result = 1; 2694 // greater_result = -1; 2695 // equal_result = 0; 2696 // nan_result = -1; 2697 2698 enc_class CmpF_Result(rRegI dst) %{ 2699 // fnstsw_ax(); 2700 emit_opcode( cbuf, 0xDF); 2701 emit_opcode( cbuf, 0xE0); 2702 // sahf 2703 emit_opcode( cbuf, 0x9E); 2704 // movl(dst, nan_result); 2705 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2706 emit_d32( cbuf, -1 ); 2707 // jcc(Assembler::parity, exit); 2708 emit_opcode( cbuf, 0x7A ); 2709 emit_d8 ( cbuf, 0x13 ); 2710 // movl(dst, less_result); 2711 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2712 emit_d32( cbuf, -1 ); 2713 // jcc(Assembler::below, exit); 2714 emit_opcode( cbuf, 0x72 ); 2715 emit_d8 ( cbuf, 0x0C ); 2716 // movl(dst, equal_result); 2717 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2718 emit_d32( cbuf, 0 ); 2719 // jcc(Assembler::equal, exit); 2720 emit_opcode( cbuf, 0x74 ); 2721 emit_d8 ( cbuf, 0x05 ); 2722 // movl(dst, greater_result); 2723 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2724 emit_d32( cbuf, 1 ); 2725 %} 2726 2727 2728 // Compare the longs and set flags 2729 // BROKEN! Do Not use as-is 2730 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2731 // CMP $src1.hi,$src2.hi 2732 emit_opcode( cbuf, 0x3B ); 2733 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2734 // JNE,s done 2735 emit_opcode(cbuf,0x75); 2736 emit_d8(cbuf, 2 ); 2737 // CMP $src1.lo,$src2.lo 2738 emit_opcode( cbuf, 0x3B ); 2739 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2740 // done: 2741 %} 2742 2743 enc_class convert_int_long( regL dst, rRegI src ) %{ 2744 // mov $dst.lo,$src 2745 int dst_encoding = $dst$$reg; 2746 int src_encoding = $src$$reg; 2747 encode_Copy( cbuf, dst_encoding , src_encoding ); 2748 // mov $dst.hi,$src 2749 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2750 // sar $dst.hi,31 2751 emit_opcode( cbuf, 0xC1 ); 2752 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2753 emit_d8(cbuf, 0x1F ); 2754 %} 2755 2756 enc_class convert_long_double( eRegL src ) %{ 2757 // push $src.hi 2758 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2759 // push $src.lo 2760 emit_opcode(cbuf, 0x50+$src$$reg ); 2761 // fild 64-bits at [SP] 2762 emit_opcode(cbuf,0xdf); 2763 emit_d8(cbuf, 0x6C); 2764 emit_d8(cbuf, 0x24); 2765 emit_d8(cbuf, 0x00); 2766 // pop stack 2767 emit_opcode(cbuf, 0x83); // add SP, #8 2768 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2769 emit_d8(cbuf, 0x8); 2770 %} 2771 2772 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2773 // IMUL EDX:EAX,$src1 2774 emit_opcode( cbuf, 0xF7 ); 2775 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2776 // SAR EDX,$cnt-32 2777 int shift_count = ((int)$cnt$$constant) - 32; 2778 if (shift_count > 0) { 2779 emit_opcode(cbuf, 0xC1); 2780 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2781 emit_d8(cbuf, shift_count); 2782 } 2783 %} 2784 2785 // this version doesn't have add sp, 8 2786 enc_class convert_long_double2( eRegL src ) %{ 2787 // push $src.hi 2788 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2789 // push $src.lo 2790 emit_opcode(cbuf, 0x50+$src$$reg ); 2791 // fild 64-bits at [SP] 2792 emit_opcode(cbuf,0xdf); 2793 emit_d8(cbuf, 0x6C); 2794 emit_d8(cbuf, 0x24); 2795 emit_d8(cbuf, 0x00); 2796 %} 2797 2798 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2799 // Basic idea: long = (long)int * (long)int 2800 // IMUL EDX:EAX, src 2801 emit_opcode( cbuf, 0xF7 ); 2802 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2803 %} 2804 2805 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2806 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2807 // MUL EDX:EAX, src 2808 emit_opcode( cbuf, 0xF7 ); 2809 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2810 %} 2811 2812 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2813 // Basic idea: lo(result) = lo(x_lo * y_lo) 2814 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2815 // MOV $tmp,$src.lo 2816 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2817 // IMUL $tmp,EDX 2818 emit_opcode( cbuf, 0x0F ); 2819 emit_opcode( cbuf, 0xAF ); 2820 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2821 // MOV EDX,$src.hi 2822 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2823 // IMUL EDX,EAX 2824 emit_opcode( cbuf, 0x0F ); 2825 emit_opcode( cbuf, 0xAF ); 2826 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2827 // ADD $tmp,EDX 2828 emit_opcode( cbuf, 0x03 ); 2829 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2830 // MUL EDX:EAX,$src.lo 2831 emit_opcode( cbuf, 0xF7 ); 2832 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2833 // ADD EDX,ESI 2834 emit_opcode( cbuf, 0x03 ); 2835 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2836 %} 2837 2838 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2839 // Basic idea: lo(result) = lo(src * y_lo) 2840 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2841 // IMUL $tmp,EDX,$src 2842 emit_opcode( cbuf, 0x6B ); 2843 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2844 emit_d8( cbuf, (int)$src$$constant ); 2845 // MOV EDX,$src 2846 emit_opcode(cbuf, 0xB8 + EDX_enc); 2847 emit_d32( cbuf, (int)$src$$constant ); 2848 // MUL EDX:EAX,EDX 2849 emit_opcode( cbuf, 0xF7 ); 2850 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2851 // ADD EDX,ESI 2852 emit_opcode( cbuf, 0x03 ); 2853 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2854 %} 2855 2856 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2857 // PUSH src1.hi 2858 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2859 // PUSH src1.lo 2860 emit_opcode(cbuf, 0x50+$src1$$reg ); 2861 // PUSH src2.hi 2862 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2863 // PUSH src2.lo 2864 emit_opcode(cbuf, 0x50+$src2$$reg ); 2865 // CALL directly to the runtime 2866 cbuf.set_insts_mark(); 2867 emit_opcode(cbuf,0xE8); // Call into runtime 2868 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2869 // Restore stack 2870 emit_opcode(cbuf, 0x83); // add SP, #framesize 2871 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2872 emit_d8(cbuf, 4*4); 2873 %} 2874 2875 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2876 // PUSH src1.hi 2877 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2878 // PUSH src1.lo 2879 emit_opcode(cbuf, 0x50+$src1$$reg ); 2880 // PUSH src2.hi 2881 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2882 // PUSH src2.lo 2883 emit_opcode(cbuf, 0x50+$src2$$reg ); 2884 // CALL directly to the runtime 2885 cbuf.set_insts_mark(); 2886 emit_opcode(cbuf,0xE8); // Call into runtime 2887 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2888 // Restore stack 2889 emit_opcode(cbuf, 0x83); // add SP, #framesize 2890 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2891 emit_d8(cbuf, 4*4); 2892 %} 2893 2894 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2895 // MOV $tmp,$src.lo 2896 emit_opcode(cbuf, 0x8B); 2897 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2898 // OR $tmp,$src.hi 2899 emit_opcode(cbuf, 0x0B); 2900 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2901 %} 2902 2903 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2904 // CMP $src1.lo,$src2.lo 2905 emit_opcode( cbuf, 0x3B ); 2906 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2907 // JNE,s skip 2908 emit_cc(cbuf, 0x70, 0x5); 2909 emit_d8(cbuf,2); 2910 // CMP $src1.hi,$src2.hi 2911 emit_opcode( cbuf, 0x3B ); 2912 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2913 %} 2914 2915 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2916 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2917 emit_opcode( cbuf, 0x3B ); 2918 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2919 // MOV $tmp,$src1.hi 2920 emit_opcode( cbuf, 0x8B ); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2922 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2923 emit_opcode( cbuf, 0x1B ); 2924 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2925 %} 2926 2927 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2928 // XOR $tmp,$tmp 2929 emit_opcode(cbuf,0x33); // XOR 2930 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2931 // CMP $tmp,$src.lo 2932 emit_opcode( cbuf, 0x3B ); 2933 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2934 // SBB $tmp,$src.hi 2935 emit_opcode( cbuf, 0x1B ); 2936 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2937 %} 2938 2939 // Sniff, sniff... smells like Gnu Superoptimizer 2940 enc_class neg_long( eRegL dst ) %{ 2941 emit_opcode(cbuf,0xF7); // NEG hi 2942 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2943 emit_opcode(cbuf,0xF7); // NEG lo 2944 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2945 emit_opcode(cbuf,0x83); // SBB hi,0 2946 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2947 emit_d8 (cbuf,0 ); 2948 %} 2949 2950 enc_class enc_pop_rdx() %{ 2951 emit_opcode(cbuf,0x5A); 2952 %} 2953 2954 enc_class enc_rethrow() %{ 2955 cbuf.set_insts_mark(); 2956 emit_opcode(cbuf, 0xE9); // jmp entry 2957 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2958 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2959 %} 2960 2961 2962 // Convert a double to an int. Java semantics require we do complex 2963 // manglelations in the corner cases. So we set the rounding mode to 2964 // 'zero', store the darned double down as an int, and reset the 2965 // rounding mode to 'nearest'. The hardware throws an exception which 2966 // patches up the correct value directly to the stack. 2967 enc_class DPR2I_encoding( regDPR src ) %{ 2968 // Flip to round-to-zero mode. We attempted to allow invalid-op 2969 // exceptions here, so that a NAN or other corner-case value will 2970 // thrown an exception (but normal values get converted at full speed). 2971 // However, I2C adapters and other float-stack manglers leave pending 2972 // invalid-op exceptions hanging. We would have to clear them before 2973 // enabling them and that is more expensive than just testing for the 2974 // invalid value Intel stores down in the corner cases. 2975 emit_opcode(cbuf,0xD9); // FLDCW trunc 2976 emit_opcode(cbuf,0x2D); 2977 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2978 // Allocate a word 2979 emit_opcode(cbuf,0x83); // SUB ESP,4 2980 emit_opcode(cbuf,0xEC); 2981 emit_d8(cbuf,0x04); 2982 // Encoding assumes a double has been pushed into FPR0. 2983 // Store down the double as an int, popping the FPU stack 2984 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2985 emit_opcode(cbuf,0x1C); 2986 emit_d8(cbuf,0x24); 2987 // Restore the rounding mode; mask the exception 2988 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2989 emit_opcode(cbuf,0x2D); 2990 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2991 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2992 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2993 2994 // Load the converted int; adjust CPU stack 2995 emit_opcode(cbuf,0x58); // POP EAX 2996 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2997 emit_d32 (cbuf,0x80000000); // 0x80000000 2998 emit_opcode(cbuf,0x75); // JNE around_slow_call 2999 emit_d8 (cbuf,0x07); // Size of slow_call 3000 // Push src onto stack slow-path 3001 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3002 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3003 // CALL directly to the runtime 3004 cbuf.set_insts_mark(); 3005 emit_opcode(cbuf,0xE8); // Call into runtime 3006 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3007 // Carry on here... 3008 %} 3009 3010 enc_class DPR2L_encoding( regDPR src ) %{ 3011 emit_opcode(cbuf,0xD9); // FLDCW trunc 3012 emit_opcode(cbuf,0x2D); 3013 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3014 // Allocate a word 3015 emit_opcode(cbuf,0x83); // SUB ESP,8 3016 emit_opcode(cbuf,0xEC); 3017 emit_d8(cbuf,0x08); 3018 // Encoding assumes a double has been pushed into FPR0. 3019 // Store down the double as a long, popping the FPU stack 3020 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3021 emit_opcode(cbuf,0x3C); 3022 emit_d8(cbuf,0x24); 3023 // Restore the rounding mode; mask the exception 3024 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3025 emit_opcode(cbuf,0x2D); 3026 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3027 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3028 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3029 3030 // Load the converted int; adjust CPU stack 3031 emit_opcode(cbuf,0x58); // POP EAX 3032 emit_opcode(cbuf,0x5A); // POP EDX 3033 emit_opcode(cbuf,0x81); // CMP EDX,imm 3034 emit_d8 (cbuf,0xFA); // rdx 3035 emit_d32 (cbuf,0x80000000); // 0x80000000 3036 emit_opcode(cbuf,0x75); // JNE around_slow_call 3037 emit_d8 (cbuf,0x07+4); // Size of slow_call 3038 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3039 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3040 emit_opcode(cbuf,0x75); // JNE around_slow_call 3041 emit_d8 (cbuf,0x07); // Size of slow_call 3042 // Push src onto stack slow-path 3043 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3044 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3045 // CALL directly to the runtime 3046 cbuf.set_insts_mark(); 3047 emit_opcode(cbuf,0xE8); // Call into runtime 3048 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3049 // Carry on here... 3050 %} 3051 3052 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3053 // Operand was loaded from memory into fp ST (stack top) 3054 // FMUL ST,$src /* D8 C8+i */ 3055 emit_opcode(cbuf, 0xD8); 3056 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3057 %} 3058 3059 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3060 // FADDP ST,src2 /* D8 C0+i */ 3061 emit_opcode(cbuf, 0xD8); 3062 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3063 //could use FADDP src2,fpST /* DE C0+i */ 3064 %} 3065 3066 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3067 // FADDP src2,ST /* DE C0+i */ 3068 emit_opcode(cbuf, 0xDE); 3069 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3070 %} 3071 3072 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3073 // Operand has been loaded into fp ST (stack top) 3074 // FSUB ST,$src1 3075 emit_opcode(cbuf, 0xD8); 3076 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3077 3078 // FDIV 3079 emit_opcode(cbuf, 0xD8); 3080 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3081 %} 3082 3083 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3084 // Operand was loaded from memory into fp ST (stack top) 3085 // FADD ST,$src /* D8 C0+i */ 3086 emit_opcode(cbuf, 0xD8); 3087 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3088 3089 // FMUL ST,src2 /* D8 C*+i */ 3090 emit_opcode(cbuf, 0xD8); 3091 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3092 %} 3093 3094 3095 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3096 // Operand was loaded from memory into fp ST (stack top) 3097 // FADD ST,$src /* D8 C0+i */ 3098 emit_opcode(cbuf, 0xD8); 3099 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3100 3101 // FMULP src2,ST /* DE C8+i */ 3102 emit_opcode(cbuf, 0xDE); 3103 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3104 %} 3105 3106 // Atomically load the volatile long 3107 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3108 emit_opcode(cbuf,0xDF); 3109 int rm_byte_opcode = 0x05; 3110 int base = $mem$$base; 3111 int index = $mem$$index; 3112 int scale = $mem$$scale; 3113 int displace = $mem$$disp; 3114 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3115 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3116 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3117 %} 3118 3119 // Volatile Store Long. Must be atomic, so move it into 3120 // the FP TOS and then do a 64-bit FIST. Has to probe the 3121 // target address before the store (for null-ptr checks) 3122 // so the memory operand is used twice in the encoding. 3123 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3124 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3125 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3126 emit_opcode(cbuf,0xDF); 3127 int rm_byte_opcode = 0x07; 3128 int base = $mem$$base; 3129 int index = $mem$$index; 3130 int scale = $mem$$scale; 3131 int displace = $mem$$disp; 3132 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3133 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3134 %} 3135 3136 %} 3137 3138 3139 //----------FRAME-------------------------------------------------------------- 3140 // Definition of frame structure and management information. 3141 // 3142 // S T A C K L A Y O U T Allocators stack-slot number 3143 // | (to get allocators register number 3144 // G Owned by | | v add OptoReg::stack0()) 3145 // r CALLER | | 3146 // o | +--------+ pad to even-align allocators stack-slot 3147 // w V | pad0 | numbers; owned by CALLER 3148 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3149 // h ^ | in | 5 3150 // | | args | 4 Holes in incoming args owned by SELF 3151 // | | | | 3 3152 // | | +--------+ 3153 // V | | old out| Empty on Intel, window on Sparc 3154 // | old |preserve| Must be even aligned. 3155 // | SP-+--------+----> Matcher::_old_SP, even aligned 3156 // | | in | 3 area for Intel ret address 3157 // Owned by |preserve| Empty on Sparc. 3158 // SELF +--------+ 3159 // | | pad2 | 2 pad to align old SP 3160 // | +--------+ 1 3161 // | | locks | 0 3162 // | +--------+----> OptoReg::stack0(), even aligned 3163 // | | pad1 | 11 pad to align new SP 3164 // | +--------+ 3165 // | | | 10 3166 // | | spills | 9 spills 3167 // V | | 8 (pad0 slot for callee) 3168 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3169 // ^ | out | 7 3170 // | | args | 6 Holes in outgoing args owned by CALLEE 3171 // Owned by +--------+ 3172 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3173 // | new |preserve| Must be even-aligned. 3174 // | SP-+--------+----> Matcher::_new_SP, even aligned 3175 // | | | 3176 // 3177 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3178 // known from SELF's arguments and the Java calling convention. 3179 // Region 6-7 is determined per call site. 3180 // Note 2: If the calling convention leaves holes in the incoming argument 3181 // area, those holes are owned by SELF. Holes in the outgoing area 3182 // are owned by the CALLEE. Holes should not be nessecary in the 3183 // incoming area, as the Java calling convention is completely under 3184 // the control of the AD file. Doubles can be sorted and packed to 3185 // avoid holes. Holes in the outgoing arguments may be nessecary for 3186 // varargs C calling conventions. 3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3188 // even aligned with pad0 as needed. 3189 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3190 // region 6-11 is even aligned; it may be padded out more so that 3191 // the region from SP to FP meets the minimum stack alignment. 3192 3193 frame %{ 3194 // What direction does stack grow in (assumed to be same for C & Java) 3195 stack_direction(TOWARDS_LOW); 3196 3197 // These three registers define part of the calling convention 3198 // between compiled code and the interpreter. 3199 inline_cache_reg(EAX); // Inline Cache Register 3200 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3201 3202 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3203 cisc_spilling_operand_name(indOffset32); 3204 3205 // Number of stack slots consumed by locking an object 3206 sync_stack_slots(1); 3207 3208 // Compiled code's Frame Pointer 3209 frame_pointer(ESP); 3210 // Interpreter stores its frame pointer in a register which is 3211 // stored to the stack by I2CAdaptors. 3212 // I2CAdaptors convert from interpreted java to compiled java. 3213 interpreter_frame_pointer(EBP); 3214 3215 // Stack alignment requirement 3216 // Alignment size in bytes (128-bit -> 16 bytes) 3217 stack_alignment(StackAlignmentInBytes); 3218 3219 // Number of stack slots between incoming argument block and the start of 3220 // a new frame. The PROLOG must add this many slots to the stack. The 3221 // EPILOG must remove this many slots. Intel needs one slot for 3222 // return address and one for rbp, (must save rbp) 3223 in_preserve_stack_slots(2+VerifyStackAtCalls); 3224 3225 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3226 // for calls to C. Supports the var-args backing area for register parms. 3227 varargs_C_out_slots_killed(0); 3228 3229 // The after-PROLOG location of the return address. Location of 3230 // return address specifies a type (REG or STACK) and a number 3231 // representing the register number (i.e. - use a register name) or 3232 // stack slot. 3233 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3234 // Otherwise, it is above the locks and verification slot and alignment word 3235 return_addr(STACK - 1 + 3236 align_up((Compile::current()->in_preserve_stack_slots() + 3237 Compile::current()->fixed_slots()), 3238 stack_alignment_in_slots())); 3239 3240 // Body of function which returns an integer array locating 3241 // arguments either in registers or in stack slots. Passed an array 3242 // of ideal registers called "sig" and a "length" count. Stack-slot 3243 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3244 // arguments for a CALLEE. Incoming stack arguments are 3245 // automatically biased by the preserve_stack_slots field above. 3246 calling_convention %{ 3247 // No difference between ingoing/outgoing just pass false 3248 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3249 %} 3250 3251 3252 // Body of function which returns an integer array locating 3253 // arguments either in registers or in stack slots. Passed an array 3254 // of ideal registers called "sig" and a "length" count. Stack-slot 3255 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3256 // arguments for a CALLEE. Incoming stack arguments are 3257 // automatically biased by the preserve_stack_slots field above. 3258 c_calling_convention %{ 3259 // This is obviously always outgoing 3260 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3261 %} 3262 3263 // Location of C & interpreter return values 3264 c_return_value %{ 3265 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3266 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3267 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3268 3269 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3270 // that C functions return float and double results in XMM0. 3271 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3272 return OptoRegPair(XMM0b_num,XMM0_num); 3273 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3274 return OptoRegPair(OptoReg::Bad,XMM0_num); 3275 3276 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3277 %} 3278 3279 // Location of return values 3280 return_value %{ 3281 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3282 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3283 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3284 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3285 return OptoRegPair(XMM0b_num,XMM0_num); 3286 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3287 return OptoRegPair(OptoReg::Bad,XMM0_num); 3288 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3289 %} 3290 3291 %} 3292 3293 //----------ATTRIBUTES--------------------------------------------------------- 3294 //----------Operand Attributes------------------------------------------------- 3295 op_attrib op_cost(0); // Required cost attribute 3296 3297 //----------Instruction Attributes--------------------------------------------- 3298 ins_attrib ins_cost(100); // Required cost attribute 3299 ins_attrib ins_size(8); // Required size attribute (in bits) 3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3301 // non-matching short branch variant of some 3302 // long branch? 3303 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3304 // specifies the alignment that some part of the instruction (not 3305 // necessarily the start) requires. If > 1, a compute_padding() 3306 // function must be provided for the instruction 3307 3308 //----------OPERANDS----------------------------------------------------------- 3309 // Operand definitions must precede instruction definitions for correct parsing 3310 // in the ADLC because operands constitute user defined types which are used in 3311 // instruction definitions. 3312 3313 //----------Simple Operands---------------------------------------------------- 3314 // Immediate Operands 3315 // Integer Immediate 3316 operand immI() %{ 3317 match(ConI); 3318 3319 op_cost(10); 3320 format %{ %} 3321 interface(CONST_INTER); 3322 %} 3323 3324 // Constant for test vs zero 3325 operand immI_0() %{ 3326 predicate(n->get_int() == 0); 3327 match(ConI); 3328 3329 op_cost(0); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 // Constant for increment 3335 operand immI_1() %{ 3336 predicate(n->get_int() == 1); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Constant for decrement 3345 operand immI_M1() %{ 3346 predicate(n->get_int() == -1); 3347 match(ConI); 3348 3349 op_cost(0); 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Valid scale values for addressing modes 3355 operand immI2() %{ 3356 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3357 match(ConI); 3358 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 operand immI8() %{ 3364 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3365 match(ConI); 3366 3367 op_cost(5); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 operand immU8() %{ 3373 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3374 match(ConI); 3375 3376 op_cost(5); 3377 format %{ %} 3378 interface(CONST_INTER); 3379 %} 3380 3381 operand immI16() %{ 3382 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3383 match(ConI); 3384 3385 op_cost(10); 3386 format %{ %} 3387 interface(CONST_INTER); 3388 %} 3389 3390 // Int Immediate non-negative 3391 operand immU31() 3392 %{ 3393 predicate(n->get_int() >= 0); 3394 match(ConI); 3395 3396 op_cost(0); 3397 format %{ %} 3398 interface(CONST_INTER); 3399 %} 3400 3401 // Constant for long shifts 3402 operand immI_32() %{ 3403 predicate( n->get_int() == 32 ); 3404 match(ConI); 3405 3406 op_cost(0); 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 operand immI_1_31() %{ 3412 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 operand immI_32_63() %{ 3421 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3422 match(ConI); 3423 op_cost(0); 3424 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 operand immI_2() %{ 3430 predicate( n->get_int() == 2 ); 3431 match(ConI); 3432 3433 op_cost(0); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 operand immI_3() %{ 3439 predicate( n->get_int() == 3 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 operand immI_4() 3448 %{ 3449 predicate(n->get_int() == 4); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 operand immI_8() 3458 %{ 3459 predicate(n->get_int() == 8); 3460 match(ConI); 3461 3462 op_cost(0); 3463 format %{ %} 3464 interface(CONST_INTER); 3465 %} 3466 3467 // Pointer Immediate 3468 operand immP() %{ 3469 match(ConP); 3470 3471 op_cost(10); 3472 format %{ %} 3473 interface(CONST_INTER); 3474 %} 3475 3476 // NULL Pointer Immediate 3477 operand immP0() %{ 3478 predicate( n->get_ptr() == 0 ); 3479 match(ConP); 3480 op_cost(0); 3481 3482 format %{ %} 3483 interface(CONST_INTER); 3484 %} 3485 3486 // Long Immediate 3487 operand immL() %{ 3488 match(ConL); 3489 3490 op_cost(20); 3491 format %{ %} 3492 interface(CONST_INTER); 3493 %} 3494 3495 // Long Immediate zero 3496 operand immL0() %{ 3497 predicate( n->get_long() == 0L ); 3498 match(ConL); 3499 op_cost(0); 3500 3501 format %{ %} 3502 interface(CONST_INTER); 3503 %} 3504 3505 // Long Immediate zero 3506 operand immL_M1() %{ 3507 predicate( n->get_long() == -1L ); 3508 match(ConL); 3509 op_cost(0); 3510 3511 format %{ %} 3512 interface(CONST_INTER); 3513 %} 3514 3515 // Long immediate from 0 to 127. 3516 // Used for a shorter form of long mul by 10. 3517 operand immL_127() %{ 3518 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3519 match(ConL); 3520 op_cost(0); 3521 3522 format %{ %} 3523 interface(CONST_INTER); 3524 %} 3525 3526 // Long Immediate: low 32-bit mask 3527 operand immL_32bits() %{ 3528 predicate(n->get_long() == 0xFFFFFFFFL); 3529 match(ConL); 3530 op_cost(0); 3531 3532 format %{ %} 3533 interface(CONST_INTER); 3534 %} 3535 3536 // Long Immediate: low 32-bit mask 3537 operand immL32() %{ 3538 predicate(n->get_long() == (int)(n->get_long())); 3539 match(ConL); 3540 op_cost(20); 3541 3542 format %{ %} 3543 interface(CONST_INTER); 3544 %} 3545 3546 //Double Immediate zero 3547 operand immDPR0() %{ 3548 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3549 // bug that generates code such that NaNs compare equal to 0.0 3550 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3551 match(ConD); 3552 3553 op_cost(5); 3554 format %{ %} 3555 interface(CONST_INTER); 3556 %} 3557 3558 // Double Immediate one 3559 operand immDPR1() %{ 3560 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3561 match(ConD); 3562 3563 op_cost(5); 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 // Double Immediate 3569 operand immDPR() %{ 3570 predicate(UseSSE<=1); 3571 match(ConD); 3572 3573 op_cost(5); 3574 format %{ %} 3575 interface(CONST_INTER); 3576 %} 3577 3578 operand immD() %{ 3579 predicate(UseSSE>=2); 3580 match(ConD); 3581 3582 op_cost(5); 3583 format %{ %} 3584 interface(CONST_INTER); 3585 %} 3586 3587 // Double Immediate zero 3588 operand immD0() %{ 3589 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3590 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3591 // compare equal to -0.0. 3592 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3593 match(ConD); 3594 3595 format %{ %} 3596 interface(CONST_INTER); 3597 %} 3598 3599 // Float Immediate zero 3600 operand immFPR0() %{ 3601 predicate(UseSSE == 0 && n->getf() == 0.0F); 3602 match(ConF); 3603 3604 op_cost(5); 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 // Float Immediate one 3610 operand immFPR1() %{ 3611 predicate(UseSSE == 0 && n->getf() == 1.0F); 3612 match(ConF); 3613 3614 op_cost(5); 3615 format %{ %} 3616 interface(CONST_INTER); 3617 %} 3618 3619 // Float Immediate 3620 operand immFPR() %{ 3621 predicate( UseSSE == 0 ); 3622 match(ConF); 3623 3624 op_cost(5); 3625 format %{ %} 3626 interface(CONST_INTER); 3627 %} 3628 3629 // Float Immediate 3630 operand immF() %{ 3631 predicate(UseSSE >= 1); 3632 match(ConF); 3633 3634 op_cost(5); 3635 format %{ %} 3636 interface(CONST_INTER); 3637 %} 3638 3639 // Float Immediate zero. Zero and not -0.0 3640 operand immF0() %{ 3641 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3642 match(ConF); 3643 3644 op_cost(5); 3645 format %{ %} 3646 interface(CONST_INTER); 3647 %} 3648 3649 // Immediates for special shifts (sign extend) 3650 3651 // Constants for increment 3652 operand immI_16() %{ 3653 predicate( n->get_int() == 16 ); 3654 match(ConI); 3655 3656 format %{ %} 3657 interface(CONST_INTER); 3658 %} 3659 3660 operand immI_24() %{ 3661 predicate( n->get_int() == 24 ); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 // Constant for byte-wide masking 3669 operand immI_255() %{ 3670 predicate( n->get_int() == 255 ); 3671 match(ConI); 3672 3673 format %{ %} 3674 interface(CONST_INTER); 3675 %} 3676 3677 // Constant for short-wide masking 3678 operand immI_65535() %{ 3679 predicate(n->get_int() == 65535); 3680 match(ConI); 3681 3682 format %{ %} 3683 interface(CONST_INTER); 3684 %} 3685 3686 // Register Operands 3687 // Integer Register 3688 operand rRegI() %{ 3689 constraint(ALLOC_IN_RC(int_reg)); 3690 match(RegI); 3691 match(xRegI); 3692 match(eAXRegI); 3693 match(eBXRegI); 3694 match(eCXRegI); 3695 match(eDXRegI); 3696 match(eDIRegI); 3697 match(eSIRegI); 3698 3699 format %{ %} 3700 interface(REG_INTER); 3701 %} 3702 3703 // Subset of Integer Register 3704 operand xRegI(rRegI reg) %{ 3705 constraint(ALLOC_IN_RC(int_x_reg)); 3706 match(reg); 3707 match(eAXRegI); 3708 match(eBXRegI); 3709 match(eCXRegI); 3710 match(eDXRegI); 3711 3712 format %{ %} 3713 interface(REG_INTER); 3714 %} 3715 3716 // Special Registers 3717 operand eAXRegI(xRegI reg) %{ 3718 constraint(ALLOC_IN_RC(eax_reg)); 3719 match(reg); 3720 match(rRegI); 3721 3722 format %{ "EAX" %} 3723 interface(REG_INTER); 3724 %} 3725 3726 // Special Registers 3727 operand eBXRegI(xRegI reg) %{ 3728 constraint(ALLOC_IN_RC(ebx_reg)); 3729 match(reg); 3730 match(rRegI); 3731 3732 format %{ "EBX" %} 3733 interface(REG_INTER); 3734 %} 3735 3736 operand eCXRegI(xRegI reg) %{ 3737 constraint(ALLOC_IN_RC(ecx_reg)); 3738 match(reg); 3739 match(rRegI); 3740 3741 format %{ "ECX" %} 3742 interface(REG_INTER); 3743 %} 3744 3745 operand eDXRegI(xRegI reg) %{ 3746 constraint(ALLOC_IN_RC(edx_reg)); 3747 match(reg); 3748 match(rRegI); 3749 3750 format %{ "EDX" %} 3751 interface(REG_INTER); 3752 %} 3753 3754 operand eDIRegI(xRegI reg) %{ 3755 constraint(ALLOC_IN_RC(edi_reg)); 3756 match(reg); 3757 match(rRegI); 3758 3759 format %{ "EDI" %} 3760 interface(REG_INTER); 3761 %} 3762 3763 operand naxRegI() %{ 3764 constraint(ALLOC_IN_RC(nax_reg)); 3765 match(RegI); 3766 match(eCXRegI); 3767 match(eDXRegI); 3768 match(eSIRegI); 3769 match(eDIRegI); 3770 3771 format %{ %} 3772 interface(REG_INTER); 3773 %} 3774 3775 operand nadxRegI() %{ 3776 constraint(ALLOC_IN_RC(nadx_reg)); 3777 match(RegI); 3778 match(eBXRegI); 3779 match(eCXRegI); 3780 match(eSIRegI); 3781 match(eDIRegI); 3782 3783 format %{ %} 3784 interface(REG_INTER); 3785 %} 3786 3787 operand ncxRegI() %{ 3788 constraint(ALLOC_IN_RC(ncx_reg)); 3789 match(RegI); 3790 match(eAXRegI); 3791 match(eDXRegI); 3792 match(eSIRegI); 3793 match(eDIRegI); 3794 3795 format %{ %} 3796 interface(REG_INTER); 3797 %} 3798 3799 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3800 // // 3801 operand eSIRegI(xRegI reg) %{ 3802 constraint(ALLOC_IN_RC(esi_reg)); 3803 match(reg); 3804 match(rRegI); 3805 3806 format %{ "ESI" %} 3807 interface(REG_INTER); 3808 %} 3809 3810 // Pointer Register 3811 operand anyRegP() %{ 3812 constraint(ALLOC_IN_RC(any_reg)); 3813 match(RegP); 3814 match(eAXRegP); 3815 match(eBXRegP); 3816 match(eCXRegP); 3817 match(eDIRegP); 3818 match(eRegP); 3819 3820 format %{ %} 3821 interface(REG_INTER); 3822 %} 3823 3824 operand eRegP() %{ 3825 constraint(ALLOC_IN_RC(int_reg)); 3826 match(RegP); 3827 match(eAXRegP); 3828 match(eBXRegP); 3829 match(eCXRegP); 3830 match(eDIRegP); 3831 3832 format %{ %} 3833 interface(REG_INTER); 3834 %} 3835 3836 operand rRegP() %{ 3837 constraint(ALLOC_IN_RC(int_reg)); 3838 match(RegP); 3839 match(eAXRegP); 3840 match(eBXRegP); 3841 match(eCXRegP); 3842 match(eDIRegP); 3843 3844 format %{ %} 3845 interface(REG_INTER); 3846 %} 3847 3848 // On windows95, EBP is not safe to use for implicit null tests. 3849 operand eRegP_no_EBP() %{ 3850 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3851 match(RegP); 3852 match(eAXRegP); 3853 match(eBXRegP); 3854 match(eCXRegP); 3855 match(eDIRegP); 3856 3857 op_cost(100); 3858 format %{ %} 3859 interface(REG_INTER); 3860 %} 3861 3862 operand naxRegP() %{ 3863 constraint(ALLOC_IN_RC(nax_reg)); 3864 match(RegP); 3865 match(eBXRegP); 3866 match(eDXRegP); 3867 match(eCXRegP); 3868 match(eSIRegP); 3869 match(eDIRegP); 3870 3871 format %{ %} 3872 interface(REG_INTER); 3873 %} 3874 3875 operand nabxRegP() %{ 3876 constraint(ALLOC_IN_RC(nabx_reg)); 3877 match(RegP); 3878 match(eCXRegP); 3879 match(eDXRegP); 3880 match(eSIRegP); 3881 match(eDIRegP); 3882 3883 format %{ %} 3884 interface(REG_INTER); 3885 %} 3886 3887 operand pRegP() %{ 3888 constraint(ALLOC_IN_RC(p_reg)); 3889 match(RegP); 3890 match(eBXRegP); 3891 match(eDXRegP); 3892 match(eSIRegP); 3893 match(eDIRegP); 3894 3895 format %{ %} 3896 interface(REG_INTER); 3897 %} 3898 3899 // Special Registers 3900 // Return a pointer value 3901 operand eAXRegP(eRegP reg) %{ 3902 constraint(ALLOC_IN_RC(eax_reg)); 3903 match(reg); 3904 format %{ "EAX" %} 3905 interface(REG_INTER); 3906 %} 3907 3908 // Used in AtomicAdd 3909 operand eBXRegP(eRegP reg) %{ 3910 constraint(ALLOC_IN_RC(ebx_reg)); 3911 match(reg); 3912 format %{ "EBX" %} 3913 interface(REG_INTER); 3914 %} 3915 3916 // Tail-call (interprocedural jump) to interpreter 3917 operand eCXRegP(eRegP reg) %{ 3918 constraint(ALLOC_IN_RC(ecx_reg)); 3919 match(reg); 3920 format %{ "ECX" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 operand eDXRegP(eRegP reg) %{ 3925 constraint(ALLOC_IN_RC(edx_reg)); 3926 match(reg); 3927 format %{ "EDX" %} 3928 interface(REG_INTER); 3929 %} 3930 3931 operand eSIRegP(eRegP reg) %{ 3932 constraint(ALLOC_IN_RC(esi_reg)); 3933 match(reg); 3934 format %{ "ESI" %} 3935 interface(REG_INTER); 3936 %} 3937 3938 // Used in rep stosw 3939 operand eDIRegP(eRegP reg) %{ 3940 constraint(ALLOC_IN_RC(edi_reg)); 3941 match(reg); 3942 format %{ "EDI" %} 3943 interface(REG_INTER); 3944 %} 3945 3946 operand eRegL() %{ 3947 constraint(ALLOC_IN_RC(long_reg)); 3948 match(RegL); 3949 match(eADXRegL); 3950 3951 format %{ %} 3952 interface(REG_INTER); 3953 %} 3954 3955 operand eADXRegL( eRegL reg ) %{ 3956 constraint(ALLOC_IN_RC(eadx_reg)); 3957 match(reg); 3958 3959 format %{ "EDX:EAX" %} 3960 interface(REG_INTER); 3961 %} 3962 3963 operand eBCXRegL( eRegL reg ) %{ 3964 constraint(ALLOC_IN_RC(ebcx_reg)); 3965 match(reg); 3966 3967 format %{ "EBX:ECX" %} 3968 interface(REG_INTER); 3969 %} 3970 3971 // Special case for integer high multiply 3972 operand eADXRegL_low_only() %{ 3973 constraint(ALLOC_IN_RC(eadx_reg)); 3974 match(RegL); 3975 3976 format %{ "EAX" %} 3977 interface(REG_INTER); 3978 %} 3979 3980 // Flags register, used as output of compare instructions 3981 operand rFlagsReg() %{ 3982 constraint(ALLOC_IN_RC(int_flags)); 3983 match(RegFlags); 3984 3985 format %{ "EFLAGS" %} 3986 interface(REG_INTER); 3987 %} 3988 3989 // Flags register, used as output of compare instructions 3990 operand eFlagsReg() %{ 3991 constraint(ALLOC_IN_RC(int_flags)); 3992 match(RegFlags); 3993 3994 format %{ "EFLAGS" %} 3995 interface(REG_INTER); 3996 %} 3997 3998 // Flags register, used as output of FLOATING POINT compare instructions 3999 operand eFlagsRegU() %{ 4000 constraint(ALLOC_IN_RC(int_flags)); 4001 match(RegFlags); 4002 4003 format %{ "EFLAGS_U" %} 4004 interface(REG_INTER); 4005 %} 4006 4007 operand eFlagsRegUCF() %{ 4008 constraint(ALLOC_IN_RC(int_flags)); 4009 match(RegFlags); 4010 predicate(false); 4011 4012 format %{ "EFLAGS_U_CF" %} 4013 interface(REG_INTER); 4014 %} 4015 4016 // Condition Code Register used by long compare 4017 operand flagsReg_long_LTGE() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_LTGE" %} 4021 interface(REG_INTER); 4022 %} 4023 operand flagsReg_long_EQNE() %{ 4024 constraint(ALLOC_IN_RC(int_flags)); 4025 match(RegFlags); 4026 format %{ "FLAGS_EQNE" %} 4027 interface(REG_INTER); 4028 %} 4029 operand flagsReg_long_LEGT() %{ 4030 constraint(ALLOC_IN_RC(int_flags)); 4031 match(RegFlags); 4032 format %{ "FLAGS_LEGT" %} 4033 interface(REG_INTER); 4034 %} 4035 4036 // Condition Code Register used by unsigned long compare 4037 operand flagsReg_ulong_LTGE() %{ 4038 constraint(ALLOC_IN_RC(int_flags)); 4039 match(RegFlags); 4040 format %{ "FLAGS_U_LTGE" %} 4041 interface(REG_INTER); 4042 %} 4043 operand flagsReg_ulong_EQNE() %{ 4044 constraint(ALLOC_IN_RC(int_flags)); 4045 match(RegFlags); 4046 format %{ "FLAGS_U_EQNE" %} 4047 interface(REG_INTER); 4048 %} 4049 operand flagsReg_ulong_LEGT() %{ 4050 constraint(ALLOC_IN_RC(int_flags)); 4051 match(RegFlags); 4052 format %{ "FLAGS_U_LEGT" %} 4053 interface(REG_INTER); 4054 %} 4055 4056 // Float register operands 4057 operand regDPR() %{ 4058 predicate( UseSSE < 2 ); 4059 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4060 match(RegD); 4061 match(regDPR1); 4062 match(regDPR2); 4063 format %{ %} 4064 interface(REG_INTER); 4065 %} 4066 4067 operand regDPR1(regDPR reg) %{ 4068 predicate( UseSSE < 2 ); 4069 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4070 match(reg); 4071 format %{ "FPR1" %} 4072 interface(REG_INTER); 4073 %} 4074 4075 operand regDPR2(regDPR reg) %{ 4076 predicate( UseSSE < 2 ); 4077 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4078 match(reg); 4079 format %{ "FPR2" %} 4080 interface(REG_INTER); 4081 %} 4082 4083 operand regnotDPR1(regDPR reg) %{ 4084 predicate( UseSSE < 2 ); 4085 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4086 match(reg); 4087 format %{ %} 4088 interface(REG_INTER); 4089 %} 4090 4091 // Float register operands 4092 operand regFPR() %{ 4093 predicate( UseSSE < 2 ); 4094 constraint(ALLOC_IN_RC(fp_flt_reg)); 4095 match(RegF); 4096 match(regFPR1); 4097 format %{ %} 4098 interface(REG_INTER); 4099 %} 4100 4101 // Float register operands 4102 operand regFPR1(regFPR reg) %{ 4103 predicate( UseSSE < 2 ); 4104 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4105 match(reg); 4106 format %{ "FPR1" %} 4107 interface(REG_INTER); 4108 %} 4109 4110 // XMM Float register operands 4111 operand regF() %{ 4112 predicate( UseSSE>=1 ); 4113 constraint(ALLOC_IN_RC(float_reg_legacy)); 4114 match(RegF); 4115 format %{ %} 4116 interface(REG_INTER); 4117 %} 4118 4119 operand legRegF() %{ 4120 predicate( UseSSE>=1 ); 4121 constraint(ALLOC_IN_RC(float_reg_legacy)); 4122 match(RegF); 4123 format %{ %} 4124 interface(REG_INTER); 4125 %} 4126 4127 // Float register operands 4128 operand vlRegF() %{ 4129 constraint(ALLOC_IN_RC(float_reg_vl)); 4130 match(RegF); 4131 4132 format %{ %} 4133 interface(REG_INTER); 4134 %} 4135 4136 // XMM Double register operands 4137 operand regD() %{ 4138 predicate( UseSSE>=2 ); 4139 constraint(ALLOC_IN_RC(double_reg_legacy)); 4140 match(RegD); 4141 format %{ %} 4142 interface(REG_INTER); 4143 %} 4144 4145 // Double register operands 4146 operand legRegD() %{ 4147 predicate( UseSSE>=2 ); 4148 constraint(ALLOC_IN_RC(double_reg_legacy)); 4149 match(RegD); 4150 format %{ %} 4151 interface(REG_INTER); 4152 %} 4153 4154 operand vlRegD() %{ 4155 constraint(ALLOC_IN_RC(double_reg_vl)); 4156 match(RegD); 4157 4158 format %{ %} 4159 interface(REG_INTER); 4160 %} 4161 4162 //----------Memory Operands---------------------------------------------------- 4163 // Direct Memory Operand 4164 operand direct(immP addr) %{ 4165 match(addr); 4166 4167 format %{ "[$addr]" %} 4168 interface(MEMORY_INTER) %{ 4169 base(0xFFFFFFFF); 4170 index(0x4); 4171 scale(0x0); 4172 disp($addr); 4173 %} 4174 %} 4175 4176 // Indirect Memory Operand 4177 operand indirect(eRegP reg) %{ 4178 constraint(ALLOC_IN_RC(int_reg)); 4179 match(reg); 4180 4181 format %{ "[$reg]" %} 4182 interface(MEMORY_INTER) %{ 4183 base($reg); 4184 index(0x4); 4185 scale(0x0); 4186 disp(0x0); 4187 %} 4188 %} 4189 4190 // Indirect Memory Plus Short Offset Operand 4191 operand indOffset8(eRegP reg, immI8 off) %{ 4192 match(AddP reg off); 4193 4194 format %{ "[$reg + $off]" %} 4195 interface(MEMORY_INTER) %{ 4196 base($reg); 4197 index(0x4); 4198 scale(0x0); 4199 disp($off); 4200 %} 4201 %} 4202 4203 // Indirect Memory Plus Long Offset Operand 4204 operand indOffset32(eRegP reg, immI off) %{ 4205 match(AddP reg off); 4206 4207 format %{ "[$reg + $off]" %} 4208 interface(MEMORY_INTER) %{ 4209 base($reg); 4210 index(0x4); 4211 scale(0x0); 4212 disp($off); 4213 %} 4214 %} 4215 4216 // Indirect Memory Plus Long Offset Operand 4217 operand indOffset32X(rRegI reg, immP off) %{ 4218 match(AddP off reg); 4219 4220 format %{ "[$reg + $off]" %} 4221 interface(MEMORY_INTER) %{ 4222 base($reg); 4223 index(0x4); 4224 scale(0x0); 4225 disp($off); 4226 %} 4227 %} 4228 4229 // Indirect Memory Plus Index Register Plus Offset Operand 4230 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4231 match(AddP (AddP reg ireg) off); 4232 4233 op_cost(10); 4234 format %{"[$reg + $off + $ireg]" %} 4235 interface(MEMORY_INTER) %{ 4236 base($reg); 4237 index($ireg); 4238 scale(0x0); 4239 disp($off); 4240 %} 4241 %} 4242 4243 // Indirect Memory Plus Index Register Plus Offset Operand 4244 operand indIndex(eRegP reg, rRegI ireg) %{ 4245 match(AddP reg ireg); 4246 4247 op_cost(10); 4248 format %{"[$reg + $ireg]" %} 4249 interface(MEMORY_INTER) %{ 4250 base($reg); 4251 index($ireg); 4252 scale(0x0); 4253 disp(0x0); 4254 %} 4255 %} 4256 4257 // // ------------------------------------------------------------------------- 4258 // // 486 architecture doesn't support "scale * index + offset" with out a base 4259 // // ------------------------------------------------------------------------- 4260 // // Scaled Memory Operands 4261 // // Indirect Memory Times Scale Plus Offset Operand 4262 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4263 // match(AddP off (LShiftI ireg scale)); 4264 // 4265 // op_cost(10); 4266 // format %{"[$off + $ireg << $scale]" %} 4267 // interface(MEMORY_INTER) %{ 4268 // base(0x4); 4269 // index($ireg); 4270 // scale($scale); 4271 // disp($off); 4272 // %} 4273 // %} 4274 4275 // Indirect Memory Times Scale Plus Index Register 4276 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4277 match(AddP reg (LShiftI ireg scale)); 4278 4279 op_cost(10); 4280 format %{"[$reg + $ireg << $scale]" %} 4281 interface(MEMORY_INTER) %{ 4282 base($reg); 4283 index($ireg); 4284 scale($scale); 4285 disp(0x0); 4286 %} 4287 %} 4288 4289 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4290 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4291 match(AddP (AddP reg (LShiftI ireg scale)) off); 4292 4293 op_cost(10); 4294 format %{"[$reg + $off + $ireg << $scale]" %} 4295 interface(MEMORY_INTER) %{ 4296 base($reg); 4297 index($ireg); 4298 scale($scale); 4299 disp($off); 4300 %} 4301 %} 4302 4303 //----------Load Long Memory Operands------------------------------------------ 4304 // The load-long idiom will use it's address expression again after loading 4305 // the first word of the long. If the load-long destination overlaps with 4306 // registers used in the addressing expression, the 2nd half will be loaded 4307 // from a clobbered address. Fix this by requiring that load-long use 4308 // address registers that do not overlap with the load-long target. 4309 4310 // load-long support 4311 operand load_long_RegP() %{ 4312 constraint(ALLOC_IN_RC(esi_reg)); 4313 match(RegP); 4314 match(eSIRegP); 4315 op_cost(100); 4316 format %{ %} 4317 interface(REG_INTER); 4318 %} 4319 4320 // Indirect Memory Operand Long 4321 operand load_long_indirect(load_long_RegP reg) %{ 4322 constraint(ALLOC_IN_RC(esi_reg)); 4323 match(reg); 4324 4325 format %{ "[$reg]" %} 4326 interface(MEMORY_INTER) %{ 4327 base($reg); 4328 index(0x4); 4329 scale(0x0); 4330 disp(0x0); 4331 %} 4332 %} 4333 4334 // Indirect Memory Plus Long Offset Operand 4335 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4336 match(AddP reg off); 4337 4338 format %{ "[$reg + $off]" %} 4339 interface(MEMORY_INTER) %{ 4340 base($reg); 4341 index(0x4); 4342 scale(0x0); 4343 disp($off); 4344 %} 4345 %} 4346 4347 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4348 4349 4350 //----------Special Memory Operands-------------------------------------------- 4351 // Stack Slot Operand - This operand is used for loading and storing temporary 4352 // values on the stack where a match requires a value to 4353 // flow through memory. 4354 operand stackSlotP(sRegP reg) %{ 4355 constraint(ALLOC_IN_RC(stack_slots)); 4356 // No match rule because this operand is only generated in matching 4357 format %{ "[$reg]" %} 4358 interface(MEMORY_INTER) %{ 4359 base(0x4); // ESP 4360 index(0x4); // No Index 4361 scale(0x0); // No Scale 4362 disp($reg); // Stack Offset 4363 %} 4364 %} 4365 4366 operand stackSlotI(sRegI reg) %{ 4367 constraint(ALLOC_IN_RC(stack_slots)); 4368 // No match rule because this operand is only generated in matching 4369 format %{ "[$reg]" %} 4370 interface(MEMORY_INTER) %{ 4371 base(0x4); // ESP 4372 index(0x4); // No Index 4373 scale(0x0); // No Scale 4374 disp($reg); // Stack Offset 4375 %} 4376 %} 4377 4378 operand stackSlotF(sRegF reg) %{ 4379 constraint(ALLOC_IN_RC(stack_slots)); 4380 // No match rule because this operand is only generated in matching 4381 format %{ "[$reg]" %} 4382 interface(MEMORY_INTER) %{ 4383 base(0x4); // ESP 4384 index(0x4); // No Index 4385 scale(0x0); // No Scale 4386 disp($reg); // Stack Offset 4387 %} 4388 %} 4389 4390 operand stackSlotD(sRegD reg) %{ 4391 constraint(ALLOC_IN_RC(stack_slots)); 4392 // No match rule because this operand is only generated in matching 4393 format %{ "[$reg]" %} 4394 interface(MEMORY_INTER) %{ 4395 base(0x4); // ESP 4396 index(0x4); // No Index 4397 scale(0x0); // No Scale 4398 disp($reg); // Stack Offset 4399 %} 4400 %} 4401 4402 operand stackSlotL(sRegL reg) %{ 4403 constraint(ALLOC_IN_RC(stack_slots)); 4404 // No match rule because this operand is only generated in matching 4405 format %{ "[$reg]" %} 4406 interface(MEMORY_INTER) %{ 4407 base(0x4); // ESP 4408 index(0x4); // No Index 4409 scale(0x0); // No Scale 4410 disp($reg); // Stack Offset 4411 %} 4412 %} 4413 4414 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4415 // Indirect Memory Operand 4416 operand indirect_win95_safe(eRegP_no_EBP reg) 4417 %{ 4418 constraint(ALLOC_IN_RC(int_reg)); 4419 match(reg); 4420 4421 op_cost(100); 4422 format %{ "[$reg]" %} 4423 interface(MEMORY_INTER) %{ 4424 base($reg); 4425 index(0x4); 4426 scale(0x0); 4427 disp(0x0); 4428 %} 4429 %} 4430 4431 // Indirect Memory Plus Short Offset Operand 4432 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4433 %{ 4434 match(AddP reg off); 4435 4436 op_cost(100); 4437 format %{ "[$reg + $off]" %} 4438 interface(MEMORY_INTER) %{ 4439 base($reg); 4440 index(0x4); 4441 scale(0x0); 4442 disp($off); 4443 %} 4444 %} 4445 4446 // Indirect Memory Plus Long Offset Operand 4447 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4448 %{ 4449 match(AddP reg off); 4450 4451 op_cost(100); 4452 format %{ "[$reg + $off]" %} 4453 interface(MEMORY_INTER) %{ 4454 base($reg); 4455 index(0x4); 4456 scale(0x0); 4457 disp($off); 4458 %} 4459 %} 4460 4461 // Indirect Memory Plus Index Register Plus Offset Operand 4462 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4463 %{ 4464 match(AddP (AddP reg ireg) off); 4465 4466 op_cost(100); 4467 format %{"[$reg + $off + $ireg]" %} 4468 interface(MEMORY_INTER) %{ 4469 base($reg); 4470 index($ireg); 4471 scale(0x0); 4472 disp($off); 4473 %} 4474 %} 4475 4476 // Indirect Memory Times Scale Plus Index Register 4477 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4478 %{ 4479 match(AddP reg (LShiftI ireg scale)); 4480 4481 op_cost(100); 4482 format %{"[$reg + $ireg << $scale]" %} 4483 interface(MEMORY_INTER) %{ 4484 base($reg); 4485 index($ireg); 4486 scale($scale); 4487 disp(0x0); 4488 %} 4489 %} 4490 4491 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4492 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4493 %{ 4494 match(AddP (AddP reg (LShiftI ireg scale)) off); 4495 4496 op_cost(100); 4497 format %{"[$reg + $off + $ireg << $scale]" %} 4498 interface(MEMORY_INTER) %{ 4499 base($reg); 4500 index($ireg); 4501 scale($scale); 4502 disp($off); 4503 %} 4504 %} 4505 4506 //----------Conditional Branch Operands---------------------------------------- 4507 // Comparison Op - This is the operation of the comparison, and is limited to 4508 // the following set of codes: 4509 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4510 // 4511 // Other attributes of the comparison, such as unsignedness, are specified 4512 // by the comparison instruction that sets a condition code flags register. 4513 // That result is represented by a flags operand whose subtype is appropriate 4514 // to the unsignedness (etc.) of the comparison. 4515 // 4516 // Later, the instruction which matches both the Comparison Op (a Bool) and 4517 // the flags (produced by the Cmp) specifies the coding of the comparison op 4518 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4519 4520 // Comparision Code 4521 operand cmpOp() %{ 4522 match(Bool); 4523 4524 format %{ "" %} 4525 interface(COND_INTER) %{ 4526 equal(0x4, "e"); 4527 not_equal(0x5, "ne"); 4528 less(0xC, "l"); 4529 greater_equal(0xD, "ge"); 4530 less_equal(0xE, "le"); 4531 greater(0xF, "g"); 4532 overflow(0x0, "o"); 4533 no_overflow(0x1, "no"); 4534 %} 4535 %} 4536 4537 // Comparison Code, unsigned compare. Used by FP also, with 4538 // C2 (unordered) turned into GT or LT already. The other bits 4539 // C0 and C3 are turned into Carry & Zero flags. 4540 operand cmpOpU() %{ 4541 match(Bool); 4542 4543 format %{ "" %} 4544 interface(COND_INTER) %{ 4545 equal(0x4, "e"); 4546 not_equal(0x5, "ne"); 4547 less(0x2, "b"); 4548 greater_equal(0x3, "nb"); 4549 less_equal(0x6, "be"); 4550 greater(0x7, "nbe"); 4551 overflow(0x0, "o"); 4552 no_overflow(0x1, "no"); 4553 %} 4554 %} 4555 4556 // Floating comparisons that don't require any fixup for the unordered case 4557 operand cmpOpUCF() %{ 4558 match(Bool); 4559 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4560 n->as_Bool()->_test._test == BoolTest::ge || 4561 n->as_Bool()->_test._test == BoolTest::le || 4562 n->as_Bool()->_test._test == BoolTest::gt); 4563 format %{ "" %} 4564 interface(COND_INTER) %{ 4565 equal(0x4, "e"); 4566 not_equal(0x5, "ne"); 4567 less(0x2, "b"); 4568 greater_equal(0x3, "nb"); 4569 less_equal(0x6, "be"); 4570 greater(0x7, "nbe"); 4571 overflow(0x0, "o"); 4572 no_overflow(0x1, "no"); 4573 %} 4574 %} 4575 4576 4577 // Floating comparisons that can be fixed up with extra conditional jumps 4578 operand cmpOpUCF2() %{ 4579 match(Bool); 4580 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4581 n->as_Bool()->_test._test == BoolTest::eq); 4582 format %{ "" %} 4583 interface(COND_INTER) %{ 4584 equal(0x4, "e"); 4585 not_equal(0x5, "ne"); 4586 less(0x2, "b"); 4587 greater_equal(0x3, "nb"); 4588 less_equal(0x6, "be"); 4589 greater(0x7, "nbe"); 4590 overflow(0x0, "o"); 4591 no_overflow(0x1, "no"); 4592 %} 4593 %} 4594 4595 // Comparison Code for FP conditional move 4596 operand cmpOp_fcmov() %{ 4597 match(Bool); 4598 4599 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4600 n->as_Bool()->_test._test != BoolTest::no_overflow); 4601 format %{ "" %} 4602 interface(COND_INTER) %{ 4603 equal (0x0C8); 4604 not_equal (0x1C8); 4605 less (0x0C0); 4606 greater_equal(0x1C0); 4607 less_equal (0x0D0); 4608 greater (0x1D0); 4609 overflow(0x0, "o"); // not really supported by the instruction 4610 no_overflow(0x1, "no"); // not really supported by the instruction 4611 %} 4612 %} 4613 4614 // Comparison Code used in long compares 4615 operand cmpOp_commute() %{ 4616 match(Bool); 4617 4618 format %{ "" %} 4619 interface(COND_INTER) %{ 4620 equal(0x4, "e"); 4621 not_equal(0x5, "ne"); 4622 less(0xF, "g"); 4623 greater_equal(0xE, "le"); 4624 less_equal(0xD, "ge"); 4625 greater(0xC, "l"); 4626 overflow(0x0, "o"); 4627 no_overflow(0x1, "no"); 4628 %} 4629 %} 4630 4631 // Comparison Code used in unsigned long compares 4632 operand cmpOpU_commute() %{ 4633 match(Bool); 4634 4635 format %{ "" %} 4636 interface(COND_INTER) %{ 4637 equal(0x4, "e"); 4638 not_equal(0x5, "ne"); 4639 less(0x7, "nbe"); 4640 greater_equal(0x6, "be"); 4641 less_equal(0x3, "nb"); 4642 greater(0x2, "b"); 4643 overflow(0x0, "o"); 4644 no_overflow(0x1, "no"); 4645 %} 4646 %} 4647 4648 //----------OPERAND CLASSES---------------------------------------------------- 4649 // Operand Classes are groups of operands that are used as to simplify 4650 // instruction definitions by not requiring the AD writer to specify separate 4651 // instructions for every form of operand when the instruction accepts 4652 // multiple operand types with the same basic encoding and format. The classic 4653 // case of this is memory operands. 4654 4655 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4656 indIndex, indIndexScale, indIndexScaleOffset); 4657 4658 // Long memory operations are encoded in 2 instructions and a +4 offset. 4659 // This means some kind of offset is always required and you cannot use 4660 // an oop as the offset (done when working on static globals). 4661 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4662 indIndex, indIndexScale, indIndexScaleOffset); 4663 4664 4665 //----------PIPELINE----------------------------------------------------------- 4666 // Rules which define the behavior of the target architectures pipeline. 4667 pipeline %{ 4668 4669 //----------ATTRIBUTES--------------------------------------------------------- 4670 attributes %{ 4671 variable_size_instructions; // Fixed size instructions 4672 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4673 instruction_unit_size = 1; // An instruction is 1 bytes long 4674 instruction_fetch_unit_size = 16; // The processor fetches one line 4675 instruction_fetch_units = 1; // of 16 bytes 4676 4677 // List of nop instructions 4678 nops( MachNop ); 4679 %} 4680 4681 //----------RESOURCES---------------------------------------------------------- 4682 // Resources are the functional units available to the machine 4683 4684 // Generic P2/P3 pipeline 4685 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4686 // 3 instructions decoded per cycle. 4687 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4688 // 2 ALU op, only ALU0 handles mul/div instructions. 4689 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4690 MS0, MS1, MEM = MS0 | MS1, 4691 BR, FPU, 4692 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4693 4694 //----------PIPELINE DESCRIPTION----------------------------------------------- 4695 // Pipeline Description specifies the stages in the machine's pipeline 4696 4697 // Generic P2/P3 pipeline 4698 pipe_desc(S0, S1, S2, S3, S4, S5); 4699 4700 //----------PIPELINE CLASSES--------------------------------------------------- 4701 // Pipeline Classes describe the stages in which input and output are 4702 // referenced by the hardware pipeline. 4703 4704 // Naming convention: ialu or fpu 4705 // Then: _reg 4706 // Then: _reg if there is a 2nd register 4707 // Then: _long if it's a pair of instructions implementing a long 4708 // Then: _fat if it requires the big decoder 4709 // Or: _mem if it requires the big decoder and a memory unit. 4710 4711 // Integer ALU reg operation 4712 pipe_class ialu_reg(rRegI dst) %{ 4713 single_instruction; 4714 dst : S4(write); 4715 dst : S3(read); 4716 DECODE : S0; // any decoder 4717 ALU : S3; // any alu 4718 %} 4719 4720 // Long ALU reg operation 4721 pipe_class ialu_reg_long(eRegL dst) %{ 4722 instruction_count(2); 4723 dst : S4(write); 4724 dst : S3(read); 4725 DECODE : S0(2); // any 2 decoders 4726 ALU : S3(2); // both alus 4727 %} 4728 4729 // Integer ALU reg operation using big decoder 4730 pipe_class ialu_reg_fat(rRegI dst) %{ 4731 single_instruction; 4732 dst : S4(write); 4733 dst : S3(read); 4734 D0 : S0; // big decoder only 4735 ALU : S3; // any alu 4736 %} 4737 4738 // Long ALU reg operation using big decoder 4739 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4740 instruction_count(2); 4741 dst : S4(write); 4742 dst : S3(read); 4743 D0 : S0(2); // big decoder only; twice 4744 ALU : S3(2); // any 2 alus 4745 %} 4746 4747 // Integer ALU reg-reg operation 4748 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4749 single_instruction; 4750 dst : S4(write); 4751 src : S3(read); 4752 DECODE : S0; // any decoder 4753 ALU : S3; // any alu 4754 %} 4755 4756 // Long ALU reg-reg operation 4757 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4758 instruction_count(2); 4759 dst : S4(write); 4760 src : S3(read); 4761 DECODE : S0(2); // any 2 decoders 4762 ALU : S3(2); // both alus 4763 %} 4764 4765 // Integer ALU reg-reg operation 4766 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4767 single_instruction; 4768 dst : S4(write); 4769 src : S3(read); 4770 D0 : S0; // big decoder only 4771 ALU : S3; // any alu 4772 %} 4773 4774 // Long ALU reg-reg operation 4775 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4776 instruction_count(2); 4777 dst : S4(write); 4778 src : S3(read); 4779 D0 : S0(2); // big decoder only; twice 4780 ALU : S3(2); // both alus 4781 %} 4782 4783 // Integer ALU reg-mem operation 4784 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4785 single_instruction; 4786 dst : S5(write); 4787 mem : S3(read); 4788 D0 : S0; // big decoder only 4789 ALU : S4; // any alu 4790 MEM : S3; // any mem 4791 %} 4792 4793 // Long ALU reg-mem operation 4794 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4795 instruction_count(2); 4796 dst : S5(write); 4797 mem : S3(read); 4798 D0 : S0(2); // big decoder only; twice 4799 ALU : S4(2); // any 2 alus 4800 MEM : S3(2); // both mems 4801 %} 4802 4803 // Integer mem operation (prefetch) 4804 pipe_class ialu_mem(memory mem) 4805 %{ 4806 single_instruction; 4807 mem : S3(read); 4808 D0 : S0; // big decoder only 4809 MEM : S3; // any mem 4810 %} 4811 4812 // Integer Store to Memory 4813 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4814 single_instruction; 4815 mem : S3(read); 4816 src : S5(read); 4817 D0 : S0; // big decoder only 4818 ALU : S4; // any alu 4819 MEM : S3; 4820 %} 4821 4822 // Long Store to Memory 4823 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4824 instruction_count(2); 4825 mem : S3(read); 4826 src : S5(read); 4827 D0 : S0(2); // big decoder only; twice 4828 ALU : S4(2); // any 2 alus 4829 MEM : S3(2); // Both mems 4830 %} 4831 4832 // Integer Store to Memory 4833 pipe_class ialu_mem_imm(memory mem) %{ 4834 single_instruction; 4835 mem : S3(read); 4836 D0 : S0; // big decoder only 4837 ALU : S4; // any alu 4838 MEM : S3; 4839 %} 4840 4841 // Integer ALU0 reg-reg operation 4842 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4843 single_instruction; 4844 dst : S4(write); 4845 src : S3(read); 4846 D0 : S0; // Big decoder only 4847 ALU0 : S3; // only alu0 4848 %} 4849 4850 // Integer ALU0 reg-mem operation 4851 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4852 single_instruction; 4853 dst : S5(write); 4854 mem : S3(read); 4855 D0 : S0; // big decoder only 4856 ALU0 : S4; // ALU0 only 4857 MEM : S3; // any mem 4858 %} 4859 4860 // Integer ALU reg-reg operation 4861 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4862 single_instruction; 4863 cr : S4(write); 4864 src1 : S3(read); 4865 src2 : S3(read); 4866 DECODE : S0; // any decoder 4867 ALU : S3; // any alu 4868 %} 4869 4870 // Integer ALU reg-imm operation 4871 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4872 single_instruction; 4873 cr : S4(write); 4874 src1 : S3(read); 4875 DECODE : S0; // any decoder 4876 ALU : S3; // any alu 4877 %} 4878 4879 // Integer ALU reg-mem operation 4880 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4881 single_instruction; 4882 cr : S4(write); 4883 src1 : S3(read); 4884 src2 : S3(read); 4885 D0 : S0; // big decoder only 4886 ALU : S4; // any alu 4887 MEM : S3; 4888 %} 4889 4890 // Conditional move reg-reg 4891 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4892 instruction_count(4); 4893 y : S4(read); 4894 q : S3(read); 4895 p : S3(read); 4896 DECODE : S0(4); // any decoder 4897 %} 4898 4899 // Conditional move reg-reg 4900 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4901 single_instruction; 4902 dst : S4(write); 4903 src : S3(read); 4904 cr : S3(read); 4905 DECODE : S0; // any decoder 4906 %} 4907 4908 // Conditional move reg-mem 4909 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4910 single_instruction; 4911 dst : S4(write); 4912 src : S3(read); 4913 cr : S3(read); 4914 DECODE : S0; // any decoder 4915 MEM : S3; 4916 %} 4917 4918 // Conditional move reg-reg long 4919 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4920 single_instruction; 4921 dst : S4(write); 4922 src : S3(read); 4923 cr : S3(read); 4924 DECODE : S0(2); // any 2 decoders 4925 %} 4926 4927 // Conditional move double reg-reg 4928 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4929 single_instruction; 4930 dst : S4(write); 4931 src : S3(read); 4932 cr : S3(read); 4933 DECODE : S0; // any decoder 4934 %} 4935 4936 // Float reg-reg operation 4937 pipe_class fpu_reg(regDPR dst) %{ 4938 instruction_count(2); 4939 dst : S3(read); 4940 DECODE : S0(2); // any 2 decoders 4941 FPU : S3; 4942 %} 4943 4944 // Float reg-reg operation 4945 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4946 instruction_count(2); 4947 dst : S4(write); 4948 src : S3(read); 4949 DECODE : S0(2); // any 2 decoders 4950 FPU : S3; 4951 %} 4952 4953 // Float reg-reg operation 4954 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4955 instruction_count(3); 4956 dst : S4(write); 4957 src1 : S3(read); 4958 src2 : S3(read); 4959 DECODE : S0(3); // any 3 decoders 4960 FPU : S3(2); 4961 %} 4962 4963 // Float reg-reg operation 4964 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4965 instruction_count(4); 4966 dst : S4(write); 4967 src1 : S3(read); 4968 src2 : S3(read); 4969 src3 : S3(read); 4970 DECODE : S0(4); // any 3 decoders 4971 FPU : S3(2); 4972 %} 4973 4974 // Float reg-reg operation 4975 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4976 instruction_count(4); 4977 dst : S4(write); 4978 src1 : S3(read); 4979 src2 : S3(read); 4980 src3 : S3(read); 4981 DECODE : S1(3); // any 3 decoders 4982 D0 : S0; // Big decoder only 4983 FPU : S3(2); 4984 MEM : S3; 4985 %} 4986 4987 // Float reg-mem operation 4988 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4989 instruction_count(2); 4990 dst : S5(write); 4991 mem : S3(read); 4992 D0 : S0; // big decoder only 4993 DECODE : S1; // any decoder for FPU POP 4994 FPU : S4; 4995 MEM : S3; // any mem 4996 %} 4997 4998 // Float reg-mem operation 4999 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 5000 instruction_count(3); 5001 dst : S5(write); 5002 src1 : S3(read); 5003 mem : S3(read); 5004 D0 : S0; // big decoder only 5005 DECODE : S1(2); // any decoder for FPU POP 5006 FPU : S4; 5007 MEM : S3; // any mem 5008 %} 5009 5010 // Float mem-reg operation 5011 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5012 instruction_count(2); 5013 src : S5(read); 5014 mem : S3(read); 5015 DECODE : S0; // any decoder for FPU PUSH 5016 D0 : S1; // big decoder only 5017 FPU : S4; 5018 MEM : S3; // any mem 5019 %} 5020 5021 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5022 instruction_count(3); 5023 src1 : S3(read); 5024 src2 : S3(read); 5025 mem : S3(read); 5026 DECODE : S0(2); // any decoder for FPU PUSH 5027 D0 : S1; // big decoder only 5028 FPU : S4; 5029 MEM : S3; // any mem 5030 %} 5031 5032 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5033 instruction_count(3); 5034 src1 : S3(read); 5035 src2 : S3(read); 5036 mem : S4(read); 5037 DECODE : S0; // any decoder for FPU PUSH 5038 D0 : S0(2); // big decoder only 5039 FPU : S4; 5040 MEM : S3(2); // any mem 5041 %} 5042 5043 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5044 instruction_count(2); 5045 src1 : S3(read); 5046 dst : S4(read); 5047 D0 : S0(2); // big decoder only 5048 MEM : S3(2); // any mem 5049 %} 5050 5051 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5052 instruction_count(3); 5053 src1 : S3(read); 5054 src2 : S3(read); 5055 dst : S4(read); 5056 D0 : S0(3); // big decoder only 5057 FPU : S4; 5058 MEM : S3(3); // any mem 5059 %} 5060 5061 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5062 instruction_count(3); 5063 src1 : S4(read); 5064 mem : S4(read); 5065 DECODE : S0; // any decoder for FPU PUSH 5066 D0 : S0(2); // big decoder only 5067 FPU : S4; 5068 MEM : S3(2); // any mem 5069 %} 5070 5071 // Float load constant 5072 pipe_class fpu_reg_con(regDPR dst) %{ 5073 instruction_count(2); 5074 dst : S5(write); 5075 D0 : S0; // big decoder only for the load 5076 DECODE : S1; // any decoder for FPU POP 5077 FPU : S4; 5078 MEM : S3; // any mem 5079 %} 5080 5081 // Float load constant 5082 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5083 instruction_count(3); 5084 dst : S5(write); 5085 src : S3(read); 5086 D0 : S0; // big decoder only for the load 5087 DECODE : S1(2); // any decoder for FPU POP 5088 FPU : S4; 5089 MEM : S3; // any mem 5090 %} 5091 5092 // UnConditional branch 5093 pipe_class pipe_jmp( label labl ) %{ 5094 single_instruction; 5095 BR : S3; 5096 %} 5097 5098 // Conditional branch 5099 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5100 single_instruction; 5101 cr : S1(read); 5102 BR : S3; 5103 %} 5104 5105 // Allocation idiom 5106 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5107 instruction_count(1); force_serialization; 5108 fixed_latency(6); 5109 heap_ptr : S3(read); 5110 DECODE : S0(3); 5111 D0 : S2; 5112 MEM : S3; 5113 ALU : S3(2); 5114 dst : S5(write); 5115 BR : S5; 5116 %} 5117 5118 // Generic big/slow expanded idiom 5119 pipe_class pipe_slow( ) %{ 5120 instruction_count(10); multiple_bundles; force_serialization; 5121 fixed_latency(100); 5122 D0 : S0(2); 5123 MEM : S3(2); 5124 %} 5125 5126 // The real do-nothing guy 5127 pipe_class empty( ) %{ 5128 instruction_count(0); 5129 %} 5130 5131 // Define the class for the Nop node 5132 define %{ 5133 MachNop = empty; 5134 %} 5135 5136 %} 5137 5138 //----------INSTRUCTIONS------------------------------------------------------- 5139 // 5140 // match -- States which machine-independent subtree may be replaced 5141 // by this instruction. 5142 // ins_cost -- The estimated cost of this instruction is used by instruction 5143 // selection to identify a minimum cost tree of machine 5144 // instructions that matches a tree of machine-independent 5145 // instructions. 5146 // format -- A string providing the disassembly for this instruction. 5147 // The value of an instruction's operand may be inserted 5148 // by referring to it with a '$' prefix. 5149 // opcode -- Three instruction opcodes may be provided. These are referred 5150 // to within an encode class as $primary, $secondary, and $tertiary 5151 // respectively. The primary opcode is commonly used to 5152 // indicate the type of machine instruction, while secondary 5153 // and tertiary are often used for prefix options or addressing 5154 // modes. 5155 // ins_encode -- A list of encode classes with parameters. The encode class 5156 // name must have been defined in an 'enc_class' specification 5157 // in the encode section of the architecture description. 5158 5159 //----------BSWAP-Instruction-------------------------------------------------- 5160 instruct bytes_reverse_int(rRegI dst) %{ 5161 match(Set dst (ReverseBytesI dst)); 5162 5163 format %{ "BSWAP $dst" %} 5164 opcode(0x0F, 0xC8); 5165 ins_encode( OpcP, OpcSReg(dst) ); 5166 ins_pipe( ialu_reg ); 5167 %} 5168 5169 instruct bytes_reverse_long(eRegL dst) %{ 5170 match(Set dst (ReverseBytesL dst)); 5171 5172 format %{ "BSWAP $dst.lo\n\t" 5173 "BSWAP $dst.hi\n\t" 5174 "XCHG $dst.lo $dst.hi" %} 5175 5176 ins_cost(125); 5177 ins_encode( bswap_long_bytes(dst) ); 5178 ins_pipe( ialu_reg_reg); 5179 %} 5180 5181 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5182 match(Set dst (ReverseBytesUS dst)); 5183 effect(KILL cr); 5184 5185 format %{ "BSWAP $dst\n\t" 5186 "SHR $dst,16\n\t" %} 5187 ins_encode %{ 5188 __ bswapl($dst$$Register); 5189 __ shrl($dst$$Register, 16); 5190 %} 5191 ins_pipe( ialu_reg ); 5192 %} 5193 5194 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5195 match(Set dst (ReverseBytesS dst)); 5196 effect(KILL cr); 5197 5198 format %{ "BSWAP $dst\n\t" 5199 "SAR $dst,16\n\t" %} 5200 ins_encode %{ 5201 __ bswapl($dst$$Register); 5202 __ sarl($dst$$Register, 16); 5203 %} 5204 ins_pipe( ialu_reg ); 5205 %} 5206 5207 5208 //---------- Zeros Count Instructions ------------------------------------------ 5209 5210 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5211 predicate(UseCountLeadingZerosInstruction); 5212 match(Set dst (CountLeadingZerosI src)); 5213 effect(KILL cr); 5214 5215 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5216 ins_encode %{ 5217 __ lzcntl($dst$$Register, $src$$Register); 5218 %} 5219 ins_pipe(ialu_reg); 5220 %} 5221 5222 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5223 predicate(!UseCountLeadingZerosInstruction); 5224 match(Set dst (CountLeadingZerosI src)); 5225 effect(KILL cr); 5226 5227 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5228 "JNZ skip\n\t" 5229 "MOV $dst, -1\n" 5230 "skip:\n\t" 5231 "NEG $dst\n\t" 5232 "ADD $dst, 31" %} 5233 ins_encode %{ 5234 Register Rdst = $dst$$Register; 5235 Register Rsrc = $src$$Register; 5236 Label skip; 5237 __ bsrl(Rdst, Rsrc); 5238 __ jccb(Assembler::notZero, skip); 5239 __ movl(Rdst, -1); 5240 __ bind(skip); 5241 __ negl(Rdst); 5242 __ addl(Rdst, BitsPerInt - 1); 5243 %} 5244 ins_pipe(ialu_reg); 5245 %} 5246 5247 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5248 predicate(UseCountLeadingZerosInstruction); 5249 match(Set dst (CountLeadingZerosL src)); 5250 effect(TEMP dst, KILL cr); 5251 5252 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5253 "JNC done\n\t" 5254 "LZCNT $dst, $src.lo\n\t" 5255 "ADD $dst, 32\n" 5256 "done:" %} 5257 ins_encode %{ 5258 Register Rdst = $dst$$Register; 5259 Register Rsrc = $src$$Register; 5260 Label done; 5261 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5262 __ jccb(Assembler::carryClear, done); 5263 __ lzcntl(Rdst, Rsrc); 5264 __ addl(Rdst, BitsPerInt); 5265 __ bind(done); 5266 %} 5267 ins_pipe(ialu_reg); 5268 %} 5269 5270 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5271 predicate(!UseCountLeadingZerosInstruction); 5272 match(Set dst (CountLeadingZerosL src)); 5273 effect(TEMP dst, KILL cr); 5274 5275 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5276 "JZ msw_is_zero\n\t" 5277 "ADD $dst, 32\n\t" 5278 "JMP not_zero\n" 5279 "msw_is_zero:\n\t" 5280 "BSR $dst, $src.lo\n\t" 5281 "JNZ not_zero\n\t" 5282 "MOV $dst, -1\n" 5283 "not_zero:\n\t" 5284 "NEG $dst\n\t" 5285 "ADD $dst, 63\n" %} 5286 ins_encode %{ 5287 Register Rdst = $dst$$Register; 5288 Register Rsrc = $src$$Register; 5289 Label msw_is_zero; 5290 Label not_zero; 5291 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5292 __ jccb(Assembler::zero, msw_is_zero); 5293 __ addl(Rdst, BitsPerInt); 5294 __ jmpb(not_zero); 5295 __ bind(msw_is_zero); 5296 __ bsrl(Rdst, Rsrc); 5297 __ jccb(Assembler::notZero, not_zero); 5298 __ movl(Rdst, -1); 5299 __ bind(not_zero); 5300 __ negl(Rdst); 5301 __ addl(Rdst, BitsPerLong - 1); 5302 %} 5303 ins_pipe(ialu_reg); 5304 %} 5305 5306 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5307 predicate(UseCountTrailingZerosInstruction); 5308 match(Set dst (CountTrailingZerosI src)); 5309 effect(KILL cr); 5310 5311 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5312 ins_encode %{ 5313 __ tzcntl($dst$$Register, $src$$Register); 5314 %} 5315 ins_pipe(ialu_reg); 5316 %} 5317 5318 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5319 predicate(!UseCountTrailingZerosInstruction); 5320 match(Set dst (CountTrailingZerosI src)); 5321 effect(KILL cr); 5322 5323 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5324 "JNZ done\n\t" 5325 "MOV $dst, 32\n" 5326 "done:" %} 5327 ins_encode %{ 5328 Register Rdst = $dst$$Register; 5329 Label done; 5330 __ bsfl(Rdst, $src$$Register); 5331 __ jccb(Assembler::notZero, done); 5332 __ movl(Rdst, BitsPerInt); 5333 __ bind(done); 5334 %} 5335 ins_pipe(ialu_reg); 5336 %} 5337 5338 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5339 predicate(UseCountTrailingZerosInstruction); 5340 match(Set dst (CountTrailingZerosL src)); 5341 effect(TEMP dst, KILL cr); 5342 5343 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5344 "JNC done\n\t" 5345 "TZCNT $dst, $src.hi\n\t" 5346 "ADD $dst, 32\n" 5347 "done:" %} 5348 ins_encode %{ 5349 Register Rdst = $dst$$Register; 5350 Register Rsrc = $src$$Register; 5351 Label done; 5352 __ tzcntl(Rdst, Rsrc); 5353 __ jccb(Assembler::carryClear, done); 5354 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5355 __ addl(Rdst, BitsPerInt); 5356 __ bind(done); 5357 %} 5358 ins_pipe(ialu_reg); 5359 %} 5360 5361 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5362 predicate(!UseCountTrailingZerosInstruction); 5363 match(Set dst (CountTrailingZerosL src)); 5364 effect(TEMP dst, KILL cr); 5365 5366 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5367 "JNZ done\n\t" 5368 "BSF $dst, $src.hi\n\t" 5369 "JNZ msw_not_zero\n\t" 5370 "MOV $dst, 32\n" 5371 "msw_not_zero:\n\t" 5372 "ADD $dst, 32\n" 5373 "done:" %} 5374 ins_encode %{ 5375 Register Rdst = $dst$$Register; 5376 Register Rsrc = $src$$Register; 5377 Label msw_not_zero; 5378 Label done; 5379 __ bsfl(Rdst, Rsrc); 5380 __ jccb(Assembler::notZero, done); 5381 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5382 __ jccb(Assembler::notZero, msw_not_zero); 5383 __ movl(Rdst, BitsPerInt); 5384 __ bind(msw_not_zero); 5385 __ addl(Rdst, BitsPerInt); 5386 __ bind(done); 5387 %} 5388 ins_pipe(ialu_reg); 5389 %} 5390 5391 5392 //---------- Population Count Instructions ------------------------------------- 5393 5394 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5395 predicate(UsePopCountInstruction); 5396 match(Set dst (PopCountI src)); 5397 effect(KILL cr); 5398 5399 format %{ "POPCNT $dst, $src" %} 5400 ins_encode %{ 5401 __ popcntl($dst$$Register, $src$$Register); 5402 %} 5403 ins_pipe(ialu_reg); 5404 %} 5405 5406 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5407 predicate(UsePopCountInstruction); 5408 match(Set dst (PopCountI (LoadI mem))); 5409 effect(KILL cr); 5410 5411 format %{ "POPCNT $dst, $mem" %} 5412 ins_encode %{ 5413 __ popcntl($dst$$Register, $mem$$Address); 5414 %} 5415 ins_pipe(ialu_reg); 5416 %} 5417 5418 // Note: Long.bitCount(long) returns an int. 5419 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5420 predicate(UsePopCountInstruction); 5421 match(Set dst (PopCountL src)); 5422 effect(KILL cr, TEMP tmp, TEMP dst); 5423 5424 format %{ "POPCNT $dst, $src.lo\n\t" 5425 "POPCNT $tmp, $src.hi\n\t" 5426 "ADD $dst, $tmp" %} 5427 ins_encode %{ 5428 __ popcntl($dst$$Register, $src$$Register); 5429 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5430 __ addl($dst$$Register, $tmp$$Register); 5431 %} 5432 ins_pipe(ialu_reg); 5433 %} 5434 5435 // Note: Long.bitCount(long) returns an int. 5436 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5437 predicate(UsePopCountInstruction); 5438 match(Set dst (PopCountL (LoadL mem))); 5439 effect(KILL cr, TEMP tmp, TEMP dst); 5440 5441 format %{ "POPCNT $dst, $mem\n\t" 5442 "POPCNT $tmp, $mem+4\n\t" 5443 "ADD $dst, $tmp" %} 5444 ins_encode %{ 5445 //__ popcntl($dst$$Register, $mem$$Address$$first); 5446 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5447 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5448 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5449 __ addl($dst$$Register, $tmp$$Register); 5450 %} 5451 ins_pipe(ialu_reg); 5452 %} 5453 5454 5455 //----------Load/Store/Move Instructions--------------------------------------- 5456 //----------Load Instructions-------------------------------------------------- 5457 // Load Byte (8bit signed) 5458 instruct loadB(xRegI dst, memory mem) %{ 5459 match(Set dst (LoadB mem)); 5460 5461 ins_cost(125); 5462 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5463 5464 ins_encode %{ 5465 __ movsbl($dst$$Register, $mem$$Address); 5466 %} 5467 5468 ins_pipe(ialu_reg_mem); 5469 %} 5470 5471 // Load Byte (8bit signed) into Long Register 5472 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5473 match(Set dst (ConvI2L (LoadB mem))); 5474 effect(KILL cr); 5475 5476 ins_cost(375); 5477 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5478 "MOV $dst.hi,$dst.lo\n\t" 5479 "SAR $dst.hi,7" %} 5480 5481 ins_encode %{ 5482 __ movsbl($dst$$Register, $mem$$Address); 5483 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5484 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5485 %} 5486 5487 ins_pipe(ialu_reg_mem); 5488 %} 5489 5490 // Load Unsigned Byte (8bit UNsigned) 5491 instruct loadUB(xRegI dst, memory mem) %{ 5492 match(Set dst (LoadUB mem)); 5493 5494 ins_cost(125); 5495 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5496 5497 ins_encode %{ 5498 __ movzbl($dst$$Register, $mem$$Address); 5499 %} 5500 5501 ins_pipe(ialu_reg_mem); 5502 %} 5503 5504 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5505 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5506 match(Set dst (ConvI2L (LoadUB mem))); 5507 effect(KILL cr); 5508 5509 ins_cost(250); 5510 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5511 "XOR $dst.hi,$dst.hi" %} 5512 5513 ins_encode %{ 5514 Register Rdst = $dst$$Register; 5515 __ movzbl(Rdst, $mem$$Address); 5516 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5517 %} 5518 5519 ins_pipe(ialu_reg_mem); 5520 %} 5521 5522 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5523 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5524 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5525 effect(KILL cr); 5526 5527 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5528 "XOR $dst.hi,$dst.hi\n\t" 5529 "AND $dst.lo,right_n_bits($mask, 8)" %} 5530 ins_encode %{ 5531 Register Rdst = $dst$$Register; 5532 __ movzbl(Rdst, $mem$$Address); 5533 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5534 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5535 %} 5536 ins_pipe(ialu_reg_mem); 5537 %} 5538 5539 // Load Short (16bit signed) 5540 instruct loadS(rRegI dst, memory mem) %{ 5541 match(Set dst (LoadS mem)); 5542 5543 ins_cost(125); 5544 format %{ "MOVSX $dst,$mem\t# short" %} 5545 5546 ins_encode %{ 5547 __ movswl($dst$$Register, $mem$$Address); 5548 %} 5549 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Short (16 bit signed) to Byte (8 bit signed) 5554 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5555 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5556 5557 ins_cost(125); 5558 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5559 ins_encode %{ 5560 __ movsbl($dst$$Register, $mem$$Address); 5561 %} 5562 ins_pipe(ialu_reg_mem); 5563 %} 5564 5565 // Load Short (16bit signed) into Long Register 5566 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5567 match(Set dst (ConvI2L (LoadS mem))); 5568 effect(KILL cr); 5569 5570 ins_cost(375); 5571 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5572 "MOV $dst.hi,$dst.lo\n\t" 5573 "SAR $dst.hi,15" %} 5574 5575 ins_encode %{ 5576 __ movswl($dst$$Register, $mem$$Address); 5577 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5578 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5579 %} 5580 5581 ins_pipe(ialu_reg_mem); 5582 %} 5583 5584 // Load Unsigned Short/Char (16bit unsigned) 5585 instruct loadUS(rRegI dst, memory mem) %{ 5586 match(Set dst (LoadUS mem)); 5587 5588 ins_cost(125); 5589 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5590 5591 ins_encode %{ 5592 __ movzwl($dst$$Register, $mem$$Address); 5593 %} 5594 5595 ins_pipe(ialu_reg_mem); 5596 %} 5597 5598 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5599 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5600 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5601 5602 ins_cost(125); 5603 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5604 ins_encode %{ 5605 __ movsbl($dst$$Register, $mem$$Address); 5606 %} 5607 ins_pipe(ialu_reg_mem); 5608 %} 5609 5610 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5611 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5612 match(Set dst (ConvI2L (LoadUS mem))); 5613 effect(KILL cr); 5614 5615 ins_cost(250); 5616 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5617 "XOR $dst.hi,$dst.hi" %} 5618 5619 ins_encode %{ 5620 __ movzwl($dst$$Register, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5622 %} 5623 5624 ins_pipe(ialu_reg_mem); 5625 %} 5626 5627 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5628 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5629 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5630 effect(KILL cr); 5631 5632 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5633 "XOR $dst.hi,$dst.hi" %} 5634 ins_encode %{ 5635 Register Rdst = $dst$$Register; 5636 __ movzbl(Rdst, $mem$$Address); 5637 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5638 %} 5639 ins_pipe(ialu_reg_mem); 5640 %} 5641 5642 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5643 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5644 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5645 effect(KILL cr); 5646 5647 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5648 "XOR $dst.hi,$dst.hi\n\t" 5649 "AND $dst.lo,right_n_bits($mask, 16)" %} 5650 ins_encode %{ 5651 Register Rdst = $dst$$Register; 5652 __ movzwl(Rdst, $mem$$Address); 5653 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5654 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5655 %} 5656 ins_pipe(ialu_reg_mem); 5657 %} 5658 5659 // Load Integer 5660 instruct loadI(rRegI dst, memory mem) %{ 5661 match(Set dst (LoadI mem)); 5662 5663 ins_cost(125); 5664 format %{ "MOV $dst,$mem\t# int" %} 5665 5666 ins_encode %{ 5667 __ movl($dst$$Register, $mem$$Address); 5668 %} 5669 5670 ins_pipe(ialu_reg_mem); 5671 %} 5672 5673 // Load Integer (32 bit signed) to Byte (8 bit signed) 5674 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5675 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5676 5677 ins_cost(125); 5678 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5679 ins_encode %{ 5680 __ movsbl($dst$$Register, $mem$$Address); 5681 %} 5682 ins_pipe(ialu_reg_mem); 5683 %} 5684 5685 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5686 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5687 match(Set dst (AndI (LoadI mem) mask)); 5688 5689 ins_cost(125); 5690 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5691 ins_encode %{ 5692 __ movzbl($dst$$Register, $mem$$Address); 5693 %} 5694 ins_pipe(ialu_reg_mem); 5695 %} 5696 5697 // Load Integer (32 bit signed) to Short (16 bit signed) 5698 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5699 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5700 5701 ins_cost(125); 5702 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5703 ins_encode %{ 5704 __ movswl($dst$$Register, $mem$$Address); 5705 %} 5706 ins_pipe(ialu_reg_mem); 5707 %} 5708 5709 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5710 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5711 match(Set dst (AndI (LoadI mem) mask)); 5712 5713 ins_cost(125); 5714 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5715 ins_encode %{ 5716 __ movzwl($dst$$Register, $mem$$Address); 5717 %} 5718 ins_pipe(ialu_reg_mem); 5719 %} 5720 5721 // Load Integer into Long Register 5722 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5723 match(Set dst (ConvI2L (LoadI mem))); 5724 effect(KILL cr); 5725 5726 ins_cost(375); 5727 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5728 "MOV $dst.hi,$dst.lo\n\t" 5729 "SAR $dst.hi,31" %} 5730 5731 ins_encode %{ 5732 __ movl($dst$$Register, $mem$$Address); 5733 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5734 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5735 %} 5736 5737 ins_pipe(ialu_reg_mem); 5738 %} 5739 5740 // Load Integer with mask 0xFF into Long Register 5741 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5742 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5743 effect(KILL cr); 5744 5745 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5746 "XOR $dst.hi,$dst.hi" %} 5747 ins_encode %{ 5748 Register Rdst = $dst$$Register; 5749 __ movzbl(Rdst, $mem$$Address); 5750 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5751 %} 5752 ins_pipe(ialu_reg_mem); 5753 %} 5754 5755 // Load Integer with mask 0xFFFF into Long Register 5756 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5757 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5758 effect(KILL cr); 5759 5760 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5761 "XOR $dst.hi,$dst.hi" %} 5762 ins_encode %{ 5763 Register Rdst = $dst$$Register; 5764 __ movzwl(Rdst, $mem$$Address); 5765 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5766 %} 5767 ins_pipe(ialu_reg_mem); 5768 %} 5769 5770 // Load Integer with 31-bit mask into Long Register 5771 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5772 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5773 effect(KILL cr); 5774 5775 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5776 "XOR $dst.hi,$dst.hi\n\t" 5777 "AND $dst.lo,$mask" %} 5778 ins_encode %{ 5779 Register Rdst = $dst$$Register; 5780 __ movl(Rdst, $mem$$Address); 5781 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5782 __ andl(Rdst, $mask$$constant); 5783 %} 5784 ins_pipe(ialu_reg_mem); 5785 %} 5786 5787 // Load Unsigned Integer into Long Register 5788 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5789 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5790 effect(KILL cr); 5791 5792 ins_cost(250); 5793 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5794 "XOR $dst.hi,$dst.hi" %} 5795 5796 ins_encode %{ 5797 __ movl($dst$$Register, $mem$$Address); 5798 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5799 %} 5800 5801 ins_pipe(ialu_reg_mem); 5802 %} 5803 5804 // Load Long. Cannot clobber address while loading, so restrict address 5805 // register to ESI 5806 instruct loadL(eRegL dst, load_long_memory mem) %{ 5807 predicate(!((LoadLNode*)n)->require_atomic_access()); 5808 match(Set dst (LoadL mem)); 5809 5810 ins_cost(250); 5811 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5812 "MOV $dst.hi,$mem+4" %} 5813 5814 ins_encode %{ 5815 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5816 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5817 __ movl($dst$$Register, Amemlo); 5818 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5819 %} 5820 5821 ins_pipe(ialu_reg_long_mem); 5822 %} 5823 5824 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5825 // then store it down to the stack and reload on the int 5826 // side. 5827 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5828 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5829 match(Set dst (LoadL mem)); 5830 5831 ins_cost(200); 5832 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5833 "FISTp $dst" %} 5834 ins_encode(enc_loadL_volatile(mem,dst)); 5835 ins_pipe( fpu_reg_mem ); 5836 %} 5837 5838 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5839 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5840 match(Set dst (LoadL mem)); 5841 effect(TEMP tmp); 5842 ins_cost(180); 5843 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5844 "MOVSD $dst,$tmp" %} 5845 ins_encode %{ 5846 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5847 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5853 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5854 match(Set dst (LoadL mem)); 5855 effect(TEMP tmp); 5856 ins_cost(160); 5857 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5858 "MOVD $dst.lo,$tmp\n\t" 5859 "PSRLQ $tmp,32\n\t" 5860 "MOVD $dst.hi,$tmp" %} 5861 ins_encode %{ 5862 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5863 __ movdl($dst$$Register, $tmp$$XMMRegister); 5864 __ psrlq($tmp$$XMMRegister, 32); 5865 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5866 %} 5867 ins_pipe( pipe_slow ); 5868 %} 5869 5870 // Load Range 5871 instruct loadRange(rRegI dst, memory mem) %{ 5872 match(Set dst (LoadRange mem)); 5873 5874 ins_cost(125); 5875 format %{ "MOV $dst,$mem" %} 5876 opcode(0x8B); 5877 ins_encode( OpcP, RegMem(dst,mem)); 5878 ins_pipe( ialu_reg_mem ); 5879 %} 5880 5881 5882 // Load Pointer 5883 instruct loadP(eRegP dst, memory mem) %{ 5884 match(Set dst (LoadP mem)); 5885 5886 ins_cost(125); 5887 format %{ "MOV $dst,$mem" %} 5888 opcode(0x8B); 5889 ins_encode( OpcP, RegMem(dst,mem)); 5890 ins_pipe( ialu_reg_mem ); 5891 %} 5892 5893 // Load Klass Pointer 5894 instruct loadKlass(eRegP dst, memory mem) %{ 5895 match(Set dst (LoadKlass mem)); 5896 5897 ins_cost(125); 5898 format %{ "MOV $dst,$mem" %} 5899 opcode(0x8B); 5900 ins_encode( OpcP, RegMem(dst,mem)); 5901 ins_pipe( ialu_reg_mem ); 5902 %} 5903 5904 // Load Float 5905 instruct MoveF2LEG(legRegF dst, regF src) %{ 5906 match(Set dst src); 5907 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5908 ins_encode %{ 5909 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5910 %} 5911 ins_pipe( fpu_reg_reg ); 5912 %} 5913 5914 // Load Float 5915 instruct MoveLEG2F(regF dst, legRegF src) %{ 5916 match(Set dst src); 5917 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5918 ins_encode %{ 5919 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5920 %} 5921 ins_pipe( fpu_reg_reg ); 5922 %} 5923 5924 // Load Double 5925 instruct MoveD2LEG(legRegD dst, regD src) %{ 5926 match(Set dst src); 5927 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5928 ins_encode %{ 5929 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5930 %} 5931 ins_pipe( fpu_reg_reg ); 5932 %} 5933 5934 // Load Double 5935 instruct MoveLEG2D(regD dst, legRegD src) %{ 5936 match(Set dst src); 5937 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5938 ins_encode %{ 5939 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5940 %} 5941 ins_pipe( fpu_reg_reg ); 5942 %} 5943 5944 // Load Double 5945 instruct loadDPR(regDPR dst, memory mem) %{ 5946 predicate(UseSSE<=1); 5947 match(Set dst (LoadD mem)); 5948 5949 ins_cost(150); 5950 format %{ "FLD_D ST,$mem\n\t" 5951 "FSTP $dst" %} 5952 opcode(0xDD); /* DD /0 */ 5953 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5954 Pop_Reg_DPR(dst) ); 5955 ins_pipe( fpu_reg_mem ); 5956 %} 5957 5958 // Load Double to XMM 5959 instruct loadD(regD dst, memory mem) %{ 5960 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5961 match(Set dst (LoadD mem)); 5962 ins_cost(145); 5963 format %{ "MOVSD $dst,$mem" %} 5964 ins_encode %{ 5965 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5966 %} 5967 ins_pipe( pipe_slow ); 5968 %} 5969 5970 instruct loadD_partial(regD dst, memory mem) %{ 5971 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5972 match(Set dst (LoadD mem)); 5973 ins_cost(145); 5974 format %{ "MOVLPD $dst,$mem" %} 5975 ins_encode %{ 5976 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5977 %} 5978 ins_pipe( pipe_slow ); 5979 %} 5980 5981 // Load to XMM register (single-precision floating point) 5982 // MOVSS instruction 5983 instruct loadF(regF dst, memory mem) %{ 5984 predicate(UseSSE>=1); 5985 match(Set dst (LoadF mem)); 5986 ins_cost(145); 5987 format %{ "MOVSS $dst,$mem" %} 5988 ins_encode %{ 5989 __ movflt ($dst$$XMMRegister, $mem$$Address); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 // Load Float 5995 instruct loadFPR(regFPR dst, memory mem) %{ 5996 predicate(UseSSE==0); 5997 match(Set dst (LoadF mem)); 5998 5999 ins_cost(150); 6000 format %{ "FLD_S ST,$mem\n\t" 6001 "FSTP $dst" %} 6002 opcode(0xD9); /* D9 /0 */ 6003 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6004 Pop_Reg_FPR(dst) ); 6005 ins_pipe( fpu_reg_mem ); 6006 %} 6007 6008 // Load Effective Address 6009 instruct leaP8(eRegP dst, indOffset8 mem) %{ 6010 match(Set dst mem); 6011 6012 ins_cost(110); 6013 format %{ "LEA $dst,$mem" %} 6014 opcode(0x8D); 6015 ins_encode( OpcP, RegMem(dst,mem)); 6016 ins_pipe( ialu_reg_reg_fat ); 6017 %} 6018 6019 instruct leaP32(eRegP dst, indOffset32 mem) %{ 6020 match(Set dst mem); 6021 6022 ins_cost(110); 6023 format %{ "LEA $dst,$mem" %} 6024 opcode(0x8D); 6025 ins_encode( OpcP, RegMem(dst,mem)); 6026 ins_pipe( ialu_reg_reg_fat ); 6027 %} 6028 6029 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 6030 match(Set dst mem); 6031 6032 ins_cost(110); 6033 format %{ "LEA $dst,$mem" %} 6034 opcode(0x8D); 6035 ins_encode( OpcP, RegMem(dst,mem)); 6036 ins_pipe( ialu_reg_reg_fat ); 6037 %} 6038 6039 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 6040 match(Set dst mem); 6041 6042 ins_cost(110); 6043 format %{ "LEA $dst,$mem" %} 6044 opcode(0x8D); 6045 ins_encode( OpcP, RegMem(dst,mem)); 6046 ins_pipe( ialu_reg_reg_fat ); 6047 %} 6048 6049 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6050 match(Set dst mem); 6051 6052 ins_cost(110); 6053 format %{ "LEA $dst,$mem" %} 6054 opcode(0x8D); 6055 ins_encode( OpcP, RegMem(dst,mem)); 6056 ins_pipe( ialu_reg_reg_fat ); 6057 %} 6058 6059 // Load Constant 6060 instruct loadConI(rRegI dst, immI src) %{ 6061 match(Set dst src); 6062 6063 format %{ "MOV $dst,$src" %} 6064 ins_encode( LdImmI(dst, src) ); 6065 ins_pipe( ialu_reg_fat ); 6066 %} 6067 6068 // Load Constant zero 6069 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6070 match(Set dst src); 6071 effect(KILL cr); 6072 6073 ins_cost(50); 6074 format %{ "XOR $dst,$dst" %} 6075 opcode(0x33); /* + rd */ 6076 ins_encode( OpcP, RegReg( dst, dst ) ); 6077 ins_pipe( ialu_reg ); 6078 %} 6079 6080 instruct loadConP(eRegP dst, immP src) %{ 6081 match(Set dst src); 6082 6083 format %{ "MOV $dst,$src" %} 6084 opcode(0xB8); /* + rd */ 6085 ins_encode( LdImmP(dst, src) ); 6086 ins_pipe( ialu_reg_fat ); 6087 %} 6088 6089 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6090 match(Set dst src); 6091 effect(KILL cr); 6092 ins_cost(200); 6093 format %{ "MOV $dst.lo,$src.lo\n\t" 6094 "MOV $dst.hi,$src.hi" %} 6095 opcode(0xB8); 6096 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6097 ins_pipe( ialu_reg_long_fat ); 6098 %} 6099 6100 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6101 match(Set dst src); 6102 effect(KILL cr); 6103 ins_cost(150); 6104 format %{ "XOR $dst.lo,$dst.lo\n\t" 6105 "XOR $dst.hi,$dst.hi" %} 6106 opcode(0x33,0x33); 6107 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6108 ins_pipe( ialu_reg_long ); 6109 %} 6110 6111 // The instruction usage is guarded by predicate in operand immFPR(). 6112 instruct loadConFPR(regFPR dst, immFPR con) %{ 6113 match(Set dst con); 6114 ins_cost(125); 6115 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6116 "FSTP $dst" %} 6117 ins_encode %{ 6118 __ fld_s($constantaddress($con)); 6119 __ fstp_d($dst$$reg); 6120 %} 6121 ins_pipe(fpu_reg_con); 6122 %} 6123 6124 // The instruction usage is guarded by predicate in operand immFPR0(). 6125 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6126 match(Set dst con); 6127 ins_cost(125); 6128 format %{ "FLDZ ST\n\t" 6129 "FSTP $dst" %} 6130 ins_encode %{ 6131 __ fldz(); 6132 __ fstp_d($dst$$reg); 6133 %} 6134 ins_pipe(fpu_reg_con); 6135 %} 6136 6137 // The instruction usage is guarded by predicate in operand immFPR1(). 6138 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6139 match(Set dst con); 6140 ins_cost(125); 6141 format %{ "FLD1 ST\n\t" 6142 "FSTP $dst" %} 6143 ins_encode %{ 6144 __ fld1(); 6145 __ fstp_d($dst$$reg); 6146 %} 6147 ins_pipe(fpu_reg_con); 6148 %} 6149 6150 // The instruction usage is guarded by predicate in operand immF(). 6151 instruct loadConF(regF dst, immF con) %{ 6152 match(Set dst con); 6153 ins_cost(125); 6154 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6155 ins_encode %{ 6156 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6157 %} 6158 ins_pipe(pipe_slow); 6159 %} 6160 6161 // The instruction usage is guarded by predicate in operand immF0(). 6162 instruct loadConF0(regF dst, immF0 src) %{ 6163 match(Set dst src); 6164 ins_cost(100); 6165 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6166 ins_encode %{ 6167 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6168 %} 6169 ins_pipe(pipe_slow); 6170 %} 6171 6172 // The instruction usage is guarded by predicate in operand immDPR(). 6173 instruct loadConDPR(regDPR dst, immDPR con) %{ 6174 match(Set dst con); 6175 ins_cost(125); 6176 6177 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6178 "FSTP $dst" %} 6179 ins_encode %{ 6180 __ fld_d($constantaddress($con)); 6181 __ fstp_d($dst$$reg); 6182 %} 6183 ins_pipe(fpu_reg_con); 6184 %} 6185 6186 // The instruction usage is guarded by predicate in operand immDPR0(). 6187 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6188 match(Set dst con); 6189 ins_cost(125); 6190 6191 format %{ "FLDZ ST\n\t" 6192 "FSTP $dst" %} 6193 ins_encode %{ 6194 __ fldz(); 6195 __ fstp_d($dst$$reg); 6196 %} 6197 ins_pipe(fpu_reg_con); 6198 %} 6199 6200 // The instruction usage is guarded by predicate in operand immDPR1(). 6201 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6202 match(Set dst con); 6203 ins_cost(125); 6204 6205 format %{ "FLD1 ST\n\t" 6206 "FSTP $dst" %} 6207 ins_encode %{ 6208 __ fld1(); 6209 __ fstp_d($dst$$reg); 6210 %} 6211 ins_pipe(fpu_reg_con); 6212 %} 6213 6214 // The instruction usage is guarded by predicate in operand immD(). 6215 instruct loadConD(regD dst, immD con) %{ 6216 match(Set dst con); 6217 ins_cost(125); 6218 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6219 ins_encode %{ 6220 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6221 %} 6222 ins_pipe(pipe_slow); 6223 %} 6224 6225 // The instruction usage is guarded by predicate in operand immD0(). 6226 instruct loadConD0(regD dst, immD0 src) %{ 6227 match(Set dst src); 6228 ins_cost(100); 6229 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6230 ins_encode %{ 6231 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6232 %} 6233 ins_pipe( pipe_slow ); 6234 %} 6235 6236 // Load Stack Slot 6237 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6238 match(Set dst src); 6239 ins_cost(125); 6240 6241 format %{ "MOV $dst,$src" %} 6242 opcode(0x8B); 6243 ins_encode( OpcP, RegMem(dst,src)); 6244 ins_pipe( ialu_reg_mem ); 6245 %} 6246 6247 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6248 match(Set dst src); 6249 6250 ins_cost(200); 6251 format %{ "MOV $dst,$src.lo\n\t" 6252 "MOV $dst+4,$src.hi" %} 6253 opcode(0x8B, 0x8B); 6254 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6255 ins_pipe( ialu_mem_long_reg ); 6256 %} 6257 6258 // Load Stack Slot 6259 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6260 match(Set dst src); 6261 ins_cost(125); 6262 6263 format %{ "MOV $dst,$src" %} 6264 opcode(0x8B); 6265 ins_encode( OpcP, RegMem(dst,src)); 6266 ins_pipe( ialu_reg_mem ); 6267 %} 6268 6269 // Load Stack Slot 6270 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6271 match(Set dst src); 6272 ins_cost(125); 6273 6274 format %{ "FLD_S $src\n\t" 6275 "FSTP $dst" %} 6276 opcode(0xD9); /* D9 /0, FLD m32real */ 6277 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6278 Pop_Reg_FPR(dst) ); 6279 ins_pipe( fpu_reg_mem ); 6280 %} 6281 6282 // Load Stack Slot 6283 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6284 match(Set dst src); 6285 ins_cost(125); 6286 6287 format %{ "FLD_D $src\n\t" 6288 "FSTP $dst" %} 6289 opcode(0xDD); /* DD /0, FLD m64real */ 6290 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6291 Pop_Reg_DPR(dst) ); 6292 ins_pipe( fpu_reg_mem ); 6293 %} 6294 6295 // Prefetch instructions for allocation. 6296 // Must be safe to execute with invalid address (cannot fault). 6297 6298 instruct prefetchAlloc0( memory mem ) %{ 6299 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6300 match(PrefetchAllocation mem); 6301 ins_cost(0); 6302 size(0); 6303 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6304 ins_encode(); 6305 ins_pipe(empty); 6306 %} 6307 6308 instruct prefetchAlloc( memory mem ) %{ 6309 predicate(AllocatePrefetchInstr==3); 6310 match( PrefetchAllocation mem ); 6311 ins_cost(100); 6312 6313 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6314 ins_encode %{ 6315 __ prefetchw($mem$$Address); 6316 %} 6317 ins_pipe(ialu_mem); 6318 %} 6319 6320 instruct prefetchAllocNTA( memory mem ) %{ 6321 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6322 match(PrefetchAllocation mem); 6323 ins_cost(100); 6324 6325 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6326 ins_encode %{ 6327 __ prefetchnta($mem$$Address); 6328 %} 6329 ins_pipe(ialu_mem); 6330 %} 6331 6332 instruct prefetchAllocT0( memory mem ) %{ 6333 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6334 match(PrefetchAllocation mem); 6335 ins_cost(100); 6336 6337 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6338 ins_encode %{ 6339 __ prefetcht0($mem$$Address); 6340 %} 6341 ins_pipe(ialu_mem); 6342 %} 6343 6344 instruct prefetchAllocT2( memory mem ) %{ 6345 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6346 match(PrefetchAllocation mem); 6347 ins_cost(100); 6348 6349 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6350 ins_encode %{ 6351 __ prefetcht2($mem$$Address); 6352 %} 6353 ins_pipe(ialu_mem); 6354 %} 6355 6356 //----------Store Instructions------------------------------------------------- 6357 6358 // Store Byte 6359 instruct storeB(memory mem, xRegI src) %{ 6360 match(Set mem (StoreB mem src)); 6361 6362 ins_cost(125); 6363 format %{ "MOV8 $mem,$src" %} 6364 opcode(0x88); 6365 ins_encode( OpcP, RegMem( src, mem ) ); 6366 ins_pipe( ialu_mem_reg ); 6367 %} 6368 6369 // Store Char/Short 6370 instruct storeC(memory mem, rRegI src) %{ 6371 match(Set mem (StoreC mem src)); 6372 6373 ins_cost(125); 6374 format %{ "MOV16 $mem,$src" %} 6375 opcode(0x89, 0x66); 6376 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6377 ins_pipe( ialu_mem_reg ); 6378 %} 6379 6380 // Store Integer 6381 instruct storeI(memory mem, rRegI src) %{ 6382 match(Set mem (StoreI mem src)); 6383 6384 ins_cost(125); 6385 format %{ "MOV $mem,$src" %} 6386 opcode(0x89); 6387 ins_encode( OpcP, RegMem( src, mem ) ); 6388 ins_pipe( ialu_mem_reg ); 6389 %} 6390 6391 // Store Long 6392 instruct storeL(long_memory mem, eRegL src) %{ 6393 predicate(!((StoreLNode*)n)->require_atomic_access()); 6394 match(Set mem (StoreL mem src)); 6395 6396 ins_cost(200); 6397 format %{ "MOV $mem,$src.lo\n\t" 6398 "MOV $mem+4,$src.hi" %} 6399 opcode(0x89, 0x89); 6400 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6401 ins_pipe( ialu_mem_long_reg ); 6402 %} 6403 6404 // Store Long to Integer 6405 instruct storeL2I(memory mem, eRegL src) %{ 6406 match(Set mem (StoreI mem (ConvL2I src))); 6407 6408 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6409 ins_encode %{ 6410 __ movl($mem$$Address, $src$$Register); 6411 %} 6412 ins_pipe(ialu_mem_reg); 6413 %} 6414 6415 // Volatile Store Long. Must be atomic, so move it into 6416 // the FP TOS and then do a 64-bit FIST. Has to probe the 6417 // target address before the store (for null-ptr checks) 6418 // so the memory operand is used twice in the encoding. 6419 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6420 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6421 match(Set mem (StoreL mem src)); 6422 effect( KILL cr ); 6423 ins_cost(400); 6424 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6425 "FILD $src\n\t" 6426 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6427 opcode(0x3B); 6428 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6429 ins_pipe( fpu_reg_mem ); 6430 %} 6431 6432 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6433 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6434 match(Set mem (StoreL mem src)); 6435 effect( TEMP tmp, KILL cr ); 6436 ins_cost(380); 6437 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6438 "MOVSD $tmp,$src\n\t" 6439 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6440 ins_encode %{ 6441 __ cmpl(rax, $mem$$Address); 6442 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6443 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6449 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6450 match(Set mem (StoreL mem src)); 6451 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6452 ins_cost(360); 6453 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6454 "MOVD $tmp,$src.lo\n\t" 6455 "MOVD $tmp2,$src.hi\n\t" 6456 "PUNPCKLDQ $tmp,$tmp2\n\t" 6457 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6458 ins_encode %{ 6459 __ cmpl(rax, $mem$$Address); 6460 __ movdl($tmp$$XMMRegister, $src$$Register); 6461 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6462 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6463 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6464 %} 6465 ins_pipe( pipe_slow ); 6466 %} 6467 6468 // Store Pointer; for storing unknown oops and raw pointers 6469 instruct storeP(memory mem, anyRegP src) %{ 6470 match(Set mem (StoreP mem src)); 6471 6472 ins_cost(125); 6473 format %{ "MOV $mem,$src" %} 6474 opcode(0x89); 6475 ins_encode( OpcP, RegMem( src, mem ) ); 6476 ins_pipe( ialu_mem_reg ); 6477 %} 6478 6479 // Store Integer Immediate 6480 instruct storeImmI(memory mem, immI src) %{ 6481 match(Set mem (StoreI mem src)); 6482 6483 ins_cost(150); 6484 format %{ "MOV $mem,$src" %} 6485 opcode(0xC7); /* C7 /0 */ 6486 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6487 ins_pipe( ialu_mem_imm ); 6488 %} 6489 6490 // Store Short/Char Immediate 6491 instruct storeImmI16(memory mem, immI16 src) %{ 6492 predicate(UseStoreImmI16); 6493 match(Set mem (StoreC mem src)); 6494 6495 ins_cost(150); 6496 format %{ "MOV16 $mem,$src" %} 6497 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6498 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6499 ins_pipe( ialu_mem_imm ); 6500 %} 6501 6502 // Store Pointer Immediate; null pointers or constant oops that do not 6503 // need card-mark barriers. 6504 instruct storeImmP(memory mem, immP src) %{ 6505 match(Set mem (StoreP mem src)); 6506 6507 ins_cost(150); 6508 format %{ "MOV $mem,$src" %} 6509 opcode(0xC7); /* C7 /0 */ 6510 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6511 ins_pipe( ialu_mem_imm ); 6512 %} 6513 6514 // Store Byte Immediate 6515 instruct storeImmB(memory mem, immI8 src) %{ 6516 match(Set mem (StoreB mem src)); 6517 6518 ins_cost(150); 6519 format %{ "MOV8 $mem,$src" %} 6520 opcode(0xC6); /* C6 /0 */ 6521 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6522 ins_pipe( ialu_mem_imm ); 6523 %} 6524 6525 // Store CMS card-mark Immediate 6526 instruct storeImmCM(memory mem, immI8 src) %{ 6527 match(Set mem (StoreCM mem src)); 6528 6529 ins_cost(150); 6530 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6531 opcode(0xC6); /* C6 /0 */ 6532 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6533 ins_pipe( ialu_mem_imm ); 6534 %} 6535 6536 // Store Double 6537 instruct storeDPR( memory mem, regDPR1 src) %{ 6538 predicate(UseSSE<=1); 6539 match(Set mem (StoreD mem src)); 6540 6541 ins_cost(100); 6542 format %{ "FST_D $mem,$src" %} 6543 opcode(0xDD); /* DD /2 */ 6544 ins_encode( enc_FPR_store(mem,src) ); 6545 ins_pipe( fpu_mem_reg ); 6546 %} 6547 6548 // Store double does rounding on x86 6549 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6550 predicate(UseSSE<=1); 6551 match(Set mem (StoreD mem (RoundDouble src))); 6552 6553 ins_cost(100); 6554 format %{ "FST_D $mem,$src\t# round" %} 6555 opcode(0xDD); /* DD /2 */ 6556 ins_encode( enc_FPR_store(mem,src) ); 6557 ins_pipe( fpu_mem_reg ); 6558 %} 6559 6560 // Store XMM register to memory (double-precision floating points) 6561 // MOVSD instruction 6562 instruct storeD(memory mem, regD src) %{ 6563 predicate(UseSSE>=2); 6564 match(Set mem (StoreD mem src)); 6565 ins_cost(95); 6566 format %{ "MOVSD $mem,$src" %} 6567 ins_encode %{ 6568 __ movdbl($mem$$Address, $src$$XMMRegister); 6569 %} 6570 ins_pipe( pipe_slow ); 6571 %} 6572 6573 // Load Double 6574 instruct MoveD2VL(vlRegD dst, regD src) %{ 6575 match(Set dst src); 6576 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6577 ins_encode %{ 6578 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6579 %} 6580 ins_pipe( fpu_reg_reg ); 6581 %} 6582 6583 // Load Double 6584 instruct MoveVL2D(regD dst, vlRegD src) %{ 6585 match(Set dst src); 6586 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6587 ins_encode %{ 6588 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6589 %} 6590 ins_pipe( fpu_reg_reg ); 6591 %} 6592 6593 // Store XMM register to memory (single-precision floating point) 6594 // MOVSS instruction 6595 instruct storeF(memory mem, regF src) %{ 6596 predicate(UseSSE>=1); 6597 match(Set mem (StoreF mem src)); 6598 ins_cost(95); 6599 format %{ "MOVSS $mem,$src" %} 6600 ins_encode %{ 6601 __ movflt($mem$$Address, $src$$XMMRegister); 6602 %} 6603 ins_pipe( pipe_slow ); 6604 %} 6605 6606 // Load Float 6607 instruct MoveF2VL(vlRegF dst, regF src) %{ 6608 match(Set dst src); 6609 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6610 ins_encode %{ 6611 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6612 %} 6613 ins_pipe( fpu_reg_reg ); 6614 %} 6615 6616 // Load Float 6617 instruct MoveVL2F(regF dst, vlRegF src) %{ 6618 match(Set dst src); 6619 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6620 ins_encode %{ 6621 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6622 %} 6623 ins_pipe( fpu_reg_reg ); 6624 %} 6625 6626 // Store Float 6627 instruct storeFPR( memory mem, regFPR1 src) %{ 6628 predicate(UseSSE==0); 6629 match(Set mem (StoreF mem src)); 6630 6631 ins_cost(100); 6632 format %{ "FST_S $mem,$src" %} 6633 opcode(0xD9); /* D9 /2 */ 6634 ins_encode( enc_FPR_store(mem,src) ); 6635 ins_pipe( fpu_mem_reg ); 6636 %} 6637 6638 // Store Float does rounding on x86 6639 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6640 predicate(UseSSE==0); 6641 match(Set mem (StoreF mem (RoundFloat src))); 6642 6643 ins_cost(100); 6644 format %{ "FST_S $mem,$src\t# round" %} 6645 opcode(0xD9); /* D9 /2 */ 6646 ins_encode( enc_FPR_store(mem,src) ); 6647 ins_pipe( fpu_mem_reg ); 6648 %} 6649 6650 // Store Float does rounding on x86 6651 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6652 predicate(UseSSE<=1); 6653 match(Set mem (StoreF mem (ConvD2F src))); 6654 6655 ins_cost(100); 6656 format %{ "FST_S $mem,$src\t# D-round" %} 6657 opcode(0xD9); /* D9 /2 */ 6658 ins_encode( enc_FPR_store(mem,src) ); 6659 ins_pipe( fpu_mem_reg ); 6660 %} 6661 6662 // Store immediate Float value (it is faster than store from FPU register) 6663 // The instruction usage is guarded by predicate in operand immFPR(). 6664 instruct storeFPR_imm( memory mem, immFPR src) %{ 6665 match(Set mem (StoreF mem src)); 6666 6667 ins_cost(50); 6668 format %{ "MOV $mem,$src\t# store float" %} 6669 opcode(0xC7); /* C7 /0 */ 6670 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6671 ins_pipe( ialu_mem_imm ); 6672 %} 6673 6674 // Store immediate Float value (it is faster than store from XMM register) 6675 // The instruction usage is guarded by predicate in operand immF(). 6676 instruct storeF_imm( memory mem, immF src) %{ 6677 match(Set mem (StoreF mem src)); 6678 6679 ins_cost(50); 6680 format %{ "MOV $mem,$src\t# store float" %} 6681 opcode(0xC7); /* C7 /0 */ 6682 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6683 ins_pipe( ialu_mem_imm ); 6684 %} 6685 6686 // Store Integer to stack slot 6687 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6688 match(Set dst src); 6689 6690 ins_cost(100); 6691 format %{ "MOV $dst,$src" %} 6692 opcode(0x89); 6693 ins_encode( OpcPRegSS( dst, src ) ); 6694 ins_pipe( ialu_mem_reg ); 6695 %} 6696 6697 // Store Integer to stack slot 6698 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6699 match(Set dst src); 6700 6701 ins_cost(100); 6702 format %{ "MOV $dst,$src" %} 6703 opcode(0x89); 6704 ins_encode( OpcPRegSS( dst, src ) ); 6705 ins_pipe( ialu_mem_reg ); 6706 %} 6707 6708 // Store Long to stack slot 6709 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6710 match(Set dst src); 6711 6712 ins_cost(200); 6713 format %{ "MOV $dst,$src.lo\n\t" 6714 "MOV $dst+4,$src.hi" %} 6715 opcode(0x89, 0x89); 6716 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6717 ins_pipe( ialu_mem_long_reg ); 6718 %} 6719 6720 //----------MemBar Instructions----------------------------------------------- 6721 // Memory barrier flavors 6722 6723 instruct membar_acquire() %{ 6724 match(MemBarAcquire); 6725 match(LoadFence); 6726 ins_cost(400); 6727 6728 size(0); 6729 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6730 ins_encode(); 6731 ins_pipe(empty); 6732 %} 6733 6734 instruct membar_acquire_lock() %{ 6735 match(MemBarAcquireLock); 6736 ins_cost(0); 6737 6738 size(0); 6739 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6740 ins_encode( ); 6741 ins_pipe(empty); 6742 %} 6743 6744 instruct membar_release() %{ 6745 match(MemBarRelease); 6746 match(StoreFence); 6747 ins_cost(400); 6748 6749 size(0); 6750 format %{ "MEMBAR-release ! (empty encoding)" %} 6751 ins_encode( ); 6752 ins_pipe(empty); 6753 %} 6754 6755 instruct membar_release_lock() %{ 6756 match(MemBarReleaseLock); 6757 ins_cost(0); 6758 6759 size(0); 6760 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6761 ins_encode( ); 6762 ins_pipe(empty); 6763 %} 6764 6765 instruct membar_volatile(eFlagsReg cr) %{ 6766 match(MemBarVolatile); 6767 effect(KILL cr); 6768 ins_cost(400); 6769 6770 format %{ 6771 $$template 6772 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6773 %} 6774 ins_encode %{ 6775 __ membar(Assembler::StoreLoad); 6776 %} 6777 ins_pipe(pipe_slow); 6778 %} 6779 6780 instruct unnecessary_membar_volatile() %{ 6781 match(MemBarVolatile); 6782 predicate(Matcher::post_store_load_barrier(n)); 6783 ins_cost(0); 6784 6785 size(0); 6786 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6787 ins_encode( ); 6788 ins_pipe(empty); 6789 %} 6790 6791 instruct membar_storestore() %{ 6792 match(MemBarStoreStore); 6793 ins_cost(0); 6794 6795 size(0); 6796 format %{ "MEMBAR-storestore (empty encoding)" %} 6797 ins_encode( ); 6798 ins_pipe(empty); 6799 %} 6800 6801 //----------Move Instructions-------------------------------------------------- 6802 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6803 match(Set dst (CastX2P src)); 6804 format %{ "# X2P $dst, $src" %} 6805 ins_encode( /*empty encoding*/ ); 6806 ins_cost(0); 6807 ins_pipe(empty); 6808 %} 6809 6810 instruct castP2X(rRegI dst, eRegP src ) %{ 6811 match(Set dst (CastP2X src)); 6812 ins_cost(50); 6813 format %{ "MOV $dst, $src\t# CastP2X" %} 6814 ins_encode( enc_Copy( dst, src) ); 6815 ins_pipe( ialu_reg_reg ); 6816 %} 6817 6818 //----------Conditional Move--------------------------------------------------- 6819 // Conditional move 6820 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6821 predicate(!VM_Version::supports_cmov() ); 6822 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6823 ins_cost(200); 6824 format %{ "J$cop,us skip\t# signed cmove\n\t" 6825 "MOV $dst,$src\n" 6826 "skip:" %} 6827 ins_encode %{ 6828 Label Lskip; 6829 // Invert sense of branch from sense of CMOV 6830 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6831 __ movl($dst$$Register, $src$$Register); 6832 __ bind(Lskip); 6833 %} 6834 ins_pipe( pipe_cmov_reg ); 6835 %} 6836 6837 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6838 predicate(!VM_Version::supports_cmov() ); 6839 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6840 ins_cost(200); 6841 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6842 "MOV $dst,$src\n" 6843 "skip:" %} 6844 ins_encode %{ 6845 Label Lskip; 6846 // Invert sense of branch from sense of CMOV 6847 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6848 __ movl($dst$$Register, $src$$Register); 6849 __ bind(Lskip); 6850 %} 6851 ins_pipe( pipe_cmov_reg ); 6852 %} 6853 6854 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6855 predicate(VM_Version::supports_cmov() ); 6856 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6857 ins_cost(200); 6858 format %{ "CMOV$cop $dst,$src" %} 6859 opcode(0x0F,0x40); 6860 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6861 ins_pipe( pipe_cmov_reg ); 6862 %} 6863 6864 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6865 predicate(VM_Version::supports_cmov() ); 6866 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6867 ins_cost(200); 6868 format %{ "CMOV$cop $dst,$src" %} 6869 opcode(0x0F,0x40); 6870 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6871 ins_pipe( pipe_cmov_reg ); 6872 %} 6873 6874 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6875 predicate(VM_Version::supports_cmov() ); 6876 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6877 ins_cost(200); 6878 expand %{ 6879 cmovI_regU(cop, cr, dst, src); 6880 %} 6881 %} 6882 6883 // Conditional move 6884 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6885 predicate(VM_Version::supports_cmov() ); 6886 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6887 ins_cost(250); 6888 format %{ "CMOV$cop $dst,$src" %} 6889 opcode(0x0F,0x40); 6890 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6891 ins_pipe( pipe_cmov_mem ); 6892 %} 6893 6894 // Conditional move 6895 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6896 predicate(VM_Version::supports_cmov() ); 6897 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6898 ins_cost(250); 6899 format %{ "CMOV$cop $dst,$src" %} 6900 opcode(0x0F,0x40); 6901 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6902 ins_pipe( pipe_cmov_mem ); 6903 %} 6904 6905 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6906 predicate(VM_Version::supports_cmov() ); 6907 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6908 ins_cost(250); 6909 expand %{ 6910 cmovI_memU(cop, cr, dst, src); 6911 %} 6912 %} 6913 6914 // Conditional move 6915 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6916 predicate(VM_Version::supports_cmov() ); 6917 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6918 ins_cost(200); 6919 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6920 opcode(0x0F,0x40); 6921 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6922 ins_pipe( pipe_cmov_reg ); 6923 %} 6924 6925 // Conditional move (non-P6 version) 6926 // Note: a CMoveP is generated for stubs and native wrappers 6927 // regardless of whether we are on a P6, so we 6928 // emulate a cmov here 6929 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6930 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6931 ins_cost(300); 6932 format %{ "Jn$cop skip\n\t" 6933 "MOV $dst,$src\t# pointer\n" 6934 "skip:" %} 6935 opcode(0x8b); 6936 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6937 ins_pipe( pipe_cmov_reg ); 6938 %} 6939 6940 // Conditional move 6941 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6942 predicate(VM_Version::supports_cmov() ); 6943 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6944 ins_cost(200); 6945 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6946 opcode(0x0F,0x40); 6947 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6948 ins_pipe( pipe_cmov_reg ); 6949 %} 6950 6951 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6952 predicate(VM_Version::supports_cmov() ); 6953 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6954 ins_cost(200); 6955 expand %{ 6956 cmovP_regU(cop, cr, dst, src); 6957 %} 6958 %} 6959 6960 // DISABLED: Requires the ADLC to emit a bottom_type call that 6961 // correctly meets the two pointer arguments; one is an incoming 6962 // register but the other is a memory operand. ALSO appears to 6963 // be buggy with implicit null checks. 6964 // 6965 //// Conditional move 6966 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6967 // predicate(VM_Version::supports_cmov() ); 6968 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6969 // ins_cost(250); 6970 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6971 // opcode(0x0F,0x40); 6972 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6973 // ins_pipe( pipe_cmov_mem ); 6974 //%} 6975 // 6976 //// Conditional move 6977 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6978 // predicate(VM_Version::supports_cmov() ); 6979 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6980 // ins_cost(250); 6981 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6982 // opcode(0x0F,0x40); 6983 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6984 // ins_pipe( pipe_cmov_mem ); 6985 //%} 6986 6987 // Conditional move 6988 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6989 predicate(UseSSE<=1); 6990 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6991 ins_cost(200); 6992 format %{ "FCMOV$cop $dst,$src\t# double" %} 6993 opcode(0xDA); 6994 ins_encode( enc_cmov_dpr(cop,src) ); 6995 ins_pipe( pipe_cmovDPR_reg ); 6996 %} 6997 6998 // Conditional move 6999 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 7000 predicate(UseSSE==0); 7001 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7002 ins_cost(200); 7003 format %{ "FCMOV$cop $dst,$src\t# float" %} 7004 opcode(0xDA); 7005 ins_encode( enc_cmov_dpr(cop,src) ); 7006 ins_pipe( pipe_cmovDPR_reg ); 7007 %} 7008 7009 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7010 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 7011 predicate(UseSSE<=1); 7012 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7013 ins_cost(200); 7014 format %{ "Jn$cop skip\n\t" 7015 "MOV $dst,$src\t# double\n" 7016 "skip:" %} 7017 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7018 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 7019 ins_pipe( pipe_cmovDPR_reg ); 7020 %} 7021 7022 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7023 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 7024 predicate(UseSSE==0); 7025 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7026 ins_cost(200); 7027 format %{ "Jn$cop skip\n\t" 7028 "MOV $dst,$src\t# float\n" 7029 "skip:" %} 7030 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7031 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 7032 ins_pipe( pipe_cmovDPR_reg ); 7033 %} 7034 7035 // No CMOVE with SSE/SSE2 7036 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7037 predicate (UseSSE>=1); 7038 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7039 ins_cost(200); 7040 format %{ "Jn$cop skip\n\t" 7041 "MOVSS $dst,$src\t# float\n" 7042 "skip:" %} 7043 ins_encode %{ 7044 Label skip; 7045 // Invert sense of branch from sense of CMOV 7046 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7047 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7048 __ bind(skip); 7049 %} 7050 ins_pipe( pipe_slow ); 7051 %} 7052 7053 // No CMOVE with SSE/SSE2 7054 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7055 predicate (UseSSE>=2); 7056 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7057 ins_cost(200); 7058 format %{ "Jn$cop skip\n\t" 7059 "MOVSD $dst,$src\t# float\n" 7060 "skip:" %} 7061 ins_encode %{ 7062 Label skip; 7063 // Invert sense of branch from sense of CMOV 7064 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7065 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7066 __ bind(skip); 7067 %} 7068 ins_pipe( pipe_slow ); 7069 %} 7070 7071 // unsigned version 7072 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 7073 predicate (UseSSE>=1); 7074 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7075 ins_cost(200); 7076 format %{ "Jn$cop skip\n\t" 7077 "MOVSS $dst,$src\t# float\n" 7078 "skip:" %} 7079 ins_encode %{ 7080 Label skip; 7081 // Invert sense of branch from sense of CMOV 7082 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7083 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7084 __ bind(skip); 7085 %} 7086 ins_pipe( pipe_slow ); 7087 %} 7088 7089 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7090 predicate (UseSSE>=1); 7091 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7092 ins_cost(200); 7093 expand %{ 7094 fcmovF_regU(cop, cr, dst, src); 7095 %} 7096 %} 7097 7098 // unsigned version 7099 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7100 predicate (UseSSE>=2); 7101 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7102 ins_cost(200); 7103 format %{ "Jn$cop skip\n\t" 7104 "MOVSD $dst,$src\t# float\n" 7105 "skip:" %} 7106 ins_encode %{ 7107 Label skip; 7108 // Invert sense of branch from sense of CMOV 7109 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7110 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7111 __ bind(skip); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7117 predicate (UseSSE>=2); 7118 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7119 ins_cost(200); 7120 expand %{ 7121 fcmovD_regU(cop, cr, dst, src); 7122 %} 7123 %} 7124 7125 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7126 predicate(VM_Version::supports_cmov() ); 7127 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7128 ins_cost(200); 7129 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7130 "CMOV$cop $dst.hi,$src.hi" %} 7131 opcode(0x0F,0x40); 7132 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7133 ins_pipe( pipe_cmov_reg_long ); 7134 %} 7135 7136 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7137 predicate(VM_Version::supports_cmov() ); 7138 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7139 ins_cost(200); 7140 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7141 "CMOV$cop $dst.hi,$src.hi" %} 7142 opcode(0x0F,0x40); 7143 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7144 ins_pipe( pipe_cmov_reg_long ); 7145 %} 7146 7147 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7148 predicate(VM_Version::supports_cmov() ); 7149 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7150 ins_cost(200); 7151 expand %{ 7152 cmovL_regU(cop, cr, dst, src); 7153 %} 7154 %} 7155 7156 //----------Arithmetic Instructions-------------------------------------------- 7157 //----------Addition Instructions---------------------------------------------- 7158 7159 // Integer Addition Instructions 7160 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7161 match(Set dst (AddI dst src)); 7162 effect(KILL cr); 7163 7164 size(2); 7165 format %{ "ADD $dst,$src" %} 7166 opcode(0x03); 7167 ins_encode( OpcP, RegReg( dst, src) ); 7168 ins_pipe( ialu_reg_reg ); 7169 %} 7170 7171 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7172 match(Set dst (AddI dst src)); 7173 effect(KILL cr); 7174 7175 format %{ "ADD $dst,$src" %} 7176 opcode(0x81, 0x00); /* /0 id */ 7177 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7178 ins_pipe( ialu_reg ); 7179 %} 7180 7181 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7182 predicate(UseIncDec); 7183 match(Set dst (AddI dst src)); 7184 effect(KILL cr); 7185 7186 size(1); 7187 format %{ "INC $dst" %} 7188 opcode(0x40); /* */ 7189 ins_encode( Opc_plus( primary, dst ) ); 7190 ins_pipe( ialu_reg ); 7191 %} 7192 7193 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7194 match(Set dst (AddI src0 src1)); 7195 ins_cost(110); 7196 7197 format %{ "LEA $dst,[$src0 + $src1]" %} 7198 opcode(0x8D); /* 0x8D /r */ 7199 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7200 ins_pipe( ialu_reg_reg ); 7201 %} 7202 7203 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7204 match(Set dst (AddP src0 src1)); 7205 ins_cost(110); 7206 7207 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7208 opcode(0x8D); /* 0x8D /r */ 7209 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7210 ins_pipe( ialu_reg_reg ); 7211 %} 7212 7213 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7214 predicate(UseIncDec); 7215 match(Set dst (AddI dst src)); 7216 effect(KILL cr); 7217 7218 size(1); 7219 format %{ "DEC $dst" %} 7220 opcode(0x48); /* */ 7221 ins_encode( Opc_plus( primary, dst ) ); 7222 ins_pipe( ialu_reg ); 7223 %} 7224 7225 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7226 match(Set dst (AddP dst src)); 7227 effect(KILL cr); 7228 7229 size(2); 7230 format %{ "ADD $dst,$src" %} 7231 opcode(0x03); 7232 ins_encode( OpcP, RegReg( dst, src) ); 7233 ins_pipe( ialu_reg_reg ); 7234 %} 7235 7236 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7237 match(Set dst (AddP dst src)); 7238 effect(KILL cr); 7239 7240 format %{ "ADD $dst,$src" %} 7241 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7242 // ins_encode( RegImm( dst, src) ); 7243 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7244 ins_pipe( ialu_reg ); 7245 %} 7246 7247 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7248 match(Set dst (AddI dst (LoadI src))); 7249 effect(KILL cr); 7250 7251 ins_cost(125); 7252 format %{ "ADD $dst,$src" %} 7253 opcode(0x03); 7254 ins_encode( OpcP, RegMem( dst, src) ); 7255 ins_pipe( ialu_reg_mem ); 7256 %} 7257 7258 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7259 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7260 effect(KILL cr); 7261 7262 ins_cost(150); 7263 format %{ "ADD $dst,$src" %} 7264 opcode(0x01); /* Opcode 01 /r */ 7265 ins_encode( OpcP, RegMem( src, dst ) ); 7266 ins_pipe( ialu_mem_reg ); 7267 %} 7268 7269 // Add Memory with Immediate 7270 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7271 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7272 effect(KILL cr); 7273 7274 ins_cost(125); 7275 format %{ "ADD $dst,$src" %} 7276 opcode(0x81); /* Opcode 81 /0 id */ 7277 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7278 ins_pipe( ialu_mem_imm ); 7279 %} 7280 7281 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7282 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7283 effect(KILL cr); 7284 7285 ins_cost(125); 7286 format %{ "INC $dst" %} 7287 opcode(0xFF); /* Opcode FF /0 */ 7288 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7289 ins_pipe( ialu_mem_imm ); 7290 %} 7291 7292 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7293 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7294 effect(KILL cr); 7295 7296 ins_cost(125); 7297 format %{ "DEC $dst" %} 7298 opcode(0xFF); /* Opcode FF /1 */ 7299 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7300 ins_pipe( ialu_mem_imm ); 7301 %} 7302 7303 7304 instruct checkCastPP( eRegP dst ) %{ 7305 match(Set dst (CheckCastPP dst)); 7306 7307 size(0); 7308 format %{ "#checkcastPP of $dst" %} 7309 ins_encode( /*empty encoding*/ ); 7310 ins_pipe( empty ); 7311 %} 7312 7313 instruct castPP( eRegP dst ) %{ 7314 match(Set dst (CastPP dst)); 7315 format %{ "#castPP of $dst" %} 7316 ins_encode( /*empty encoding*/ ); 7317 ins_pipe( empty ); 7318 %} 7319 7320 instruct castII( rRegI dst ) %{ 7321 match(Set dst (CastII dst)); 7322 format %{ "#castII of $dst" %} 7323 ins_encode( /*empty encoding*/ ); 7324 ins_cost(0); 7325 ins_pipe( empty ); 7326 %} 7327 7328 // Load-locked - same as a regular pointer load when used with compare-swap 7329 instruct loadPLocked(eRegP dst, memory mem) %{ 7330 match(Set dst (LoadPLocked mem)); 7331 7332 ins_cost(125); 7333 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7334 opcode(0x8B); 7335 ins_encode( OpcP, RegMem(dst,mem)); 7336 ins_pipe( ialu_reg_mem ); 7337 %} 7338 7339 // Conditional-store of the updated heap-top. 7340 // Used during allocation of the shared heap. 7341 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7342 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7343 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7344 // EAX is killed if there is contention, but then it's also unused. 7345 // In the common case of no contention, EAX holds the new oop address. 7346 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7347 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7348 ins_pipe( pipe_cmpxchg ); 7349 %} 7350 7351 // Conditional-store of an int value. 7352 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7353 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7354 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7355 effect(KILL oldval); 7356 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7357 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7358 ins_pipe( pipe_cmpxchg ); 7359 %} 7360 7361 // Conditional-store of a long value. 7362 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7363 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7364 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7365 effect(KILL oldval); 7366 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7367 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7368 "XCHG EBX,ECX" 7369 %} 7370 ins_encode %{ 7371 // Note: we need to swap rbx, and rcx before and after the 7372 // cmpxchg8 instruction because the instruction uses 7373 // rcx as the high order word of the new value to store but 7374 // our register encoding uses rbx. 7375 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7376 __ lock(); 7377 __ cmpxchg8($mem$$Address); 7378 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7379 %} 7380 ins_pipe( pipe_cmpxchg ); 7381 %} 7382 7383 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7384 7385 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7386 predicate(VM_Version::supports_cx8()); 7387 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7388 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7389 effect(KILL cr, KILL oldval); 7390 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7391 "MOV $res,0\n\t" 7392 "JNE,s fail\n\t" 7393 "MOV $res,1\n" 7394 "fail:" %} 7395 ins_encode( enc_cmpxchg8(mem_ptr), 7396 enc_flags_ne_to_boolean(res) ); 7397 ins_pipe( pipe_cmpxchg ); 7398 %} 7399 7400 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7401 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7402 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7403 effect(KILL cr, KILL oldval); 7404 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7405 "MOV $res,0\n\t" 7406 "JNE,s fail\n\t" 7407 "MOV $res,1\n" 7408 "fail:" %} 7409 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7410 ins_pipe( pipe_cmpxchg ); 7411 %} 7412 7413 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7414 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7415 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7416 effect(KILL cr, KILL oldval); 7417 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7418 "MOV $res,0\n\t" 7419 "JNE,s fail\n\t" 7420 "MOV $res,1\n" 7421 "fail:" %} 7422 ins_encode( enc_cmpxchgb(mem_ptr), 7423 enc_flags_ne_to_boolean(res) ); 7424 ins_pipe( pipe_cmpxchg ); 7425 %} 7426 7427 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7428 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7429 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7430 effect(KILL cr, KILL oldval); 7431 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7432 "MOV $res,0\n\t" 7433 "JNE,s fail\n\t" 7434 "MOV $res,1\n" 7435 "fail:" %} 7436 ins_encode( enc_cmpxchgw(mem_ptr), 7437 enc_flags_ne_to_boolean(res) ); 7438 ins_pipe( pipe_cmpxchg ); 7439 %} 7440 7441 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7442 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7443 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7444 effect(KILL cr, KILL oldval); 7445 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7446 "MOV $res,0\n\t" 7447 "JNE,s fail\n\t" 7448 "MOV $res,1\n" 7449 "fail:" %} 7450 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7455 predicate(VM_Version::supports_cx8()); 7456 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7457 effect(KILL cr); 7458 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7459 ins_encode( enc_cmpxchg8(mem_ptr) ); 7460 ins_pipe( pipe_cmpxchg ); 7461 %} 7462 7463 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7464 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7465 effect(KILL cr); 7466 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7467 ins_encode( enc_cmpxchg(mem_ptr) ); 7468 ins_pipe( pipe_cmpxchg ); 7469 %} 7470 7471 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7472 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7473 effect(KILL cr); 7474 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7475 ins_encode( enc_cmpxchgb(mem_ptr) ); 7476 ins_pipe( pipe_cmpxchg ); 7477 %} 7478 7479 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7480 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7481 effect(KILL cr); 7482 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7483 ins_encode( enc_cmpxchgw(mem_ptr) ); 7484 ins_pipe( pipe_cmpxchg ); 7485 %} 7486 7487 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7488 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7489 effect(KILL cr); 7490 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7491 ins_encode( enc_cmpxchg(mem_ptr) ); 7492 ins_pipe( pipe_cmpxchg ); 7493 %} 7494 7495 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7496 predicate(n->as_LoadStore()->result_not_used()); 7497 match(Set dummy (GetAndAddB mem add)); 7498 effect(KILL cr); 7499 format %{ "ADDB [$mem],$add" %} 7500 ins_encode %{ 7501 __ lock(); 7502 __ addb($mem$$Address, $add$$constant); 7503 %} 7504 ins_pipe( pipe_cmpxchg ); 7505 %} 7506 7507 // Important to match to xRegI: only 8-bit regs. 7508 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7509 match(Set newval (GetAndAddB mem newval)); 7510 effect(KILL cr); 7511 format %{ "XADDB [$mem],$newval" %} 7512 ins_encode %{ 7513 __ lock(); 7514 __ xaddb($mem$$Address, $newval$$Register); 7515 %} 7516 ins_pipe( pipe_cmpxchg ); 7517 %} 7518 7519 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7520 predicate(n->as_LoadStore()->result_not_used()); 7521 match(Set dummy (GetAndAddS mem add)); 7522 effect(KILL cr); 7523 format %{ "ADDS [$mem],$add" %} 7524 ins_encode %{ 7525 __ lock(); 7526 __ addw($mem$$Address, $add$$constant); 7527 %} 7528 ins_pipe( pipe_cmpxchg ); 7529 %} 7530 7531 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7532 match(Set newval (GetAndAddS mem newval)); 7533 effect(KILL cr); 7534 format %{ "XADDS [$mem],$newval" %} 7535 ins_encode %{ 7536 __ lock(); 7537 __ xaddw($mem$$Address, $newval$$Register); 7538 %} 7539 ins_pipe( pipe_cmpxchg ); 7540 %} 7541 7542 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7543 predicate(n->as_LoadStore()->result_not_used()); 7544 match(Set dummy (GetAndAddI mem add)); 7545 effect(KILL cr); 7546 format %{ "ADDL [$mem],$add" %} 7547 ins_encode %{ 7548 __ lock(); 7549 __ addl($mem$$Address, $add$$constant); 7550 %} 7551 ins_pipe( pipe_cmpxchg ); 7552 %} 7553 7554 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7555 match(Set newval (GetAndAddI mem newval)); 7556 effect(KILL cr); 7557 format %{ "XADDL [$mem],$newval" %} 7558 ins_encode %{ 7559 __ lock(); 7560 __ xaddl($mem$$Address, $newval$$Register); 7561 %} 7562 ins_pipe( pipe_cmpxchg ); 7563 %} 7564 7565 // Important to match to xRegI: only 8-bit regs. 7566 instruct xchgB( memory mem, xRegI newval) %{ 7567 match(Set newval (GetAndSetB mem newval)); 7568 format %{ "XCHGB $newval,[$mem]" %} 7569 ins_encode %{ 7570 __ xchgb($newval$$Register, $mem$$Address); 7571 %} 7572 ins_pipe( pipe_cmpxchg ); 7573 %} 7574 7575 instruct xchgS( memory mem, rRegI newval) %{ 7576 match(Set newval (GetAndSetS mem newval)); 7577 format %{ "XCHGW $newval,[$mem]" %} 7578 ins_encode %{ 7579 __ xchgw($newval$$Register, $mem$$Address); 7580 %} 7581 ins_pipe( pipe_cmpxchg ); 7582 %} 7583 7584 instruct xchgI( memory mem, rRegI newval) %{ 7585 match(Set newval (GetAndSetI mem newval)); 7586 format %{ "XCHGL $newval,[$mem]" %} 7587 ins_encode %{ 7588 __ xchgl($newval$$Register, $mem$$Address); 7589 %} 7590 ins_pipe( pipe_cmpxchg ); 7591 %} 7592 7593 instruct xchgP( memory mem, pRegP newval) %{ 7594 match(Set newval (GetAndSetP mem newval)); 7595 format %{ "XCHGL $newval,[$mem]" %} 7596 ins_encode %{ 7597 __ xchgl($newval$$Register, $mem$$Address); 7598 %} 7599 ins_pipe( pipe_cmpxchg ); 7600 %} 7601 7602 //----------Subtraction Instructions------------------------------------------- 7603 7604 // Integer Subtraction Instructions 7605 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7606 match(Set dst (SubI dst src)); 7607 effect(KILL cr); 7608 7609 size(2); 7610 format %{ "SUB $dst,$src" %} 7611 opcode(0x2B); 7612 ins_encode( OpcP, RegReg( dst, src) ); 7613 ins_pipe( ialu_reg_reg ); 7614 %} 7615 7616 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7617 match(Set dst (SubI dst src)); 7618 effect(KILL cr); 7619 7620 format %{ "SUB $dst,$src" %} 7621 opcode(0x81,0x05); /* Opcode 81 /5 */ 7622 // ins_encode( RegImm( dst, src) ); 7623 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7624 ins_pipe( ialu_reg ); 7625 %} 7626 7627 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7628 match(Set dst (SubI dst (LoadI src))); 7629 effect(KILL cr); 7630 7631 ins_cost(125); 7632 format %{ "SUB $dst,$src" %} 7633 opcode(0x2B); 7634 ins_encode( OpcP, RegMem( dst, src) ); 7635 ins_pipe( ialu_reg_mem ); 7636 %} 7637 7638 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7639 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7640 effect(KILL cr); 7641 7642 ins_cost(150); 7643 format %{ "SUB $dst,$src" %} 7644 opcode(0x29); /* Opcode 29 /r */ 7645 ins_encode( OpcP, RegMem( src, dst ) ); 7646 ins_pipe( ialu_mem_reg ); 7647 %} 7648 7649 // Subtract from a pointer 7650 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7651 match(Set dst (AddP dst (SubI zero src))); 7652 effect(KILL cr); 7653 7654 size(2); 7655 format %{ "SUB $dst,$src" %} 7656 opcode(0x2B); 7657 ins_encode( OpcP, RegReg( dst, src) ); 7658 ins_pipe( ialu_reg_reg ); 7659 %} 7660 7661 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7662 match(Set dst (SubI zero dst)); 7663 effect(KILL cr); 7664 7665 size(2); 7666 format %{ "NEG $dst" %} 7667 opcode(0xF7,0x03); // Opcode F7 /3 7668 ins_encode( OpcP, RegOpc( dst ) ); 7669 ins_pipe( ialu_reg ); 7670 %} 7671 7672 //----------Multiplication/Division Instructions------------------------------- 7673 // Integer Multiplication Instructions 7674 // Multiply Register 7675 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7676 match(Set dst (MulI dst src)); 7677 effect(KILL cr); 7678 7679 size(3); 7680 ins_cost(300); 7681 format %{ "IMUL $dst,$src" %} 7682 opcode(0xAF, 0x0F); 7683 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7684 ins_pipe( ialu_reg_reg_alu0 ); 7685 %} 7686 7687 // Multiply 32-bit Immediate 7688 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7689 match(Set dst (MulI src imm)); 7690 effect(KILL cr); 7691 7692 ins_cost(300); 7693 format %{ "IMUL $dst,$src,$imm" %} 7694 opcode(0x69); /* 69 /r id */ 7695 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7696 ins_pipe( ialu_reg_reg_alu0 ); 7697 %} 7698 7699 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7700 match(Set dst src); 7701 effect(KILL cr); 7702 7703 // Note that this is artificially increased to make it more expensive than loadConL 7704 ins_cost(250); 7705 format %{ "MOV EAX,$src\t// low word only" %} 7706 opcode(0xB8); 7707 ins_encode( LdImmL_Lo(dst, src) ); 7708 ins_pipe( ialu_reg_fat ); 7709 %} 7710 7711 // Multiply by 32-bit Immediate, taking the shifted high order results 7712 // (special case for shift by 32) 7713 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7714 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7715 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7716 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7717 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7718 effect(USE src1, KILL cr); 7719 7720 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7721 ins_cost(0*100 + 1*400 - 150); 7722 format %{ "IMUL EDX:EAX,$src1" %} 7723 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7724 ins_pipe( pipe_slow ); 7725 %} 7726 7727 // Multiply by 32-bit Immediate, taking the shifted high order results 7728 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7729 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7730 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7731 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7732 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7733 effect(USE src1, KILL cr); 7734 7735 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7736 ins_cost(1*100 + 1*400 - 150); 7737 format %{ "IMUL EDX:EAX,$src1\n\t" 7738 "SAR EDX,$cnt-32" %} 7739 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 // Multiply Memory 32-bit Immediate 7744 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7745 match(Set dst (MulI (LoadI src) imm)); 7746 effect(KILL cr); 7747 7748 ins_cost(300); 7749 format %{ "IMUL $dst,$src,$imm" %} 7750 opcode(0x69); /* 69 /r id */ 7751 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7752 ins_pipe( ialu_reg_mem_alu0 ); 7753 %} 7754 7755 // Multiply Memory 7756 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7757 match(Set dst (MulI dst (LoadI src))); 7758 effect(KILL cr); 7759 7760 ins_cost(350); 7761 format %{ "IMUL $dst,$src" %} 7762 opcode(0xAF, 0x0F); 7763 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7764 ins_pipe( ialu_reg_mem_alu0 ); 7765 %} 7766 7767 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7768 %{ 7769 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7770 effect(KILL cr, KILL src2); 7771 7772 expand %{ mulI_eReg(dst, src1, cr); 7773 mulI_eReg(src2, src3, cr); 7774 addI_eReg(dst, src2, cr); %} 7775 %} 7776 7777 // Multiply Register Int to Long 7778 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7779 // Basic Idea: long = (long)int * (long)int 7780 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7781 effect(DEF dst, USE src, USE src1, KILL flags); 7782 7783 ins_cost(300); 7784 format %{ "IMUL $dst,$src1" %} 7785 7786 ins_encode( long_int_multiply( dst, src1 ) ); 7787 ins_pipe( ialu_reg_reg_alu0 ); 7788 %} 7789 7790 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7791 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7792 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7793 effect(KILL flags); 7794 7795 ins_cost(300); 7796 format %{ "MUL $dst,$src1" %} 7797 7798 ins_encode( long_uint_multiply(dst, src1) ); 7799 ins_pipe( ialu_reg_reg_alu0 ); 7800 %} 7801 7802 // Multiply Register Long 7803 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7804 match(Set dst (MulL dst src)); 7805 effect(KILL cr, TEMP tmp); 7806 ins_cost(4*100+3*400); 7807 // Basic idea: lo(result) = lo(x_lo * y_lo) 7808 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7809 format %{ "MOV $tmp,$src.lo\n\t" 7810 "IMUL $tmp,EDX\n\t" 7811 "MOV EDX,$src.hi\n\t" 7812 "IMUL EDX,EAX\n\t" 7813 "ADD $tmp,EDX\n\t" 7814 "MUL EDX:EAX,$src.lo\n\t" 7815 "ADD EDX,$tmp" %} 7816 ins_encode( long_multiply( dst, src, tmp ) ); 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 // Multiply Register Long where the left operand's high 32 bits are zero 7821 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7822 predicate(is_operand_hi32_zero(n->in(1))); 7823 match(Set dst (MulL dst src)); 7824 effect(KILL cr, TEMP tmp); 7825 ins_cost(2*100+2*400); 7826 // Basic idea: lo(result) = lo(x_lo * y_lo) 7827 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7828 format %{ "MOV $tmp,$src.hi\n\t" 7829 "IMUL $tmp,EAX\n\t" 7830 "MUL EDX:EAX,$src.lo\n\t" 7831 "ADD EDX,$tmp" %} 7832 ins_encode %{ 7833 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7834 __ imull($tmp$$Register, rax); 7835 __ mull($src$$Register); 7836 __ addl(rdx, $tmp$$Register); 7837 %} 7838 ins_pipe( pipe_slow ); 7839 %} 7840 7841 // Multiply Register Long where the right operand's high 32 bits are zero 7842 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7843 predicate(is_operand_hi32_zero(n->in(2))); 7844 match(Set dst (MulL dst src)); 7845 effect(KILL cr, TEMP tmp); 7846 ins_cost(2*100+2*400); 7847 // Basic idea: lo(result) = lo(x_lo * y_lo) 7848 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7849 format %{ "MOV $tmp,$src.lo\n\t" 7850 "IMUL $tmp,EDX\n\t" 7851 "MUL EDX:EAX,$src.lo\n\t" 7852 "ADD EDX,$tmp" %} 7853 ins_encode %{ 7854 __ movl($tmp$$Register, $src$$Register); 7855 __ imull($tmp$$Register, rdx); 7856 __ mull($src$$Register); 7857 __ addl(rdx, $tmp$$Register); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7863 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7864 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7865 match(Set dst (MulL dst src)); 7866 effect(KILL cr); 7867 ins_cost(1*400); 7868 // Basic idea: lo(result) = lo(x_lo * y_lo) 7869 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7870 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7871 ins_encode %{ 7872 __ mull($src$$Register); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 // Multiply Register Long by small constant 7878 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7879 match(Set dst (MulL dst src)); 7880 effect(KILL cr, TEMP tmp); 7881 ins_cost(2*100+2*400); 7882 size(12); 7883 // Basic idea: lo(result) = lo(src * EAX) 7884 // hi(result) = hi(src * EAX) + lo(src * EDX) 7885 format %{ "IMUL $tmp,EDX,$src\n\t" 7886 "MOV EDX,$src\n\t" 7887 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7888 "ADD EDX,$tmp" %} 7889 ins_encode( long_multiply_con( dst, src, tmp ) ); 7890 ins_pipe( pipe_slow ); 7891 %} 7892 7893 // Integer DIV with Register 7894 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7895 match(Set rax (DivI rax div)); 7896 effect(KILL rdx, KILL cr); 7897 size(26); 7898 ins_cost(30*100+10*100); 7899 format %{ "CMP EAX,0x80000000\n\t" 7900 "JNE,s normal\n\t" 7901 "XOR EDX,EDX\n\t" 7902 "CMP ECX,-1\n\t" 7903 "JE,s done\n" 7904 "normal: CDQ\n\t" 7905 "IDIV $div\n\t" 7906 "done:" %} 7907 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7908 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7909 ins_pipe( ialu_reg_reg_alu0 ); 7910 %} 7911 7912 // Divide Register Long 7913 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7914 match(Set dst (DivL src1 src2)); 7915 effect( KILL cr, KILL cx, KILL bx ); 7916 ins_cost(10000); 7917 format %{ "PUSH $src1.hi\n\t" 7918 "PUSH $src1.lo\n\t" 7919 "PUSH $src2.hi\n\t" 7920 "PUSH $src2.lo\n\t" 7921 "CALL SharedRuntime::ldiv\n\t" 7922 "ADD ESP,16" %} 7923 ins_encode( long_div(src1,src2) ); 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 // Integer DIVMOD with Register, both quotient and mod results 7928 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7929 match(DivModI rax div); 7930 effect(KILL cr); 7931 size(26); 7932 ins_cost(30*100+10*100); 7933 format %{ "CMP EAX,0x80000000\n\t" 7934 "JNE,s normal\n\t" 7935 "XOR EDX,EDX\n\t" 7936 "CMP ECX,-1\n\t" 7937 "JE,s done\n" 7938 "normal: CDQ\n\t" 7939 "IDIV $div\n\t" 7940 "done:" %} 7941 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7942 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7943 ins_pipe( pipe_slow ); 7944 %} 7945 7946 // Integer MOD with Register 7947 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7948 match(Set rdx (ModI rax div)); 7949 effect(KILL rax, KILL cr); 7950 7951 size(26); 7952 ins_cost(300); 7953 format %{ "CDQ\n\t" 7954 "IDIV $div" %} 7955 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7956 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7957 ins_pipe( ialu_reg_reg_alu0 ); 7958 %} 7959 7960 // Remainder Register Long 7961 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7962 match(Set dst (ModL src1 src2)); 7963 effect( KILL cr, KILL cx, KILL bx ); 7964 ins_cost(10000); 7965 format %{ "PUSH $src1.hi\n\t" 7966 "PUSH $src1.lo\n\t" 7967 "PUSH $src2.hi\n\t" 7968 "PUSH $src2.lo\n\t" 7969 "CALL SharedRuntime::lrem\n\t" 7970 "ADD ESP,16" %} 7971 ins_encode( long_mod(src1,src2) ); 7972 ins_pipe( pipe_slow ); 7973 %} 7974 7975 // Divide Register Long (no special case since divisor != -1) 7976 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7977 match(Set dst (DivL dst imm)); 7978 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7979 ins_cost(1000); 7980 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7981 "XOR $tmp2,$tmp2\n\t" 7982 "CMP $tmp,EDX\n\t" 7983 "JA,s fast\n\t" 7984 "MOV $tmp2,EAX\n\t" 7985 "MOV EAX,EDX\n\t" 7986 "MOV EDX,0\n\t" 7987 "JLE,s pos\n\t" 7988 "LNEG EAX : $tmp2\n\t" 7989 "DIV $tmp # unsigned division\n\t" 7990 "XCHG EAX,$tmp2\n\t" 7991 "DIV $tmp\n\t" 7992 "LNEG $tmp2 : EAX\n\t" 7993 "JMP,s done\n" 7994 "pos:\n\t" 7995 "DIV $tmp\n\t" 7996 "XCHG EAX,$tmp2\n" 7997 "fast:\n\t" 7998 "DIV $tmp\n" 7999 "done:\n\t" 8000 "MOV EDX,$tmp2\n\t" 8001 "NEG EDX:EAX # if $imm < 0" %} 8002 ins_encode %{ 8003 int con = (int)$imm$$constant; 8004 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8005 int pcon = (con > 0) ? con : -con; 8006 Label Lfast, Lpos, Ldone; 8007 8008 __ movl($tmp$$Register, pcon); 8009 __ xorl($tmp2$$Register,$tmp2$$Register); 8010 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8011 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 8012 8013 __ movl($tmp2$$Register, $dst$$Register); // save 8014 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8015 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8016 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8017 8018 // Negative dividend. 8019 // convert value to positive to use unsigned division 8020 __ lneg($dst$$Register, $tmp2$$Register); 8021 __ divl($tmp$$Register); 8022 __ xchgl($dst$$Register, $tmp2$$Register); 8023 __ divl($tmp$$Register); 8024 // revert result back to negative 8025 __ lneg($tmp2$$Register, $dst$$Register); 8026 __ jmpb(Ldone); 8027 8028 __ bind(Lpos); 8029 __ divl($tmp$$Register); // Use unsigned division 8030 __ xchgl($dst$$Register, $tmp2$$Register); 8031 // Fallthrow for final divide, tmp2 has 32 bit hi result 8032 8033 __ bind(Lfast); 8034 // fast path: src is positive 8035 __ divl($tmp$$Register); // Use unsigned division 8036 8037 __ bind(Ldone); 8038 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 8039 if (con < 0) { 8040 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 8041 } 8042 %} 8043 ins_pipe( pipe_slow ); 8044 %} 8045 8046 // Remainder Register Long (remainder fit into 32 bits) 8047 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 8048 match(Set dst (ModL dst imm)); 8049 effect( TEMP tmp, TEMP tmp2, KILL cr ); 8050 ins_cost(1000); 8051 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 8052 "CMP $tmp,EDX\n\t" 8053 "JA,s fast\n\t" 8054 "MOV $tmp2,EAX\n\t" 8055 "MOV EAX,EDX\n\t" 8056 "MOV EDX,0\n\t" 8057 "JLE,s pos\n\t" 8058 "LNEG EAX : $tmp2\n\t" 8059 "DIV $tmp # unsigned division\n\t" 8060 "MOV EAX,$tmp2\n\t" 8061 "DIV $tmp\n\t" 8062 "NEG EDX\n\t" 8063 "JMP,s done\n" 8064 "pos:\n\t" 8065 "DIV $tmp\n\t" 8066 "MOV EAX,$tmp2\n" 8067 "fast:\n\t" 8068 "DIV $tmp\n" 8069 "done:\n\t" 8070 "MOV EAX,EDX\n\t" 8071 "SAR EDX,31\n\t" %} 8072 ins_encode %{ 8073 int con = (int)$imm$$constant; 8074 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 8075 int pcon = (con > 0) ? con : -con; 8076 Label Lfast, Lpos, Ldone; 8077 8078 __ movl($tmp$$Register, pcon); 8079 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 8080 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 8081 8082 __ movl($tmp2$$Register, $dst$$Register); // save 8083 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8084 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 8085 __ jccb(Assembler::lessEqual, Lpos); // result is positive 8086 8087 // Negative dividend. 8088 // convert value to positive to use unsigned division 8089 __ lneg($dst$$Register, $tmp2$$Register); 8090 __ divl($tmp$$Register); 8091 __ movl($dst$$Register, $tmp2$$Register); 8092 __ divl($tmp$$Register); 8093 // revert remainder back to negative 8094 __ negl(HIGH_FROM_LOW($dst$$Register)); 8095 __ jmpb(Ldone); 8096 8097 __ bind(Lpos); 8098 __ divl($tmp$$Register); 8099 __ movl($dst$$Register, $tmp2$$Register); 8100 8101 __ bind(Lfast); 8102 // fast path: src is positive 8103 __ divl($tmp$$Register); 8104 8105 __ bind(Ldone); 8106 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8107 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8108 8109 %} 8110 ins_pipe( pipe_slow ); 8111 %} 8112 8113 // Integer Shift Instructions 8114 // Shift Left by one 8115 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8116 match(Set dst (LShiftI dst shift)); 8117 effect(KILL cr); 8118 8119 size(2); 8120 format %{ "SHL $dst,$shift" %} 8121 opcode(0xD1, 0x4); /* D1 /4 */ 8122 ins_encode( OpcP, RegOpc( dst ) ); 8123 ins_pipe( ialu_reg ); 8124 %} 8125 8126 // Shift Left by 8-bit immediate 8127 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8128 match(Set dst (LShiftI dst shift)); 8129 effect(KILL cr); 8130 8131 size(3); 8132 format %{ "SHL $dst,$shift" %} 8133 opcode(0xC1, 0x4); /* C1 /4 ib */ 8134 ins_encode( RegOpcImm( dst, shift) ); 8135 ins_pipe( ialu_reg ); 8136 %} 8137 8138 // Shift Left by variable 8139 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8140 match(Set dst (LShiftI dst shift)); 8141 effect(KILL cr); 8142 8143 size(2); 8144 format %{ "SHL $dst,$shift" %} 8145 opcode(0xD3, 0x4); /* D3 /4 */ 8146 ins_encode( OpcP, RegOpc( dst ) ); 8147 ins_pipe( ialu_reg_reg ); 8148 %} 8149 8150 // Arithmetic shift right by one 8151 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8152 match(Set dst (RShiftI dst shift)); 8153 effect(KILL cr); 8154 8155 size(2); 8156 format %{ "SAR $dst,$shift" %} 8157 opcode(0xD1, 0x7); /* D1 /7 */ 8158 ins_encode( OpcP, RegOpc( dst ) ); 8159 ins_pipe( ialu_reg ); 8160 %} 8161 8162 // Arithmetic shift right by one 8163 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8164 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8165 effect(KILL cr); 8166 format %{ "SAR $dst,$shift" %} 8167 opcode(0xD1, 0x7); /* D1 /7 */ 8168 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8169 ins_pipe( ialu_mem_imm ); 8170 %} 8171 8172 // Arithmetic Shift Right by 8-bit immediate 8173 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8174 match(Set dst (RShiftI dst shift)); 8175 effect(KILL cr); 8176 8177 size(3); 8178 format %{ "SAR $dst,$shift" %} 8179 opcode(0xC1, 0x7); /* C1 /7 ib */ 8180 ins_encode( RegOpcImm( dst, shift ) ); 8181 ins_pipe( ialu_mem_imm ); 8182 %} 8183 8184 // Arithmetic Shift Right by 8-bit immediate 8185 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8186 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8187 effect(KILL cr); 8188 8189 format %{ "SAR $dst,$shift" %} 8190 opcode(0xC1, 0x7); /* C1 /7 ib */ 8191 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8192 ins_pipe( ialu_mem_imm ); 8193 %} 8194 8195 // Arithmetic Shift Right by variable 8196 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8197 match(Set dst (RShiftI dst shift)); 8198 effect(KILL cr); 8199 8200 size(2); 8201 format %{ "SAR $dst,$shift" %} 8202 opcode(0xD3, 0x7); /* D3 /7 */ 8203 ins_encode( OpcP, RegOpc( dst ) ); 8204 ins_pipe( ialu_reg_reg ); 8205 %} 8206 8207 // Logical shift right by one 8208 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8209 match(Set dst (URShiftI dst shift)); 8210 effect(KILL cr); 8211 8212 size(2); 8213 format %{ "SHR $dst,$shift" %} 8214 opcode(0xD1, 0x5); /* D1 /5 */ 8215 ins_encode( OpcP, RegOpc( dst ) ); 8216 ins_pipe( ialu_reg ); 8217 %} 8218 8219 // Logical Shift Right by 8-bit immediate 8220 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8221 match(Set dst (URShiftI dst shift)); 8222 effect(KILL cr); 8223 8224 size(3); 8225 format %{ "SHR $dst,$shift" %} 8226 opcode(0xC1, 0x5); /* C1 /5 ib */ 8227 ins_encode( RegOpcImm( dst, shift) ); 8228 ins_pipe( ialu_reg ); 8229 %} 8230 8231 8232 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8233 // This idiom is used by the compiler for the i2b bytecode. 8234 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8235 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8236 8237 size(3); 8238 format %{ "MOVSX $dst,$src :8" %} 8239 ins_encode %{ 8240 __ movsbl($dst$$Register, $src$$Register); 8241 %} 8242 ins_pipe(ialu_reg_reg); 8243 %} 8244 8245 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8246 // This idiom is used by the compiler the i2s bytecode. 8247 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8248 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8249 8250 size(3); 8251 format %{ "MOVSX $dst,$src :16" %} 8252 ins_encode %{ 8253 __ movswl($dst$$Register, $src$$Register); 8254 %} 8255 ins_pipe(ialu_reg_reg); 8256 %} 8257 8258 8259 // Logical Shift Right by variable 8260 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8261 match(Set dst (URShiftI dst shift)); 8262 effect(KILL cr); 8263 8264 size(2); 8265 format %{ "SHR $dst,$shift" %} 8266 opcode(0xD3, 0x5); /* D3 /5 */ 8267 ins_encode( OpcP, RegOpc( dst ) ); 8268 ins_pipe( ialu_reg_reg ); 8269 %} 8270 8271 8272 //----------Logical Instructions----------------------------------------------- 8273 //----------Integer Logical Instructions--------------------------------------- 8274 // And Instructions 8275 // And Register with Register 8276 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8277 match(Set dst (AndI dst src)); 8278 effect(KILL cr); 8279 8280 size(2); 8281 format %{ "AND $dst,$src" %} 8282 opcode(0x23); 8283 ins_encode( OpcP, RegReg( dst, src) ); 8284 ins_pipe( ialu_reg_reg ); 8285 %} 8286 8287 // And Register with Immediate 8288 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8289 match(Set dst (AndI dst src)); 8290 effect(KILL cr); 8291 8292 format %{ "AND $dst,$src" %} 8293 opcode(0x81,0x04); /* Opcode 81 /4 */ 8294 // ins_encode( RegImm( dst, src) ); 8295 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8296 ins_pipe( ialu_reg ); 8297 %} 8298 8299 // And Register with Memory 8300 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8301 match(Set dst (AndI dst (LoadI src))); 8302 effect(KILL cr); 8303 8304 ins_cost(125); 8305 format %{ "AND $dst,$src" %} 8306 opcode(0x23); 8307 ins_encode( OpcP, RegMem( dst, src) ); 8308 ins_pipe( ialu_reg_mem ); 8309 %} 8310 8311 // And Memory with Register 8312 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8313 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8314 effect(KILL cr); 8315 8316 ins_cost(150); 8317 format %{ "AND $dst,$src" %} 8318 opcode(0x21); /* Opcode 21 /r */ 8319 ins_encode( OpcP, RegMem( src, dst ) ); 8320 ins_pipe( ialu_mem_reg ); 8321 %} 8322 8323 // And Memory with Immediate 8324 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8325 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8326 effect(KILL cr); 8327 8328 ins_cost(125); 8329 format %{ "AND $dst,$src" %} 8330 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8331 // ins_encode( MemImm( dst, src) ); 8332 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8333 ins_pipe( ialu_mem_imm ); 8334 %} 8335 8336 // BMI1 instructions 8337 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8338 match(Set dst (AndI (XorI src1 minus_1) src2)); 8339 predicate(UseBMI1Instructions); 8340 effect(KILL cr); 8341 8342 format %{ "ANDNL $dst, $src1, $src2" %} 8343 8344 ins_encode %{ 8345 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8346 %} 8347 ins_pipe(ialu_reg); 8348 %} 8349 8350 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8351 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8352 predicate(UseBMI1Instructions); 8353 effect(KILL cr); 8354 8355 ins_cost(125); 8356 format %{ "ANDNL $dst, $src1, $src2" %} 8357 8358 ins_encode %{ 8359 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8360 %} 8361 ins_pipe(ialu_reg_mem); 8362 %} 8363 8364 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8365 match(Set dst (AndI (SubI imm_zero src) src)); 8366 predicate(UseBMI1Instructions); 8367 effect(KILL cr); 8368 8369 format %{ "BLSIL $dst, $src" %} 8370 8371 ins_encode %{ 8372 __ blsil($dst$$Register, $src$$Register); 8373 %} 8374 ins_pipe(ialu_reg); 8375 %} 8376 8377 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8378 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8379 predicate(UseBMI1Instructions); 8380 effect(KILL cr); 8381 8382 ins_cost(125); 8383 format %{ "BLSIL $dst, $src" %} 8384 8385 ins_encode %{ 8386 __ blsil($dst$$Register, $src$$Address); 8387 %} 8388 ins_pipe(ialu_reg_mem); 8389 %} 8390 8391 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8392 %{ 8393 match(Set dst (XorI (AddI src minus_1) src)); 8394 predicate(UseBMI1Instructions); 8395 effect(KILL cr); 8396 8397 format %{ "BLSMSKL $dst, $src" %} 8398 8399 ins_encode %{ 8400 __ blsmskl($dst$$Register, $src$$Register); 8401 %} 8402 8403 ins_pipe(ialu_reg); 8404 %} 8405 8406 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8407 %{ 8408 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8409 predicate(UseBMI1Instructions); 8410 effect(KILL cr); 8411 8412 ins_cost(125); 8413 format %{ "BLSMSKL $dst, $src" %} 8414 8415 ins_encode %{ 8416 __ blsmskl($dst$$Register, $src$$Address); 8417 %} 8418 8419 ins_pipe(ialu_reg_mem); 8420 %} 8421 8422 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8423 %{ 8424 match(Set dst (AndI (AddI src minus_1) src) ); 8425 predicate(UseBMI1Instructions); 8426 effect(KILL cr); 8427 8428 format %{ "BLSRL $dst, $src" %} 8429 8430 ins_encode %{ 8431 __ blsrl($dst$$Register, $src$$Register); 8432 %} 8433 8434 ins_pipe(ialu_reg); 8435 %} 8436 8437 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8438 %{ 8439 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8440 predicate(UseBMI1Instructions); 8441 effect(KILL cr); 8442 8443 ins_cost(125); 8444 format %{ "BLSRL $dst, $src" %} 8445 8446 ins_encode %{ 8447 __ blsrl($dst$$Register, $src$$Address); 8448 %} 8449 8450 ins_pipe(ialu_reg_mem); 8451 %} 8452 8453 // Or Instructions 8454 // Or Register with Register 8455 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8456 match(Set dst (OrI dst src)); 8457 effect(KILL cr); 8458 8459 size(2); 8460 format %{ "OR $dst,$src" %} 8461 opcode(0x0B); 8462 ins_encode( OpcP, RegReg( dst, src) ); 8463 ins_pipe( ialu_reg_reg ); 8464 %} 8465 8466 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8467 match(Set dst (OrI dst (CastP2X src))); 8468 effect(KILL cr); 8469 8470 size(2); 8471 format %{ "OR $dst,$src" %} 8472 opcode(0x0B); 8473 ins_encode( OpcP, RegReg( dst, src) ); 8474 ins_pipe( ialu_reg_reg ); 8475 %} 8476 8477 8478 // Or Register with Immediate 8479 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8480 match(Set dst (OrI dst src)); 8481 effect(KILL cr); 8482 8483 format %{ "OR $dst,$src" %} 8484 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8485 // ins_encode( RegImm( dst, src) ); 8486 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8487 ins_pipe( ialu_reg ); 8488 %} 8489 8490 // Or Register with Memory 8491 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8492 match(Set dst (OrI dst (LoadI src))); 8493 effect(KILL cr); 8494 8495 ins_cost(125); 8496 format %{ "OR $dst,$src" %} 8497 opcode(0x0B); 8498 ins_encode( OpcP, RegMem( dst, src) ); 8499 ins_pipe( ialu_reg_mem ); 8500 %} 8501 8502 // Or Memory with Register 8503 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8504 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8505 effect(KILL cr); 8506 8507 ins_cost(150); 8508 format %{ "OR $dst,$src" %} 8509 opcode(0x09); /* Opcode 09 /r */ 8510 ins_encode( OpcP, RegMem( src, dst ) ); 8511 ins_pipe( ialu_mem_reg ); 8512 %} 8513 8514 // Or Memory with Immediate 8515 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8516 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8517 effect(KILL cr); 8518 8519 ins_cost(125); 8520 format %{ "OR $dst,$src" %} 8521 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8522 // ins_encode( MemImm( dst, src) ); 8523 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8524 ins_pipe( ialu_mem_imm ); 8525 %} 8526 8527 // ROL/ROR 8528 // ROL expand 8529 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8530 effect(USE_DEF dst, USE shift, KILL cr); 8531 8532 format %{ "ROL $dst, $shift" %} 8533 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8534 ins_encode( OpcP, RegOpc( dst )); 8535 ins_pipe( ialu_reg ); 8536 %} 8537 8538 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8539 effect(USE_DEF dst, USE shift, KILL cr); 8540 8541 format %{ "ROL $dst, $shift" %} 8542 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8543 ins_encode( RegOpcImm(dst, shift) ); 8544 ins_pipe(ialu_reg); 8545 %} 8546 8547 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8548 effect(USE_DEF dst, USE shift, KILL cr); 8549 8550 format %{ "ROL $dst, $shift" %} 8551 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8552 ins_encode(OpcP, RegOpc(dst)); 8553 ins_pipe( ialu_reg_reg ); 8554 %} 8555 // end of ROL expand 8556 8557 // ROL 32bit by one once 8558 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8559 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8560 8561 expand %{ 8562 rolI_eReg_imm1(dst, lshift, cr); 8563 %} 8564 %} 8565 8566 // ROL 32bit var by imm8 once 8567 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8568 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8569 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8570 8571 expand %{ 8572 rolI_eReg_imm8(dst, lshift, cr); 8573 %} 8574 %} 8575 8576 // ROL 32bit var by var once 8577 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8578 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8579 8580 expand %{ 8581 rolI_eReg_CL(dst, shift, cr); 8582 %} 8583 %} 8584 8585 // ROL 32bit var by var once 8586 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8587 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8588 8589 expand %{ 8590 rolI_eReg_CL(dst, shift, cr); 8591 %} 8592 %} 8593 8594 // ROR expand 8595 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8596 effect(USE_DEF dst, USE shift, KILL cr); 8597 8598 format %{ "ROR $dst, $shift" %} 8599 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8600 ins_encode( OpcP, RegOpc( dst ) ); 8601 ins_pipe( ialu_reg ); 8602 %} 8603 8604 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8605 effect (USE_DEF dst, USE shift, KILL cr); 8606 8607 format %{ "ROR $dst, $shift" %} 8608 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8609 ins_encode( RegOpcImm(dst, shift) ); 8610 ins_pipe( ialu_reg ); 8611 %} 8612 8613 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8614 effect(USE_DEF dst, USE shift, KILL cr); 8615 8616 format %{ "ROR $dst, $shift" %} 8617 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8618 ins_encode(OpcP, RegOpc(dst)); 8619 ins_pipe( ialu_reg_reg ); 8620 %} 8621 // end of ROR expand 8622 8623 // ROR right once 8624 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8625 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8626 8627 expand %{ 8628 rorI_eReg_imm1(dst, rshift, cr); 8629 %} 8630 %} 8631 8632 // ROR 32bit by immI8 once 8633 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8634 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8635 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8636 8637 expand %{ 8638 rorI_eReg_imm8(dst, rshift, cr); 8639 %} 8640 %} 8641 8642 // ROR 32bit var by var once 8643 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8644 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8645 8646 expand %{ 8647 rorI_eReg_CL(dst, shift, cr); 8648 %} 8649 %} 8650 8651 // ROR 32bit var by var once 8652 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8653 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8654 8655 expand %{ 8656 rorI_eReg_CL(dst, shift, cr); 8657 %} 8658 %} 8659 8660 // Xor Instructions 8661 // Xor Register with Register 8662 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8663 match(Set dst (XorI dst src)); 8664 effect(KILL cr); 8665 8666 size(2); 8667 format %{ "XOR $dst,$src" %} 8668 opcode(0x33); 8669 ins_encode( OpcP, RegReg( dst, src) ); 8670 ins_pipe( ialu_reg_reg ); 8671 %} 8672 8673 // Xor Register with Immediate -1 8674 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8675 match(Set dst (XorI dst imm)); 8676 8677 size(2); 8678 format %{ "NOT $dst" %} 8679 ins_encode %{ 8680 __ notl($dst$$Register); 8681 %} 8682 ins_pipe( ialu_reg ); 8683 %} 8684 8685 // Xor Register with Immediate 8686 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8687 match(Set dst (XorI dst src)); 8688 effect(KILL cr); 8689 8690 format %{ "XOR $dst,$src" %} 8691 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8692 // ins_encode( RegImm( dst, src) ); 8693 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8694 ins_pipe( ialu_reg ); 8695 %} 8696 8697 // Xor Register with Memory 8698 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8699 match(Set dst (XorI dst (LoadI src))); 8700 effect(KILL cr); 8701 8702 ins_cost(125); 8703 format %{ "XOR $dst,$src" %} 8704 opcode(0x33); 8705 ins_encode( OpcP, RegMem(dst, src) ); 8706 ins_pipe( ialu_reg_mem ); 8707 %} 8708 8709 // Xor Memory with Register 8710 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8711 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8712 effect(KILL cr); 8713 8714 ins_cost(150); 8715 format %{ "XOR $dst,$src" %} 8716 opcode(0x31); /* Opcode 31 /r */ 8717 ins_encode( OpcP, RegMem( src, dst ) ); 8718 ins_pipe( ialu_mem_reg ); 8719 %} 8720 8721 // Xor Memory with Immediate 8722 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8723 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8724 effect(KILL cr); 8725 8726 ins_cost(125); 8727 format %{ "XOR $dst,$src" %} 8728 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8729 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8730 ins_pipe( ialu_mem_imm ); 8731 %} 8732 8733 //----------Convert Int to Boolean--------------------------------------------- 8734 8735 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8736 effect( DEF dst, USE src ); 8737 format %{ "MOV $dst,$src" %} 8738 ins_encode( enc_Copy( dst, src) ); 8739 ins_pipe( ialu_reg_reg ); 8740 %} 8741 8742 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8743 effect( USE_DEF dst, USE src, KILL cr ); 8744 8745 size(4); 8746 format %{ "NEG $dst\n\t" 8747 "ADC $dst,$src" %} 8748 ins_encode( neg_reg(dst), 8749 OpcRegReg(0x13,dst,src) ); 8750 ins_pipe( ialu_reg_reg_long ); 8751 %} 8752 8753 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8754 match(Set dst (Conv2B src)); 8755 8756 expand %{ 8757 movI_nocopy(dst,src); 8758 ci2b(dst,src,cr); 8759 %} 8760 %} 8761 8762 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8763 effect( DEF dst, USE src ); 8764 format %{ "MOV $dst,$src" %} 8765 ins_encode( enc_Copy( dst, src) ); 8766 ins_pipe( ialu_reg_reg ); 8767 %} 8768 8769 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8770 effect( USE_DEF dst, USE src, KILL cr ); 8771 format %{ "NEG $dst\n\t" 8772 "ADC $dst,$src" %} 8773 ins_encode( neg_reg(dst), 8774 OpcRegReg(0x13,dst,src) ); 8775 ins_pipe( ialu_reg_reg_long ); 8776 %} 8777 8778 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8779 match(Set dst (Conv2B src)); 8780 8781 expand %{ 8782 movP_nocopy(dst,src); 8783 cp2b(dst,src,cr); 8784 %} 8785 %} 8786 8787 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8788 match(Set dst (CmpLTMask p q)); 8789 effect(KILL cr); 8790 ins_cost(400); 8791 8792 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8793 format %{ "XOR $dst,$dst\n\t" 8794 "CMP $p,$q\n\t" 8795 "SETlt $dst\n\t" 8796 "NEG $dst" %} 8797 ins_encode %{ 8798 Register Rp = $p$$Register; 8799 Register Rq = $q$$Register; 8800 Register Rd = $dst$$Register; 8801 Label done; 8802 __ xorl(Rd, Rd); 8803 __ cmpl(Rp, Rq); 8804 __ setb(Assembler::less, Rd); 8805 __ negl(Rd); 8806 %} 8807 8808 ins_pipe(pipe_slow); 8809 %} 8810 8811 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8812 match(Set dst (CmpLTMask dst zero)); 8813 effect(DEF dst, KILL cr); 8814 ins_cost(100); 8815 8816 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8817 ins_encode %{ 8818 __ sarl($dst$$Register, 31); 8819 %} 8820 ins_pipe(ialu_reg); 8821 %} 8822 8823 /* better to save a register than avoid a branch */ 8824 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8825 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8826 effect(KILL cr); 8827 ins_cost(400); 8828 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8829 "JGE done\n\t" 8830 "ADD $p,$y\n" 8831 "done: " %} 8832 ins_encode %{ 8833 Register Rp = $p$$Register; 8834 Register Rq = $q$$Register; 8835 Register Ry = $y$$Register; 8836 Label done; 8837 __ subl(Rp, Rq); 8838 __ jccb(Assembler::greaterEqual, done); 8839 __ addl(Rp, Ry); 8840 __ bind(done); 8841 %} 8842 8843 ins_pipe(pipe_cmplt); 8844 %} 8845 8846 /* better to save a register than avoid a branch */ 8847 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8848 match(Set y (AndI (CmpLTMask p q) y)); 8849 effect(KILL cr); 8850 8851 ins_cost(300); 8852 8853 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8854 "JLT done\n\t" 8855 "XORL $y, $y\n" 8856 "done: " %} 8857 ins_encode %{ 8858 Register Rp = $p$$Register; 8859 Register Rq = $q$$Register; 8860 Register Ry = $y$$Register; 8861 Label done; 8862 __ cmpl(Rp, Rq); 8863 __ jccb(Assembler::less, done); 8864 __ xorl(Ry, Ry); 8865 __ bind(done); 8866 %} 8867 8868 ins_pipe(pipe_cmplt); 8869 %} 8870 8871 /* If I enable this, I encourage spilling in the inner loop of compress. 8872 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8873 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8874 */ 8875 //----------Overflow Math Instructions----------------------------------------- 8876 8877 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8878 %{ 8879 match(Set cr (OverflowAddI op1 op2)); 8880 effect(DEF cr, USE_KILL op1, USE op2); 8881 8882 format %{ "ADD $op1, $op2\t# overflow check int" %} 8883 8884 ins_encode %{ 8885 __ addl($op1$$Register, $op2$$Register); 8886 %} 8887 ins_pipe(ialu_reg_reg); 8888 %} 8889 8890 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8891 %{ 8892 match(Set cr (OverflowAddI op1 op2)); 8893 effect(DEF cr, USE_KILL op1, USE op2); 8894 8895 format %{ "ADD $op1, $op2\t# overflow check int" %} 8896 8897 ins_encode %{ 8898 __ addl($op1$$Register, $op2$$constant); 8899 %} 8900 ins_pipe(ialu_reg_reg); 8901 %} 8902 8903 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8904 %{ 8905 match(Set cr (OverflowSubI op1 op2)); 8906 8907 format %{ "CMP $op1, $op2\t# overflow check int" %} 8908 ins_encode %{ 8909 __ cmpl($op1$$Register, $op2$$Register); 8910 %} 8911 ins_pipe(ialu_reg_reg); 8912 %} 8913 8914 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8915 %{ 8916 match(Set cr (OverflowSubI op1 op2)); 8917 8918 format %{ "CMP $op1, $op2\t# overflow check int" %} 8919 ins_encode %{ 8920 __ cmpl($op1$$Register, $op2$$constant); 8921 %} 8922 ins_pipe(ialu_reg_reg); 8923 %} 8924 8925 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8926 %{ 8927 match(Set cr (OverflowSubI zero op2)); 8928 effect(DEF cr, USE_KILL op2); 8929 8930 format %{ "NEG $op2\t# overflow check int" %} 8931 ins_encode %{ 8932 __ negl($op2$$Register); 8933 %} 8934 ins_pipe(ialu_reg_reg); 8935 %} 8936 8937 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8938 %{ 8939 match(Set cr (OverflowMulI op1 op2)); 8940 effect(DEF cr, USE_KILL op1, USE op2); 8941 8942 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8943 ins_encode %{ 8944 __ imull($op1$$Register, $op2$$Register); 8945 %} 8946 ins_pipe(ialu_reg_reg_alu0); 8947 %} 8948 8949 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8950 %{ 8951 match(Set cr (OverflowMulI op1 op2)); 8952 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8953 8954 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8955 ins_encode %{ 8956 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8957 %} 8958 ins_pipe(ialu_reg_reg_alu0); 8959 %} 8960 8961 // Integer Absolute Instructions 8962 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8963 %{ 8964 match(Set dst (AbsI src)); 8965 effect(TEMP dst, TEMP tmp, KILL cr); 8966 format %{ "movl $tmp, $src\n\t" 8967 "sarl $tmp, 31\n\t" 8968 "movl $dst, $src\n\t" 8969 "xorl $dst, $tmp\n\t" 8970 "subl $dst, $tmp\n" 8971 %} 8972 ins_encode %{ 8973 __ movl($tmp$$Register, $src$$Register); 8974 __ sarl($tmp$$Register, 31); 8975 __ movl($dst$$Register, $src$$Register); 8976 __ xorl($dst$$Register, $tmp$$Register); 8977 __ subl($dst$$Register, $tmp$$Register); 8978 %} 8979 8980 ins_pipe(ialu_reg_reg); 8981 %} 8982 8983 //----------Long Instructions------------------------------------------------ 8984 // Add Long Register with Register 8985 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8986 match(Set dst (AddL dst src)); 8987 effect(KILL cr); 8988 ins_cost(200); 8989 format %{ "ADD $dst.lo,$src.lo\n\t" 8990 "ADC $dst.hi,$src.hi" %} 8991 opcode(0x03, 0x13); 8992 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8993 ins_pipe( ialu_reg_reg_long ); 8994 %} 8995 8996 // Add Long Register with Immediate 8997 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8998 match(Set dst (AddL dst src)); 8999 effect(KILL cr); 9000 format %{ "ADD $dst.lo,$src.lo\n\t" 9001 "ADC $dst.hi,$src.hi" %} 9002 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 9003 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9004 ins_pipe( ialu_reg_long ); 9005 %} 9006 9007 // Add Long Register with Memory 9008 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9009 match(Set dst (AddL dst (LoadL mem))); 9010 effect(KILL cr); 9011 ins_cost(125); 9012 format %{ "ADD $dst.lo,$mem\n\t" 9013 "ADC $dst.hi,$mem+4" %} 9014 opcode(0x03, 0x13); 9015 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9016 ins_pipe( ialu_reg_long_mem ); 9017 %} 9018 9019 // Subtract Long Register with Register. 9020 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9021 match(Set dst (SubL dst src)); 9022 effect(KILL cr); 9023 ins_cost(200); 9024 format %{ "SUB $dst.lo,$src.lo\n\t" 9025 "SBB $dst.hi,$src.hi" %} 9026 opcode(0x2B, 0x1B); 9027 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 9028 ins_pipe( ialu_reg_reg_long ); 9029 %} 9030 9031 // Subtract Long Register with Immediate 9032 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9033 match(Set dst (SubL dst src)); 9034 effect(KILL cr); 9035 format %{ "SUB $dst.lo,$src.lo\n\t" 9036 "SBB $dst.hi,$src.hi" %} 9037 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 9038 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9039 ins_pipe( ialu_reg_long ); 9040 %} 9041 9042 // Subtract Long Register with Memory 9043 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9044 match(Set dst (SubL dst (LoadL mem))); 9045 effect(KILL cr); 9046 ins_cost(125); 9047 format %{ "SUB $dst.lo,$mem\n\t" 9048 "SBB $dst.hi,$mem+4" %} 9049 opcode(0x2B, 0x1B); 9050 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9051 ins_pipe( ialu_reg_long_mem ); 9052 %} 9053 9054 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 9055 match(Set dst (SubL zero dst)); 9056 effect(KILL cr); 9057 ins_cost(300); 9058 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 9059 ins_encode( neg_long(dst) ); 9060 ins_pipe( ialu_reg_reg_long ); 9061 %} 9062 9063 // And Long Register with Register 9064 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9065 match(Set dst (AndL dst src)); 9066 effect(KILL cr); 9067 format %{ "AND $dst.lo,$src.lo\n\t" 9068 "AND $dst.hi,$src.hi" %} 9069 opcode(0x23,0x23); 9070 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9071 ins_pipe( ialu_reg_reg_long ); 9072 %} 9073 9074 // And Long Register with Immediate 9075 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9076 match(Set dst (AndL dst src)); 9077 effect(KILL cr); 9078 format %{ "AND $dst.lo,$src.lo\n\t" 9079 "AND $dst.hi,$src.hi" %} 9080 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 9081 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9082 ins_pipe( ialu_reg_long ); 9083 %} 9084 9085 // And Long Register with Memory 9086 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9087 match(Set dst (AndL dst (LoadL mem))); 9088 effect(KILL cr); 9089 ins_cost(125); 9090 format %{ "AND $dst.lo,$mem\n\t" 9091 "AND $dst.hi,$mem+4" %} 9092 opcode(0x23, 0x23); 9093 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9094 ins_pipe( ialu_reg_long_mem ); 9095 %} 9096 9097 // BMI1 instructions 9098 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9099 match(Set dst (AndL (XorL src1 minus_1) src2)); 9100 predicate(UseBMI1Instructions); 9101 effect(KILL cr, TEMP dst); 9102 9103 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9104 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9105 %} 9106 9107 ins_encode %{ 9108 Register Rdst = $dst$$Register; 9109 Register Rsrc1 = $src1$$Register; 9110 Register Rsrc2 = $src2$$Register; 9111 __ andnl(Rdst, Rsrc1, Rsrc2); 9112 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9113 %} 9114 ins_pipe(ialu_reg_reg_long); 9115 %} 9116 9117 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9118 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9119 predicate(UseBMI1Instructions); 9120 effect(KILL cr, TEMP dst); 9121 9122 ins_cost(125); 9123 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9124 "ANDNL $dst.hi, $src1.hi, $src2+4" 9125 %} 9126 9127 ins_encode %{ 9128 Register Rdst = $dst$$Register; 9129 Register Rsrc1 = $src1$$Register; 9130 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9131 9132 __ andnl(Rdst, Rsrc1, $src2$$Address); 9133 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9134 %} 9135 ins_pipe(ialu_reg_mem); 9136 %} 9137 9138 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9139 match(Set dst (AndL (SubL imm_zero src) src)); 9140 predicate(UseBMI1Instructions); 9141 effect(KILL cr, TEMP dst); 9142 9143 format %{ "MOVL $dst.hi, 0\n\t" 9144 "BLSIL $dst.lo, $src.lo\n\t" 9145 "JNZ done\n\t" 9146 "BLSIL $dst.hi, $src.hi\n" 9147 "done:" 9148 %} 9149 9150 ins_encode %{ 9151 Label done; 9152 Register Rdst = $dst$$Register; 9153 Register Rsrc = $src$$Register; 9154 __ movl(HIGH_FROM_LOW(Rdst), 0); 9155 __ blsil(Rdst, Rsrc); 9156 __ jccb(Assembler::notZero, done); 9157 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9158 __ bind(done); 9159 %} 9160 ins_pipe(ialu_reg); 9161 %} 9162 9163 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9164 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9165 predicate(UseBMI1Instructions); 9166 effect(KILL cr, TEMP dst); 9167 9168 ins_cost(125); 9169 format %{ "MOVL $dst.hi, 0\n\t" 9170 "BLSIL $dst.lo, $src\n\t" 9171 "JNZ done\n\t" 9172 "BLSIL $dst.hi, $src+4\n" 9173 "done:" 9174 %} 9175 9176 ins_encode %{ 9177 Label done; 9178 Register Rdst = $dst$$Register; 9179 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9180 9181 __ movl(HIGH_FROM_LOW(Rdst), 0); 9182 __ blsil(Rdst, $src$$Address); 9183 __ jccb(Assembler::notZero, done); 9184 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9185 __ bind(done); 9186 %} 9187 ins_pipe(ialu_reg_mem); 9188 %} 9189 9190 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9191 %{ 9192 match(Set dst (XorL (AddL src minus_1) src)); 9193 predicate(UseBMI1Instructions); 9194 effect(KILL cr, TEMP dst); 9195 9196 format %{ "MOVL $dst.hi, 0\n\t" 9197 "BLSMSKL $dst.lo, $src.lo\n\t" 9198 "JNC done\n\t" 9199 "BLSMSKL $dst.hi, $src.hi\n" 9200 "done:" 9201 %} 9202 9203 ins_encode %{ 9204 Label done; 9205 Register Rdst = $dst$$Register; 9206 Register Rsrc = $src$$Register; 9207 __ movl(HIGH_FROM_LOW(Rdst), 0); 9208 __ blsmskl(Rdst, Rsrc); 9209 __ jccb(Assembler::carryClear, done); 9210 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9211 __ bind(done); 9212 %} 9213 9214 ins_pipe(ialu_reg); 9215 %} 9216 9217 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9218 %{ 9219 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9220 predicate(UseBMI1Instructions); 9221 effect(KILL cr, TEMP dst); 9222 9223 ins_cost(125); 9224 format %{ "MOVL $dst.hi, 0\n\t" 9225 "BLSMSKL $dst.lo, $src\n\t" 9226 "JNC done\n\t" 9227 "BLSMSKL $dst.hi, $src+4\n" 9228 "done:" 9229 %} 9230 9231 ins_encode %{ 9232 Label done; 9233 Register Rdst = $dst$$Register; 9234 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9235 9236 __ movl(HIGH_FROM_LOW(Rdst), 0); 9237 __ blsmskl(Rdst, $src$$Address); 9238 __ jccb(Assembler::carryClear, done); 9239 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9240 __ bind(done); 9241 %} 9242 9243 ins_pipe(ialu_reg_mem); 9244 %} 9245 9246 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9247 %{ 9248 match(Set dst (AndL (AddL src minus_1) src) ); 9249 predicate(UseBMI1Instructions); 9250 effect(KILL cr, TEMP dst); 9251 9252 format %{ "MOVL $dst.hi, $src.hi\n\t" 9253 "BLSRL $dst.lo, $src.lo\n\t" 9254 "JNC done\n\t" 9255 "BLSRL $dst.hi, $src.hi\n" 9256 "done:" 9257 %} 9258 9259 ins_encode %{ 9260 Label done; 9261 Register Rdst = $dst$$Register; 9262 Register Rsrc = $src$$Register; 9263 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9264 __ blsrl(Rdst, Rsrc); 9265 __ jccb(Assembler::carryClear, done); 9266 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9267 __ bind(done); 9268 %} 9269 9270 ins_pipe(ialu_reg); 9271 %} 9272 9273 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9274 %{ 9275 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9276 predicate(UseBMI1Instructions); 9277 effect(KILL cr, TEMP dst); 9278 9279 ins_cost(125); 9280 format %{ "MOVL $dst.hi, $src+4\n\t" 9281 "BLSRL $dst.lo, $src\n\t" 9282 "JNC done\n\t" 9283 "BLSRL $dst.hi, $src+4\n" 9284 "done:" 9285 %} 9286 9287 ins_encode %{ 9288 Label done; 9289 Register Rdst = $dst$$Register; 9290 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9291 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9292 __ blsrl(Rdst, $src$$Address); 9293 __ jccb(Assembler::carryClear, done); 9294 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9295 __ bind(done); 9296 %} 9297 9298 ins_pipe(ialu_reg_mem); 9299 %} 9300 9301 // Or Long Register with Register 9302 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9303 match(Set dst (OrL dst src)); 9304 effect(KILL cr); 9305 format %{ "OR $dst.lo,$src.lo\n\t" 9306 "OR $dst.hi,$src.hi" %} 9307 opcode(0x0B,0x0B); 9308 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9309 ins_pipe( ialu_reg_reg_long ); 9310 %} 9311 9312 // Or Long Register with Immediate 9313 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9314 match(Set dst (OrL dst src)); 9315 effect(KILL cr); 9316 format %{ "OR $dst.lo,$src.lo\n\t" 9317 "OR $dst.hi,$src.hi" %} 9318 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9319 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9320 ins_pipe( ialu_reg_long ); 9321 %} 9322 9323 // Or Long Register with Memory 9324 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9325 match(Set dst (OrL dst (LoadL mem))); 9326 effect(KILL cr); 9327 ins_cost(125); 9328 format %{ "OR $dst.lo,$mem\n\t" 9329 "OR $dst.hi,$mem+4" %} 9330 opcode(0x0B,0x0B); 9331 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9332 ins_pipe( ialu_reg_long_mem ); 9333 %} 9334 9335 // Xor Long Register with Register 9336 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9337 match(Set dst (XorL dst src)); 9338 effect(KILL cr); 9339 format %{ "XOR $dst.lo,$src.lo\n\t" 9340 "XOR $dst.hi,$src.hi" %} 9341 opcode(0x33,0x33); 9342 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9343 ins_pipe( ialu_reg_reg_long ); 9344 %} 9345 9346 // Xor Long Register with Immediate -1 9347 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9348 match(Set dst (XorL dst imm)); 9349 format %{ "NOT $dst.lo\n\t" 9350 "NOT $dst.hi" %} 9351 ins_encode %{ 9352 __ notl($dst$$Register); 9353 __ notl(HIGH_FROM_LOW($dst$$Register)); 9354 %} 9355 ins_pipe( ialu_reg_long ); 9356 %} 9357 9358 // Xor Long Register with Immediate 9359 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9360 match(Set dst (XorL dst src)); 9361 effect(KILL cr); 9362 format %{ "XOR $dst.lo,$src.lo\n\t" 9363 "XOR $dst.hi,$src.hi" %} 9364 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9365 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9366 ins_pipe( ialu_reg_long ); 9367 %} 9368 9369 // Xor Long Register with Memory 9370 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9371 match(Set dst (XorL dst (LoadL mem))); 9372 effect(KILL cr); 9373 ins_cost(125); 9374 format %{ "XOR $dst.lo,$mem\n\t" 9375 "XOR $dst.hi,$mem+4" %} 9376 opcode(0x33,0x33); 9377 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9378 ins_pipe( ialu_reg_long_mem ); 9379 %} 9380 9381 // Shift Left Long by 1 9382 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9383 predicate(UseNewLongLShift); 9384 match(Set dst (LShiftL dst cnt)); 9385 effect(KILL cr); 9386 ins_cost(100); 9387 format %{ "ADD $dst.lo,$dst.lo\n\t" 9388 "ADC $dst.hi,$dst.hi" %} 9389 ins_encode %{ 9390 __ addl($dst$$Register,$dst$$Register); 9391 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9392 %} 9393 ins_pipe( ialu_reg_long ); 9394 %} 9395 9396 // Shift Left Long by 2 9397 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9398 predicate(UseNewLongLShift); 9399 match(Set dst (LShiftL dst cnt)); 9400 effect(KILL cr); 9401 ins_cost(100); 9402 format %{ "ADD $dst.lo,$dst.lo\n\t" 9403 "ADC $dst.hi,$dst.hi\n\t" 9404 "ADD $dst.lo,$dst.lo\n\t" 9405 "ADC $dst.hi,$dst.hi" %} 9406 ins_encode %{ 9407 __ addl($dst$$Register,$dst$$Register); 9408 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9409 __ addl($dst$$Register,$dst$$Register); 9410 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9411 %} 9412 ins_pipe( ialu_reg_long ); 9413 %} 9414 9415 // Shift Left Long by 3 9416 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9417 predicate(UseNewLongLShift); 9418 match(Set dst (LShiftL dst cnt)); 9419 effect(KILL cr); 9420 ins_cost(100); 9421 format %{ "ADD $dst.lo,$dst.lo\n\t" 9422 "ADC $dst.hi,$dst.hi\n\t" 9423 "ADD $dst.lo,$dst.lo\n\t" 9424 "ADC $dst.hi,$dst.hi\n\t" 9425 "ADD $dst.lo,$dst.lo\n\t" 9426 "ADC $dst.hi,$dst.hi" %} 9427 ins_encode %{ 9428 __ addl($dst$$Register,$dst$$Register); 9429 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9430 __ addl($dst$$Register,$dst$$Register); 9431 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9432 __ addl($dst$$Register,$dst$$Register); 9433 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9434 %} 9435 ins_pipe( ialu_reg_long ); 9436 %} 9437 9438 // Shift Left Long by 1-31 9439 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9440 match(Set dst (LShiftL dst cnt)); 9441 effect(KILL cr); 9442 ins_cost(200); 9443 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9444 "SHL $dst.lo,$cnt" %} 9445 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9446 ins_encode( move_long_small_shift(dst,cnt) ); 9447 ins_pipe( ialu_reg_long ); 9448 %} 9449 9450 // Shift Left Long by 32-63 9451 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9452 match(Set dst (LShiftL dst cnt)); 9453 effect(KILL cr); 9454 ins_cost(300); 9455 format %{ "MOV $dst.hi,$dst.lo\n" 9456 "\tSHL $dst.hi,$cnt-32\n" 9457 "\tXOR $dst.lo,$dst.lo" %} 9458 opcode(0xC1, 0x4); /* C1 /4 ib */ 9459 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9460 ins_pipe( ialu_reg_long ); 9461 %} 9462 9463 // Shift Left Long by variable 9464 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9465 match(Set dst (LShiftL dst shift)); 9466 effect(KILL cr); 9467 ins_cost(500+200); 9468 size(17); 9469 format %{ "TEST $shift,32\n\t" 9470 "JEQ,s small\n\t" 9471 "MOV $dst.hi,$dst.lo\n\t" 9472 "XOR $dst.lo,$dst.lo\n" 9473 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9474 "SHL $dst.lo,$shift" %} 9475 ins_encode( shift_left_long( dst, shift ) ); 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 // Shift Right Long by 1-31 9480 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9481 match(Set dst (URShiftL dst cnt)); 9482 effect(KILL cr); 9483 ins_cost(200); 9484 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9485 "SHR $dst.hi,$cnt" %} 9486 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9487 ins_encode( move_long_small_shift(dst,cnt) ); 9488 ins_pipe( ialu_reg_long ); 9489 %} 9490 9491 // Shift Right Long by 32-63 9492 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9493 match(Set dst (URShiftL dst cnt)); 9494 effect(KILL cr); 9495 ins_cost(300); 9496 format %{ "MOV $dst.lo,$dst.hi\n" 9497 "\tSHR $dst.lo,$cnt-32\n" 9498 "\tXOR $dst.hi,$dst.hi" %} 9499 opcode(0xC1, 0x5); /* C1 /5 ib */ 9500 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9501 ins_pipe( ialu_reg_long ); 9502 %} 9503 9504 // Shift Right Long by variable 9505 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9506 match(Set dst (URShiftL dst shift)); 9507 effect(KILL cr); 9508 ins_cost(600); 9509 size(17); 9510 format %{ "TEST $shift,32\n\t" 9511 "JEQ,s small\n\t" 9512 "MOV $dst.lo,$dst.hi\n\t" 9513 "XOR $dst.hi,$dst.hi\n" 9514 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9515 "SHR $dst.hi,$shift" %} 9516 ins_encode( shift_right_long( dst, shift ) ); 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 // Shift Right Long by 1-31 9521 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9522 match(Set dst (RShiftL dst cnt)); 9523 effect(KILL cr); 9524 ins_cost(200); 9525 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9526 "SAR $dst.hi,$cnt" %} 9527 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9528 ins_encode( move_long_small_shift(dst,cnt) ); 9529 ins_pipe( ialu_reg_long ); 9530 %} 9531 9532 // Shift Right Long by 32-63 9533 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9534 match(Set dst (RShiftL dst cnt)); 9535 effect(KILL cr); 9536 ins_cost(300); 9537 format %{ "MOV $dst.lo,$dst.hi\n" 9538 "\tSAR $dst.lo,$cnt-32\n" 9539 "\tSAR $dst.hi,31" %} 9540 opcode(0xC1, 0x7); /* C1 /7 ib */ 9541 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9542 ins_pipe( ialu_reg_long ); 9543 %} 9544 9545 // Shift Right arithmetic Long by variable 9546 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9547 match(Set dst (RShiftL dst shift)); 9548 effect(KILL cr); 9549 ins_cost(600); 9550 size(18); 9551 format %{ "TEST $shift,32\n\t" 9552 "JEQ,s small\n\t" 9553 "MOV $dst.lo,$dst.hi\n\t" 9554 "SAR $dst.hi,31\n" 9555 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9556 "SAR $dst.hi,$shift" %} 9557 ins_encode( shift_right_arith_long( dst, shift ) ); 9558 ins_pipe( pipe_slow ); 9559 %} 9560 9561 9562 //----------Double Instructions------------------------------------------------ 9563 // Double Math 9564 9565 // Compare & branch 9566 9567 // P6 version of float compare, sets condition codes in EFLAGS 9568 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9569 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9570 match(Set cr (CmpD src1 src2)); 9571 effect(KILL rax); 9572 ins_cost(150); 9573 format %{ "FLD $src1\n\t" 9574 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9575 "JNP exit\n\t" 9576 "MOV ah,1 // saw a NaN, set CF\n\t" 9577 "SAHF\n" 9578 "exit:\tNOP // avoid branch to branch" %} 9579 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9580 ins_encode( Push_Reg_DPR(src1), 9581 OpcP, RegOpc(src2), 9582 cmpF_P6_fixup ); 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9587 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9588 match(Set cr (CmpD src1 src2)); 9589 ins_cost(150); 9590 format %{ "FLD $src1\n\t" 9591 "FUCOMIP ST,$src2 // P6 instruction" %} 9592 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9593 ins_encode( Push_Reg_DPR(src1), 9594 OpcP, RegOpc(src2)); 9595 ins_pipe( pipe_slow ); 9596 %} 9597 9598 // Compare & branch 9599 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9600 predicate(UseSSE<=1); 9601 match(Set cr (CmpD src1 src2)); 9602 effect(KILL rax); 9603 ins_cost(200); 9604 format %{ "FLD $src1\n\t" 9605 "FCOMp $src2\n\t" 9606 "FNSTSW AX\n\t" 9607 "TEST AX,0x400\n\t" 9608 "JZ,s flags\n\t" 9609 "MOV AH,1\t# unordered treat as LT\n" 9610 "flags:\tSAHF" %} 9611 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9612 ins_encode( Push_Reg_DPR(src1), 9613 OpcP, RegOpc(src2), 9614 fpu_flags); 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 // Compare vs zero into -1,0,1 9619 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9620 predicate(UseSSE<=1); 9621 match(Set dst (CmpD3 src1 zero)); 9622 effect(KILL cr, KILL rax); 9623 ins_cost(280); 9624 format %{ "FTSTD $dst,$src1" %} 9625 opcode(0xE4, 0xD9); 9626 ins_encode( Push_Reg_DPR(src1), 9627 OpcS, OpcP, PopFPU, 9628 CmpF_Result(dst)); 9629 ins_pipe( pipe_slow ); 9630 %} 9631 9632 // Compare into -1,0,1 9633 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9634 predicate(UseSSE<=1); 9635 match(Set dst (CmpD3 src1 src2)); 9636 effect(KILL cr, KILL rax); 9637 ins_cost(300); 9638 format %{ "FCMPD $dst,$src1,$src2" %} 9639 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9640 ins_encode( Push_Reg_DPR(src1), 9641 OpcP, RegOpc(src2), 9642 CmpF_Result(dst)); 9643 ins_pipe( pipe_slow ); 9644 %} 9645 9646 // float compare and set condition codes in EFLAGS by XMM regs 9647 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9648 predicate(UseSSE>=2); 9649 match(Set cr (CmpD src1 src2)); 9650 ins_cost(145); 9651 format %{ "UCOMISD $src1,$src2\n\t" 9652 "JNP,s exit\n\t" 9653 "PUSHF\t# saw NaN, set CF\n\t" 9654 "AND [rsp], #0xffffff2b\n\t" 9655 "POPF\n" 9656 "exit:" %} 9657 ins_encode %{ 9658 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9659 emit_cmpfp_fixup(_masm); 9660 %} 9661 ins_pipe( pipe_slow ); 9662 %} 9663 9664 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9665 predicate(UseSSE>=2); 9666 match(Set cr (CmpD src1 src2)); 9667 ins_cost(100); 9668 format %{ "UCOMISD $src1,$src2" %} 9669 ins_encode %{ 9670 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 // float compare and set condition codes in EFLAGS by XMM regs 9676 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9677 predicate(UseSSE>=2); 9678 match(Set cr (CmpD src1 (LoadD src2))); 9679 ins_cost(145); 9680 format %{ "UCOMISD $src1,$src2\n\t" 9681 "JNP,s exit\n\t" 9682 "PUSHF\t# saw NaN, set CF\n\t" 9683 "AND [rsp], #0xffffff2b\n\t" 9684 "POPF\n" 9685 "exit:" %} 9686 ins_encode %{ 9687 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9688 emit_cmpfp_fixup(_masm); 9689 %} 9690 ins_pipe( pipe_slow ); 9691 %} 9692 9693 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9694 predicate(UseSSE>=2); 9695 match(Set cr (CmpD src1 (LoadD src2))); 9696 ins_cost(100); 9697 format %{ "UCOMISD $src1,$src2" %} 9698 ins_encode %{ 9699 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9700 %} 9701 ins_pipe( pipe_slow ); 9702 %} 9703 9704 // Compare into -1,0,1 in XMM 9705 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9706 predicate(UseSSE>=2); 9707 match(Set dst (CmpD3 src1 src2)); 9708 effect(KILL cr); 9709 ins_cost(255); 9710 format %{ "UCOMISD $src1, $src2\n\t" 9711 "MOV $dst, #-1\n\t" 9712 "JP,s done\n\t" 9713 "JB,s done\n\t" 9714 "SETNE $dst\n\t" 9715 "MOVZB $dst, $dst\n" 9716 "done:" %} 9717 ins_encode %{ 9718 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9719 emit_cmpfp3(_masm, $dst$$Register); 9720 %} 9721 ins_pipe( pipe_slow ); 9722 %} 9723 9724 // Compare into -1,0,1 in XMM and memory 9725 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9726 predicate(UseSSE>=2); 9727 match(Set dst (CmpD3 src1 (LoadD src2))); 9728 effect(KILL cr); 9729 ins_cost(275); 9730 format %{ "UCOMISD $src1, $src2\n\t" 9731 "MOV $dst, #-1\n\t" 9732 "JP,s done\n\t" 9733 "JB,s done\n\t" 9734 "SETNE $dst\n\t" 9735 "MOVZB $dst, $dst\n" 9736 "done:" %} 9737 ins_encode %{ 9738 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9739 emit_cmpfp3(_masm, $dst$$Register); 9740 %} 9741 ins_pipe( pipe_slow ); 9742 %} 9743 9744 9745 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9746 predicate (UseSSE <=1); 9747 match(Set dst (SubD dst src)); 9748 9749 format %{ "FLD $src\n\t" 9750 "DSUBp $dst,ST" %} 9751 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9752 ins_cost(150); 9753 ins_encode( Push_Reg_DPR(src), 9754 OpcP, RegOpc(dst) ); 9755 ins_pipe( fpu_reg_reg ); 9756 %} 9757 9758 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9759 predicate (UseSSE <=1); 9760 match(Set dst (RoundDouble (SubD src1 src2))); 9761 ins_cost(250); 9762 9763 format %{ "FLD $src2\n\t" 9764 "DSUB ST,$src1\n\t" 9765 "FSTP_D $dst\t# D-round" %} 9766 opcode(0xD8, 0x5); 9767 ins_encode( Push_Reg_DPR(src2), 9768 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9769 ins_pipe( fpu_mem_reg_reg ); 9770 %} 9771 9772 9773 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9774 predicate (UseSSE <=1); 9775 match(Set dst (SubD dst (LoadD src))); 9776 ins_cost(150); 9777 9778 format %{ "FLD $src\n\t" 9779 "DSUBp $dst,ST" %} 9780 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9781 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9782 OpcP, RegOpc(dst) ); 9783 ins_pipe( fpu_reg_mem ); 9784 %} 9785 9786 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9787 predicate (UseSSE<=1); 9788 match(Set dst (AbsD src)); 9789 ins_cost(100); 9790 format %{ "FABS" %} 9791 opcode(0xE1, 0xD9); 9792 ins_encode( OpcS, OpcP ); 9793 ins_pipe( fpu_reg_reg ); 9794 %} 9795 9796 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9797 predicate(UseSSE<=1); 9798 match(Set dst (NegD src)); 9799 ins_cost(100); 9800 format %{ "FCHS" %} 9801 opcode(0xE0, 0xD9); 9802 ins_encode( OpcS, OpcP ); 9803 ins_pipe( fpu_reg_reg ); 9804 %} 9805 9806 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9807 predicate(UseSSE<=1); 9808 match(Set dst (AddD dst src)); 9809 format %{ "FLD $src\n\t" 9810 "DADD $dst,ST" %} 9811 size(4); 9812 ins_cost(150); 9813 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9814 ins_encode( Push_Reg_DPR(src), 9815 OpcP, RegOpc(dst) ); 9816 ins_pipe( fpu_reg_reg ); 9817 %} 9818 9819 9820 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9821 predicate(UseSSE<=1); 9822 match(Set dst (RoundDouble (AddD src1 src2))); 9823 ins_cost(250); 9824 9825 format %{ "FLD $src2\n\t" 9826 "DADD ST,$src1\n\t" 9827 "FSTP_D $dst\t# D-round" %} 9828 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9829 ins_encode( Push_Reg_DPR(src2), 9830 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9831 ins_pipe( fpu_mem_reg_reg ); 9832 %} 9833 9834 9835 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9836 predicate(UseSSE<=1); 9837 match(Set dst (AddD dst (LoadD src))); 9838 ins_cost(150); 9839 9840 format %{ "FLD $src\n\t" 9841 "DADDp $dst,ST" %} 9842 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9843 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9844 OpcP, RegOpc(dst) ); 9845 ins_pipe( fpu_reg_mem ); 9846 %} 9847 9848 // add-to-memory 9849 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9850 predicate(UseSSE<=1); 9851 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9852 ins_cost(150); 9853 9854 format %{ "FLD_D $dst\n\t" 9855 "DADD ST,$src\n\t" 9856 "FST_D $dst" %} 9857 opcode(0xDD, 0x0); 9858 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9859 Opcode(0xD8), RegOpc(src), 9860 set_instruction_start, 9861 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9862 ins_pipe( fpu_reg_mem ); 9863 %} 9864 9865 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9866 predicate(UseSSE<=1); 9867 match(Set dst (AddD dst con)); 9868 ins_cost(125); 9869 format %{ "FLD1\n\t" 9870 "DADDp $dst,ST" %} 9871 ins_encode %{ 9872 __ fld1(); 9873 __ faddp($dst$$reg); 9874 %} 9875 ins_pipe(fpu_reg); 9876 %} 9877 9878 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9879 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9880 match(Set dst (AddD dst con)); 9881 ins_cost(200); 9882 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9883 "DADDp $dst,ST" %} 9884 ins_encode %{ 9885 __ fld_d($constantaddress($con)); 9886 __ faddp($dst$$reg); 9887 %} 9888 ins_pipe(fpu_reg_mem); 9889 %} 9890 9891 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9892 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9893 match(Set dst (RoundDouble (AddD src con))); 9894 ins_cost(200); 9895 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9896 "DADD ST,$src\n\t" 9897 "FSTP_D $dst\t# D-round" %} 9898 ins_encode %{ 9899 __ fld_d($constantaddress($con)); 9900 __ fadd($src$$reg); 9901 __ fstp_d(Address(rsp, $dst$$disp)); 9902 %} 9903 ins_pipe(fpu_mem_reg_con); 9904 %} 9905 9906 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9907 predicate(UseSSE<=1); 9908 match(Set dst (MulD dst src)); 9909 format %{ "FLD $src\n\t" 9910 "DMULp $dst,ST" %} 9911 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9912 ins_cost(150); 9913 ins_encode( Push_Reg_DPR(src), 9914 OpcP, RegOpc(dst) ); 9915 ins_pipe( fpu_reg_reg ); 9916 %} 9917 9918 // Strict FP instruction biases argument before multiply then 9919 // biases result to avoid double rounding of subnormals. 9920 // 9921 // scale arg1 by multiplying arg1 by 2^(-15360) 9922 // load arg2 9923 // multiply scaled arg1 by arg2 9924 // rescale product by 2^(15360) 9925 // 9926 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9927 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9928 match(Set dst (MulD dst src)); 9929 ins_cost(1); // Select this instruction for all strict FP double multiplies 9930 9931 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9932 "DMULp $dst,ST\n\t" 9933 "FLD $src\n\t" 9934 "DMULp $dst,ST\n\t" 9935 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9936 "DMULp $dst,ST\n\t" %} 9937 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9938 ins_encode( strictfp_bias1(dst), 9939 Push_Reg_DPR(src), 9940 OpcP, RegOpc(dst), 9941 strictfp_bias2(dst) ); 9942 ins_pipe( fpu_reg_reg ); 9943 %} 9944 9945 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9946 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9947 match(Set dst (MulD dst con)); 9948 ins_cost(200); 9949 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9950 "DMULp $dst,ST" %} 9951 ins_encode %{ 9952 __ fld_d($constantaddress($con)); 9953 __ fmulp($dst$$reg); 9954 %} 9955 ins_pipe(fpu_reg_mem); 9956 %} 9957 9958 9959 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9960 predicate( UseSSE<=1 ); 9961 match(Set dst (MulD dst (LoadD src))); 9962 ins_cost(200); 9963 format %{ "FLD_D $src\n\t" 9964 "DMULp $dst,ST" %} 9965 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9966 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9967 OpcP, RegOpc(dst) ); 9968 ins_pipe( fpu_reg_mem ); 9969 %} 9970 9971 // 9972 // Cisc-alternate to reg-reg multiply 9973 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9974 predicate( UseSSE<=1 ); 9975 match(Set dst (MulD src (LoadD mem))); 9976 ins_cost(250); 9977 format %{ "FLD_D $mem\n\t" 9978 "DMUL ST,$src\n\t" 9979 "FSTP_D $dst" %} 9980 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9981 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9982 OpcReg_FPR(src), 9983 Pop_Reg_DPR(dst) ); 9984 ins_pipe( fpu_reg_reg_mem ); 9985 %} 9986 9987 9988 // MACRO3 -- addDPR a mulDPR 9989 // This instruction is a '2-address' instruction in that the result goes 9990 // back to src2. This eliminates a move from the macro; possibly the 9991 // register allocator will have to add it back (and maybe not). 9992 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9993 predicate( UseSSE<=1 ); 9994 match(Set src2 (AddD (MulD src0 src1) src2)); 9995 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9996 "DMUL ST,$src1\n\t" 9997 "DADDp $src2,ST" %} 9998 ins_cost(250); 9999 opcode(0xDD); /* LoadD DD /0 */ 10000 ins_encode( Push_Reg_FPR(src0), 10001 FMul_ST_reg(src1), 10002 FAddP_reg_ST(src2) ); 10003 ins_pipe( fpu_reg_reg_reg ); 10004 %} 10005 10006 10007 // MACRO3 -- subDPR a mulDPR 10008 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 10009 predicate( UseSSE<=1 ); 10010 match(Set src2 (SubD (MulD src0 src1) src2)); 10011 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 10012 "DMUL ST,$src1\n\t" 10013 "DSUBRp $src2,ST" %} 10014 ins_cost(250); 10015 ins_encode( Push_Reg_FPR(src0), 10016 FMul_ST_reg(src1), 10017 Opcode(0xDE), Opc_plus(0xE0,src2)); 10018 ins_pipe( fpu_reg_reg_reg ); 10019 %} 10020 10021 10022 instruct divDPR_reg(regDPR dst, regDPR src) %{ 10023 predicate( UseSSE<=1 ); 10024 match(Set dst (DivD dst src)); 10025 10026 format %{ "FLD $src\n\t" 10027 "FDIVp $dst,ST" %} 10028 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10029 ins_cost(150); 10030 ins_encode( Push_Reg_DPR(src), 10031 OpcP, RegOpc(dst) ); 10032 ins_pipe( fpu_reg_reg ); 10033 %} 10034 10035 // Strict FP instruction biases argument before division then 10036 // biases result, to avoid double rounding of subnormals. 10037 // 10038 // scale dividend by multiplying dividend by 2^(-15360) 10039 // load divisor 10040 // divide scaled dividend by divisor 10041 // rescale quotient by 2^(15360) 10042 // 10043 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 10044 predicate (UseSSE<=1); 10045 match(Set dst (DivD dst src)); 10046 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10047 ins_cost(01); 10048 10049 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 10050 "DMULp $dst,ST\n\t" 10051 "FLD $src\n\t" 10052 "FDIVp $dst,ST\n\t" 10053 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10054 "DMULp $dst,ST\n\t" %} 10055 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10056 ins_encode( strictfp_bias1(dst), 10057 Push_Reg_DPR(src), 10058 OpcP, RegOpc(dst), 10059 strictfp_bias2(dst) ); 10060 ins_pipe( fpu_reg_reg ); 10061 %} 10062 10063 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 10064 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10065 match(Set dst (RoundDouble (DivD src1 src2))); 10066 10067 format %{ "FLD $src1\n\t" 10068 "FDIV ST,$src2\n\t" 10069 "FSTP_D $dst\t# D-round" %} 10070 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10071 ins_encode( Push_Reg_DPR(src1), 10072 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 10073 ins_pipe( fpu_mem_reg_reg ); 10074 %} 10075 10076 10077 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 10078 predicate(UseSSE<=1); 10079 match(Set dst (ModD dst src)); 10080 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10081 10082 format %{ "DMOD $dst,$src" %} 10083 ins_cost(250); 10084 ins_encode(Push_Reg_Mod_DPR(dst, src), 10085 emitModDPR(), 10086 Push_Result_Mod_DPR(src), 10087 Pop_Reg_DPR(dst)); 10088 ins_pipe( pipe_slow ); 10089 %} 10090 10091 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 10092 predicate(UseSSE>=2); 10093 match(Set dst (ModD src0 src1)); 10094 effect(KILL rax, KILL cr); 10095 10096 format %{ "SUB ESP,8\t # DMOD\n" 10097 "\tMOVSD [ESP+0],$src1\n" 10098 "\tFLD_D [ESP+0]\n" 10099 "\tMOVSD [ESP+0],$src0\n" 10100 "\tFLD_D [ESP+0]\n" 10101 "loop:\tFPREM\n" 10102 "\tFWAIT\n" 10103 "\tFNSTSW AX\n" 10104 "\tSAHF\n" 10105 "\tJP loop\n" 10106 "\tFSTP_D [ESP+0]\n" 10107 "\tMOVSD $dst,[ESP+0]\n" 10108 "\tADD ESP,8\n" 10109 "\tFSTP ST0\t # Restore FPU Stack" 10110 %} 10111 ins_cost(250); 10112 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10113 ins_pipe( pipe_slow ); 10114 %} 10115 10116 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10117 predicate (UseSSE<=1); 10118 match(Set dst(AtanD dst src)); 10119 format %{ "DATA $dst,$src" %} 10120 opcode(0xD9, 0xF3); 10121 ins_encode( Push_Reg_DPR(src), 10122 OpcP, OpcS, RegOpc(dst) ); 10123 ins_pipe( pipe_slow ); 10124 %} 10125 10126 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10127 predicate (UseSSE>=2); 10128 match(Set dst(AtanD dst src)); 10129 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10130 format %{ "DATA $dst,$src" %} 10131 opcode(0xD9, 0xF3); 10132 ins_encode( Push_SrcD(src), 10133 OpcP, OpcS, Push_ResultD(dst) ); 10134 ins_pipe( pipe_slow ); 10135 %} 10136 10137 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10138 predicate (UseSSE<=1); 10139 match(Set dst (SqrtD src)); 10140 format %{ "DSQRT $dst,$src" %} 10141 opcode(0xFA, 0xD9); 10142 ins_encode( Push_Reg_DPR(src), 10143 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10144 ins_pipe( pipe_slow ); 10145 %} 10146 10147 //-------------Float Instructions------------------------------- 10148 // Float Math 10149 10150 // Code for float compare: 10151 // fcompp(); 10152 // fwait(); fnstsw_ax(); 10153 // sahf(); 10154 // movl(dst, unordered_result); 10155 // jcc(Assembler::parity, exit); 10156 // movl(dst, less_result); 10157 // jcc(Assembler::below, exit); 10158 // movl(dst, equal_result); 10159 // jcc(Assembler::equal, exit); 10160 // movl(dst, greater_result); 10161 // exit: 10162 10163 // P6 version of float compare, sets condition codes in EFLAGS 10164 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10165 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10166 match(Set cr (CmpF src1 src2)); 10167 effect(KILL rax); 10168 ins_cost(150); 10169 format %{ "FLD $src1\n\t" 10170 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10171 "JNP exit\n\t" 10172 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10173 "SAHF\n" 10174 "exit:\tNOP // avoid branch to branch" %} 10175 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10176 ins_encode( Push_Reg_DPR(src1), 10177 OpcP, RegOpc(src2), 10178 cmpF_P6_fixup ); 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10183 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10184 match(Set cr (CmpF src1 src2)); 10185 ins_cost(100); 10186 format %{ "FLD $src1\n\t" 10187 "FUCOMIP ST,$src2 // P6 instruction" %} 10188 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10189 ins_encode( Push_Reg_DPR(src1), 10190 OpcP, RegOpc(src2)); 10191 ins_pipe( pipe_slow ); 10192 %} 10193 10194 10195 // Compare & branch 10196 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10197 predicate(UseSSE == 0); 10198 match(Set cr (CmpF src1 src2)); 10199 effect(KILL rax); 10200 ins_cost(200); 10201 format %{ "FLD $src1\n\t" 10202 "FCOMp $src2\n\t" 10203 "FNSTSW AX\n\t" 10204 "TEST AX,0x400\n\t" 10205 "JZ,s flags\n\t" 10206 "MOV AH,1\t# unordered treat as LT\n" 10207 "flags:\tSAHF" %} 10208 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10209 ins_encode( Push_Reg_DPR(src1), 10210 OpcP, RegOpc(src2), 10211 fpu_flags); 10212 ins_pipe( pipe_slow ); 10213 %} 10214 10215 // Compare vs zero into -1,0,1 10216 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10217 predicate(UseSSE == 0); 10218 match(Set dst (CmpF3 src1 zero)); 10219 effect(KILL cr, KILL rax); 10220 ins_cost(280); 10221 format %{ "FTSTF $dst,$src1" %} 10222 opcode(0xE4, 0xD9); 10223 ins_encode( Push_Reg_DPR(src1), 10224 OpcS, OpcP, PopFPU, 10225 CmpF_Result(dst)); 10226 ins_pipe( pipe_slow ); 10227 %} 10228 10229 // Compare into -1,0,1 10230 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10231 predicate(UseSSE == 0); 10232 match(Set dst (CmpF3 src1 src2)); 10233 effect(KILL cr, KILL rax); 10234 ins_cost(300); 10235 format %{ "FCMPF $dst,$src1,$src2" %} 10236 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10237 ins_encode( Push_Reg_DPR(src1), 10238 OpcP, RegOpc(src2), 10239 CmpF_Result(dst)); 10240 ins_pipe( pipe_slow ); 10241 %} 10242 10243 // float compare and set condition codes in EFLAGS by XMM regs 10244 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10245 predicate(UseSSE>=1); 10246 match(Set cr (CmpF src1 src2)); 10247 ins_cost(145); 10248 format %{ "UCOMISS $src1,$src2\n\t" 10249 "JNP,s exit\n\t" 10250 "PUSHF\t# saw NaN, set CF\n\t" 10251 "AND [rsp], #0xffffff2b\n\t" 10252 "POPF\n" 10253 "exit:" %} 10254 ins_encode %{ 10255 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10256 emit_cmpfp_fixup(_masm); 10257 %} 10258 ins_pipe( pipe_slow ); 10259 %} 10260 10261 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10262 predicate(UseSSE>=1); 10263 match(Set cr (CmpF src1 src2)); 10264 ins_cost(100); 10265 format %{ "UCOMISS $src1,$src2" %} 10266 ins_encode %{ 10267 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10268 %} 10269 ins_pipe( pipe_slow ); 10270 %} 10271 10272 // float compare and set condition codes in EFLAGS by XMM regs 10273 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10274 predicate(UseSSE>=1); 10275 match(Set cr (CmpF src1 (LoadF src2))); 10276 ins_cost(165); 10277 format %{ "UCOMISS $src1,$src2\n\t" 10278 "JNP,s exit\n\t" 10279 "PUSHF\t# saw NaN, set CF\n\t" 10280 "AND [rsp], #0xffffff2b\n\t" 10281 "POPF\n" 10282 "exit:" %} 10283 ins_encode %{ 10284 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10285 emit_cmpfp_fixup(_masm); 10286 %} 10287 ins_pipe( pipe_slow ); 10288 %} 10289 10290 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10291 predicate(UseSSE>=1); 10292 match(Set cr (CmpF src1 (LoadF src2))); 10293 ins_cost(100); 10294 format %{ "UCOMISS $src1,$src2" %} 10295 ins_encode %{ 10296 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10297 %} 10298 ins_pipe( pipe_slow ); 10299 %} 10300 10301 // Compare into -1,0,1 in XMM 10302 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10303 predicate(UseSSE>=1); 10304 match(Set dst (CmpF3 src1 src2)); 10305 effect(KILL cr); 10306 ins_cost(255); 10307 format %{ "UCOMISS $src1, $src2\n\t" 10308 "MOV $dst, #-1\n\t" 10309 "JP,s done\n\t" 10310 "JB,s done\n\t" 10311 "SETNE $dst\n\t" 10312 "MOVZB $dst, $dst\n" 10313 "done:" %} 10314 ins_encode %{ 10315 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10316 emit_cmpfp3(_masm, $dst$$Register); 10317 %} 10318 ins_pipe( pipe_slow ); 10319 %} 10320 10321 // Compare into -1,0,1 in XMM and memory 10322 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10323 predicate(UseSSE>=1); 10324 match(Set dst (CmpF3 src1 (LoadF src2))); 10325 effect(KILL cr); 10326 ins_cost(275); 10327 format %{ "UCOMISS $src1, $src2\n\t" 10328 "MOV $dst, #-1\n\t" 10329 "JP,s done\n\t" 10330 "JB,s done\n\t" 10331 "SETNE $dst\n\t" 10332 "MOVZB $dst, $dst\n" 10333 "done:" %} 10334 ins_encode %{ 10335 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10336 emit_cmpfp3(_masm, $dst$$Register); 10337 %} 10338 ins_pipe( pipe_slow ); 10339 %} 10340 10341 // Spill to obtain 24-bit precision 10342 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10343 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10344 match(Set dst (SubF src1 src2)); 10345 10346 format %{ "FSUB $dst,$src1 - $src2" %} 10347 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10348 ins_encode( Push_Reg_FPR(src1), 10349 OpcReg_FPR(src2), 10350 Pop_Mem_FPR(dst) ); 10351 ins_pipe( fpu_mem_reg_reg ); 10352 %} 10353 // 10354 // This instruction does not round to 24-bits 10355 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10356 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10357 match(Set dst (SubF dst src)); 10358 10359 format %{ "FSUB $dst,$src" %} 10360 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10361 ins_encode( Push_Reg_FPR(src), 10362 OpcP, RegOpc(dst) ); 10363 ins_pipe( fpu_reg_reg ); 10364 %} 10365 10366 // Spill to obtain 24-bit precision 10367 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10368 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10369 match(Set dst (AddF src1 src2)); 10370 10371 format %{ "FADD $dst,$src1,$src2" %} 10372 opcode(0xD8, 0x0); /* D8 C0+i */ 10373 ins_encode( Push_Reg_FPR(src2), 10374 OpcReg_FPR(src1), 10375 Pop_Mem_FPR(dst) ); 10376 ins_pipe( fpu_mem_reg_reg ); 10377 %} 10378 // 10379 // This instruction does not round to 24-bits 10380 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10381 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10382 match(Set dst (AddF dst src)); 10383 10384 format %{ "FLD $src\n\t" 10385 "FADDp $dst,ST" %} 10386 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10387 ins_encode( Push_Reg_FPR(src), 10388 OpcP, RegOpc(dst) ); 10389 ins_pipe( fpu_reg_reg ); 10390 %} 10391 10392 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10393 predicate(UseSSE==0); 10394 match(Set dst (AbsF src)); 10395 ins_cost(100); 10396 format %{ "FABS" %} 10397 opcode(0xE1, 0xD9); 10398 ins_encode( OpcS, OpcP ); 10399 ins_pipe( fpu_reg_reg ); 10400 %} 10401 10402 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10403 predicate(UseSSE==0); 10404 match(Set dst (NegF src)); 10405 ins_cost(100); 10406 format %{ "FCHS" %} 10407 opcode(0xE0, 0xD9); 10408 ins_encode( OpcS, OpcP ); 10409 ins_pipe( fpu_reg_reg ); 10410 %} 10411 10412 // Cisc-alternate to addFPR_reg 10413 // Spill to obtain 24-bit precision 10414 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10415 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10416 match(Set dst (AddF src1 (LoadF src2))); 10417 10418 format %{ "FLD $src2\n\t" 10419 "FADD ST,$src1\n\t" 10420 "FSTP_S $dst" %} 10421 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10422 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10423 OpcReg_FPR(src1), 10424 Pop_Mem_FPR(dst) ); 10425 ins_pipe( fpu_mem_reg_mem ); 10426 %} 10427 // 10428 // Cisc-alternate to addFPR_reg 10429 // This instruction does not round to 24-bits 10430 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10431 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10432 match(Set dst (AddF dst (LoadF src))); 10433 10434 format %{ "FADD $dst,$src" %} 10435 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10436 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10437 OpcP, RegOpc(dst) ); 10438 ins_pipe( fpu_reg_mem ); 10439 %} 10440 10441 // // Following two instructions for _222_mpegaudio 10442 // Spill to obtain 24-bit precision 10443 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10444 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10445 match(Set dst (AddF src1 src2)); 10446 10447 format %{ "FADD $dst,$src1,$src2" %} 10448 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10449 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10450 OpcReg_FPR(src2), 10451 Pop_Mem_FPR(dst) ); 10452 ins_pipe( fpu_mem_reg_mem ); 10453 %} 10454 10455 // Cisc-spill variant 10456 // Spill to obtain 24-bit precision 10457 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10458 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10459 match(Set dst (AddF src1 (LoadF src2))); 10460 10461 format %{ "FADD $dst,$src1,$src2 cisc" %} 10462 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10463 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10464 set_instruction_start, 10465 OpcP, RMopc_Mem(secondary,src1), 10466 Pop_Mem_FPR(dst) ); 10467 ins_pipe( fpu_mem_mem_mem ); 10468 %} 10469 10470 // Spill to obtain 24-bit precision 10471 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10472 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10473 match(Set dst (AddF src1 src2)); 10474 10475 format %{ "FADD $dst,$src1,$src2" %} 10476 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10477 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10478 set_instruction_start, 10479 OpcP, RMopc_Mem(secondary,src1), 10480 Pop_Mem_FPR(dst) ); 10481 ins_pipe( fpu_mem_mem_mem ); 10482 %} 10483 10484 10485 // Spill to obtain 24-bit precision 10486 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10487 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10488 match(Set dst (AddF src con)); 10489 format %{ "FLD $src\n\t" 10490 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10491 "FSTP_S $dst" %} 10492 ins_encode %{ 10493 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10494 __ fadd_s($constantaddress($con)); 10495 __ fstp_s(Address(rsp, $dst$$disp)); 10496 %} 10497 ins_pipe(fpu_mem_reg_con); 10498 %} 10499 // 10500 // This instruction does not round to 24-bits 10501 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10502 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10503 match(Set dst (AddF src con)); 10504 format %{ "FLD $src\n\t" 10505 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10506 "FSTP $dst" %} 10507 ins_encode %{ 10508 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10509 __ fadd_s($constantaddress($con)); 10510 __ fstp_d($dst$$reg); 10511 %} 10512 ins_pipe(fpu_reg_reg_con); 10513 %} 10514 10515 // Spill to obtain 24-bit precision 10516 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10517 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10518 match(Set dst (MulF src1 src2)); 10519 10520 format %{ "FLD $src1\n\t" 10521 "FMUL $src2\n\t" 10522 "FSTP_S $dst" %} 10523 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10524 ins_encode( Push_Reg_FPR(src1), 10525 OpcReg_FPR(src2), 10526 Pop_Mem_FPR(dst) ); 10527 ins_pipe( fpu_mem_reg_reg ); 10528 %} 10529 // 10530 // This instruction does not round to 24-bits 10531 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10532 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10533 match(Set dst (MulF src1 src2)); 10534 10535 format %{ "FLD $src1\n\t" 10536 "FMUL $src2\n\t" 10537 "FSTP_S $dst" %} 10538 opcode(0xD8, 0x1); /* D8 C8+i */ 10539 ins_encode( Push_Reg_FPR(src2), 10540 OpcReg_FPR(src1), 10541 Pop_Reg_FPR(dst) ); 10542 ins_pipe( fpu_reg_reg_reg ); 10543 %} 10544 10545 10546 // Spill to obtain 24-bit precision 10547 // Cisc-alternate to reg-reg multiply 10548 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10549 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10550 match(Set dst (MulF src1 (LoadF src2))); 10551 10552 format %{ "FLD_S $src2\n\t" 10553 "FMUL $src1\n\t" 10554 "FSTP_S $dst" %} 10555 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10556 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10557 OpcReg_FPR(src1), 10558 Pop_Mem_FPR(dst) ); 10559 ins_pipe( fpu_mem_reg_mem ); 10560 %} 10561 // 10562 // This instruction does not round to 24-bits 10563 // Cisc-alternate to reg-reg multiply 10564 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10565 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10566 match(Set dst (MulF src1 (LoadF src2))); 10567 10568 format %{ "FMUL $dst,$src1,$src2" %} 10569 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10570 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10571 OpcReg_FPR(src1), 10572 Pop_Reg_FPR(dst) ); 10573 ins_pipe( fpu_reg_reg_mem ); 10574 %} 10575 10576 // Spill to obtain 24-bit precision 10577 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10578 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10579 match(Set dst (MulF src1 src2)); 10580 10581 format %{ "FMUL $dst,$src1,$src2" %} 10582 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10583 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10584 set_instruction_start, 10585 OpcP, RMopc_Mem(secondary,src1), 10586 Pop_Mem_FPR(dst) ); 10587 ins_pipe( fpu_mem_mem_mem ); 10588 %} 10589 10590 // Spill to obtain 24-bit precision 10591 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10592 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10593 match(Set dst (MulF src con)); 10594 10595 format %{ "FLD $src\n\t" 10596 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10597 "FSTP_S $dst" %} 10598 ins_encode %{ 10599 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10600 __ fmul_s($constantaddress($con)); 10601 __ fstp_s(Address(rsp, $dst$$disp)); 10602 %} 10603 ins_pipe(fpu_mem_reg_con); 10604 %} 10605 // 10606 // This instruction does not round to 24-bits 10607 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10608 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10609 match(Set dst (MulF src con)); 10610 10611 format %{ "FLD $src\n\t" 10612 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10613 "FSTP $dst" %} 10614 ins_encode %{ 10615 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10616 __ fmul_s($constantaddress($con)); 10617 __ fstp_d($dst$$reg); 10618 %} 10619 ins_pipe(fpu_reg_reg_con); 10620 %} 10621 10622 10623 // 10624 // MACRO1 -- subsume unshared load into mulFPR 10625 // This instruction does not round to 24-bits 10626 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10627 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10628 match(Set dst (MulF (LoadF mem1) src)); 10629 10630 format %{ "FLD $mem1 ===MACRO1===\n\t" 10631 "FMUL ST,$src\n\t" 10632 "FSTP $dst" %} 10633 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10634 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10635 OpcReg_FPR(src), 10636 Pop_Reg_FPR(dst) ); 10637 ins_pipe( fpu_reg_reg_mem ); 10638 %} 10639 // 10640 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10641 // This instruction does not round to 24-bits 10642 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10643 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10644 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10645 ins_cost(95); 10646 10647 format %{ "FLD $mem1 ===MACRO2===\n\t" 10648 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10649 "FADD ST,$src2\n\t" 10650 "FSTP $dst" %} 10651 opcode(0xD9); /* LoadF D9 /0 */ 10652 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10653 FMul_ST_reg(src1), 10654 FAdd_ST_reg(src2), 10655 Pop_Reg_FPR(dst) ); 10656 ins_pipe( fpu_reg_mem_reg_reg ); 10657 %} 10658 10659 // MACRO3 -- addFPR a mulFPR 10660 // This instruction does not round to 24-bits. It is a '2-address' 10661 // instruction in that the result goes back to src2. This eliminates 10662 // a move from the macro; possibly the register allocator will have 10663 // to add it back (and maybe not). 10664 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10665 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10666 match(Set src2 (AddF (MulF src0 src1) src2)); 10667 10668 format %{ "FLD $src0 ===MACRO3===\n\t" 10669 "FMUL ST,$src1\n\t" 10670 "FADDP $src2,ST" %} 10671 opcode(0xD9); /* LoadF D9 /0 */ 10672 ins_encode( Push_Reg_FPR(src0), 10673 FMul_ST_reg(src1), 10674 FAddP_reg_ST(src2) ); 10675 ins_pipe( fpu_reg_reg_reg ); 10676 %} 10677 10678 // MACRO4 -- divFPR subFPR 10679 // This instruction does not round to 24-bits 10680 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10681 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10682 match(Set dst (DivF (SubF src2 src1) src3)); 10683 10684 format %{ "FLD $src2 ===MACRO4===\n\t" 10685 "FSUB ST,$src1\n\t" 10686 "FDIV ST,$src3\n\t" 10687 "FSTP $dst" %} 10688 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10689 ins_encode( Push_Reg_FPR(src2), 10690 subFPR_divFPR_encode(src1,src3), 10691 Pop_Reg_FPR(dst) ); 10692 ins_pipe( fpu_reg_reg_reg_reg ); 10693 %} 10694 10695 // Spill to obtain 24-bit precision 10696 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10697 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10698 match(Set dst (DivF src1 src2)); 10699 10700 format %{ "FDIV $dst,$src1,$src2" %} 10701 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10702 ins_encode( Push_Reg_FPR(src1), 10703 OpcReg_FPR(src2), 10704 Pop_Mem_FPR(dst) ); 10705 ins_pipe( fpu_mem_reg_reg ); 10706 %} 10707 // 10708 // This instruction does not round to 24-bits 10709 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10710 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10711 match(Set dst (DivF dst src)); 10712 10713 format %{ "FDIV $dst,$src" %} 10714 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10715 ins_encode( Push_Reg_FPR(src), 10716 OpcP, RegOpc(dst) ); 10717 ins_pipe( fpu_reg_reg ); 10718 %} 10719 10720 10721 // Spill to obtain 24-bit precision 10722 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10723 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10724 match(Set dst (ModF src1 src2)); 10725 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10726 10727 format %{ "FMOD $dst,$src1,$src2" %} 10728 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10729 emitModDPR(), 10730 Push_Result_Mod_DPR(src2), 10731 Pop_Mem_FPR(dst)); 10732 ins_pipe( pipe_slow ); 10733 %} 10734 // 10735 // This instruction does not round to 24-bits 10736 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10737 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10738 match(Set dst (ModF dst src)); 10739 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10740 10741 format %{ "FMOD $dst,$src" %} 10742 ins_encode(Push_Reg_Mod_DPR(dst, src), 10743 emitModDPR(), 10744 Push_Result_Mod_DPR(src), 10745 Pop_Reg_FPR(dst)); 10746 ins_pipe( pipe_slow ); 10747 %} 10748 10749 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10750 predicate(UseSSE>=1); 10751 match(Set dst (ModF src0 src1)); 10752 effect(KILL rax, KILL cr); 10753 format %{ "SUB ESP,4\t # FMOD\n" 10754 "\tMOVSS [ESP+0],$src1\n" 10755 "\tFLD_S [ESP+0]\n" 10756 "\tMOVSS [ESP+0],$src0\n" 10757 "\tFLD_S [ESP+0]\n" 10758 "loop:\tFPREM\n" 10759 "\tFWAIT\n" 10760 "\tFNSTSW AX\n" 10761 "\tSAHF\n" 10762 "\tJP loop\n" 10763 "\tFSTP_S [ESP+0]\n" 10764 "\tMOVSS $dst,[ESP+0]\n" 10765 "\tADD ESP,4\n" 10766 "\tFSTP ST0\t # Restore FPU Stack" 10767 %} 10768 ins_cost(250); 10769 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10770 ins_pipe( pipe_slow ); 10771 %} 10772 10773 10774 //----------Arithmetic Conversion Instructions--------------------------------- 10775 // The conversions operations are all Alpha sorted. Please keep it that way! 10776 10777 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10778 predicate(UseSSE==0); 10779 match(Set dst (RoundFloat src)); 10780 ins_cost(125); 10781 format %{ "FST_S $dst,$src\t# F-round" %} 10782 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10783 ins_pipe( fpu_mem_reg ); 10784 %} 10785 10786 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10787 predicate(UseSSE<=1); 10788 match(Set dst (RoundDouble src)); 10789 ins_cost(125); 10790 format %{ "FST_D $dst,$src\t# D-round" %} 10791 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10792 ins_pipe( fpu_mem_reg ); 10793 %} 10794 10795 // Force rounding to 24-bit precision and 6-bit exponent 10796 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10797 predicate(UseSSE==0); 10798 match(Set dst (ConvD2F src)); 10799 format %{ "FST_S $dst,$src\t# F-round" %} 10800 expand %{ 10801 roundFloat_mem_reg(dst,src); 10802 %} 10803 %} 10804 10805 // Force rounding to 24-bit precision and 6-bit exponent 10806 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10807 predicate(UseSSE==1); 10808 match(Set dst (ConvD2F src)); 10809 effect( KILL cr ); 10810 format %{ "SUB ESP,4\n\t" 10811 "FST_S [ESP],$src\t# F-round\n\t" 10812 "MOVSS $dst,[ESP]\n\t" 10813 "ADD ESP,4" %} 10814 ins_encode %{ 10815 __ subptr(rsp, 4); 10816 if ($src$$reg != FPR1L_enc) { 10817 __ fld_s($src$$reg-1); 10818 __ fstp_s(Address(rsp, 0)); 10819 } else { 10820 __ fst_s(Address(rsp, 0)); 10821 } 10822 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10823 __ addptr(rsp, 4); 10824 %} 10825 ins_pipe( pipe_slow ); 10826 %} 10827 10828 // Force rounding double precision to single precision 10829 instruct convD2F_reg(regF dst, regD src) %{ 10830 predicate(UseSSE>=2); 10831 match(Set dst (ConvD2F src)); 10832 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10833 ins_encode %{ 10834 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10835 %} 10836 ins_pipe( pipe_slow ); 10837 %} 10838 10839 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10840 predicate(UseSSE==0); 10841 match(Set dst (ConvF2D src)); 10842 format %{ "FST_S $dst,$src\t# D-round" %} 10843 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10844 ins_pipe( fpu_reg_reg ); 10845 %} 10846 10847 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10848 predicate(UseSSE==1); 10849 match(Set dst (ConvF2D src)); 10850 format %{ "FST_D $dst,$src\t# D-round" %} 10851 expand %{ 10852 roundDouble_mem_reg(dst,src); 10853 %} 10854 %} 10855 10856 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10857 predicate(UseSSE==1); 10858 match(Set dst (ConvF2D src)); 10859 effect( KILL cr ); 10860 format %{ "SUB ESP,4\n\t" 10861 "MOVSS [ESP] $src\n\t" 10862 "FLD_S [ESP]\n\t" 10863 "ADD ESP,4\n\t" 10864 "FSTP $dst\t# D-round" %} 10865 ins_encode %{ 10866 __ subptr(rsp, 4); 10867 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10868 __ fld_s(Address(rsp, 0)); 10869 __ addptr(rsp, 4); 10870 __ fstp_d($dst$$reg); 10871 %} 10872 ins_pipe( pipe_slow ); 10873 %} 10874 10875 instruct convF2D_reg(regD dst, regF src) %{ 10876 predicate(UseSSE>=2); 10877 match(Set dst (ConvF2D src)); 10878 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10879 ins_encode %{ 10880 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10881 %} 10882 ins_pipe( pipe_slow ); 10883 %} 10884 10885 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10886 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10887 predicate(UseSSE<=1); 10888 match(Set dst (ConvD2I src)); 10889 effect( KILL tmp, KILL cr ); 10890 format %{ "FLD $src\t# Convert double to int \n\t" 10891 "FLDCW trunc mode\n\t" 10892 "SUB ESP,4\n\t" 10893 "FISTp [ESP + #0]\n\t" 10894 "FLDCW std/24-bit mode\n\t" 10895 "POP EAX\n\t" 10896 "CMP EAX,0x80000000\n\t" 10897 "JNE,s fast\n\t" 10898 "FLD_D $src\n\t" 10899 "CALL d2i_wrapper\n" 10900 "fast:" %} 10901 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10902 ins_pipe( pipe_slow ); 10903 %} 10904 10905 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10906 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10907 predicate(UseSSE>=2); 10908 match(Set dst (ConvD2I src)); 10909 effect( KILL tmp, KILL cr ); 10910 format %{ "CVTTSD2SI $dst, $src\n\t" 10911 "CMP $dst,0x80000000\n\t" 10912 "JNE,s fast\n\t" 10913 "SUB ESP, 8\n\t" 10914 "MOVSD [ESP], $src\n\t" 10915 "FLD_D [ESP]\n\t" 10916 "ADD ESP, 8\n\t" 10917 "CALL d2i_wrapper\n" 10918 "fast:" %} 10919 ins_encode %{ 10920 Label fast; 10921 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10922 __ cmpl($dst$$Register, 0x80000000); 10923 __ jccb(Assembler::notEqual, fast); 10924 __ subptr(rsp, 8); 10925 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10926 __ fld_d(Address(rsp, 0)); 10927 __ addptr(rsp, 8); 10928 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10929 __ bind(fast); 10930 %} 10931 ins_pipe( pipe_slow ); 10932 %} 10933 10934 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10935 predicate(UseSSE<=1); 10936 match(Set dst (ConvD2L src)); 10937 effect( KILL cr ); 10938 format %{ "FLD $src\t# Convert double to long\n\t" 10939 "FLDCW trunc mode\n\t" 10940 "SUB ESP,8\n\t" 10941 "FISTp [ESP + #0]\n\t" 10942 "FLDCW std/24-bit mode\n\t" 10943 "POP EAX\n\t" 10944 "POP EDX\n\t" 10945 "CMP EDX,0x80000000\n\t" 10946 "JNE,s fast\n\t" 10947 "TEST EAX,EAX\n\t" 10948 "JNE,s fast\n\t" 10949 "FLD $src\n\t" 10950 "CALL d2l_wrapper\n" 10951 "fast:" %} 10952 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10953 ins_pipe( pipe_slow ); 10954 %} 10955 10956 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10957 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10958 predicate (UseSSE>=2); 10959 match(Set dst (ConvD2L src)); 10960 effect( KILL cr ); 10961 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10962 "MOVSD [ESP],$src\n\t" 10963 "FLD_D [ESP]\n\t" 10964 "FLDCW trunc mode\n\t" 10965 "FISTp [ESP + #0]\n\t" 10966 "FLDCW std/24-bit mode\n\t" 10967 "POP EAX\n\t" 10968 "POP EDX\n\t" 10969 "CMP EDX,0x80000000\n\t" 10970 "JNE,s fast\n\t" 10971 "TEST EAX,EAX\n\t" 10972 "JNE,s fast\n\t" 10973 "SUB ESP,8\n\t" 10974 "MOVSD [ESP],$src\n\t" 10975 "FLD_D [ESP]\n\t" 10976 "ADD ESP,8\n\t" 10977 "CALL d2l_wrapper\n" 10978 "fast:" %} 10979 ins_encode %{ 10980 Label fast; 10981 __ subptr(rsp, 8); 10982 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10983 __ fld_d(Address(rsp, 0)); 10984 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10985 __ fistp_d(Address(rsp, 0)); 10986 // Restore the rounding mode, mask the exception 10987 if (Compile::current()->in_24_bit_fp_mode()) { 10988 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10989 } else { 10990 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10991 } 10992 // Load the converted long, adjust CPU stack 10993 __ pop(rax); 10994 __ pop(rdx); 10995 __ cmpl(rdx, 0x80000000); 10996 __ jccb(Assembler::notEqual, fast); 10997 __ testl(rax, rax); 10998 __ jccb(Assembler::notEqual, fast); 10999 __ subptr(rsp, 8); 11000 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 11001 __ fld_d(Address(rsp, 0)); 11002 __ addptr(rsp, 8); 11003 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11004 __ bind(fast); 11005 %} 11006 ins_pipe( pipe_slow ); 11007 %} 11008 11009 // Convert a double to an int. Java semantics require we do complex 11010 // manglations in the corner cases. So we set the rounding mode to 11011 // 'zero', store the darned double down as an int, and reset the 11012 // rounding mode to 'nearest'. The hardware stores a flag value down 11013 // if we would overflow or converted a NAN; we check for this and 11014 // and go the slow path if needed. 11015 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 11016 predicate(UseSSE==0); 11017 match(Set dst (ConvF2I src)); 11018 effect( KILL tmp, KILL cr ); 11019 format %{ "FLD $src\t# Convert float to int \n\t" 11020 "FLDCW trunc mode\n\t" 11021 "SUB ESP,4\n\t" 11022 "FISTp [ESP + #0]\n\t" 11023 "FLDCW std/24-bit mode\n\t" 11024 "POP EAX\n\t" 11025 "CMP EAX,0x80000000\n\t" 11026 "JNE,s fast\n\t" 11027 "FLD $src\n\t" 11028 "CALL d2i_wrapper\n" 11029 "fast:" %} 11030 // DPR2I_encoding works for FPR2I 11031 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 11032 ins_pipe( pipe_slow ); 11033 %} 11034 11035 // Convert a float in xmm to an int reg. 11036 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11037 predicate(UseSSE>=1); 11038 match(Set dst (ConvF2I src)); 11039 effect( KILL tmp, KILL cr ); 11040 format %{ "CVTTSS2SI $dst, $src\n\t" 11041 "CMP $dst,0x80000000\n\t" 11042 "JNE,s fast\n\t" 11043 "SUB ESP, 4\n\t" 11044 "MOVSS [ESP], $src\n\t" 11045 "FLD [ESP]\n\t" 11046 "ADD ESP, 4\n\t" 11047 "CALL d2i_wrapper\n" 11048 "fast:" %} 11049 ins_encode %{ 11050 Label fast; 11051 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 11052 __ cmpl($dst$$Register, 0x80000000); 11053 __ jccb(Assembler::notEqual, fast); 11054 __ subptr(rsp, 4); 11055 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11056 __ fld_s(Address(rsp, 0)); 11057 __ addptr(rsp, 4); 11058 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 11059 __ bind(fast); 11060 %} 11061 ins_pipe( pipe_slow ); 11062 %} 11063 11064 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 11065 predicate(UseSSE==0); 11066 match(Set dst (ConvF2L src)); 11067 effect( KILL cr ); 11068 format %{ "FLD $src\t# Convert float to long\n\t" 11069 "FLDCW trunc mode\n\t" 11070 "SUB ESP,8\n\t" 11071 "FISTp [ESP + #0]\n\t" 11072 "FLDCW std/24-bit mode\n\t" 11073 "POP EAX\n\t" 11074 "POP EDX\n\t" 11075 "CMP EDX,0x80000000\n\t" 11076 "JNE,s fast\n\t" 11077 "TEST EAX,EAX\n\t" 11078 "JNE,s fast\n\t" 11079 "FLD $src\n\t" 11080 "CALL d2l_wrapper\n" 11081 "fast:" %} 11082 // DPR2L_encoding works for FPR2L 11083 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 11084 ins_pipe( pipe_slow ); 11085 %} 11086 11087 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11088 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11089 predicate (UseSSE>=1); 11090 match(Set dst (ConvF2L src)); 11091 effect( KILL cr ); 11092 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11093 "MOVSS [ESP],$src\n\t" 11094 "FLD_S [ESP]\n\t" 11095 "FLDCW trunc mode\n\t" 11096 "FISTp [ESP + #0]\n\t" 11097 "FLDCW std/24-bit mode\n\t" 11098 "POP EAX\n\t" 11099 "POP EDX\n\t" 11100 "CMP EDX,0x80000000\n\t" 11101 "JNE,s fast\n\t" 11102 "TEST EAX,EAX\n\t" 11103 "JNE,s fast\n\t" 11104 "SUB ESP,4\t# Convert float to long\n\t" 11105 "MOVSS [ESP],$src\n\t" 11106 "FLD_S [ESP]\n\t" 11107 "ADD ESP,4\n\t" 11108 "CALL d2l_wrapper\n" 11109 "fast:" %} 11110 ins_encode %{ 11111 Label fast; 11112 __ subptr(rsp, 8); 11113 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11114 __ fld_s(Address(rsp, 0)); 11115 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11116 __ fistp_d(Address(rsp, 0)); 11117 // Restore the rounding mode, mask the exception 11118 if (Compile::current()->in_24_bit_fp_mode()) { 11119 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11120 } else { 11121 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11122 } 11123 // Load the converted long, adjust CPU stack 11124 __ pop(rax); 11125 __ pop(rdx); 11126 __ cmpl(rdx, 0x80000000); 11127 __ jccb(Assembler::notEqual, fast); 11128 __ testl(rax, rax); 11129 __ jccb(Assembler::notEqual, fast); 11130 __ subptr(rsp, 4); 11131 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11132 __ fld_s(Address(rsp, 0)); 11133 __ addptr(rsp, 4); 11134 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11135 __ bind(fast); 11136 %} 11137 ins_pipe( pipe_slow ); 11138 %} 11139 11140 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11141 predicate( UseSSE<=1 ); 11142 match(Set dst (ConvI2D src)); 11143 format %{ "FILD $src\n\t" 11144 "FSTP $dst" %} 11145 opcode(0xDB, 0x0); /* DB /0 */ 11146 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11147 ins_pipe( fpu_reg_mem ); 11148 %} 11149 11150 instruct convI2D_reg(regD dst, rRegI src) %{ 11151 predicate( UseSSE>=2 && !UseXmmI2D ); 11152 match(Set dst (ConvI2D src)); 11153 format %{ "CVTSI2SD $dst,$src" %} 11154 ins_encode %{ 11155 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11156 %} 11157 ins_pipe( pipe_slow ); 11158 %} 11159 11160 instruct convI2D_mem(regD dst, memory mem) %{ 11161 predicate( UseSSE>=2 ); 11162 match(Set dst (ConvI2D (LoadI mem))); 11163 format %{ "CVTSI2SD $dst,$mem" %} 11164 ins_encode %{ 11165 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11166 %} 11167 ins_pipe( pipe_slow ); 11168 %} 11169 11170 instruct convXI2D_reg(regD dst, rRegI src) 11171 %{ 11172 predicate( UseSSE>=2 && UseXmmI2D ); 11173 match(Set dst (ConvI2D src)); 11174 11175 format %{ "MOVD $dst,$src\n\t" 11176 "CVTDQ2PD $dst,$dst\t# i2d" %} 11177 ins_encode %{ 11178 __ movdl($dst$$XMMRegister, $src$$Register); 11179 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11180 %} 11181 ins_pipe(pipe_slow); // XXX 11182 %} 11183 11184 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11185 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11186 match(Set dst (ConvI2D (LoadI mem))); 11187 format %{ "FILD $mem\n\t" 11188 "FSTP $dst" %} 11189 opcode(0xDB); /* DB /0 */ 11190 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11191 Pop_Reg_DPR(dst)); 11192 ins_pipe( fpu_reg_mem ); 11193 %} 11194 11195 // Convert a byte to a float; no rounding step needed. 11196 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11197 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11198 match(Set dst (ConvI2F src)); 11199 format %{ "FILD $src\n\t" 11200 "FSTP $dst" %} 11201 11202 opcode(0xDB, 0x0); /* DB /0 */ 11203 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11204 ins_pipe( fpu_reg_mem ); 11205 %} 11206 11207 // In 24-bit mode, force exponent rounding by storing back out 11208 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11209 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11210 match(Set dst (ConvI2F src)); 11211 ins_cost(200); 11212 format %{ "FILD $src\n\t" 11213 "FSTP_S $dst" %} 11214 opcode(0xDB, 0x0); /* DB /0 */ 11215 ins_encode( Push_Mem_I(src), 11216 Pop_Mem_FPR(dst)); 11217 ins_pipe( fpu_mem_mem ); 11218 %} 11219 11220 // In 24-bit mode, force exponent rounding by storing back out 11221 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11222 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11223 match(Set dst (ConvI2F (LoadI mem))); 11224 ins_cost(200); 11225 format %{ "FILD $mem\n\t" 11226 "FSTP_S $dst" %} 11227 opcode(0xDB); /* DB /0 */ 11228 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11229 Pop_Mem_FPR(dst)); 11230 ins_pipe( fpu_mem_mem ); 11231 %} 11232 11233 // This instruction does not round to 24-bits 11234 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11235 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11236 match(Set dst (ConvI2F src)); 11237 format %{ "FILD $src\n\t" 11238 "FSTP $dst" %} 11239 opcode(0xDB, 0x0); /* DB /0 */ 11240 ins_encode( Push_Mem_I(src), 11241 Pop_Reg_FPR(dst)); 11242 ins_pipe( fpu_reg_mem ); 11243 %} 11244 11245 // This instruction does not round to 24-bits 11246 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11247 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11248 match(Set dst (ConvI2F (LoadI mem))); 11249 format %{ "FILD $mem\n\t" 11250 "FSTP $dst" %} 11251 opcode(0xDB); /* DB /0 */ 11252 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11253 Pop_Reg_FPR(dst)); 11254 ins_pipe( fpu_reg_mem ); 11255 %} 11256 11257 // Convert an int to a float in xmm; no rounding step needed. 11258 instruct convI2F_reg(regF dst, rRegI src) %{ 11259 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11260 match(Set dst (ConvI2F src)); 11261 format %{ "CVTSI2SS $dst, $src" %} 11262 ins_encode %{ 11263 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11264 %} 11265 ins_pipe( pipe_slow ); 11266 %} 11267 11268 instruct convXI2F_reg(regF dst, rRegI src) 11269 %{ 11270 predicate( UseSSE>=2 && UseXmmI2F ); 11271 match(Set dst (ConvI2F src)); 11272 11273 format %{ "MOVD $dst,$src\n\t" 11274 "CVTDQ2PS $dst,$dst\t# i2f" %} 11275 ins_encode %{ 11276 __ movdl($dst$$XMMRegister, $src$$Register); 11277 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11278 %} 11279 ins_pipe(pipe_slow); // XXX 11280 %} 11281 11282 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11283 match(Set dst (ConvI2L src)); 11284 effect(KILL cr); 11285 ins_cost(375); 11286 format %{ "MOV $dst.lo,$src\n\t" 11287 "MOV $dst.hi,$src\n\t" 11288 "SAR $dst.hi,31" %} 11289 ins_encode(convert_int_long(dst,src)); 11290 ins_pipe( ialu_reg_reg_long ); 11291 %} 11292 11293 // Zero-extend convert int to long 11294 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11295 match(Set dst (AndL (ConvI2L src) mask) ); 11296 effect( KILL flags ); 11297 ins_cost(250); 11298 format %{ "MOV $dst.lo,$src\n\t" 11299 "XOR $dst.hi,$dst.hi" %} 11300 opcode(0x33); // XOR 11301 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11302 ins_pipe( ialu_reg_reg_long ); 11303 %} 11304 11305 // Zero-extend long 11306 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11307 match(Set dst (AndL src mask) ); 11308 effect( KILL flags ); 11309 ins_cost(250); 11310 format %{ "MOV $dst.lo,$src.lo\n\t" 11311 "XOR $dst.hi,$dst.hi\n\t" %} 11312 opcode(0x33); // XOR 11313 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11314 ins_pipe( ialu_reg_reg_long ); 11315 %} 11316 11317 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11318 predicate (UseSSE<=1); 11319 match(Set dst (ConvL2D src)); 11320 effect( KILL cr ); 11321 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11322 "PUSH $src.lo\n\t" 11323 "FILD ST,[ESP + #0]\n\t" 11324 "ADD ESP,8\n\t" 11325 "FSTP_D $dst\t# D-round" %} 11326 opcode(0xDF, 0x5); /* DF /5 */ 11327 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11328 ins_pipe( pipe_slow ); 11329 %} 11330 11331 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11332 predicate (UseSSE>=2); 11333 match(Set dst (ConvL2D src)); 11334 effect( KILL cr ); 11335 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11336 "PUSH $src.lo\n\t" 11337 "FILD_D [ESP]\n\t" 11338 "FSTP_D [ESP]\n\t" 11339 "MOVSD $dst,[ESP]\n\t" 11340 "ADD ESP,8" %} 11341 opcode(0xDF, 0x5); /* DF /5 */ 11342 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11343 ins_pipe( pipe_slow ); 11344 %} 11345 11346 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11347 predicate (UseSSE>=1); 11348 match(Set dst (ConvL2F src)); 11349 effect( KILL cr ); 11350 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11351 "PUSH $src.lo\n\t" 11352 "FILD_D [ESP]\n\t" 11353 "FSTP_S [ESP]\n\t" 11354 "MOVSS $dst,[ESP]\n\t" 11355 "ADD ESP,8" %} 11356 opcode(0xDF, 0x5); /* DF /5 */ 11357 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11358 ins_pipe( pipe_slow ); 11359 %} 11360 11361 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11362 match(Set dst (ConvL2F src)); 11363 effect( KILL cr ); 11364 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11365 "PUSH $src.lo\n\t" 11366 "FILD ST,[ESP + #0]\n\t" 11367 "ADD ESP,8\n\t" 11368 "FSTP_S $dst\t# F-round" %} 11369 opcode(0xDF, 0x5); /* DF /5 */ 11370 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11371 ins_pipe( pipe_slow ); 11372 %} 11373 11374 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11375 match(Set dst (ConvL2I src)); 11376 effect( DEF dst, USE src ); 11377 format %{ "MOV $dst,$src.lo" %} 11378 ins_encode(enc_CopyL_Lo(dst,src)); 11379 ins_pipe( ialu_reg_reg ); 11380 %} 11381 11382 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11383 match(Set dst (MoveF2I src)); 11384 effect( DEF dst, USE src ); 11385 ins_cost(100); 11386 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11387 ins_encode %{ 11388 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11389 %} 11390 ins_pipe( ialu_reg_mem ); 11391 %} 11392 11393 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11394 predicate(UseSSE==0); 11395 match(Set dst (MoveF2I src)); 11396 effect( DEF dst, USE src ); 11397 11398 ins_cost(125); 11399 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11400 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11401 ins_pipe( fpu_mem_reg ); 11402 %} 11403 11404 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11405 predicate(UseSSE>=1); 11406 match(Set dst (MoveF2I src)); 11407 effect( DEF dst, USE src ); 11408 11409 ins_cost(95); 11410 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11411 ins_encode %{ 11412 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11413 %} 11414 ins_pipe( pipe_slow ); 11415 %} 11416 11417 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11418 predicate(UseSSE>=2); 11419 match(Set dst (MoveF2I src)); 11420 effect( DEF dst, USE src ); 11421 ins_cost(85); 11422 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11423 ins_encode %{ 11424 __ movdl($dst$$Register, $src$$XMMRegister); 11425 %} 11426 ins_pipe( pipe_slow ); 11427 %} 11428 11429 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11430 match(Set dst (MoveI2F src)); 11431 effect( DEF dst, USE src ); 11432 11433 ins_cost(100); 11434 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11435 ins_encode %{ 11436 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11437 %} 11438 ins_pipe( ialu_mem_reg ); 11439 %} 11440 11441 11442 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11443 predicate(UseSSE==0); 11444 match(Set dst (MoveI2F src)); 11445 effect(DEF dst, USE src); 11446 11447 ins_cost(125); 11448 format %{ "FLD_S $src\n\t" 11449 "FSTP $dst\t# MoveI2F_stack_reg" %} 11450 opcode(0xD9); /* D9 /0, FLD m32real */ 11451 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11452 Pop_Reg_FPR(dst) ); 11453 ins_pipe( fpu_reg_mem ); 11454 %} 11455 11456 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11457 predicate(UseSSE>=1); 11458 match(Set dst (MoveI2F src)); 11459 effect( DEF dst, USE src ); 11460 11461 ins_cost(95); 11462 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11463 ins_encode %{ 11464 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11465 %} 11466 ins_pipe( pipe_slow ); 11467 %} 11468 11469 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11470 predicate(UseSSE>=2); 11471 match(Set dst (MoveI2F src)); 11472 effect( DEF dst, USE src ); 11473 11474 ins_cost(85); 11475 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11476 ins_encode %{ 11477 __ movdl($dst$$XMMRegister, $src$$Register); 11478 %} 11479 ins_pipe( pipe_slow ); 11480 %} 11481 11482 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11483 match(Set dst (MoveD2L src)); 11484 effect(DEF dst, USE src); 11485 11486 ins_cost(250); 11487 format %{ "MOV $dst.lo,$src\n\t" 11488 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11489 opcode(0x8B, 0x8B); 11490 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11491 ins_pipe( ialu_mem_long_reg ); 11492 %} 11493 11494 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11495 predicate(UseSSE<=1); 11496 match(Set dst (MoveD2L src)); 11497 effect(DEF dst, USE src); 11498 11499 ins_cost(125); 11500 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11501 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11502 ins_pipe( fpu_mem_reg ); 11503 %} 11504 11505 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11506 predicate(UseSSE>=2); 11507 match(Set dst (MoveD2L src)); 11508 effect(DEF dst, USE src); 11509 ins_cost(95); 11510 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11511 ins_encode %{ 11512 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11513 %} 11514 ins_pipe( pipe_slow ); 11515 %} 11516 11517 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11518 predicate(UseSSE>=2); 11519 match(Set dst (MoveD2L src)); 11520 effect(DEF dst, USE src, TEMP tmp); 11521 ins_cost(85); 11522 format %{ "MOVD $dst.lo,$src\n\t" 11523 "PSHUFLW $tmp,$src,0x4E\n\t" 11524 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11525 ins_encode %{ 11526 __ movdl($dst$$Register, $src$$XMMRegister); 11527 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11528 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11529 %} 11530 ins_pipe( pipe_slow ); 11531 %} 11532 11533 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11534 match(Set dst (MoveL2D src)); 11535 effect(DEF dst, USE src); 11536 11537 ins_cost(200); 11538 format %{ "MOV $dst,$src.lo\n\t" 11539 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11540 opcode(0x89, 0x89); 11541 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11542 ins_pipe( ialu_mem_long_reg ); 11543 %} 11544 11545 11546 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11547 predicate(UseSSE<=1); 11548 match(Set dst (MoveL2D src)); 11549 effect(DEF dst, USE src); 11550 ins_cost(125); 11551 11552 format %{ "FLD_D $src\n\t" 11553 "FSTP $dst\t# MoveL2D_stack_reg" %} 11554 opcode(0xDD); /* DD /0, FLD m64real */ 11555 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11556 Pop_Reg_DPR(dst) ); 11557 ins_pipe( fpu_reg_mem ); 11558 %} 11559 11560 11561 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11562 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11563 match(Set dst (MoveL2D src)); 11564 effect(DEF dst, USE src); 11565 11566 ins_cost(95); 11567 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11568 ins_encode %{ 11569 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11570 %} 11571 ins_pipe( pipe_slow ); 11572 %} 11573 11574 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11575 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11576 match(Set dst (MoveL2D src)); 11577 effect(DEF dst, USE src); 11578 11579 ins_cost(95); 11580 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11581 ins_encode %{ 11582 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11583 %} 11584 ins_pipe( pipe_slow ); 11585 %} 11586 11587 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11588 predicate(UseSSE>=2); 11589 match(Set dst (MoveL2D src)); 11590 effect(TEMP dst, USE src, TEMP tmp); 11591 ins_cost(85); 11592 format %{ "MOVD $dst,$src.lo\n\t" 11593 "MOVD $tmp,$src.hi\n\t" 11594 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11595 ins_encode %{ 11596 __ movdl($dst$$XMMRegister, $src$$Register); 11597 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11598 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11599 %} 11600 ins_pipe( pipe_slow ); 11601 %} 11602 11603 11604 // ======================================================================= 11605 // fast clearing of an array 11606 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11607 predicate(!((ClearArrayNode*)n)->is_large()); 11608 match(Set dummy (ClearArray cnt base)); 11609 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11610 11611 format %{ $$template 11612 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11613 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11614 $$emit$$"JG LARGE\n\t" 11615 $$emit$$"SHL ECX, 1\n\t" 11616 $$emit$$"DEC ECX\n\t" 11617 $$emit$$"JS DONE\t# Zero length\n\t" 11618 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11619 $$emit$$"DEC ECX\n\t" 11620 $$emit$$"JGE LOOP\n\t" 11621 $$emit$$"JMP DONE\n\t" 11622 $$emit$$"# LARGE:\n\t" 11623 if (UseFastStosb) { 11624 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11625 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11626 } else if (UseXMMForObjInit) { 11627 $$emit$$"MOV RDI,RAX\n\t" 11628 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11629 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11630 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11631 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11632 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11633 $$emit$$"ADD 0x40,RAX\n\t" 11634 $$emit$$"# L_zero_64_bytes:\n\t" 11635 $$emit$$"SUB 0x8,RCX\n\t" 11636 $$emit$$"JGE L_loop\n\t" 11637 $$emit$$"ADD 0x4,RCX\n\t" 11638 $$emit$$"JL L_tail\n\t" 11639 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11640 $$emit$$"ADD 0x20,RAX\n\t" 11641 $$emit$$"SUB 0x4,RCX\n\t" 11642 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11643 $$emit$$"ADD 0x4,RCX\n\t" 11644 $$emit$$"JLE L_end\n\t" 11645 $$emit$$"DEC RCX\n\t" 11646 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11647 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11648 $$emit$$"ADD 0x8,RAX\n\t" 11649 $$emit$$"DEC RCX\n\t" 11650 $$emit$$"JGE L_sloop\n\t" 11651 $$emit$$"# L_end:\n\t" 11652 } else { 11653 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11654 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11655 } 11656 $$emit$$"# DONE" 11657 %} 11658 ins_encode %{ 11659 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11660 $tmp$$XMMRegister, false); 11661 %} 11662 ins_pipe( pipe_slow ); 11663 %} 11664 11665 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11666 predicate(((ClearArrayNode*)n)->is_large()); 11667 match(Set dummy (ClearArray cnt base)); 11668 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11669 format %{ $$template 11670 if (UseFastStosb) { 11671 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11672 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11673 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11674 } else if (UseXMMForObjInit) { 11675 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11676 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11677 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11678 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11679 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11680 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11681 $$emit$$"ADD 0x40,RAX\n\t" 11682 $$emit$$"# L_zero_64_bytes:\n\t" 11683 $$emit$$"SUB 0x8,RCX\n\t" 11684 $$emit$$"JGE L_loop\n\t" 11685 $$emit$$"ADD 0x4,RCX\n\t" 11686 $$emit$$"JL L_tail\n\t" 11687 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11688 $$emit$$"ADD 0x20,RAX\n\t" 11689 $$emit$$"SUB 0x4,RCX\n\t" 11690 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11691 $$emit$$"ADD 0x4,RCX\n\t" 11692 $$emit$$"JLE L_end\n\t" 11693 $$emit$$"DEC RCX\n\t" 11694 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11695 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11696 $$emit$$"ADD 0x8,RAX\n\t" 11697 $$emit$$"DEC RCX\n\t" 11698 $$emit$$"JGE L_sloop\n\t" 11699 $$emit$$"# L_end:\n\t" 11700 } else { 11701 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11702 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11703 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11704 } 11705 $$emit$$"# DONE" 11706 %} 11707 ins_encode %{ 11708 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11709 $tmp$$XMMRegister, true); 11710 %} 11711 ins_pipe( pipe_slow ); 11712 %} 11713 11714 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11715 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11716 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11717 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11718 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11719 11720 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11721 ins_encode %{ 11722 __ string_compare($str1$$Register, $str2$$Register, 11723 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11724 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11725 %} 11726 ins_pipe( pipe_slow ); 11727 %} 11728 11729 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11730 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11731 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11732 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11733 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11734 11735 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11736 ins_encode %{ 11737 __ string_compare($str1$$Register, $str2$$Register, 11738 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11739 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11740 %} 11741 ins_pipe( pipe_slow ); 11742 %} 11743 11744 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11745 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11746 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11747 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11748 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11749 11750 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11751 ins_encode %{ 11752 __ string_compare($str1$$Register, $str2$$Register, 11753 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11754 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11755 %} 11756 ins_pipe( pipe_slow ); 11757 %} 11758 11759 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11760 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11761 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11762 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11763 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11764 11765 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11766 ins_encode %{ 11767 __ string_compare($str2$$Register, $str1$$Register, 11768 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11769 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11770 %} 11771 ins_pipe( pipe_slow ); 11772 %} 11773 11774 // fast string equals 11775 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11776 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11777 match(Set result (StrEquals (Binary str1 str2) cnt)); 11778 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11779 11780 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11781 ins_encode %{ 11782 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11783 $cnt$$Register, $result$$Register, $tmp3$$Register, 11784 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11785 %} 11786 11787 ins_pipe( pipe_slow ); 11788 %} 11789 11790 // fast search of substring with known size. 11791 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11792 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11793 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11794 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11795 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11796 11797 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11798 ins_encode %{ 11799 int icnt2 = (int)$int_cnt2$$constant; 11800 if (icnt2 >= 16) { 11801 // IndexOf for constant substrings with size >= 16 elements 11802 // which don't need to be loaded through stack. 11803 __ string_indexofC8($str1$$Register, $str2$$Register, 11804 $cnt1$$Register, $cnt2$$Register, 11805 icnt2, $result$$Register, 11806 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11807 } else { 11808 // Small strings are loaded through stack if they cross page boundary. 11809 __ string_indexof($str1$$Register, $str2$$Register, 11810 $cnt1$$Register, $cnt2$$Register, 11811 icnt2, $result$$Register, 11812 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11813 } 11814 %} 11815 ins_pipe( pipe_slow ); 11816 %} 11817 11818 // fast search of substring with known size. 11819 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11820 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11821 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11822 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11823 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11824 11825 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11826 ins_encode %{ 11827 int icnt2 = (int)$int_cnt2$$constant; 11828 if (icnt2 >= 8) { 11829 // IndexOf for constant substrings with size >= 8 elements 11830 // which don't need to be loaded through stack. 11831 __ string_indexofC8($str1$$Register, $str2$$Register, 11832 $cnt1$$Register, $cnt2$$Register, 11833 icnt2, $result$$Register, 11834 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11835 } else { 11836 // Small strings are loaded through stack if they cross page boundary. 11837 __ string_indexof($str1$$Register, $str2$$Register, 11838 $cnt1$$Register, $cnt2$$Register, 11839 icnt2, $result$$Register, 11840 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11841 } 11842 %} 11843 ins_pipe( pipe_slow ); 11844 %} 11845 11846 // fast search of substring with known size. 11847 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11848 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11849 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11850 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11851 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11852 11853 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11854 ins_encode %{ 11855 int icnt2 = (int)$int_cnt2$$constant; 11856 if (icnt2 >= 8) { 11857 // IndexOf for constant substrings with size >= 8 elements 11858 // which don't need to be loaded through stack. 11859 __ string_indexofC8($str1$$Register, $str2$$Register, 11860 $cnt1$$Register, $cnt2$$Register, 11861 icnt2, $result$$Register, 11862 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11863 } else { 11864 // Small strings are loaded through stack if they cross page boundary. 11865 __ string_indexof($str1$$Register, $str2$$Register, 11866 $cnt1$$Register, $cnt2$$Register, 11867 icnt2, $result$$Register, 11868 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11869 } 11870 %} 11871 ins_pipe( pipe_slow ); 11872 %} 11873 11874 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11875 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11876 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11877 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11878 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11879 11880 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11881 ins_encode %{ 11882 __ string_indexof($str1$$Register, $str2$$Register, 11883 $cnt1$$Register, $cnt2$$Register, 11884 (-1), $result$$Register, 11885 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11886 %} 11887 ins_pipe( pipe_slow ); 11888 %} 11889 11890 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11891 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11892 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11893 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11894 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11895 11896 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11897 ins_encode %{ 11898 __ string_indexof($str1$$Register, $str2$$Register, 11899 $cnt1$$Register, $cnt2$$Register, 11900 (-1), $result$$Register, 11901 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11902 %} 11903 ins_pipe( pipe_slow ); 11904 %} 11905 11906 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11907 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11908 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11909 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11910 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11911 11912 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11913 ins_encode %{ 11914 __ string_indexof($str1$$Register, $str2$$Register, 11915 $cnt1$$Register, $cnt2$$Register, 11916 (-1), $result$$Register, 11917 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11923 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11924 predicate(UseSSE42Intrinsics); 11925 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11926 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11927 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11928 ins_encode %{ 11929 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11930 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11931 %} 11932 ins_pipe( pipe_slow ); 11933 %} 11934 11935 // fast array equals 11936 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11937 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11938 %{ 11939 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11940 match(Set result (AryEq ary1 ary2)); 11941 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11942 //ins_cost(300); 11943 11944 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11945 ins_encode %{ 11946 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11947 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11948 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11949 %} 11950 ins_pipe( pipe_slow ); 11951 %} 11952 11953 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11954 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11955 %{ 11956 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11957 match(Set result (AryEq ary1 ary2)); 11958 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11959 //ins_cost(300); 11960 11961 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11962 ins_encode %{ 11963 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11964 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11965 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11966 %} 11967 ins_pipe( pipe_slow ); 11968 %} 11969 11970 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11971 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11972 %{ 11973 match(Set result (HasNegatives ary1 len)); 11974 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11975 11976 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11977 ins_encode %{ 11978 __ has_negatives($ary1$$Register, $len$$Register, 11979 $result$$Register, $tmp3$$Register, 11980 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11981 %} 11982 ins_pipe( pipe_slow ); 11983 %} 11984 11985 // fast char[] to byte[] compression 11986 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11987 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11988 match(Set result (StrCompressedCopy src (Binary dst len))); 11989 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11990 11991 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11992 ins_encode %{ 11993 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11994 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11995 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11996 %} 11997 ins_pipe( pipe_slow ); 11998 %} 11999 12000 // fast byte[] to char[] inflation 12001 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12002 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12003 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12004 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12005 12006 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12007 ins_encode %{ 12008 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12009 $tmp1$$XMMRegister, $tmp2$$Register); 12010 %} 12011 ins_pipe( pipe_slow ); 12012 %} 12013 12014 // encode char[] to byte[] in ISO_8859_1 12015 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12016 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12017 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12018 match(Set result (EncodeISOArray src (Binary dst len))); 12019 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12020 12021 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12022 ins_encode %{ 12023 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12024 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12025 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 12026 %} 12027 ins_pipe( pipe_slow ); 12028 %} 12029 12030 12031 //----------Control Flow Instructions------------------------------------------ 12032 // Signed compare Instructions 12033 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12034 match(Set cr (CmpI op1 op2)); 12035 effect( DEF cr, USE op1, USE op2 ); 12036 format %{ "CMP $op1,$op2" %} 12037 opcode(0x3B); /* Opcode 3B /r */ 12038 ins_encode( OpcP, RegReg( op1, op2) ); 12039 ins_pipe( ialu_cr_reg_reg ); 12040 %} 12041 12042 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12043 match(Set cr (CmpI op1 op2)); 12044 effect( DEF cr, USE op1 ); 12045 format %{ "CMP $op1,$op2" %} 12046 opcode(0x81,0x07); /* Opcode 81 /7 */ 12047 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12048 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12049 ins_pipe( ialu_cr_reg_imm ); 12050 %} 12051 12052 // Cisc-spilled version of cmpI_eReg 12053 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12054 match(Set cr (CmpI op1 (LoadI op2))); 12055 12056 format %{ "CMP $op1,$op2" %} 12057 ins_cost(500); 12058 opcode(0x3B); /* Opcode 3B /r */ 12059 ins_encode( OpcP, RegMem( op1, op2) ); 12060 ins_pipe( ialu_cr_reg_mem ); 12061 %} 12062 12063 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12064 match(Set cr (CmpI src zero)); 12065 effect( DEF cr, USE src ); 12066 12067 format %{ "TEST $src,$src" %} 12068 opcode(0x85); 12069 ins_encode( OpcP, RegReg( src, src ) ); 12070 ins_pipe( ialu_cr_reg_imm ); 12071 %} 12072 12073 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12074 match(Set cr (CmpI (AndI src con) zero)); 12075 12076 format %{ "TEST $src,$con" %} 12077 opcode(0xF7,0x00); 12078 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12079 ins_pipe( ialu_cr_reg_imm ); 12080 %} 12081 12082 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12083 match(Set cr (CmpI (AndI src mem) zero)); 12084 12085 format %{ "TEST $src,$mem" %} 12086 opcode(0x85); 12087 ins_encode( OpcP, RegMem( src, mem ) ); 12088 ins_pipe( ialu_cr_reg_mem ); 12089 %} 12090 12091 // Unsigned compare Instructions; really, same as signed except they 12092 // produce an eFlagsRegU instead of eFlagsReg. 12093 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12094 match(Set cr (CmpU op1 op2)); 12095 12096 format %{ "CMPu $op1,$op2" %} 12097 opcode(0x3B); /* Opcode 3B /r */ 12098 ins_encode( OpcP, RegReg( op1, op2) ); 12099 ins_pipe( ialu_cr_reg_reg ); 12100 %} 12101 12102 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12103 match(Set cr (CmpU op1 op2)); 12104 12105 format %{ "CMPu $op1,$op2" %} 12106 opcode(0x81,0x07); /* Opcode 81 /7 */ 12107 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12108 ins_pipe( ialu_cr_reg_imm ); 12109 %} 12110 12111 // // Cisc-spilled version of cmpU_eReg 12112 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12113 match(Set cr (CmpU op1 (LoadI op2))); 12114 12115 format %{ "CMPu $op1,$op2" %} 12116 ins_cost(500); 12117 opcode(0x3B); /* Opcode 3B /r */ 12118 ins_encode( OpcP, RegMem( op1, op2) ); 12119 ins_pipe( ialu_cr_reg_mem ); 12120 %} 12121 12122 // // Cisc-spilled version of cmpU_eReg 12123 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12124 // match(Set cr (CmpU (LoadI op1) op2)); 12125 // 12126 // format %{ "CMPu $op1,$op2" %} 12127 // ins_cost(500); 12128 // opcode(0x39); /* Opcode 39 /r */ 12129 // ins_encode( OpcP, RegMem( op1, op2) ); 12130 //%} 12131 12132 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12133 match(Set cr (CmpU src zero)); 12134 12135 format %{ "TESTu $src,$src" %} 12136 opcode(0x85); 12137 ins_encode( OpcP, RegReg( src, src ) ); 12138 ins_pipe( ialu_cr_reg_imm ); 12139 %} 12140 12141 // Unsigned pointer compare Instructions 12142 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12143 match(Set cr (CmpP op1 op2)); 12144 12145 format %{ "CMPu $op1,$op2" %} 12146 opcode(0x3B); /* Opcode 3B /r */ 12147 ins_encode( OpcP, RegReg( op1, op2) ); 12148 ins_pipe( ialu_cr_reg_reg ); 12149 %} 12150 12151 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12152 match(Set cr (CmpP op1 op2)); 12153 12154 format %{ "CMPu $op1,$op2" %} 12155 opcode(0x81,0x07); /* Opcode 81 /7 */ 12156 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12157 ins_pipe( ialu_cr_reg_imm ); 12158 %} 12159 12160 // // Cisc-spilled version of cmpP_eReg 12161 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12162 match(Set cr (CmpP op1 (LoadP op2))); 12163 12164 format %{ "CMPu $op1,$op2" %} 12165 ins_cost(500); 12166 opcode(0x3B); /* Opcode 3B /r */ 12167 ins_encode( OpcP, RegMem( op1, op2) ); 12168 ins_pipe( ialu_cr_reg_mem ); 12169 %} 12170 12171 // // Cisc-spilled version of cmpP_eReg 12172 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12173 // match(Set cr (CmpP (LoadP op1) op2)); 12174 // 12175 // format %{ "CMPu $op1,$op2" %} 12176 // ins_cost(500); 12177 // opcode(0x39); /* Opcode 39 /r */ 12178 // ins_encode( OpcP, RegMem( op1, op2) ); 12179 //%} 12180 12181 // Compare raw pointer (used in out-of-heap check). 12182 // Only works because non-oop pointers must be raw pointers 12183 // and raw pointers have no anti-dependencies. 12184 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12185 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12186 match(Set cr (CmpP op1 (LoadP op2))); 12187 12188 format %{ "CMPu $op1,$op2" %} 12189 opcode(0x3B); /* Opcode 3B /r */ 12190 ins_encode( OpcP, RegMem( op1, op2) ); 12191 ins_pipe( ialu_cr_reg_mem ); 12192 %} 12193 12194 // 12195 // This will generate a signed flags result. This should be ok 12196 // since any compare to a zero should be eq/neq. 12197 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12198 match(Set cr (CmpP src zero)); 12199 12200 format %{ "TEST $src,$src" %} 12201 opcode(0x85); 12202 ins_encode( OpcP, RegReg( src, src ) ); 12203 ins_pipe( ialu_cr_reg_imm ); 12204 %} 12205 12206 // Cisc-spilled version of testP_reg 12207 // This will generate a signed flags result. This should be ok 12208 // since any compare to a zero should be eq/neq. 12209 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12210 match(Set cr (CmpP (LoadP op) zero)); 12211 12212 format %{ "TEST $op,0xFFFFFFFF" %} 12213 ins_cost(500); 12214 opcode(0xF7); /* Opcode F7 /0 */ 12215 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12216 ins_pipe( ialu_cr_reg_imm ); 12217 %} 12218 12219 // Yanked all unsigned pointer compare operations. 12220 // Pointer compares are done with CmpP which is already unsigned. 12221 12222 //----------Max and Min-------------------------------------------------------- 12223 // Min Instructions 12224 //// 12225 // *** Min and Max using the conditional move are slower than the 12226 // *** branch version on a Pentium III. 12227 // // Conditional move for min 12228 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12229 // effect( USE_DEF op2, USE op1, USE cr ); 12230 // format %{ "CMOVlt $op2,$op1\t! min" %} 12231 // opcode(0x4C,0x0F); 12232 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12233 // ins_pipe( pipe_cmov_reg ); 12234 //%} 12235 // 12236 //// Min Register with Register (P6 version) 12237 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12238 // predicate(VM_Version::supports_cmov() ); 12239 // match(Set op2 (MinI op1 op2)); 12240 // ins_cost(200); 12241 // expand %{ 12242 // eFlagsReg cr; 12243 // compI_eReg(cr,op1,op2); 12244 // cmovI_reg_lt(op2,op1,cr); 12245 // %} 12246 //%} 12247 12248 // Min Register with Register (generic version) 12249 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12250 match(Set dst (MinI dst src)); 12251 effect(KILL flags); 12252 ins_cost(300); 12253 12254 format %{ "MIN $dst,$src" %} 12255 opcode(0xCC); 12256 ins_encode( min_enc(dst,src) ); 12257 ins_pipe( pipe_slow ); 12258 %} 12259 12260 // Max Register with Register 12261 // *** Min and Max using the conditional move are slower than the 12262 // *** branch version on a Pentium III. 12263 // // Conditional move for max 12264 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12265 // effect( USE_DEF op2, USE op1, USE cr ); 12266 // format %{ "CMOVgt $op2,$op1\t! max" %} 12267 // opcode(0x4F,0x0F); 12268 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12269 // ins_pipe( pipe_cmov_reg ); 12270 //%} 12271 // 12272 // // Max Register with Register (P6 version) 12273 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12274 // predicate(VM_Version::supports_cmov() ); 12275 // match(Set op2 (MaxI op1 op2)); 12276 // ins_cost(200); 12277 // expand %{ 12278 // eFlagsReg cr; 12279 // compI_eReg(cr,op1,op2); 12280 // cmovI_reg_gt(op2,op1,cr); 12281 // %} 12282 //%} 12283 12284 // Max Register with Register (generic version) 12285 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12286 match(Set dst (MaxI dst src)); 12287 effect(KILL flags); 12288 ins_cost(300); 12289 12290 format %{ "MAX $dst,$src" %} 12291 opcode(0xCC); 12292 ins_encode( max_enc(dst,src) ); 12293 ins_pipe( pipe_slow ); 12294 %} 12295 12296 // ============================================================================ 12297 // Counted Loop limit node which represents exact final iterator value. 12298 // Note: the resulting value should fit into integer range since 12299 // counted loops have limit check on overflow. 12300 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12301 match(Set limit (LoopLimit (Binary init limit) stride)); 12302 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12303 ins_cost(300); 12304 12305 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12306 ins_encode %{ 12307 int strd = (int)$stride$$constant; 12308 assert(strd != 1 && strd != -1, "sanity"); 12309 int m1 = (strd > 0) ? 1 : -1; 12310 // Convert limit to long (EAX:EDX) 12311 __ cdql(); 12312 // Convert init to long (init:tmp) 12313 __ movl($tmp$$Register, $init$$Register); 12314 __ sarl($tmp$$Register, 31); 12315 // $limit - $init 12316 __ subl($limit$$Register, $init$$Register); 12317 __ sbbl($limit_hi$$Register, $tmp$$Register); 12318 // + ($stride - 1) 12319 if (strd > 0) { 12320 __ addl($limit$$Register, (strd - 1)); 12321 __ adcl($limit_hi$$Register, 0); 12322 __ movl($tmp$$Register, strd); 12323 } else { 12324 __ addl($limit$$Register, (strd + 1)); 12325 __ adcl($limit_hi$$Register, -1); 12326 __ lneg($limit_hi$$Register, $limit$$Register); 12327 __ movl($tmp$$Register, -strd); 12328 } 12329 // signed devision: (EAX:EDX) / pos_stride 12330 __ idivl($tmp$$Register); 12331 if (strd < 0) { 12332 // restore sign 12333 __ negl($tmp$$Register); 12334 } 12335 // (EAX) * stride 12336 __ mull($tmp$$Register); 12337 // + init (ignore upper bits) 12338 __ addl($limit$$Register, $init$$Register); 12339 %} 12340 ins_pipe( pipe_slow ); 12341 %} 12342 12343 // ============================================================================ 12344 // Branch Instructions 12345 // Jump Table 12346 instruct jumpXtnd(rRegI switch_val) %{ 12347 match(Jump switch_val); 12348 ins_cost(350); 12349 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12350 ins_encode %{ 12351 // Jump to Address(table_base + switch_reg) 12352 Address index(noreg, $switch_val$$Register, Address::times_1); 12353 __ jump(ArrayAddress($constantaddress, index)); 12354 %} 12355 ins_pipe(pipe_jmp); 12356 %} 12357 12358 // Jump Direct - Label defines a relative address from JMP+1 12359 instruct jmpDir(label labl) %{ 12360 match(Goto); 12361 effect(USE labl); 12362 12363 ins_cost(300); 12364 format %{ "JMP $labl" %} 12365 size(5); 12366 ins_encode %{ 12367 Label* L = $labl$$label; 12368 __ jmp(*L, false); // Always long jump 12369 %} 12370 ins_pipe( pipe_jmp ); 12371 %} 12372 12373 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12374 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12375 match(If cop cr); 12376 effect(USE labl); 12377 12378 ins_cost(300); 12379 format %{ "J$cop $labl" %} 12380 size(6); 12381 ins_encode %{ 12382 Label* L = $labl$$label; 12383 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12384 %} 12385 ins_pipe( pipe_jcc ); 12386 %} 12387 12388 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12389 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12390 predicate(!n->has_vector_mask_set()); 12391 match(CountedLoopEnd cop cr); 12392 effect(USE labl); 12393 12394 ins_cost(300); 12395 format %{ "J$cop $labl\t# Loop end" %} 12396 size(6); 12397 ins_encode %{ 12398 Label* L = $labl$$label; 12399 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12400 %} 12401 ins_pipe( pipe_jcc ); 12402 %} 12403 12404 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12405 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12406 predicate(!n->has_vector_mask_set()); 12407 match(CountedLoopEnd cop cmp); 12408 effect(USE labl); 12409 12410 ins_cost(300); 12411 format %{ "J$cop,u $labl\t# Loop end" %} 12412 size(6); 12413 ins_encode %{ 12414 Label* L = $labl$$label; 12415 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12416 %} 12417 ins_pipe( pipe_jcc ); 12418 %} 12419 12420 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12421 predicate(!n->has_vector_mask_set()); 12422 match(CountedLoopEnd cop cmp); 12423 effect(USE labl); 12424 12425 ins_cost(200); 12426 format %{ "J$cop,u $labl\t# Loop end" %} 12427 size(6); 12428 ins_encode %{ 12429 Label* L = $labl$$label; 12430 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12431 %} 12432 ins_pipe( pipe_jcc ); 12433 %} 12434 12435 // mask version 12436 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12437 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12438 predicate(n->has_vector_mask_set()); 12439 match(CountedLoopEnd cop cr); 12440 effect(USE labl); 12441 12442 ins_cost(400); 12443 format %{ "J$cop $labl\t# Loop end\n\t" 12444 "restorevectmask \t# vector mask restore for loops" %} 12445 size(10); 12446 ins_encode %{ 12447 Label* L = $labl$$label; 12448 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12449 __ restorevectmask(); 12450 %} 12451 ins_pipe( pipe_jcc ); 12452 %} 12453 12454 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12455 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12456 predicate(n->has_vector_mask_set()); 12457 match(CountedLoopEnd cop cmp); 12458 effect(USE labl); 12459 12460 ins_cost(400); 12461 format %{ "J$cop,u $labl\t# Loop end\n\t" 12462 "restorevectmask \t# vector mask restore for loops" %} 12463 size(10); 12464 ins_encode %{ 12465 Label* L = $labl$$label; 12466 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12467 __ restorevectmask(); 12468 %} 12469 ins_pipe( pipe_jcc ); 12470 %} 12471 12472 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12473 predicate(n->has_vector_mask_set()); 12474 match(CountedLoopEnd cop cmp); 12475 effect(USE labl); 12476 12477 ins_cost(300); 12478 format %{ "J$cop,u $labl\t# Loop end\n\t" 12479 "restorevectmask \t# vector mask restore for loops" %} 12480 size(10); 12481 ins_encode %{ 12482 Label* L = $labl$$label; 12483 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12484 __ restorevectmask(); 12485 %} 12486 ins_pipe( pipe_jcc ); 12487 %} 12488 12489 // Jump Direct Conditional - using unsigned comparison 12490 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12491 match(If cop cmp); 12492 effect(USE labl); 12493 12494 ins_cost(300); 12495 format %{ "J$cop,u $labl" %} 12496 size(6); 12497 ins_encode %{ 12498 Label* L = $labl$$label; 12499 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12500 %} 12501 ins_pipe(pipe_jcc); 12502 %} 12503 12504 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12505 match(If cop cmp); 12506 effect(USE labl); 12507 12508 ins_cost(200); 12509 format %{ "J$cop,u $labl" %} 12510 size(6); 12511 ins_encode %{ 12512 Label* L = $labl$$label; 12513 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12514 %} 12515 ins_pipe(pipe_jcc); 12516 %} 12517 12518 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12519 match(If cop cmp); 12520 effect(USE labl); 12521 12522 ins_cost(200); 12523 format %{ $$template 12524 if ($cop$$cmpcode == Assembler::notEqual) { 12525 $$emit$$"JP,u $labl\n\t" 12526 $$emit$$"J$cop,u $labl" 12527 } else { 12528 $$emit$$"JP,u done\n\t" 12529 $$emit$$"J$cop,u $labl\n\t" 12530 $$emit$$"done:" 12531 } 12532 %} 12533 ins_encode %{ 12534 Label* l = $labl$$label; 12535 if ($cop$$cmpcode == Assembler::notEqual) { 12536 __ jcc(Assembler::parity, *l, false); 12537 __ jcc(Assembler::notEqual, *l, false); 12538 } else if ($cop$$cmpcode == Assembler::equal) { 12539 Label done; 12540 __ jccb(Assembler::parity, done); 12541 __ jcc(Assembler::equal, *l, false); 12542 __ bind(done); 12543 } else { 12544 ShouldNotReachHere(); 12545 } 12546 %} 12547 ins_pipe(pipe_jcc); 12548 %} 12549 12550 // ============================================================================ 12551 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12552 // array for an instance of the superklass. Set a hidden internal cache on a 12553 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12554 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12555 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12556 match(Set result (PartialSubtypeCheck sub super)); 12557 effect( KILL rcx, KILL cr ); 12558 12559 ins_cost(1100); // slightly larger than the next version 12560 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12561 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12562 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12563 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12564 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12565 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12566 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12567 "miss:\t" %} 12568 12569 opcode(0x1); // Force a XOR of EDI 12570 ins_encode( enc_PartialSubtypeCheck() ); 12571 ins_pipe( pipe_slow ); 12572 %} 12573 12574 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12575 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12576 effect( KILL rcx, KILL result ); 12577 12578 ins_cost(1000); 12579 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12580 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12581 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12582 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12583 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12584 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12585 "miss:\t" %} 12586 12587 opcode(0x0); // No need to XOR EDI 12588 ins_encode( enc_PartialSubtypeCheck() ); 12589 ins_pipe( pipe_slow ); 12590 %} 12591 12592 // ============================================================================ 12593 // Branch Instructions -- short offset versions 12594 // 12595 // These instructions are used to replace jumps of a long offset (the default 12596 // match) with jumps of a shorter offset. These instructions are all tagged 12597 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12598 // match rules in general matching. Instead, the ADLC generates a conversion 12599 // method in the MachNode which can be used to do in-place replacement of the 12600 // long variant with the shorter variant. The compiler will determine if a 12601 // branch can be taken by the is_short_branch_offset() predicate in the machine 12602 // specific code section of the file. 12603 12604 // Jump Direct - Label defines a relative address from JMP+1 12605 instruct jmpDir_short(label labl) %{ 12606 match(Goto); 12607 effect(USE labl); 12608 12609 ins_cost(300); 12610 format %{ "JMP,s $labl" %} 12611 size(2); 12612 ins_encode %{ 12613 Label* L = $labl$$label; 12614 __ jmpb(*L); 12615 %} 12616 ins_pipe( pipe_jmp ); 12617 ins_short_branch(1); 12618 %} 12619 12620 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12621 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12622 match(If cop cr); 12623 effect(USE labl); 12624 12625 ins_cost(300); 12626 format %{ "J$cop,s $labl" %} 12627 size(2); 12628 ins_encode %{ 12629 Label* L = $labl$$label; 12630 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12631 %} 12632 ins_pipe( pipe_jcc ); 12633 ins_short_branch(1); 12634 %} 12635 12636 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12637 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12638 match(CountedLoopEnd cop cr); 12639 effect(USE labl); 12640 12641 ins_cost(300); 12642 format %{ "J$cop,s $labl\t# Loop end" %} 12643 size(2); 12644 ins_encode %{ 12645 Label* L = $labl$$label; 12646 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12647 %} 12648 ins_pipe( pipe_jcc ); 12649 ins_short_branch(1); 12650 %} 12651 12652 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12653 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12654 match(CountedLoopEnd cop cmp); 12655 effect(USE labl); 12656 12657 ins_cost(300); 12658 format %{ "J$cop,us $labl\t# Loop end" %} 12659 size(2); 12660 ins_encode %{ 12661 Label* L = $labl$$label; 12662 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12663 %} 12664 ins_pipe( pipe_jcc ); 12665 ins_short_branch(1); 12666 %} 12667 12668 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12669 match(CountedLoopEnd cop cmp); 12670 effect(USE labl); 12671 12672 ins_cost(300); 12673 format %{ "J$cop,us $labl\t# Loop end" %} 12674 size(2); 12675 ins_encode %{ 12676 Label* L = $labl$$label; 12677 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12678 %} 12679 ins_pipe( pipe_jcc ); 12680 ins_short_branch(1); 12681 %} 12682 12683 // Jump Direct Conditional - using unsigned comparison 12684 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12685 match(If cop cmp); 12686 effect(USE labl); 12687 12688 ins_cost(300); 12689 format %{ "J$cop,us $labl" %} 12690 size(2); 12691 ins_encode %{ 12692 Label* L = $labl$$label; 12693 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12694 %} 12695 ins_pipe( pipe_jcc ); 12696 ins_short_branch(1); 12697 %} 12698 12699 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12700 match(If cop cmp); 12701 effect(USE labl); 12702 12703 ins_cost(300); 12704 format %{ "J$cop,us $labl" %} 12705 size(2); 12706 ins_encode %{ 12707 Label* L = $labl$$label; 12708 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12709 %} 12710 ins_pipe( pipe_jcc ); 12711 ins_short_branch(1); 12712 %} 12713 12714 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12715 match(If cop cmp); 12716 effect(USE labl); 12717 12718 ins_cost(300); 12719 format %{ $$template 12720 if ($cop$$cmpcode == Assembler::notEqual) { 12721 $$emit$$"JP,u,s $labl\n\t" 12722 $$emit$$"J$cop,u,s $labl" 12723 } else { 12724 $$emit$$"JP,u,s done\n\t" 12725 $$emit$$"J$cop,u,s $labl\n\t" 12726 $$emit$$"done:" 12727 } 12728 %} 12729 size(4); 12730 ins_encode %{ 12731 Label* l = $labl$$label; 12732 if ($cop$$cmpcode == Assembler::notEqual) { 12733 __ jccb(Assembler::parity, *l); 12734 __ jccb(Assembler::notEqual, *l); 12735 } else if ($cop$$cmpcode == Assembler::equal) { 12736 Label done; 12737 __ jccb(Assembler::parity, done); 12738 __ jccb(Assembler::equal, *l); 12739 __ bind(done); 12740 } else { 12741 ShouldNotReachHere(); 12742 } 12743 %} 12744 ins_pipe(pipe_jcc); 12745 ins_short_branch(1); 12746 %} 12747 12748 // ============================================================================ 12749 // Long Compare 12750 // 12751 // Currently we hold longs in 2 registers. Comparing such values efficiently 12752 // is tricky. The flavor of compare used depends on whether we are testing 12753 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12754 // The GE test is the negated LT test. The LE test can be had by commuting 12755 // the operands (yielding a GE test) and then negating; negate again for the 12756 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12757 // NE test is negated from that. 12758 12759 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12760 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12761 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12762 // are collapsed internally in the ADLC's dfa-gen code. The match for 12763 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12764 // foo match ends up with the wrong leaf. One fix is to not match both 12765 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12766 // both forms beat the trinary form of long-compare and both are very useful 12767 // on Intel which has so few registers. 12768 12769 // Manifest a CmpL result in an integer register. Very painful. 12770 // This is the test to avoid. 12771 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12772 match(Set dst (CmpL3 src1 src2)); 12773 effect( KILL flags ); 12774 ins_cost(1000); 12775 format %{ "XOR $dst,$dst\n\t" 12776 "CMP $src1.hi,$src2.hi\n\t" 12777 "JLT,s m_one\n\t" 12778 "JGT,s p_one\n\t" 12779 "CMP $src1.lo,$src2.lo\n\t" 12780 "JB,s m_one\n\t" 12781 "JEQ,s done\n" 12782 "p_one:\tINC $dst\n\t" 12783 "JMP,s done\n" 12784 "m_one:\tDEC $dst\n" 12785 "done:" %} 12786 ins_encode %{ 12787 Label p_one, m_one, done; 12788 __ xorptr($dst$$Register, $dst$$Register); 12789 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12790 __ jccb(Assembler::less, m_one); 12791 __ jccb(Assembler::greater, p_one); 12792 __ cmpl($src1$$Register, $src2$$Register); 12793 __ jccb(Assembler::below, m_one); 12794 __ jccb(Assembler::equal, done); 12795 __ bind(p_one); 12796 __ incrementl($dst$$Register); 12797 __ jmpb(done); 12798 __ bind(m_one); 12799 __ decrementl($dst$$Register); 12800 __ bind(done); 12801 %} 12802 ins_pipe( pipe_slow ); 12803 %} 12804 12805 //====== 12806 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12807 // compares. Can be used for LE or GT compares by reversing arguments. 12808 // NOT GOOD FOR EQ/NE tests. 12809 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12810 match( Set flags (CmpL src zero )); 12811 ins_cost(100); 12812 format %{ "TEST $src.hi,$src.hi" %} 12813 opcode(0x85); 12814 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12815 ins_pipe( ialu_cr_reg_reg ); 12816 %} 12817 12818 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12819 // compares. Can be used for LE or GT compares by reversing arguments. 12820 // NOT GOOD FOR EQ/NE tests. 12821 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12822 match( Set flags (CmpL src1 src2 )); 12823 effect( TEMP tmp ); 12824 ins_cost(300); 12825 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12826 "MOV $tmp,$src1.hi\n\t" 12827 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12828 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12829 ins_pipe( ialu_cr_reg_reg ); 12830 %} 12831 12832 // Long compares reg < zero/req OR reg >= zero/req. 12833 // Just a wrapper for a normal branch, plus the predicate test. 12834 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12835 match(If cmp flags); 12836 effect(USE labl); 12837 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12838 expand %{ 12839 jmpCon(cmp,flags,labl); // JLT or JGE... 12840 %} 12841 %} 12842 12843 //====== 12844 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12845 // compares. Can be used for LE or GT compares by reversing arguments. 12846 // NOT GOOD FOR EQ/NE tests. 12847 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12848 match(Set flags (CmpUL src zero)); 12849 ins_cost(100); 12850 format %{ "TEST $src.hi,$src.hi" %} 12851 opcode(0x85); 12852 ins_encode(OpcP, RegReg_Hi2(src, src)); 12853 ins_pipe(ialu_cr_reg_reg); 12854 %} 12855 12856 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12857 // compares. Can be used for LE or GT compares by reversing arguments. 12858 // NOT GOOD FOR EQ/NE tests. 12859 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12860 match(Set flags (CmpUL src1 src2)); 12861 effect(TEMP tmp); 12862 ins_cost(300); 12863 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12864 "MOV $tmp,$src1.hi\n\t" 12865 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12866 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12867 ins_pipe(ialu_cr_reg_reg); 12868 %} 12869 12870 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12871 // Just a wrapper for a normal branch, plus the predicate test. 12872 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12873 match(If cmp flags); 12874 effect(USE labl); 12875 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12876 expand %{ 12877 jmpCon(cmp, flags, labl); // JLT or JGE... 12878 %} 12879 %} 12880 12881 // Compare 2 longs and CMOVE longs. 12882 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12883 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12884 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12885 ins_cost(400); 12886 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12887 "CMOV$cmp $dst.hi,$src.hi" %} 12888 opcode(0x0F,0x40); 12889 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12890 ins_pipe( pipe_cmov_reg_long ); 12891 %} 12892 12893 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12894 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12895 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12896 ins_cost(500); 12897 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12898 "CMOV$cmp $dst.hi,$src.hi" %} 12899 opcode(0x0F,0x40); 12900 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12901 ins_pipe( pipe_cmov_reg_long ); 12902 %} 12903 12904 // Compare 2 longs and CMOVE ints. 12905 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12906 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12907 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12908 ins_cost(200); 12909 format %{ "CMOV$cmp $dst,$src" %} 12910 opcode(0x0F,0x40); 12911 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12912 ins_pipe( pipe_cmov_reg ); 12913 %} 12914 12915 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12916 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12917 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12918 ins_cost(250); 12919 format %{ "CMOV$cmp $dst,$src" %} 12920 opcode(0x0F,0x40); 12921 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12922 ins_pipe( pipe_cmov_mem ); 12923 %} 12924 12925 // Compare 2 longs and CMOVE ints. 12926 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12927 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12928 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12929 ins_cost(200); 12930 format %{ "CMOV$cmp $dst,$src" %} 12931 opcode(0x0F,0x40); 12932 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12933 ins_pipe( pipe_cmov_reg ); 12934 %} 12935 12936 // Compare 2 longs and CMOVE doubles 12937 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12938 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12939 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12940 ins_cost(200); 12941 expand %{ 12942 fcmovDPR_regS(cmp,flags,dst,src); 12943 %} 12944 %} 12945 12946 // Compare 2 longs and CMOVE doubles 12947 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12948 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12949 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12950 ins_cost(200); 12951 expand %{ 12952 fcmovD_regS(cmp,flags,dst,src); 12953 %} 12954 %} 12955 12956 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12957 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12958 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12959 ins_cost(200); 12960 expand %{ 12961 fcmovFPR_regS(cmp,flags,dst,src); 12962 %} 12963 %} 12964 12965 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12966 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12967 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12968 ins_cost(200); 12969 expand %{ 12970 fcmovF_regS(cmp,flags,dst,src); 12971 %} 12972 %} 12973 12974 //====== 12975 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12976 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12977 match( Set flags (CmpL src zero )); 12978 effect(TEMP tmp); 12979 ins_cost(200); 12980 format %{ "MOV $tmp,$src.lo\n\t" 12981 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12982 ins_encode( long_cmp_flags0( src, tmp ) ); 12983 ins_pipe( ialu_reg_reg_long ); 12984 %} 12985 12986 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12987 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12988 match( Set flags (CmpL src1 src2 )); 12989 ins_cost(200+300); 12990 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12991 "JNE,s skip\n\t" 12992 "CMP $src1.hi,$src2.hi\n\t" 12993 "skip:\t" %} 12994 ins_encode( long_cmp_flags1( src1, src2 ) ); 12995 ins_pipe( ialu_cr_reg_reg ); 12996 %} 12997 12998 // Long compare reg == zero/reg OR reg != zero/reg 12999 // Just a wrapper for a normal branch, plus the predicate test. 13000 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13001 match(If cmp flags); 13002 effect(USE labl); 13003 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13004 expand %{ 13005 jmpCon(cmp,flags,labl); // JEQ or JNE... 13006 %} 13007 %} 13008 13009 //====== 13010 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13011 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13012 match(Set flags (CmpUL src zero)); 13013 effect(TEMP tmp); 13014 ins_cost(200); 13015 format %{ "MOV $tmp,$src.lo\n\t" 13016 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13017 ins_encode(long_cmp_flags0(src, tmp)); 13018 ins_pipe(ialu_reg_reg_long); 13019 %} 13020 13021 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13022 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13023 match(Set flags (CmpUL src1 src2)); 13024 ins_cost(200+300); 13025 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13026 "JNE,s skip\n\t" 13027 "CMP $src1.hi,$src2.hi\n\t" 13028 "skip:\t" %} 13029 ins_encode(long_cmp_flags1(src1, src2)); 13030 ins_pipe(ialu_cr_reg_reg); 13031 %} 13032 13033 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13034 // Just a wrapper for a normal branch, plus the predicate test. 13035 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13036 match(If cmp flags); 13037 effect(USE labl); 13038 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13039 expand %{ 13040 jmpCon(cmp, flags, labl); // JEQ or JNE... 13041 %} 13042 %} 13043 13044 // Compare 2 longs and CMOVE longs. 13045 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13046 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13047 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13048 ins_cost(400); 13049 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13050 "CMOV$cmp $dst.hi,$src.hi" %} 13051 opcode(0x0F,0x40); 13052 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13053 ins_pipe( pipe_cmov_reg_long ); 13054 %} 13055 13056 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13057 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13058 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13059 ins_cost(500); 13060 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13061 "CMOV$cmp $dst.hi,$src.hi" %} 13062 opcode(0x0F,0x40); 13063 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13064 ins_pipe( pipe_cmov_reg_long ); 13065 %} 13066 13067 // Compare 2 longs and CMOVE ints. 13068 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13069 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13070 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13071 ins_cost(200); 13072 format %{ "CMOV$cmp $dst,$src" %} 13073 opcode(0x0F,0x40); 13074 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13075 ins_pipe( pipe_cmov_reg ); 13076 %} 13077 13078 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13079 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13080 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13081 ins_cost(250); 13082 format %{ "CMOV$cmp $dst,$src" %} 13083 opcode(0x0F,0x40); 13084 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13085 ins_pipe( pipe_cmov_mem ); 13086 %} 13087 13088 // Compare 2 longs and CMOVE ints. 13089 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13090 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13091 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13092 ins_cost(200); 13093 format %{ "CMOV$cmp $dst,$src" %} 13094 opcode(0x0F,0x40); 13095 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13096 ins_pipe( pipe_cmov_reg ); 13097 %} 13098 13099 // Compare 2 longs and CMOVE doubles 13100 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13101 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13102 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13103 ins_cost(200); 13104 expand %{ 13105 fcmovDPR_regS(cmp,flags,dst,src); 13106 %} 13107 %} 13108 13109 // Compare 2 longs and CMOVE doubles 13110 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13111 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13112 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13113 ins_cost(200); 13114 expand %{ 13115 fcmovD_regS(cmp,flags,dst,src); 13116 %} 13117 %} 13118 13119 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13120 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13121 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13122 ins_cost(200); 13123 expand %{ 13124 fcmovFPR_regS(cmp,flags,dst,src); 13125 %} 13126 %} 13127 13128 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13129 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13130 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13131 ins_cost(200); 13132 expand %{ 13133 fcmovF_regS(cmp,flags,dst,src); 13134 %} 13135 %} 13136 13137 //====== 13138 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13139 // Same as cmpL_reg_flags_LEGT except must negate src 13140 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13141 match( Set flags (CmpL src zero )); 13142 effect( TEMP tmp ); 13143 ins_cost(300); 13144 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13145 "CMP $tmp,$src.lo\n\t" 13146 "SBB $tmp,$src.hi\n\t" %} 13147 ins_encode( long_cmp_flags3(src, tmp) ); 13148 ins_pipe( ialu_reg_reg_long ); 13149 %} 13150 13151 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13152 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13153 // requires a commuted test to get the same result. 13154 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13155 match( Set flags (CmpL src1 src2 )); 13156 effect( TEMP tmp ); 13157 ins_cost(300); 13158 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13159 "MOV $tmp,$src2.hi\n\t" 13160 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13161 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13162 ins_pipe( ialu_cr_reg_reg ); 13163 %} 13164 13165 // Long compares reg < zero/req OR reg >= zero/req. 13166 // Just a wrapper for a normal branch, plus the predicate test 13167 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13168 match(If cmp flags); 13169 effect(USE labl); 13170 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13171 ins_cost(300); 13172 expand %{ 13173 jmpCon(cmp,flags,labl); // JGT or JLE... 13174 %} 13175 %} 13176 13177 //====== 13178 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13179 // Same as cmpUL_reg_flags_LEGT except must negate src 13180 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13181 match(Set flags (CmpUL src zero)); 13182 effect(TEMP tmp); 13183 ins_cost(300); 13184 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13185 "CMP $tmp,$src.lo\n\t" 13186 "SBB $tmp,$src.hi\n\t" %} 13187 ins_encode(long_cmp_flags3(src, tmp)); 13188 ins_pipe(ialu_reg_reg_long); 13189 %} 13190 13191 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13192 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13193 // requires a commuted test to get the same result. 13194 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13195 match(Set flags (CmpUL src1 src2)); 13196 effect(TEMP tmp); 13197 ins_cost(300); 13198 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13199 "MOV $tmp,$src2.hi\n\t" 13200 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13201 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13202 ins_pipe(ialu_cr_reg_reg); 13203 %} 13204 13205 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13206 // Just a wrapper for a normal branch, plus the predicate test 13207 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13208 match(If cmp flags); 13209 effect(USE labl); 13210 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13211 ins_cost(300); 13212 expand %{ 13213 jmpCon(cmp, flags, labl); // JGT or JLE... 13214 %} 13215 %} 13216 13217 // Compare 2 longs and CMOVE longs. 13218 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13219 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13220 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13221 ins_cost(400); 13222 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13223 "CMOV$cmp $dst.hi,$src.hi" %} 13224 opcode(0x0F,0x40); 13225 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13226 ins_pipe( pipe_cmov_reg_long ); 13227 %} 13228 13229 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13230 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13231 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13232 ins_cost(500); 13233 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13234 "CMOV$cmp $dst.hi,$src.hi+4" %} 13235 opcode(0x0F,0x40); 13236 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13237 ins_pipe( pipe_cmov_reg_long ); 13238 %} 13239 13240 // Compare 2 longs and CMOVE ints. 13241 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13242 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13243 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13244 ins_cost(200); 13245 format %{ "CMOV$cmp $dst,$src" %} 13246 opcode(0x0F,0x40); 13247 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13248 ins_pipe( pipe_cmov_reg ); 13249 %} 13250 13251 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13252 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13253 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13254 ins_cost(250); 13255 format %{ "CMOV$cmp $dst,$src" %} 13256 opcode(0x0F,0x40); 13257 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13258 ins_pipe( pipe_cmov_mem ); 13259 %} 13260 13261 // Compare 2 longs and CMOVE ptrs. 13262 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13263 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13264 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13265 ins_cost(200); 13266 format %{ "CMOV$cmp $dst,$src" %} 13267 opcode(0x0F,0x40); 13268 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13269 ins_pipe( pipe_cmov_reg ); 13270 %} 13271 13272 // Compare 2 longs and CMOVE doubles 13273 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13274 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13275 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13276 ins_cost(200); 13277 expand %{ 13278 fcmovDPR_regS(cmp,flags,dst,src); 13279 %} 13280 %} 13281 13282 // Compare 2 longs and CMOVE doubles 13283 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13284 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13285 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13286 ins_cost(200); 13287 expand %{ 13288 fcmovD_regS(cmp,flags,dst,src); 13289 %} 13290 %} 13291 13292 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13293 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13294 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13295 ins_cost(200); 13296 expand %{ 13297 fcmovFPR_regS(cmp,flags,dst,src); 13298 %} 13299 %} 13300 13301 13302 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13303 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13304 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13305 ins_cost(200); 13306 expand %{ 13307 fcmovF_regS(cmp,flags,dst,src); 13308 %} 13309 %} 13310 13311 13312 // ============================================================================ 13313 // Procedure Call/Return Instructions 13314 // Call Java Static Instruction 13315 // Note: If this code changes, the corresponding ret_addr_offset() and 13316 // compute_padding() functions will have to be adjusted. 13317 instruct CallStaticJavaDirect(method meth) %{ 13318 match(CallStaticJava); 13319 effect(USE meth); 13320 13321 ins_cost(300); 13322 format %{ "CALL,static " %} 13323 opcode(0xE8); /* E8 cd */ 13324 ins_encode( pre_call_resets, 13325 Java_Static_Call( meth ), 13326 call_epilog, 13327 post_call_FPU ); 13328 ins_pipe( pipe_slow ); 13329 ins_alignment(4); 13330 %} 13331 13332 // Call Java Dynamic Instruction 13333 // Note: If this code changes, the corresponding ret_addr_offset() and 13334 // compute_padding() functions will have to be adjusted. 13335 instruct CallDynamicJavaDirect(method meth) %{ 13336 match(CallDynamicJava); 13337 effect(USE meth); 13338 13339 ins_cost(300); 13340 format %{ "MOV EAX,(oop)-1\n\t" 13341 "CALL,dynamic" %} 13342 opcode(0xE8); /* E8 cd */ 13343 ins_encode( pre_call_resets, 13344 Java_Dynamic_Call( meth ), 13345 call_epilog, 13346 post_call_FPU ); 13347 ins_pipe( pipe_slow ); 13348 ins_alignment(4); 13349 %} 13350 13351 // Call Runtime Instruction 13352 instruct CallRuntimeDirect(method meth) %{ 13353 match(CallRuntime ); 13354 effect(USE meth); 13355 13356 ins_cost(300); 13357 format %{ "CALL,runtime " %} 13358 opcode(0xE8); /* E8 cd */ 13359 // Use FFREEs to clear entries in float stack 13360 ins_encode( pre_call_resets, 13361 FFree_Float_Stack_All, 13362 Java_To_Runtime( meth ), 13363 post_call_FPU ); 13364 ins_pipe( pipe_slow ); 13365 %} 13366 13367 // Call runtime without safepoint 13368 instruct CallLeafDirect(method meth) %{ 13369 match(CallLeaf); 13370 effect(USE meth); 13371 13372 ins_cost(300); 13373 format %{ "CALL_LEAF,runtime " %} 13374 opcode(0xE8); /* E8 cd */ 13375 ins_encode( pre_call_resets, 13376 FFree_Float_Stack_All, 13377 Java_To_Runtime( meth ), 13378 Verify_FPU_For_Leaf, post_call_FPU ); 13379 ins_pipe( pipe_slow ); 13380 %} 13381 13382 instruct CallLeafNoFPDirect(method meth) %{ 13383 match(CallLeafNoFP); 13384 effect(USE meth); 13385 13386 ins_cost(300); 13387 format %{ "CALL_LEAF_NOFP,runtime " %} 13388 opcode(0xE8); /* E8 cd */ 13389 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13390 ins_pipe( pipe_slow ); 13391 %} 13392 13393 13394 // Return Instruction 13395 // Remove the return address & jump to it. 13396 instruct Ret() %{ 13397 match(Return); 13398 format %{ "RET" %} 13399 opcode(0xC3); 13400 ins_encode(OpcP); 13401 ins_pipe( pipe_jmp ); 13402 %} 13403 13404 // Tail Call; Jump from runtime stub to Java code. 13405 // Also known as an 'interprocedural jump'. 13406 // Target of jump will eventually return to caller. 13407 // TailJump below removes the return address. 13408 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13409 match(TailCall jump_target method_oop ); 13410 ins_cost(300); 13411 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13412 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13413 ins_encode( OpcP, RegOpc(jump_target) ); 13414 ins_pipe( pipe_jmp ); 13415 %} 13416 13417 13418 // Tail Jump; remove the return address; jump to target. 13419 // TailCall above leaves the return address around. 13420 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13421 match( TailJump jump_target ex_oop ); 13422 ins_cost(300); 13423 format %{ "POP EDX\t# pop return address into dummy\n\t" 13424 "JMP $jump_target " %} 13425 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13426 ins_encode( enc_pop_rdx, 13427 OpcP, RegOpc(jump_target) ); 13428 ins_pipe( pipe_jmp ); 13429 %} 13430 13431 // Create exception oop: created by stack-crawling runtime code. 13432 // Created exception is now available to this handler, and is setup 13433 // just prior to jumping to this handler. No code emitted. 13434 instruct CreateException( eAXRegP ex_oop ) 13435 %{ 13436 match(Set ex_oop (CreateEx)); 13437 13438 size(0); 13439 // use the following format syntax 13440 format %{ "# exception oop is in EAX; no code emitted" %} 13441 ins_encode(); 13442 ins_pipe( empty ); 13443 %} 13444 13445 13446 // Rethrow exception: 13447 // The exception oop will come in the first argument position. 13448 // Then JUMP (not call) to the rethrow stub code. 13449 instruct RethrowException() 13450 %{ 13451 match(Rethrow); 13452 13453 // use the following format syntax 13454 format %{ "JMP rethrow_stub" %} 13455 ins_encode(enc_rethrow); 13456 ins_pipe( pipe_jmp ); 13457 %} 13458 13459 // inlined locking and unlocking 13460 13461 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13462 predicate(Compile::current()->use_rtm()); 13463 match(Set cr (FastLock object box)); 13464 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13465 ins_cost(300); 13466 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13467 ins_encode %{ 13468 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13469 $scr$$Register, $cx1$$Register, $cx2$$Register, 13470 _counters, _rtm_counters, _stack_rtm_counters, 13471 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13472 true, ra_->C->profile_rtm()); 13473 %} 13474 ins_pipe(pipe_slow); 13475 %} 13476 13477 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13478 predicate(!Compile::current()->use_rtm()); 13479 match(Set cr (FastLock object box)); 13480 effect(TEMP tmp, TEMP scr, USE_KILL box); 13481 ins_cost(300); 13482 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13483 ins_encode %{ 13484 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13485 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13486 %} 13487 ins_pipe(pipe_slow); 13488 %} 13489 13490 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13491 match(Set cr (FastUnlock object box)); 13492 effect(TEMP tmp, USE_KILL box); 13493 ins_cost(300); 13494 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13495 ins_encode %{ 13496 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13497 %} 13498 ins_pipe(pipe_slow); 13499 %} 13500 13501 13502 13503 // ============================================================================ 13504 // Safepoint Instruction 13505 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13506 match(SafePoint poll); 13507 effect(KILL cr, USE poll); 13508 13509 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13510 ins_cost(125); 13511 // EBP would need size(3) 13512 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13513 ins_encode %{ 13514 __ relocate(relocInfo::poll_type); 13515 address pre_pc = __ pc(); 13516 __ testl(rax, Address($poll$$Register, 0)); 13517 address post_pc = __ pc(); 13518 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13519 %} 13520 ins_pipe(ialu_reg_mem); 13521 %} 13522 13523 13524 // ============================================================================ 13525 // This name is KNOWN by the ADLC and cannot be changed. 13526 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13527 // for this guy. 13528 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13529 match(Set dst (ThreadLocal)); 13530 effect(DEF dst, KILL cr); 13531 13532 format %{ "MOV $dst, Thread::current()" %} 13533 ins_encode %{ 13534 Register dstReg = as_Register($dst$$reg); 13535 __ get_thread(dstReg); 13536 %} 13537 ins_pipe( ialu_reg_fat ); 13538 %} 13539 13540 13541 13542 //----------PEEPHOLE RULES----------------------------------------------------- 13543 // These must follow all instruction definitions as they use the names 13544 // defined in the instructions definitions. 13545 // 13546 // peepmatch ( root_instr_name [preceding_instruction]* ); 13547 // 13548 // peepconstraint %{ 13549 // (instruction_number.operand_name relational_op instruction_number.operand_name 13550 // [, ...] ); 13551 // // instruction numbers are zero-based using left to right order in peepmatch 13552 // 13553 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13554 // // provide an instruction_number.operand_name for each operand that appears 13555 // // in the replacement instruction's match rule 13556 // 13557 // ---------VM FLAGS--------------------------------------------------------- 13558 // 13559 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13560 // 13561 // Each peephole rule is given an identifying number starting with zero and 13562 // increasing by one in the order seen by the parser. An individual peephole 13563 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13564 // on the command-line. 13565 // 13566 // ---------CURRENT LIMITATIONS---------------------------------------------- 13567 // 13568 // Only match adjacent instructions in same basic block 13569 // Only equality constraints 13570 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13571 // Only one replacement instruction 13572 // 13573 // ---------EXAMPLE---------------------------------------------------------- 13574 // 13575 // // pertinent parts of existing instructions in architecture description 13576 // instruct movI(rRegI dst, rRegI src) %{ 13577 // match(Set dst (CopyI src)); 13578 // %} 13579 // 13580 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13581 // match(Set dst (AddI dst src)); 13582 // effect(KILL cr); 13583 // %} 13584 // 13585 // // Change (inc mov) to lea 13586 // peephole %{ 13587 // // increment preceeded by register-register move 13588 // peepmatch ( incI_eReg movI ); 13589 // // require that the destination register of the increment 13590 // // match the destination register of the move 13591 // peepconstraint ( 0.dst == 1.dst ); 13592 // // construct a replacement instruction that sets 13593 // // the destination to ( move's source register + one ) 13594 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13595 // %} 13596 // 13597 // Implementation no longer uses movX instructions since 13598 // machine-independent system no longer uses CopyX nodes. 13599 // 13600 // peephole %{ 13601 // peepmatch ( incI_eReg movI ); 13602 // peepconstraint ( 0.dst == 1.dst ); 13603 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13604 // %} 13605 // 13606 // peephole %{ 13607 // peepmatch ( decI_eReg movI ); 13608 // peepconstraint ( 0.dst == 1.dst ); 13609 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13610 // %} 13611 // 13612 // peephole %{ 13613 // peepmatch ( addI_eReg_imm movI ); 13614 // peepconstraint ( 0.dst == 1.dst ); 13615 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13616 // %} 13617 // 13618 // peephole %{ 13619 // peepmatch ( addP_eReg_imm movP ); 13620 // peepconstraint ( 0.dst == 1.dst ); 13621 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13622 // %} 13623 13624 // // Change load of spilled value to only a spill 13625 // instruct storeI(memory mem, rRegI src) %{ 13626 // match(Set mem (StoreI mem src)); 13627 // %} 13628 // 13629 // instruct loadI(rRegI dst, memory mem) %{ 13630 // match(Set dst (LoadI mem)); 13631 // %} 13632 // 13633 peephole %{ 13634 peepmatch ( loadI storeI ); 13635 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13636 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13637 %} 13638 13639 //----------SMARTSPILL RULES--------------------------------------------------- 13640 // These must follow all instruction definitions as they use the names 13641 // defined in the instructions definitions.