1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 799 // it maps more cases to single byte displacement 800 _masm.set_managed(); 801 if (reg_lo+1 == reg_hi) { // double move? 802 if (is_load) { 803 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 804 } else { 805 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 806 } 807 } else { 808 if (is_load) { 809 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 810 } else { 811 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 812 } 813 } 814 #ifndef PRODUCT 815 } else if (!do_size) { 816 if (size != 0) st->print("\n\t"); 817 if (reg_lo+1 == reg_hi) { // double move? 818 if (is_load) st->print("%s %s,[ESP + #%d]", 819 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSD [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } else { 824 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 825 Matcher::regName[reg_lo], offset); 826 else st->print("MOVSS [ESP + #%d],%s", 827 offset, Matcher::regName[reg_lo]); 828 } 829 #endif 830 } 831 bool is_single_byte = false; 832 if ((UseAVX > 2) && (offset != 0)) { 833 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 834 } 835 int offset_size = 0; 836 if (UseAVX > 2 ) { 837 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 838 } else { 839 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 840 } 841 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 842 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 843 return size+5+offset_size; 844 } 845 846 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 848 int src_hi, int dst_hi, int size, outputStream* st ) { 849 if (cbuf) { 850 MacroAssembler _masm(cbuf); 851 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 852 _masm.set_managed(); 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } else { 857 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 858 as_XMMRegister(Matcher::_regEncode[src_lo])); 859 } 860 #ifndef PRODUCT 861 } else if (!do_size) { 862 if (size != 0) st->print("\n\t"); 863 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 864 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 865 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 866 } else { 867 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } 869 } else { 870 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 871 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 872 } else { 873 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 874 } 875 } 876 #endif 877 } 878 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 879 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 880 int sz = (UseAVX > 2) ? 6 : 4; 881 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 882 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 883 return size + sz; 884 } 885 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 894 as_Register(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 905 int src_hi, int dst_hi, int size, outputStream* st ) { 906 // 32-bit 907 if (cbuf) { 908 MacroAssembler _masm(cbuf); 909 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 910 _masm.set_managed(); 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 912 as_XMMRegister(Matcher::_regEncode[src_lo])); 913 #ifndef PRODUCT 914 } else if (!do_size) { 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 916 #endif 917 } 918 return (UseAVX> 2) ? 6 : 4; 919 } 920 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 922 if( cbuf ) { 923 emit_opcode(*cbuf, 0x8B ); 924 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 929 #endif 930 } 931 return size+2; 932 } 933 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 935 int offset, int size, outputStream* st ) { 936 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 937 if( cbuf ) { 938 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 939 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 940 #ifndef PRODUCT 941 } else if( !do_size ) { 942 if( size != 0 ) st->print("\n\t"); 943 st->print("FLD %s",Matcher::regName[src_lo]); 944 #endif 945 } 946 size += 2; 947 } 948 949 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 950 const char *op_str; 951 int op; 952 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 953 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 954 op = 0xDD; 955 } else { // 32-bit store 956 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 957 op = 0xD9; 958 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 959 } 960 961 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 962 } 963 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 966 int src_hi, int dst_hi, uint ireg, outputStream* st); 967 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 969 int stack_offset, int reg, uint ireg, outputStream* st); 970 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 972 int dst_offset, uint ireg, outputStream* st) { 973 int calc_size = 0; 974 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 975 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 976 switch (ireg) { 977 case Op_VecS: 978 calc_size = 3+src_offset_size + 3+dst_offset_size; 979 break; 980 case Op_VecD: { 981 calc_size = 3+src_offset_size + 3+dst_offset_size; 982 int tmp_src_offset = src_offset + 4; 983 int tmp_dst_offset = dst_offset + 4; 984 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 985 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 986 calc_size += 3+src_offset_size + 3+dst_offset_size; 987 break; 988 } 989 case Op_VecX: 990 case Op_VecY: 991 case Op_VecZ: 992 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 993 break; 994 default: 995 ShouldNotReachHere(); 996 } 997 if (cbuf) { 998 MacroAssembler _masm(cbuf); 999 int offset = __ offset(); 1000 switch (ireg) { 1001 case Op_VecS: 1002 __ pushl(Address(rsp, src_offset)); 1003 __ popl (Address(rsp, dst_offset)); 1004 break; 1005 case Op_VecD: 1006 __ pushl(Address(rsp, src_offset)); 1007 __ popl (Address(rsp, dst_offset)); 1008 __ pushl(Address(rsp, src_offset+4)); 1009 __ popl (Address(rsp, dst_offset+4)); 1010 break; 1011 case Op_VecX: 1012 __ movdqu(Address(rsp, -16), xmm0); 1013 __ movdqu(xmm0, Address(rsp, src_offset)); 1014 __ movdqu(Address(rsp, dst_offset), xmm0); 1015 __ movdqu(xmm0, Address(rsp, -16)); 1016 break; 1017 case Op_VecY: 1018 __ vmovdqu(Address(rsp, -32), xmm0); 1019 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1020 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1021 __ vmovdqu(xmm0, Address(rsp, -32)); 1022 break; 1023 case Op_VecZ: 1024 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1025 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1026 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1027 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1028 break; 1029 default: 1030 ShouldNotReachHere(); 1031 } 1032 int size = __ offset() - offset; 1033 assert(size == calc_size, "incorrect size calculation"); 1034 return size; 1035 #ifndef PRODUCT 1036 } else if (!do_size) { 1037 switch (ireg) { 1038 case Op_VecS: 1039 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1040 "popl [rsp + #%d]", 1041 src_offset, dst_offset); 1042 break; 1043 case Op_VecD: 1044 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1045 "popq [rsp + #%d]\n\t" 1046 "pushl [rsp + #%d]\n\t" 1047 "popq [rsp + #%d]", 1048 src_offset, dst_offset, src_offset+4, dst_offset+4); 1049 break; 1050 case Op_VecX: 1051 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1052 "movdqu xmm0, [rsp + #%d]\n\t" 1053 "movdqu [rsp + #%d], xmm0\n\t" 1054 "movdqu xmm0, [rsp - #16]", 1055 src_offset, dst_offset); 1056 break; 1057 case Op_VecY: 1058 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1059 "vmovdqu xmm0, [rsp + #%d]\n\t" 1060 "vmovdqu [rsp + #%d], xmm0\n\t" 1061 "vmovdqu xmm0, [rsp - #32]", 1062 src_offset, dst_offset); 1063 break; 1064 case Op_VecZ: 1065 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1066 "vmovdqu xmm0, [rsp + #%d]\n\t" 1067 "vmovdqu [rsp + #%d], xmm0\n\t" 1068 "vmovdqu xmm0, [rsp - #64]", 1069 src_offset, dst_offset); 1070 break; 1071 default: 1072 ShouldNotReachHere(); 1073 } 1074 #endif 1075 } 1076 return calc_size; 1077 } 1078 1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1080 // Get registers to move 1081 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1082 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1083 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1084 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1085 1086 enum RC src_second_rc = rc_class(src_second); 1087 enum RC src_first_rc = rc_class(src_first); 1088 enum RC dst_second_rc = rc_class(dst_second); 1089 enum RC dst_first_rc = rc_class(dst_first); 1090 1091 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1092 1093 // Generate spill code! 1094 int size = 0; 1095 1096 if( src_first == dst_first && src_second == dst_second ) 1097 return size; // Self copy, no move 1098 1099 if (bottom_type()->isa_vect() != NULL) { 1100 uint ireg = ideal_reg(); 1101 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1102 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1103 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1105 // mem -> mem 1106 int src_offset = ra_->reg2offset(src_first); 1107 int dst_offset = ra_->reg2offset(dst_first); 1108 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1109 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1110 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1111 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1112 int stack_offset = ra_->reg2offset(dst_first); 1113 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1114 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1115 int stack_offset = ra_->reg2offset(src_first); 1116 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1117 } else { 1118 ShouldNotReachHere(); 1119 } 1120 } 1121 1122 // -------------------------------------- 1123 // Check for mem-mem move. push/pop to move. 1124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1125 if( src_second == dst_first ) { // overlapping stack copy ranges 1126 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1127 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1128 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1129 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1130 } 1131 // move low bits 1132 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1133 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1134 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1135 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1137 } 1138 return size; 1139 } 1140 1141 // -------------------------------------- 1142 // Check for integer reg-reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1144 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1145 1146 // Check for integer store 1147 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1148 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1149 1150 // Check for integer load 1151 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1152 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1153 1154 // Check for integer reg-xmm reg copy 1155 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1156 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1157 "no 64 bit integer-float reg moves" ); 1158 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1159 } 1160 // -------------------------------------- 1161 // Check for float reg-reg copy 1162 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1163 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1164 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1165 if( cbuf ) { 1166 1167 // Note the mucking with the register encode to compensate for the 0/1 1168 // indexing issue mentioned in a comment in the reg_def sections 1169 // for FPR registers many lines above here. 1170 1171 if( src_first != FPR1L_num ) { 1172 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1173 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1174 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1175 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1176 } else { 1177 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1178 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1179 } 1180 #ifndef PRODUCT 1181 } else if( !do_size ) { 1182 if( size != 0 ) st->print("\n\t"); 1183 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1184 else st->print( "FST %s", Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1188 } 1189 1190 // Check for float store 1191 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1192 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1193 } 1194 1195 // Check for float load 1196 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1197 int offset = ra_->reg2offset(src_first); 1198 const char *op_str; 1199 int op; 1200 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1201 op_str = "FLD_D"; 1202 op = 0xDD; 1203 } else { // 32-bit load 1204 op_str = "FLD_S"; 1205 op = 0xD9; 1206 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1207 } 1208 if( cbuf ) { 1209 emit_opcode (*cbuf, op ); 1210 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1211 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1212 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1213 #ifndef PRODUCT 1214 } else if( !do_size ) { 1215 if( size != 0 ) st->print("\n\t"); 1216 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1217 #endif 1218 } 1219 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1220 return size + 3+offset_size+2; 1221 } 1222 1223 // Check for xmm reg-reg copy 1224 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1225 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1226 (src_first+1 == src_second && dst_first+1 == dst_second), 1227 "no non-adjacent float-moves" ); 1228 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1229 } 1230 1231 // Check for xmm reg-integer reg copy 1232 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1233 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1234 "no 64 bit float-integer reg moves" ); 1235 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1236 } 1237 1238 // Check for xmm store 1239 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1240 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1241 } 1242 1243 // Check for float xmm load 1244 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1245 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1246 } 1247 1248 // Copy from float reg to xmm reg 1249 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1250 // copy to the top of stack from floating point reg 1251 // and use LEA to preserve flags 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0xF8); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP-8]"); 1261 #endif 1262 } 1263 size += 4; 1264 1265 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1266 1267 // Copy from the temp memory to the xmm reg. 1268 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1269 1270 if( cbuf ) { 1271 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1272 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1273 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1274 emit_d8(*cbuf,0x08); 1275 #ifndef PRODUCT 1276 } else if( !do_size ) { 1277 if( size != 0 ) st->print("\n\t"); 1278 st->print("LEA ESP,[ESP+8]"); 1279 #endif 1280 } 1281 size += 4; 1282 return size; 1283 } 1284 1285 assert( size > 0, "missed a case" ); 1286 1287 // -------------------------------------------------------------------- 1288 // Check for second bits still needing moving. 1289 if( src_second == dst_second ) 1290 return size; // Self copy; no move 1291 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1292 1293 // Check for second word int-int move 1294 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1295 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1296 1297 // Check for second word integer store 1298 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1299 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1300 1301 // Check for second word integer load 1302 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1303 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1304 1305 1306 Unimplemented(); 1307 return 0; // Mute compiler 1308 } 1309 1310 #ifndef PRODUCT 1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1312 implementation( NULL, ra_, false, st ); 1313 } 1314 #endif 1315 1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 implementation( &cbuf, ra_, false, NULL ); 1318 } 1319 1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1321 return implementation( NULL, ra_, true, NULL ); 1322 } 1323 1324 1325 //============================================================================= 1326 #ifndef PRODUCT 1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 int reg = ra_->get_reg_first(this); 1330 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1331 } 1332 #endif 1333 1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1335 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1336 int reg = ra_->get_encode(this); 1337 if( offset >= 128 ) { 1338 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1339 emit_rm(cbuf, 0x2, reg, 0x04); 1340 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1341 emit_d32(cbuf, offset); 1342 } 1343 else { 1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(cbuf, 0x1, reg, 0x04); 1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1347 emit_d8(cbuf, offset); 1348 } 1349 } 1350 1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1352 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1353 if( offset >= 128 ) { 1354 return 7; 1355 } 1356 else { 1357 return 4; 1358 } 1359 } 1360 1361 //============================================================================= 1362 #ifndef PRODUCT 1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1364 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1365 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1366 st->print_cr("\tNOP"); 1367 st->print_cr("\tNOP"); 1368 if( !OptoBreakpoint ) 1369 st->print_cr("\tNOP"); 1370 } 1371 #endif 1372 1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1374 MacroAssembler masm(&cbuf); 1375 #ifdef ASSERT 1376 uint insts_size = cbuf.insts_size(); 1377 #endif 1378 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1379 masm.jump_cc(Assembler::notEqual, 1380 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1381 /* WARNING these NOPs are critical so that verified entry point is properly 1382 aligned for patching by NativeJump::patch_verified_entry() */ 1383 int nops_cnt = 2; 1384 if( !OptoBreakpoint ) // Leave space for int3 1385 nops_cnt += 1; 1386 masm.nop(nops_cnt); 1387 1388 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1389 } 1390 1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1392 return OptoBreakpoint ? 11 : 12; 1393 } 1394 1395 1396 //============================================================================= 1397 1398 int Matcher::regnum_to_fpu_offset(int regnum) { 1399 return regnum - 32; // The FP registers are in the second chunk 1400 } 1401 1402 // This is UltraSparc specific, true just means we have fast l2f conversion 1403 const bool Matcher::convL2FSupported(void) { 1404 return true; 1405 } 1406 1407 // Is this branch offset short enough that a short branch can be used? 1408 // 1409 // NOTE: If the platform does not provide any short branch variants, then 1410 // this method should return false for offset 0. 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1412 // The passed offset is relative to address of the branch. 1413 // On 86 a branch displacement is calculated relative to address 1414 // of a next instruction. 1415 offset -= br_size; 1416 1417 // the short version of jmpConUCF2 contains multiple branches, 1418 // making the reach slightly less 1419 if (rule == jmpConUCF2_rule) 1420 return (-126 <= offset && offset <= 125); 1421 return (-128 <= offset && offset <= 127); 1422 } 1423 1424 const bool Matcher::isSimpleConstant64(jlong value) { 1425 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1426 return false; 1427 } 1428 1429 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1430 const bool Matcher::init_array_count_is_in_bytes = false; 1431 1432 // Needs 2 CMOV's for longs. 1433 const int Matcher::long_cmove_cost() { return 1; } 1434 1435 // No CMOVF/CMOVD with SSE/SSE2 1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1437 1438 // Does the CPU require late expand (see block.cpp for description of late expand)? 1439 const bool Matcher::require_postalloc_expand = false; 1440 1441 // Do we need to mask the count passed to shift instructions or does 1442 // the cpu only look at the lower 5/6 bits anyway? 1443 const bool Matcher::need_masked_shift_count = false; 1444 1445 bool Matcher::narrow_oop_use_complex_address() { 1446 ShouldNotCallThis(); 1447 return true; 1448 } 1449 1450 bool Matcher::narrow_klass_use_complex_address() { 1451 ShouldNotCallThis(); 1452 return true; 1453 } 1454 1455 bool Matcher::const_oop_prefer_decode() { 1456 ShouldNotCallThis(); 1457 return true; 1458 } 1459 1460 bool Matcher::const_klass_prefer_decode() { 1461 ShouldNotCallThis(); 1462 return true; 1463 } 1464 1465 // Is it better to copy float constants, or load them directly from memory? 1466 // Intel can load a float constant from a direct address, requiring no 1467 // extra registers. Most RISCs will have to materialize an address into a 1468 // register first, so they would do better to copy the constant from stack. 1469 const bool Matcher::rematerialize_float_constants = true; 1470 1471 // If CPU can load and store mis-aligned doubles directly then no fixup is 1472 // needed. Else we split the double into 2 integer pieces and move it 1473 // piece-by-piece. Only happens when passing doubles into C code as the 1474 // Java calling convention forces doubles to be aligned. 1475 const bool Matcher::misaligned_doubles_ok = true; 1476 1477 1478 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1479 // Get the memory operand from the node 1480 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1481 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1482 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1483 uint opcnt = 1; // First operand 1484 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1485 while( idx >= skipped+num_edges ) { 1486 skipped += num_edges; 1487 opcnt++; // Bump operand count 1488 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1489 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1490 } 1491 1492 MachOper *memory = node->_opnds[opcnt]; 1493 MachOper *new_memory = NULL; 1494 switch (memory->opcode()) { 1495 case DIRECT: 1496 case INDOFFSET32X: 1497 // No transformation necessary. 1498 return; 1499 case INDIRECT: 1500 new_memory = new indirect_win95_safeOper( ); 1501 break; 1502 case INDOFFSET8: 1503 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1504 break; 1505 case INDOFFSET32: 1506 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1507 break; 1508 case INDINDEXOFFSET: 1509 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1510 break; 1511 case INDINDEXSCALE: 1512 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1513 break; 1514 case INDINDEXSCALEOFFSET: 1515 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1516 break; 1517 case LOAD_LONG_INDIRECT: 1518 case LOAD_LONG_INDOFFSET32: 1519 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1520 return; 1521 default: 1522 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1523 return; 1524 } 1525 node->_opnds[opcnt] = new_memory; 1526 } 1527 1528 // Advertise here if the CPU requires explicit rounding operations 1529 // to implement the UseStrictFP mode. 1530 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1531 1532 // Are floats conerted to double when stored to stack during deoptimization? 1533 // On x32 it is stored with convertion only when FPU is used for floats. 1534 bool Matcher::float_in_double() { return (UseSSE == 0); } 1535 1536 // Do ints take an entire long register or just half? 1537 const bool Matcher::int_in_long = false; 1538 1539 // Return whether or not this register is ever used as an argument. This 1540 // function is used on startup to build the trampoline stubs in generateOptoStub. 1541 // Registers not mentioned will be killed by the VM call in the trampoline, and 1542 // arguments in those registers not be available to the callee. 1543 bool Matcher::can_be_java_arg( int reg ) { 1544 if( reg == ECX_num || reg == EDX_num ) return true; 1545 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1546 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1547 return false; 1548 } 1549 1550 bool Matcher::is_spillable_arg( int reg ) { 1551 return can_be_java_arg(reg); 1552 } 1553 1554 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1555 // Use hardware integer DIV instruction when 1556 // it is faster than a code which use multiply. 1557 // Only when constant divisor fits into 32 bit 1558 // (min_jint is excluded to get only correct 1559 // positive 32 bit values from negative). 1560 return VM_Version::has_fast_idiv() && 1561 (divisor == (int)divisor && divisor != min_jint); 1562 } 1563 1564 // Register for DIVI projection of divmodI 1565 RegMask Matcher::divI_proj_mask() { 1566 return EAX_REG_mask(); 1567 } 1568 1569 // Register for MODI projection of divmodI 1570 RegMask Matcher::modI_proj_mask() { 1571 return EDX_REG_mask(); 1572 } 1573 1574 // Register for DIVL projection of divmodL 1575 RegMask Matcher::divL_proj_mask() { 1576 ShouldNotReachHere(); 1577 return RegMask(); 1578 } 1579 1580 // Register for MODL projection of divmodL 1581 RegMask Matcher::modL_proj_mask() { 1582 ShouldNotReachHere(); 1583 return RegMask(); 1584 } 1585 1586 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1587 return NO_REG_mask(); 1588 } 1589 1590 // Returns true if the high 32 bits of the value is known to be zero. 1591 bool is_operand_hi32_zero(Node* n) { 1592 int opc = n->Opcode(); 1593 if (opc == Op_AndL) { 1594 Node* o2 = n->in(2); 1595 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1596 return true; 1597 } 1598 } 1599 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1600 return true; 1601 } 1602 return false; 1603 } 1604 1605 %} 1606 1607 //----------ENCODING BLOCK----------------------------------------------------- 1608 // This block specifies the encoding classes used by the compiler to output 1609 // byte streams. Encoding classes generate functions which are called by 1610 // Machine Instruction Nodes in order to generate the bit encoding of the 1611 // instruction. Operands specify their base encoding interface with the 1612 // interface keyword. There are currently supported four interfaces, 1613 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1614 // operand to generate a function which returns its register number when 1615 // queried. CONST_INTER causes an operand to generate a function which 1616 // returns the value of the constant when queried. MEMORY_INTER causes an 1617 // operand to generate four functions which return the Base Register, the 1618 // Index Register, the Scale Value, and the Offset Value of the operand when 1619 // queried. COND_INTER causes an operand to generate six functions which 1620 // return the encoding code (ie - encoding bits for the instruction) 1621 // associated with each basic boolean condition for a conditional instruction. 1622 // Instructions specify two basic values for encoding. They use the 1623 // ins_encode keyword to specify their encoding class (which must be one of 1624 // the class names specified in the encoding block), and they use the 1625 // opcode keyword to specify, in order, their primary, secondary, and 1626 // tertiary opcode. Only the opcode sections which a particular instruction 1627 // needs for encoding need to be specified. 1628 encode %{ 1629 // Build emit functions for each basic byte or larger field in the intel 1630 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1631 // code in the enc_class source block. Emit functions will live in the 1632 // main source block for now. In future, we can generalize this by 1633 // adding a syntax that specifies the sizes of fields in an order, 1634 // so that the adlc can build the emit functions automagically 1635 1636 // Emit primary opcode 1637 enc_class OpcP %{ 1638 emit_opcode(cbuf, $primary); 1639 %} 1640 1641 // Emit secondary opcode 1642 enc_class OpcS %{ 1643 emit_opcode(cbuf, $secondary); 1644 %} 1645 1646 // Emit opcode directly 1647 enc_class Opcode(immI d8) %{ 1648 emit_opcode(cbuf, $d8$$constant); 1649 %} 1650 1651 enc_class SizePrefix %{ 1652 emit_opcode(cbuf,0x66); 1653 %} 1654 1655 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1656 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1657 %} 1658 1659 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1660 emit_opcode(cbuf,$opcode$$constant); 1661 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1662 %} 1663 1664 enc_class mov_r32_imm0( rRegI dst ) %{ 1665 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1666 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1667 %} 1668 1669 enc_class cdq_enc %{ 1670 // Full implementation of Java idiv and irem; checks for 1671 // special case as described in JVM spec., p.243 & p.271. 1672 // 1673 // normal case special case 1674 // 1675 // input : rax,: dividend min_int 1676 // reg: divisor -1 1677 // 1678 // output: rax,: quotient (= rax, idiv reg) min_int 1679 // rdx: remainder (= rax, irem reg) 0 1680 // 1681 // Code sequnce: 1682 // 1683 // 81 F8 00 00 00 80 cmp rax,80000000h 1684 // 0F 85 0B 00 00 00 jne normal_case 1685 // 33 D2 xor rdx,edx 1686 // 83 F9 FF cmp rcx,0FFh 1687 // 0F 84 03 00 00 00 je done 1688 // normal_case: 1689 // 99 cdq 1690 // F7 F9 idiv rax,ecx 1691 // done: 1692 // 1693 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1695 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1696 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1697 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1698 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1699 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1700 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1701 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1702 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1703 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1704 // normal_case: 1705 emit_opcode(cbuf,0x99); // cdq 1706 // idiv (note: must be emitted by the user of this rule) 1707 // normal: 1708 %} 1709 1710 // Dense encoding for older common ops 1711 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1712 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1713 %} 1714 1715 1716 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1717 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1718 // Check for 8-bit immediate, and set sign extend bit in opcode 1719 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1720 emit_opcode(cbuf, $primary | 0x02); 1721 } 1722 else { // If 32-bit immediate 1723 emit_opcode(cbuf, $primary); 1724 } 1725 %} 1726 1727 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1728 // Emit primary opcode and set sign-extend bit 1729 // Check for 8-bit immediate, and set sign extend bit in opcode 1730 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1731 emit_opcode(cbuf, $primary | 0x02); } 1732 else { // If 32-bit immediate 1733 emit_opcode(cbuf, $primary); 1734 } 1735 // Emit r/m byte with secondary opcode, after primary opcode. 1736 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1737 %} 1738 1739 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1740 // Check for 8-bit immediate, and set sign extend bit in opcode 1741 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1742 $$$emit8$imm$$constant; 1743 } 1744 else { // If 32-bit immediate 1745 // Output immediate 1746 $$$emit32$imm$$constant; 1747 } 1748 %} 1749 1750 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1751 // Emit primary opcode and set sign-extend bit 1752 // Check for 8-bit immediate, and set sign extend bit in opcode 1753 int con = (int)$imm$$constant; // Throw away top bits 1754 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1755 // Emit r/m byte with secondary opcode, after primary opcode. 1756 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1757 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1758 else emit_d32(cbuf,con); 1759 %} 1760 1761 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1762 // Emit primary opcode and set sign-extend bit 1763 // Check for 8-bit immediate, and set sign extend bit in opcode 1764 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1765 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1766 // Emit r/m byte with tertiary opcode, after primary opcode. 1767 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1768 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1769 else emit_d32(cbuf,con); 1770 %} 1771 1772 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1773 emit_cc(cbuf, $secondary, $dst$$reg ); 1774 %} 1775 1776 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1777 int destlo = $dst$$reg; 1778 int desthi = HIGH_FROM_LOW(destlo); 1779 // bswap lo 1780 emit_opcode(cbuf, 0x0F); 1781 emit_cc(cbuf, 0xC8, destlo); 1782 // bswap hi 1783 emit_opcode(cbuf, 0x0F); 1784 emit_cc(cbuf, 0xC8, desthi); 1785 // xchg lo and hi 1786 emit_opcode(cbuf, 0x87); 1787 emit_rm(cbuf, 0x3, destlo, desthi); 1788 %} 1789 1790 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1791 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1792 %} 1793 1794 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1795 $$$emit8$primary; 1796 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1797 %} 1798 1799 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1800 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1801 emit_d8(cbuf, op >> 8 ); 1802 emit_d8(cbuf, op & 255); 1803 %} 1804 1805 // emulate a CMOV with a conditional branch around a MOV 1806 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1807 // Invert sense of branch from sense of CMOV 1808 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1809 emit_d8( cbuf, $brOffs$$constant ); 1810 %} 1811 1812 enc_class enc_PartialSubtypeCheck( ) %{ 1813 Register Redi = as_Register(EDI_enc); // result register 1814 Register Reax = as_Register(EAX_enc); // super class 1815 Register Recx = as_Register(ECX_enc); // killed 1816 Register Resi = as_Register(ESI_enc); // sub class 1817 Label miss; 1818 1819 MacroAssembler _masm(&cbuf); 1820 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1821 NULL, &miss, 1822 /*set_cond_codes:*/ true); 1823 if ($primary) { 1824 __ xorptr(Redi, Redi); 1825 } 1826 __ bind(miss); 1827 %} 1828 1829 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1830 MacroAssembler masm(&cbuf); 1831 int start = masm.offset(); 1832 if (UseSSE >= 2) { 1833 if (VerifyFPU) { 1834 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1835 } 1836 } else { 1837 // External c_calling_convention expects the FPU stack to be 'clean'. 1838 // Compiled code leaves it dirty. Do cleanup now. 1839 masm.empty_FPU_stack(); 1840 } 1841 if (sizeof_FFree_Float_Stack_All == -1) { 1842 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1843 } else { 1844 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1845 } 1846 %} 1847 1848 enc_class Verify_FPU_For_Leaf %{ 1849 if( VerifyFPU ) { 1850 MacroAssembler masm(&cbuf); 1851 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1852 } 1853 %} 1854 1855 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1856 // This is the instruction starting address for relocation info. 1857 cbuf.set_insts_mark(); 1858 $$$emit8$primary; 1859 // CALL directly to the runtime 1860 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1861 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1862 1863 if (UseSSE >= 2) { 1864 MacroAssembler _masm(&cbuf); 1865 BasicType rt = tf()->return_type(); 1866 1867 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1868 // A C runtime call where the return value is unused. In SSE2+ 1869 // mode the result needs to be removed from the FPU stack. It's 1870 // likely that this function call could be removed by the 1871 // optimizer if the C function is a pure function. 1872 __ ffree(0); 1873 } else if (rt == T_FLOAT) { 1874 __ lea(rsp, Address(rsp, -4)); 1875 __ fstp_s(Address(rsp, 0)); 1876 __ movflt(xmm0, Address(rsp, 0)); 1877 __ lea(rsp, Address(rsp, 4)); 1878 } else if (rt == T_DOUBLE) { 1879 __ lea(rsp, Address(rsp, -8)); 1880 __ fstp_d(Address(rsp, 0)); 1881 __ movdbl(xmm0, Address(rsp, 0)); 1882 __ lea(rsp, Address(rsp, 8)); 1883 } 1884 } 1885 %} 1886 1887 enc_class pre_call_resets %{ 1888 // If method sets FPU control word restore it here 1889 debug_only(int off0 = cbuf.insts_size()); 1890 if (ra_->C->in_24_bit_fp_mode()) { 1891 MacroAssembler _masm(&cbuf); 1892 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1893 } 1894 // Clear upper bits of YMM registers when current compiled code uses 1895 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1896 MacroAssembler _masm(&cbuf); 1897 __ vzeroupper(); 1898 debug_only(int off1 = cbuf.insts_size()); 1899 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1900 %} 1901 1902 enc_class post_call_FPU %{ 1903 // If method sets FPU control word do it here also 1904 if (Compile::current()->in_24_bit_fp_mode()) { 1905 MacroAssembler masm(&cbuf); 1906 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1907 } 1908 %} 1909 1910 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1911 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1912 // who we intended to call. 1913 cbuf.set_insts_mark(); 1914 $$$emit8$primary; 1915 1916 if (!_method) { 1917 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1918 runtime_call_Relocation::spec(), 1919 RELOC_IMM32); 1920 } else { 1921 int method_index = resolved_method_index(cbuf); 1922 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1923 : static_call_Relocation::spec(method_index); 1924 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1925 rspec, RELOC_DISP32); 1926 // Emit stubs for static call. 1927 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1928 if (stub == NULL) { 1929 ciEnv::current()->record_failure("CodeCache is full"); 1930 return; 1931 } 1932 } 1933 %} 1934 1935 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1936 MacroAssembler _masm(&cbuf); 1937 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1938 %} 1939 1940 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1941 int disp = in_bytes(Method::from_compiled_offset()); 1942 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1943 1944 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1945 cbuf.set_insts_mark(); 1946 $$$emit8$primary; 1947 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1948 emit_d8(cbuf, disp); // Displacement 1949 1950 %} 1951 1952 // Following encoding is no longer used, but may be restored if calling 1953 // convention changes significantly. 1954 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1955 // 1956 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1957 // // int ic_reg = Matcher::inline_cache_reg(); 1958 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1959 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1960 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1961 // 1962 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1963 // // // so we load it immediately before the call 1964 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1965 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1966 // 1967 // // xor rbp,ebp 1968 // emit_opcode(cbuf, 0x33); 1969 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1970 // 1971 // // CALL to interpreter. 1972 // cbuf.set_insts_mark(); 1973 // $$$emit8$primary; 1974 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1975 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1976 // %} 1977 1978 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1979 $$$emit8$primary; 1980 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1981 $$$emit8$shift$$constant; 1982 %} 1983 1984 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1985 // Load immediate does not have a zero or sign extended version 1986 // for 8-bit immediates 1987 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1988 $$$emit32$src$$constant; 1989 %} 1990 1991 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1992 // Load immediate does not have a zero or sign extended version 1993 // for 8-bit immediates 1994 emit_opcode(cbuf, $primary + $dst$$reg); 1995 $$$emit32$src$$constant; 1996 %} 1997 1998 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1999 // Load immediate does not have a zero or sign extended version 2000 // for 8-bit immediates 2001 int dst_enc = $dst$$reg; 2002 int src_con = $src$$constant & 0x0FFFFFFFFL; 2003 if (src_con == 0) { 2004 // xor dst, dst 2005 emit_opcode(cbuf, 0x33); 2006 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2007 } else { 2008 emit_opcode(cbuf, $primary + dst_enc); 2009 emit_d32(cbuf, src_con); 2010 } 2011 %} 2012 2013 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2014 // Load immediate does not have a zero or sign extended version 2015 // for 8-bit immediates 2016 int dst_enc = $dst$$reg + 2; 2017 int src_con = ((julong)($src$$constant)) >> 32; 2018 if (src_con == 0) { 2019 // xor dst, dst 2020 emit_opcode(cbuf, 0x33); 2021 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2022 } else { 2023 emit_opcode(cbuf, $primary + dst_enc); 2024 emit_d32(cbuf, src_con); 2025 } 2026 %} 2027 2028 2029 // Encode a reg-reg copy. If it is useless, then empty encoding. 2030 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2031 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2032 %} 2033 2034 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2035 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2036 %} 2037 2038 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2039 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2040 %} 2041 2042 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2043 $$$emit8$primary; 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 $$$emit8$secondary; 2049 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2050 %} 2051 2052 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2053 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2054 %} 2055 2056 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2057 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2058 %} 2059 2060 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2061 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2062 %} 2063 2064 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2065 // Output immediate 2066 $$$emit32$src$$constant; 2067 %} 2068 2069 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2070 // Output Float immediate bits 2071 jfloat jf = $src$$constant; 2072 int jf_as_bits = jint_cast( jf ); 2073 emit_d32(cbuf, jf_as_bits); 2074 %} 2075 2076 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2077 // Output Float immediate bits 2078 jfloat jf = $src$$constant; 2079 int jf_as_bits = jint_cast( jf ); 2080 emit_d32(cbuf, jf_as_bits); 2081 %} 2082 2083 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2084 // Output immediate 2085 $$$emit16$src$$constant; 2086 %} 2087 2088 enc_class Con_d32(immI src) %{ 2089 emit_d32(cbuf,$src$$constant); 2090 %} 2091 2092 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2093 // Output immediate memory reference 2094 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2095 emit_d32(cbuf, 0x00); 2096 %} 2097 2098 enc_class lock_prefix( ) %{ 2099 if( os::is_MP() ) 2100 emit_opcode(cbuf,0xF0); // [Lock] 2101 %} 2102 2103 // Cmp-xchg long value. 2104 // Note: we need to swap rbx, and rcx before and after the 2105 // cmpxchg8 instruction because the instruction uses 2106 // rcx as the high order word of the new value to store but 2107 // our register encoding uses rbx,. 2108 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2109 2110 // XCHG rbx,ecx 2111 emit_opcode(cbuf,0x87); 2112 emit_opcode(cbuf,0xD9); 2113 // [Lock] 2114 if( os::is_MP() ) 2115 emit_opcode(cbuf,0xF0); 2116 // CMPXCHG8 [Eptr] 2117 emit_opcode(cbuf,0x0F); 2118 emit_opcode(cbuf,0xC7); 2119 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2120 // XCHG rbx,ecx 2121 emit_opcode(cbuf,0x87); 2122 emit_opcode(cbuf,0xD9); 2123 %} 2124 2125 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2126 // [Lock] 2127 if( os::is_MP() ) 2128 emit_opcode(cbuf,0xF0); 2129 2130 // CMPXCHG [Eptr] 2131 emit_opcode(cbuf,0x0F); 2132 emit_opcode(cbuf,0xB1); 2133 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2134 %} 2135 2136 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2137 // [Lock] 2138 if( os::is_MP() ) 2139 emit_opcode(cbuf,0xF0); 2140 2141 // CMPXCHGB [Eptr] 2142 emit_opcode(cbuf,0x0F); 2143 emit_opcode(cbuf,0xB0); 2144 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2145 %} 2146 2147 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2148 // [Lock] 2149 if( os::is_MP() ) 2150 emit_opcode(cbuf,0xF0); 2151 2152 // 16-bit mode 2153 emit_opcode(cbuf, 0x66); 2154 2155 // CMPXCHGW [Eptr] 2156 emit_opcode(cbuf,0x0F); 2157 emit_opcode(cbuf,0xB1); 2158 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2159 %} 2160 2161 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2162 int res_encoding = $res$$reg; 2163 2164 // MOV res,0 2165 emit_opcode( cbuf, 0xB8 + res_encoding); 2166 emit_d32( cbuf, 0 ); 2167 // JNE,s fail 2168 emit_opcode(cbuf,0x75); 2169 emit_d8(cbuf, 5 ); 2170 // MOV res,1 2171 emit_opcode( cbuf, 0xB8 + res_encoding); 2172 emit_d32( cbuf, 1 ); 2173 // fail: 2174 %} 2175 2176 enc_class set_instruction_start( ) %{ 2177 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2178 %} 2179 2180 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2181 int reg_encoding = $ereg$$reg; 2182 int base = $mem$$base; 2183 int index = $mem$$index; 2184 int scale = $mem$$scale; 2185 int displace = $mem$$disp; 2186 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2187 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2188 %} 2189 2190 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2191 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2192 int base = $mem$$base; 2193 int index = $mem$$index; 2194 int scale = $mem$$scale; 2195 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2196 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2197 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2198 %} 2199 2200 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2201 int r1, r2; 2202 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2203 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2204 emit_opcode(cbuf,0x0F); 2205 emit_opcode(cbuf,$tertiary); 2206 emit_rm(cbuf, 0x3, r1, r2); 2207 emit_d8(cbuf,$cnt$$constant); 2208 emit_d8(cbuf,$primary); 2209 emit_rm(cbuf, 0x3, $secondary, r1); 2210 emit_d8(cbuf,$cnt$$constant); 2211 %} 2212 2213 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2214 emit_opcode( cbuf, 0x8B ); // Move 2215 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2216 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2217 emit_d8(cbuf,$primary); 2218 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2219 emit_d8(cbuf,$cnt$$constant-32); 2220 } 2221 emit_d8(cbuf,$primary); 2222 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2223 emit_d8(cbuf,31); 2224 %} 2225 2226 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2227 int r1, r2; 2228 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2229 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2230 2231 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2232 emit_rm(cbuf, 0x3, r1, r2); 2233 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2234 emit_opcode(cbuf,$primary); 2235 emit_rm(cbuf, 0x3, $secondary, r1); 2236 emit_d8(cbuf,$cnt$$constant-32); 2237 } 2238 emit_opcode(cbuf,0x33); // XOR r2,r2 2239 emit_rm(cbuf, 0x3, r2, r2); 2240 %} 2241 2242 // Clone of RegMem but accepts an extra parameter to access each 2243 // half of a double in memory; it never needs relocation info. 2244 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2245 emit_opcode(cbuf,$opcode$$constant); 2246 int reg_encoding = $rm_reg$$reg; 2247 int base = $mem$$base; 2248 int index = $mem$$index; 2249 int scale = $mem$$scale; 2250 int displace = $mem$$disp + $disp_for_half$$constant; 2251 relocInfo::relocType disp_reloc = relocInfo::none; 2252 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2253 %} 2254 2255 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2256 // 2257 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2258 // and it never needs relocation information. 2259 // Frequently used to move data between FPU's Stack Top and memory. 2260 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2261 int rm_byte_opcode = $rm_opcode$$constant; 2262 int base = $mem$$base; 2263 int index = $mem$$index; 2264 int scale = $mem$$scale; 2265 int displace = $mem$$disp; 2266 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2267 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2268 %} 2269 2270 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2271 int rm_byte_opcode = $rm_opcode$$constant; 2272 int base = $mem$$base; 2273 int index = $mem$$index; 2274 int scale = $mem$$scale; 2275 int displace = $mem$$disp; 2276 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2277 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2278 %} 2279 2280 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2281 int reg_encoding = $dst$$reg; 2282 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2283 int index = 0x04; // 0x04 indicates no index 2284 int scale = 0x00; // 0x00 indicates no scale 2285 int displace = $src1$$constant; // 0x00 indicates no displacement 2286 relocInfo::relocType disp_reloc = relocInfo::none; 2287 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2288 %} 2289 2290 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2291 // Compare dst,src 2292 emit_opcode(cbuf,0x3B); 2293 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2294 // jmp dst < src around move 2295 emit_opcode(cbuf,0x7C); 2296 emit_d8(cbuf,2); 2297 // move dst,src 2298 emit_opcode(cbuf,0x8B); 2299 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2300 %} 2301 2302 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2303 // Compare dst,src 2304 emit_opcode(cbuf,0x3B); 2305 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2306 // jmp dst > src around move 2307 emit_opcode(cbuf,0x7F); 2308 emit_d8(cbuf,2); 2309 // move dst,src 2310 emit_opcode(cbuf,0x8B); 2311 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2312 %} 2313 2314 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2315 // If src is FPR1, we can just FST to store it. 2316 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2317 int reg_encoding = 0x2; // Just store 2318 int base = $mem$$base; 2319 int index = $mem$$index; 2320 int scale = $mem$$scale; 2321 int displace = $mem$$disp; 2322 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2323 if( $src$$reg != FPR1L_enc ) { 2324 reg_encoding = 0x3; // Store & pop 2325 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2326 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2327 } 2328 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2329 emit_opcode(cbuf,$primary); 2330 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2331 %} 2332 2333 enc_class neg_reg(rRegI dst) %{ 2334 // NEG $dst 2335 emit_opcode(cbuf,0xF7); 2336 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2337 %} 2338 2339 enc_class setLT_reg(eCXRegI dst) %{ 2340 // SETLT $dst 2341 emit_opcode(cbuf,0x0F); 2342 emit_opcode(cbuf,0x9C); 2343 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2344 %} 2345 2346 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2347 int tmpReg = $tmp$$reg; 2348 2349 // SUB $p,$q 2350 emit_opcode(cbuf,0x2B); 2351 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2352 // SBB $tmp,$tmp 2353 emit_opcode(cbuf,0x1B); 2354 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2355 // AND $tmp,$y 2356 emit_opcode(cbuf,0x23); 2357 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2358 // ADD $p,$tmp 2359 emit_opcode(cbuf,0x03); 2360 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2361 %} 2362 2363 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2364 // TEST shift,32 2365 emit_opcode(cbuf,0xF7); 2366 emit_rm(cbuf, 0x3, 0, ECX_enc); 2367 emit_d32(cbuf,0x20); 2368 // JEQ,s small 2369 emit_opcode(cbuf, 0x74); 2370 emit_d8(cbuf, 0x04); 2371 // MOV $dst.hi,$dst.lo 2372 emit_opcode( cbuf, 0x8B ); 2373 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2374 // CLR $dst.lo 2375 emit_opcode(cbuf, 0x33); 2376 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2377 // small: 2378 // SHLD $dst.hi,$dst.lo,$shift 2379 emit_opcode(cbuf,0x0F); 2380 emit_opcode(cbuf,0xA5); 2381 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2382 // SHL $dst.lo,$shift" 2383 emit_opcode(cbuf,0xD3); 2384 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2385 %} 2386 2387 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2388 // TEST shift,32 2389 emit_opcode(cbuf,0xF7); 2390 emit_rm(cbuf, 0x3, 0, ECX_enc); 2391 emit_d32(cbuf,0x20); 2392 // JEQ,s small 2393 emit_opcode(cbuf, 0x74); 2394 emit_d8(cbuf, 0x04); 2395 // MOV $dst.lo,$dst.hi 2396 emit_opcode( cbuf, 0x8B ); 2397 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2398 // CLR $dst.hi 2399 emit_opcode(cbuf, 0x33); 2400 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2401 // small: 2402 // SHRD $dst.lo,$dst.hi,$shift 2403 emit_opcode(cbuf,0x0F); 2404 emit_opcode(cbuf,0xAD); 2405 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2406 // SHR $dst.hi,$shift" 2407 emit_opcode(cbuf,0xD3); 2408 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2409 %} 2410 2411 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2412 // TEST shift,32 2413 emit_opcode(cbuf,0xF7); 2414 emit_rm(cbuf, 0x3, 0, ECX_enc); 2415 emit_d32(cbuf,0x20); 2416 // JEQ,s small 2417 emit_opcode(cbuf, 0x74); 2418 emit_d8(cbuf, 0x05); 2419 // MOV $dst.lo,$dst.hi 2420 emit_opcode( cbuf, 0x8B ); 2421 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2422 // SAR $dst.hi,31 2423 emit_opcode(cbuf, 0xC1); 2424 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2425 emit_d8(cbuf, 0x1F ); 2426 // small: 2427 // SHRD $dst.lo,$dst.hi,$shift 2428 emit_opcode(cbuf,0x0F); 2429 emit_opcode(cbuf,0xAD); 2430 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2431 // SAR $dst.hi,$shift" 2432 emit_opcode(cbuf,0xD3); 2433 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2434 %} 2435 2436 2437 // ----------------- Encodings for floating point unit ----------------- 2438 // May leave result in FPU-TOS or FPU reg depending on opcodes 2439 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2440 $$$emit8$primary; 2441 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2442 %} 2443 2444 // Pop argument in FPR0 with FSTP ST(0) 2445 enc_class PopFPU() %{ 2446 emit_opcode( cbuf, 0xDD ); 2447 emit_d8( cbuf, 0xD8 ); 2448 %} 2449 2450 // !!!!! equivalent to Pop_Reg_F 2451 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2452 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2453 emit_d8( cbuf, 0xD8+$dst$$reg ); 2454 %} 2455 2456 enc_class Push_Reg_DPR( regDPR dst ) %{ 2457 emit_opcode( cbuf, 0xD9 ); 2458 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2459 %} 2460 2461 enc_class strictfp_bias1( regDPR dst ) %{ 2462 emit_opcode( cbuf, 0xDB ); // FLD m80real 2463 emit_opcode( cbuf, 0x2D ); 2464 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2465 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2466 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2467 %} 2468 2469 enc_class strictfp_bias2( regDPR dst ) %{ 2470 emit_opcode( cbuf, 0xDB ); // FLD m80real 2471 emit_opcode( cbuf, 0x2D ); 2472 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2473 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2474 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2475 %} 2476 2477 // Special case for moving an integer register to a stack slot. 2478 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2479 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2480 %} 2481 2482 // Special case for moving a register to a stack slot. 2483 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2484 // Opcode already emitted 2485 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2486 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2487 emit_d32(cbuf, $dst$$disp); // Displacement 2488 %} 2489 2490 // Push the integer in stackSlot 'src' onto FP-stack 2491 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2492 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2493 %} 2494 2495 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2496 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2497 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2498 %} 2499 2500 // Same as Pop_Mem_F except for opcode 2501 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2502 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2503 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2504 %} 2505 2506 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2507 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2508 emit_d8( cbuf, 0xD8+$dst$$reg ); 2509 %} 2510 2511 enc_class Push_Reg_FPR( regFPR dst ) %{ 2512 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2513 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2514 %} 2515 2516 // Push FPU's float to a stack-slot, and pop FPU-stack 2517 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2518 int pop = 0x02; 2519 if ($src$$reg != FPR1L_enc) { 2520 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2521 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2522 pop = 0x03; 2523 } 2524 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2525 %} 2526 2527 // Push FPU's double to a stack-slot, and pop FPU-stack 2528 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2529 int pop = 0x02; 2530 if ($src$$reg != FPR1L_enc) { 2531 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2532 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2533 pop = 0x03; 2534 } 2535 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2536 %} 2537 2538 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2539 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2540 int pop = 0xD0 - 1; // -1 since we skip FLD 2541 if ($src$$reg != FPR1L_enc) { 2542 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2543 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2544 pop = 0xD8; 2545 } 2546 emit_opcode( cbuf, 0xDD ); 2547 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2548 %} 2549 2550 2551 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2552 // load dst in FPR0 2553 emit_opcode( cbuf, 0xD9 ); 2554 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2555 if ($src$$reg != FPR1L_enc) { 2556 // fincstp 2557 emit_opcode (cbuf, 0xD9); 2558 emit_opcode (cbuf, 0xF7); 2559 // swap src with FPR1: 2560 // FXCH FPR1 with src 2561 emit_opcode(cbuf, 0xD9); 2562 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2563 // fdecstp 2564 emit_opcode (cbuf, 0xD9); 2565 emit_opcode (cbuf, 0xF6); 2566 } 2567 %} 2568 2569 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2570 MacroAssembler _masm(&cbuf); 2571 __ subptr(rsp, 8); 2572 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2573 __ fld_d(Address(rsp, 0)); 2574 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2575 __ fld_d(Address(rsp, 0)); 2576 %} 2577 2578 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ subptr(rsp, 4); 2581 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2582 __ fld_s(Address(rsp, 0)); 2583 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2584 __ fld_s(Address(rsp, 0)); 2585 %} 2586 2587 enc_class Push_ResultD(regD dst) %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ fstp_d(Address(rsp, 0)); 2590 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2591 __ addptr(rsp, 8); 2592 %} 2593 2594 enc_class Push_ResultF(regF dst, immI d8) %{ 2595 MacroAssembler _masm(&cbuf); 2596 __ fstp_s(Address(rsp, 0)); 2597 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2598 __ addptr(rsp, $d8$$constant); 2599 %} 2600 2601 enc_class Push_SrcD(regD src) %{ 2602 MacroAssembler _masm(&cbuf); 2603 __ subptr(rsp, 8); 2604 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2605 __ fld_d(Address(rsp, 0)); 2606 %} 2607 2608 enc_class push_stack_temp_qword() %{ 2609 MacroAssembler _masm(&cbuf); 2610 __ subptr(rsp, 8); 2611 %} 2612 2613 enc_class pop_stack_temp_qword() %{ 2614 MacroAssembler _masm(&cbuf); 2615 __ addptr(rsp, 8); 2616 %} 2617 2618 enc_class push_xmm_to_fpr1(regD src) %{ 2619 MacroAssembler _masm(&cbuf); 2620 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2621 __ fld_d(Address(rsp, 0)); 2622 %} 2623 2624 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2625 if ($src$$reg != FPR1L_enc) { 2626 // fincstp 2627 emit_opcode (cbuf, 0xD9); 2628 emit_opcode (cbuf, 0xF7); 2629 // FXCH FPR1 with src 2630 emit_opcode(cbuf, 0xD9); 2631 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2632 // fdecstp 2633 emit_opcode (cbuf, 0xD9); 2634 emit_opcode (cbuf, 0xF6); 2635 } 2636 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2637 // // FSTP FPR$dst$$reg 2638 // emit_opcode( cbuf, 0xDD ); 2639 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2640 %} 2641 2642 enc_class fnstsw_sahf_skip_parity() %{ 2643 // fnstsw ax 2644 emit_opcode( cbuf, 0xDF ); 2645 emit_opcode( cbuf, 0xE0 ); 2646 // sahf 2647 emit_opcode( cbuf, 0x9E ); 2648 // jnp ::skip 2649 emit_opcode( cbuf, 0x7B ); 2650 emit_opcode( cbuf, 0x05 ); 2651 %} 2652 2653 enc_class emitModDPR() %{ 2654 // fprem must be iterative 2655 // :: loop 2656 // fprem 2657 emit_opcode( cbuf, 0xD9 ); 2658 emit_opcode( cbuf, 0xF8 ); 2659 // wait 2660 emit_opcode( cbuf, 0x9b ); 2661 // fnstsw ax 2662 emit_opcode( cbuf, 0xDF ); 2663 emit_opcode( cbuf, 0xE0 ); 2664 // sahf 2665 emit_opcode( cbuf, 0x9E ); 2666 // jp ::loop 2667 emit_opcode( cbuf, 0x0F ); 2668 emit_opcode( cbuf, 0x8A ); 2669 emit_opcode( cbuf, 0xF4 ); 2670 emit_opcode( cbuf, 0xFF ); 2671 emit_opcode( cbuf, 0xFF ); 2672 emit_opcode( cbuf, 0xFF ); 2673 %} 2674 2675 enc_class fpu_flags() %{ 2676 // fnstsw_ax 2677 emit_opcode( cbuf, 0xDF); 2678 emit_opcode( cbuf, 0xE0); 2679 // test ax,0x0400 2680 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2681 emit_opcode( cbuf, 0xA9 ); 2682 emit_d16 ( cbuf, 0x0400 ); 2683 // // // This sequence works, but stalls for 12-16 cycles on PPro 2684 // // test rax,0x0400 2685 // emit_opcode( cbuf, 0xA9 ); 2686 // emit_d32 ( cbuf, 0x00000400 ); 2687 // 2688 // jz exit (no unordered comparison) 2689 emit_opcode( cbuf, 0x74 ); 2690 emit_d8 ( cbuf, 0x02 ); 2691 // mov ah,1 - treat as LT case (set carry flag) 2692 emit_opcode( cbuf, 0xB4 ); 2693 emit_d8 ( cbuf, 0x01 ); 2694 // sahf 2695 emit_opcode( cbuf, 0x9E); 2696 %} 2697 2698 enc_class cmpF_P6_fixup() %{ 2699 // Fixup the integer flags in case comparison involved a NaN 2700 // 2701 // JNP exit (no unordered comparison, P-flag is set by NaN) 2702 emit_opcode( cbuf, 0x7B ); 2703 emit_d8 ( cbuf, 0x03 ); 2704 // MOV AH,1 - treat as LT case (set carry flag) 2705 emit_opcode( cbuf, 0xB4 ); 2706 emit_d8 ( cbuf, 0x01 ); 2707 // SAHF 2708 emit_opcode( cbuf, 0x9E); 2709 // NOP // target for branch to avoid branch to branch 2710 emit_opcode( cbuf, 0x90); 2711 %} 2712 2713 // fnstsw_ax(); 2714 // sahf(); 2715 // movl(dst, nan_result); 2716 // jcc(Assembler::parity, exit); 2717 // movl(dst, less_result); 2718 // jcc(Assembler::below, exit); 2719 // movl(dst, equal_result); 2720 // jcc(Assembler::equal, exit); 2721 // movl(dst, greater_result); 2722 2723 // less_result = 1; 2724 // greater_result = -1; 2725 // equal_result = 0; 2726 // nan_result = -1; 2727 2728 enc_class CmpF_Result(rRegI dst) %{ 2729 // fnstsw_ax(); 2730 emit_opcode( cbuf, 0xDF); 2731 emit_opcode( cbuf, 0xE0); 2732 // sahf 2733 emit_opcode( cbuf, 0x9E); 2734 // movl(dst, nan_result); 2735 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2736 emit_d32( cbuf, -1 ); 2737 // jcc(Assembler::parity, exit); 2738 emit_opcode( cbuf, 0x7A ); 2739 emit_d8 ( cbuf, 0x13 ); 2740 // movl(dst, less_result); 2741 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2742 emit_d32( cbuf, -1 ); 2743 // jcc(Assembler::below, exit); 2744 emit_opcode( cbuf, 0x72 ); 2745 emit_d8 ( cbuf, 0x0C ); 2746 // movl(dst, equal_result); 2747 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2748 emit_d32( cbuf, 0 ); 2749 // jcc(Assembler::equal, exit); 2750 emit_opcode( cbuf, 0x74 ); 2751 emit_d8 ( cbuf, 0x05 ); 2752 // movl(dst, greater_result); 2753 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2754 emit_d32( cbuf, 1 ); 2755 %} 2756 2757 2758 // Compare the longs and set flags 2759 // BROKEN! Do Not use as-is 2760 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2761 // CMP $src1.hi,$src2.hi 2762 emit_opcode( cbuf, 0x3B ); 2763 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2764 // JNE,s done 2765 emit_opcode(cbuf,0x75); 2766 emit_d8(cbuf, 2 ); 2767 // CMP $src1.lo,$src2.lo 2768 emit_opcode( cbuf, 0x3B ); 2769 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2770 // done: 2771 %} 2772 2773 enc_class convert_int_long( regL dst, rRegI src ) %{ 2774 // mov $dst.lo,$src 2775 int dst_encoding = $dst$$reg; 2776 int src_encoding = $src$$reg; 2777 encode_Copy( cbuf, dst_encoding , src_encoding ); 2778 // mov $dst.hi,$src 2779 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2780 // sar $dst.hi,31 2781 emit_opcode( cbuf, 0xC1 ); 2782 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2783 emit_d8(cbuf, 0x1F ); 2784 %} 2785 2786 enc_class convert_long_double( eRegL src ) %{ 2787 // push $src.hi 2788 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2789 // push $src.lo 2790 emit_opcode(cbuf, 0x50+$src$$reg ); 2791 // fild 64-bits at [SP] 2792 emit_opcode(cbuf,0xdf); 2793 emit_d8(cbuf, 0x6C); 2794 emit_d8(cbuf, 0x24); 2795 emit_d8(cbuf, 0x00); 2796 // pop stack 2797 emit_opcode(cbuf, 0x83); // add SP, #8 2798 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2799 emit_d8(cbuf, 0x8); 2800 %} 2801 2802 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2803 // IMUL EDX:EAX,$src1 2804 emit_opcode( cbuf, 0xF7 ); 2805 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2806 // SAR EDX,$cnt-32 2807 int shift_count = ((int)$cnt$$constant) - 32; 2808 if (shift_count > 0) { 2809 emit_opcode(cbuf, 0xC1); 2810 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2811 emit_d8(cbuf, shift_count); 2812 } 2813 %} 2814 2815 // this version doesn't have add sp, 8 2816 enc_class convert_long_double2( eRegL src ) %{ 2817 // push $src.hi 2818 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2819 // push $src.lo 2820 emit_opcode(cbuf, 0x50+$src$$reg ); 2821 // fild 64-bits at [SP] 2822 emit_opcode(cbuf,0xdf); 2823 emit_d8(cbuf, 0x6C); 2824 emit_d8(cbuf, 0x24); 2825 emit_d8(cbuf, 0x00); 2826 %} 2827 2828 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2829 // Basic idea: long = (long)int * (long)int 2830 // IMUL EDX:EAX, src 2831 emit_opcode( cbuf, 0xF7 ); 2832 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2833 %} 2834 2835 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2836 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2837 // MUL EDX:EAX, src 2838 emit_opcode( cbuf, 0xF7 ); 2839 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2840 %} 2841 2842 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2843 // Basic idea: lo(result) = lo(x_lo * y_lo) 2844 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2845 // MOV $tmp,$src.lo 2846 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2847 // IMUL $tmp,EDX 2848 emit_opcode( cbuf, 0x0F ); 2849 emit_opcode( cbuf, 0xAF ); 2850 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2851 // MOV EDX,$src.hi 2852 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2853 // IMUL EDX,EAX 2854 emit_opcode( cbuf, 0x0F ); 2855 emit_opcode( cbuf, 0xAF ); 2856 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2857 // ADD $tmp,EDX 2858 emit_opcode( cbuf, 0x03 ); 2859 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2860 // MUL EDX:EAX,$src.lo 2861 emit_opcode( cbuf, 0xF7 ); 2862 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2863 // ADD EDX,ESI 2864 emit_opcode( cbuf, 0x03 ); 2865 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2866 %} 2867 2868 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2869 // Basic idea: lo(result) = lo(src * y_lo) 2870 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2871 // IMUL $tmp,EDX,$src 2872 emit_opcode( cbuf, 0x6B ); 2873 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2874 emit_d8( cbuf, (int)$src$$constant ); 2875 // MOV EDX,$src 2876 emit_opcode(cbuf, 0xB8 + EDX_enc); 2877 emit_d32( cbuf, (int)$src$$constant ); 2878 // MUL EDX:EAX,EDX 2879 emit_opcode( cbuf, 0xF7 ); 2880 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2881 // ADD EDX,ESI 2882 emit_opcode( cbuf, 0x03 ); 2883 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2884 %} 2885 2886 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2887 // PUSH src1.hi 2888 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2889 // PUSH src1.lo 2890 emit_opcode(cbuf, 0x50+$src1$$reg ); 2891 // PUSH src2.hi 2892 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2893 // PUSH src2.lo 2894 emit_opcode(cbuf, 0x50+$src2$$reg ); 2895 // CALL directly to the runtime 2896 cbuf.set_insts_mark(); 2897 emit_opcode(cbuf,0xE8); // Call into runtime 2898 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2899 // Restore stack 2900 emit_opcode(cbuf, 0x83); // add SP, #framesize 2901 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2902 emit_d8(cbuf, 4*4); 2903 %} 2904 2905 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2906 // PUSH src1.hi 2907 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2908 // PUSH src1.lo 2909 emit_opcode(cbuf, 0x50+$src1$$reg ); 2910 // PUSH src2.hi 2911 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2912 // PUSH src2.lo 2913 emit_opcode(cbuf, 0x50+$src2$$reg ); 2914 // CALL directly to the runtime 2915 cbuf.set_insts_mark(); 2916 emit_opcode(cbuf,0xE8); // Call into runtime 2917 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2918 // Restore stack 2919 emit_opcode(cbuf, 0x83); // add SP, #framesize 2920 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2921 emit_d8(cbuf, 4*4); 2922 %} 2923 2924 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2925 // MOV $tmp,$src.lo 2926 emit_opcode(cbuf, 0x8B); 2927 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2928 // OR $tmp,$src.hi 2929 emit_opcode(cbuf, 0x0B); 2930 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2931 %} 2932 2933 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2934 // CMP $src1.lo,$src2.lo 2935 emit_opcode( cbuf, 0x3B ); 2936 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2937 // JNE,s skip 2938 emit_cc(cbuf, 0x70, 0x5); 2939 emit_d8(cbuf,2); 2940 // CMP $src1.hi,$src2.hi 2941 emit_opcode( cbuf, 0x3B ); 2942 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2943 %} 2944 2945 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2946 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2947 emit_opcode( cbuf, 0x3B ); 2948 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2949 // MOV $tmp,$src1.hi 2950 emit_opcode( cbuf, 0x8B ); 2951 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2952 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2953 emit_opcode( cbuf, 0x1B ); 2954 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2955 %} 2956 2957 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2958 // XOR $tmp,$tmp 2959 emit_opcode(cbuf,0x33); // XOR 2960 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2961 // CMP $tmp,$src.lo 2962 emit_opcode( cbuf, 0x3B ); 2963 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2964 // SBB $tmp,$src.hi 2965 emit_opcode( cbuf, 0x1B ); 2966 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2967 %} 2968 2969 // Sniff, sniff... smells like Gnu Superoptimizer 2970 enc_class neg_long( eRegL dst ) %{ 2971 emit_opcode(cbuf,0xF7); // NEG hi 2972 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2973 emit_opcode(cbuf,0xF7); // NEG lo 2974 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2975 emit_opcode(cbuf,0x83); // SBB hi,0 2976 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2977 emit_d8 (cbuf,0 ); 2978 %} 2979 2980 enc_class enc_pop_rdx() %{ 2981 emit_opcode(cbuf,0x5A); 2982 %} 2983 2984 enc_class enc_rethrow() %{ 2985 cbuf.set_insts_mark(); 2986 emit_opcode(cbuf, 0xE9); // jmp entry 2987 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2988 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2989 %} 2990 2991 2992 // Convert a double to an int. Java semantics require we do complex 2993 // manglelations in the corner cases. So we set the rounding mode to 2994 // 'zero', store the darned double down as an int, and reset the 2995 // rounding mode to 'nearest'. The hardware throws an exception which 2996 // patches up the correct value directly to the stack. 2997 enc_class DPR2I_encoding( regDPR src ) %{ 2998 // Flip to round-to-zero mode. We attempted to allow invalid-op 2999 // exceptions here, so that a NAN or other corner-case value will 3000 // thrown an exception (but normal values get converted at full speed). 3001 // However, I2C adapters and other float-stack manglers leave pending 3002 // invalid-op exceptions hanging. We would have to clear them before 3003 // enabling them and that is more expensive than just testing for the 3004 // invalid value Intel stores down in the corner cases. 3005 emit_opcode(cbuf,0xD9); // FLDCW trunc 3006 emit_opcode(cbuf,0x2D); 3007 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3008 // Allocate a word 3009 emit_opcode(cbuf,0x83); // SUB ESP,4 3010 emit_opcode(cbuf,0xEC); 3011 emit_d8(cbuf,0x04); 3012 // Encoding assumes a double has been pushed into FPR0. 3013 // Store down the double as an int, popping the FPU stack 3014 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3015 emit_opcode(cbuf,0x1C); 3016 emit_d8(cbuf,0x24); 3017 // Restore the rounding mode; mask the exception 3018 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3019 emit_opcode(cbuf,0x2D); 3020 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3021 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3022 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3023 3024 // Load the converted int; adjust CPU stack 3025 emit_opcode(cbuf,0x58); // POP EAX 3026 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3027 emit_d32 (cbuf,0x80000000); // 0x80000000 3028 emit_opcode(cbuf,0x75); // JNE around_slow_call 3029 emit_d8 (cbuf,0x07); // Size of slow_call 3030 // Push src onto stack slow-path 3031 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3032 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3033 // CALL directly to the runtime 3034 cbuf.set_insts_mark(); 3035 emit_opcode(cbuf,0xE8); // Call into runtime 3036 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3037 // Carry on here... 3038 %} 3039 3040 enc_class DPR2L_encoding( regDPR src ) %{ 3041 emit_opcode(cbuf,0xD9); // FLDCW trunc 3042 emit_opcode(cbuf,0x2D); 3043 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3044 // Allocate a word 3045 emit_opcode(cbuf,0x83); // SUB ESP,8 3046 emit_opcode(cbuf,0xEC); 3047 emit_d8(cbuf,0x08); 3048 // Encoding assumes a double has been pushed into FPR0. 3049 // Store down the double as a long, popping the FPU stack 3050 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3051 emit_opcode(cbuf,0x3C); 3052 emit_d8(cbuf,0x24); 3053 // Restore the rounding mode; mask the exception 3054 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3055 emit_opcode(cbuf,0x2D); 3056 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3057 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3058 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3059 3060 // Load the converted int; adjust CPU stack 3061 emit_opcode(cbuf,0x58); // POP EAX 3062 emit_opcode(cbuf,0x5A); // POP EDX 3063 emit_opcode(cbuf,0x81); // CMP EDX,imm 3064 emit_d8 (cbuf,0xFA); // rdx 3065 emit_d32 (cbuf,0x80000000); // 0x80000000 3066 emit_opcode(cbuf,0x75); // JNE around_slow_call 3067 emit_d8 (cbuf,0x07+4); // Size of slow_call 3068 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3069 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3070 emit_opcode(cbuf,0x75); // JNE around_slow_call 3071 emit_d8 (cbuf,0x07); // Size of slow_call 3072 // Push src onto stack slow-path 3073 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3074 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3075 // CALL directly to the runtime 3076 cbuf.set_insts_mark(); 3077 emit_opcode(cbuf,0xE8); // Call into runtime 3078 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3079 // Carry on here... 3080 %} 3081 3082 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3083 // Operand was loaded from memory into fp ST (stack top) 3084 // FMUL ST,$src /* D8 C8+i */ 3085 emit_opcode(cbuf, 0xD8); 3086 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3087 %} 3088 3089 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3090 // FADDP ST,src2 /* D8 C0+i */ 3091 emit_opcode(cbuf, 0xD8); 3092 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3093 //could use FADDP src2,fpST /* DE C0+i */ 3094 %} 3095 3096 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3097 // FADDP src2,ST /* DE C0+i */ 3098 emit_opcode(cbuf, 0xDE); 3099 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3100 %} 3101 3102 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3103 // Operand has been loaded into fp ST (stack top) 3104 // FSUB ST,$src1 3105 emit_opcode(cbuf, 0xD8); 3106 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3107 3108 // FDIV 3109 emit_opcode(cbuf, 0xD8); 3110 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3111 %} 3112 3113 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3114 // Operand was loaded from memory into fp ST (stack top) 3115 // FADD ST,$src /* D8 C0+i */ 3116 emit_opcode(cbuf, 0xD8); 3117 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3118 3119 // FMUL ST,src2 /* D8 C*+i */ 3120 emit_opcode(cbuf, 0xD8); 3121 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3122 %} 3123 3124 3125 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3126 // Operand was loaded from memory into fp ST (stack top) 3127 // FADD ST,$src /* D8 C0+i */ 3128 emit_opcode(cbuf, 0xD8); 3129 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3130 3131 // FMULP src2,ST /* DE C8+i */ 3132 emit_opcode(cbuf, 0xDE); 3133 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3134 %} 3135 3136 // Atomically load the volatile long 3137 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3138 emit_opcode(cbuf,0xDF); 3139 int rm_byte_opcode = 0x05; 3140 int base = $mem$$base; 3141 int index = $mem$$index; 3142 int scale = $mem$$scale; 3143 int displace = $mem$$disp; 3144 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3145 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3146 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3147 %} 3148 3149 // Volatile Store Long. Must be atomic, so move it into 3150 // the FP TOS and then do a 64-bit FIST. Has to probe the 3151 // target address before the store (for null-ptr checks) 3152 // so the memory operand is used twice in the encoding. 3153 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3154 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3155 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3156 emit_opcode(cbuf,0xDF); 3157 int rm_byte_opcode = 0x07; 3158 int base = $mem$$base; 3159 int index = $mem$$index; 3160 int scale = $mem$$scale; 3161 int displace = $mem$$disp; 3162 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3163 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3164 %} 3165 3166 // Safepoint Poll. This polls the safepoint page, and causes an 3167 // exception if it is not readable. Unfortunately, it kills the condition code 3168 // in the process 3169 // We current use TESTL [spp],EDI 3170 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3171 3172 enc_class Safepoint_Poll() %{ 3173 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3174 emit_opcode(cbuf,0x85); 3175 emit_rm (cbuf, 0x0, 0x7, 0x5); 3176 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3177 %} 3178 %} 3179 3180 3181 //----------FRAME-------------------------------------------------------------- 3182 // Definition of frame structure and management information. 3183 // 3184 // S T A C K L A Y O U T Allocators stack-slot number 3185 // | (to get allocators register number 3186 // G Owned by | | v add OptoReg::stack0()) 3187 // r CALLER | | 3188 // o | +--------+ pad to even-align allocators stack-slot 3189 // w V | pad0 | numbers; owned by CALLER 3190 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3191 // h ^ | in | 5 3192 // | | args | 4 Holes in incoming args owned by SELF 3193 // | | | | 3 3194 // | | +--------+ 3195 // V | | old out| Empty on Intel, window on Sparc 3196 // | old |preserve| Must be even aligned. 3197 // | SP-+--------+----> Matcher::_old_SP, even aligned 3198 // | | in | 3 area for Intel ret address 3199 // Owned by |preserve| Empty on Sparc. 3200 // SELF +--------+ 3201 // | | pad2 | 2 pad to align old SP 3202 // | +--------+ 1 3203 // | | locks | 0 3204 // | +--------+----> OptoReg::stack0(), even aligned 3205 // | | pad1 | 11 pad to align new SP 3206 // | +--------+ 3207 // | | | 10 3208 // | | spills | 9 spills 3209 // V | | 8 (pad0 slot for callee) 3210 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3211 // ^ | out | 7 3212 // | | args | 6 Holes in outgoing args owned by CALLEE 3213 // Owned by +--------+ 3214 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3215 // | new |preserve| Must be even-aligned. 3216 // | SP-+--------+----> Matcher::_new_SP, even aligned 3217 // | | | 3218 // 3219 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3220 // known from SELF's arguments and the Java calling convention. 3221 // Region 6-7 is determined per call site. 3222 // Note 2: If the calling convention leaves holes in the incoming argument 3223 // area, those holes are owned by SELF. Holes in the outgoing area 3224 // are owned by the CALLEE. Holes should not be nessecary in the 3225 // incoming area, as the Java calling convention is completely under 3226 // the control of the AD file. Doubles can be sorted and packed to 3227 // avoid holes. Holes in the outgoing arguments may be nessecary for 3228 // varargs C calling conventions. 3229 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3230 // even aligned with pad0 as needed. 3231 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3232 // region 6-11 is even aligned; it may be padded out more so that 3233 // the region from SP to FP meets the minimum stack alignment. 3234 3235 frame %{ 3236 // What direction does stack grow in (assumed to be same for C & Java) 3237 stack_direction(TOWARDS_LOW); 3238 3239 // These three registers define part of the calling convention 3240 // between compiled code and the interpreter. 3241 inline_cache_reg(EAX); // Inline Cache Register 3242 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3243 3244 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3245 cisc_spilling_operand_name(indOffset32); 3246 3247 // Number of stack slots consumed by locking an object 3248 sync_stack_slots(1); 3249 3250 // Compiled code's Frame Pointer 3251 frame_pointer(ESP); 3252 // Interpreter stores its frame pointer in a register which is 3253 // stored to the stack by I2CAdaptors. 3254 // I2CAdaptors convert from interpreted java to compiled java. 3255 interpreter_frame_pointer(EBP); 3256 3257 // Stack alignment requirement 3258 // Alignment size in bytes (128-bit -> 16 bytes) 3259 stack_alignment(StackAlignmentInBytes); 3260 3261 // Number of stack slots between incoming argument block and the start of 3262 // a new frame. The PROLOG must add this many slots to the stack. The 3263 // EPILOG must remove this many slots. Intel needs one slot for 3264 // return address and one for rbp, (must save rbp) 3265 in_preserve_stack_slots(2+VerifyStackAtCalls); 3266 3267 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3268 // for calls to C. Supports the var-args backing area for register parms. 3269 varargs_C_out_slots_killed(0); 3270 3271 // The after-PROLOG location of the return address. Location of 3272 // return address specifies a type (REG or STACK) and a number 3273 // representing the register number (i.e. - use a register name) or 3274 // stack slot. 3275 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3276 // Otherwise, it is above the locks and verification slot and alignment word 3277 return_addr(STACK - 1 + 3278 align_up((Compile::current()->in_preserve_stack_slots() + 3279 Compile::current()->fixed_slots()), 3280 stack_alignment_in_slots())); 3281 3282 // Body of function which returns an integer array locating 3283 // arguments either in registers or in stack slots. Passed an array 3284 // of ideal registers called "sig" and a "length" count. Stack-slot 3285 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3286 // arguments for a CALLEE. Incoming stack arguments are 3287 // automatically biased by the preserve_stack_slots field above. 3288 calling_convention %{ 3289 // No difference between ingoing/outgoing just pass false 3290 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3291 %} 3292 3293 3294 // Body of function which returns an integer array locating 3295 // arguments either in registers or in stack slots. Passed an array 3296 // of ideal registers called "sig" and a "length" count. Stack-slot 3297 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3298 // arguments for a CALLEE. Incoming stack arguments are 3299 // automatically biased by the preserve_stack_slots field above. 3300 c_calling_convention %{ 3301 // This is obviously always outgoing 3302 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3303 %} 3304 3305 // Location of C & interpreter return values 3306 c_return_value %{ 3307 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3308 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3309 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3310 3311 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3312 // that C functions return float and double results in XMM0. 3313 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3314 return OptoRegPair(XMM0b_num,XMM0_num); 3315 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3316 return OptoRegPair(OptoReg::Bad,XMM0_num); 3317 3318 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3319 %} 3320 3321 // Location of return values 3322 return_value %{ 3323 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3324 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3325 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3326 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3327 return OptoRegPair(XMM0b_num,XMM0_num); 3328 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3329 return OptoRegPair(OptoReg::Bad,XMM0_num); 3330 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3331 %} 3332 3333 %} 3334 3335 //----------ATTRIBUTES--------------------------------------------------------- 3336 //----------Operand Attributes------------------------------------------------- 3337 op_attrib op_cost(0); // Required cost attribute 3338 3339 //----------Instruction Attributes--------------------------------------------- 3340 ins_attrib ins_cost(100); // Required cost attribute 3341 ins_attrib ins_size(8); // Required size attribute (in bits) 3342 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3343 // non-matching short branch variant of some 3344 // long branch? 3345 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3346 // specifies the alignment that some part of the instruction (not 3347 // necessarily the start) requires. If > 1, a compute_padding() 3348 // function must be provided for the instruction 3349 3350 //----------OPERANDS----------------------------------------------------------- 3351 // Operand definitions must precede instruction definitions for correct parsing 3352 // in the ADLC because operands constitute user defined types which are used in 3353 // instruction definitions. 3354 3355 //----------Simple Operands---------------------------------------------------- 3356 // Immediate Operands 3357 // Integer Immediate 3358 operand immI() %{ 3359 match(ConI); 3360 3361 op_cost(10); 3362 format %{ %} 3363 interface(CONST_INTER); 3364 %} 3365 3366 // Constant for test vs zero 3367 operand immI0() %{ 3368 predicate(n->get_int() == 0); 3369 match(ConI); 3370 3371 op_cost(0); 3372 format %{ %} 3373 interface(CONST_INTER); 3374 %} 3375 3376 // Constant for increment 3377 operand immI1() %{ 3378 predicate(n->get_int() == 1); 3379 match(ConI); 3380 3381 op_cost(0); 3382 format %{ %} 3383 interface(CONST_INTER); 3384 %} 3385 3386 // Constant for decrement 3387 operand immI_M1() %{ 3388 predicate(n->get_int() == -1); 3389 match(ConI); 3390 3391 op_cost(0); 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 // Valid scale values for addressing modes 3397 operand immI2() %{ 3398 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3399 match(ConI); 3400 3401 format %{ %} 3402 interface(CONST_INTER); 3403 %} 3404 3405 operand immI8() %{ 3406 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3407 match(ConI); 3408 3409 op_cost(5); 3410 format %{ %} 3411 interface(CONST_INTER); 3412 %} 3413 3414 operand immI16() %{ 3415 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3416 match(ConI); 3417 3418 op_cost(10); 3419 format %{ %} 3420 interface(CONST_INTER); 3421 %} 3422 3423 // Int Immediate non-negative 3424 operand immU31() 3425 %{ 3426 predicate(n->get_int() >= 0); 3427 match(ConI); 3428 3429 op_cost(0); 3430 format %{ %} 3431 interface(CONST_INTER); 3432 %} 3433 3434 // Constant for long shifts 3435 operand immI_32() %{ 3436 predicate( n->get_int() == 32 ); 3437 match(ConI); 3438 3439 op_cost(0); 3440 format %{ %} 3441 interface(CONST_INTER); 3442 %} 3443 3444 operand immI_1_31() %{ 3445 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3446 match(ConI); 3447 3448 op_cost(0); 3449 format %{ %} 3450 interface(CONST_INTER); 3451 %} 3452 3453 operand immI_32_63() %{ 3454 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3455 match(ConI); 3456 op_cost(0); 3457 3458 format %{ %} 3459 interface(CONST_INTER); 3460 %} 3461 3462 operand immI_1() %{ 3463 predicate( n->get_int() == 1 ); 3464 match(ConI); 3465 3466 op_cost(0); 3467 format %{ %} 3468 interface(CONST_INTER); 3469 %} 3470 3471 operand immI_2() %{ 3472 predicate( n->get_int() == 2 ); 3473 match(ConI); 3474 3475 op_cost(0); 3476 format %{ %} 3477 interface(CONST_INTER); 3478 %} 3479 3480 operand immI_3() %{ 3481 predicate( n->get_int() == 3 ); 3482 match(ConI); 3483 3484 op_cost(0); 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 // Pointer Immediate 3490 operand immP() %{ 3491 match(ConP); 3492 3493 op_cost(10); 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 // NULL Pointer Immediate 3499 operand immP0() %{ 3500 predicate( n->get_ptr() == 0 ); 3501 match(ConP); 3502 op_cost(0); 3503 3504 format %{ %} 3505 interface(CONST_INTER); 3506 %} 3507 3508 // Long Immediate 3509 operand immL() %{ 3510 match(ConL); 3511 3512 op_cost(20); 3513 format %{ %} 3514 interface(CONST_INTER); 3515 %} 3516 3517 // Long Immediate zero 3518 operand immL0() %{ 3519 predicate( n->get_long() == 0L ); 3520 match(ConL); 3521 op_cost(0); 3522 3523 format %{ %} 3524 interface(CONST_INTER); 3525 %} 3526 3527 // Long Immediate zero 3528 operand immL_M1() %{ 3529 predicate( n->get_long() == -1L ); 3530 match(ConL); 3531 op_cost(0); 3532 3533 format %{ %} 3534 interface(CONST_INTER); 3535 %} 3536 3537 // Long immediate from 0 to 127. 3538 // Used for a shorter form of long mul by 10. 3539 operand immL_127() %{ 3540 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3541 match(ConL); 3542 op_cost(0); 3543 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Long Immediate: low 32-bit mask 3549 operand immL_32bits() %{ 3550 predicate(n->get_long() == 0xFFFFFFFFL); 3551 match(ConL); 3552 op_cost(0); 3553 3554 format %{ %} 3555 interface(CONST_INTER); 3556 %} 3557 3558 // Long Immediate: low 32-bit mask 3559 operand immL32() %{ 3560 predicate(n->get_long() == (int)(n->get_long())); 3561 match(ConL); 3562 op_cost(20); 3563 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 //Double Immediate zero 3569 operand immDPR0() %{ 3570 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3571 // bug that generates code such that NaNs compare equal to 0.0 3572 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3573 match(ConD); 3574 3575 op_cost(5); 3576 format %{ %} 3577 interface(CONST_INTER); 3578 %} 3579 3580 // Double Immediate one 3581 operand immDPR1() %{ 3582 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3583 match(ConD); 3584 3585 op_cost(5); 3586 format %{ %} 3587 interface(CONST_INTER); 3588 %} 3589 3590 // Double Immediate 3591 operand immDPR() %{ 3592 predicate(UseSSE<=1); 3593 match(ConD); 3594 3595 op_cost(5); 3596 format %{ %} 3597 interface(CONST_INTER); 3598 %} 3599 3600 operand immD() %{ 3601 predicate(UseSSE>=2); 3602 match(ConD); 3603 3604 op_cost(5); 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 // Double Immediate zero 3610 operand immD0() %{ 3611 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3612 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3613 // compare equal to -0.0. 3614 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3615 match(ConD); 3616 3617 format %{ %} 3618 interface(CONST_INTER); 3619 %} 3620 3621 // Float Immediate zero 3622 operand immFPR0() %{ 3623 predicate(UseSSE == 0 && n->getf() == 0.0F); 3624 match(ConF); 3625 3626 op_cost(5); 3627 format %{ %} 3628 interface(CONST_INTER); 3629 %} 3630 3631 // Float Immediate one 3632 operand immFPR1() %{ 3633 predicate(UseSSE == 0 && n->getf() == 1.0F); 3634 match(ConF); 3635 3636 op_cost(5); 3637 format %{ %} 3638 interface(CONST_INTER); 3639 %} 3640 3641 // Float Immediate 3642 operand immFPR() %{ 3643 predicate( UseSSE == 0 ); 3644 match(ConF); 3645 3646 op_cost(5); 3647 format %{ %} 3648 interface(CONST_INTER); 3649 %} 3650 3651 // Float Immediate 3652 operand immF() %{ 3653 predicate(UseSSE >= 1); 3654 match(ConF); 3655 3656 op_cost(5); 3657 format %{ %} 3658 interface(CONST_INTER); 3659 %} 3660 3661 // Float Immediate zero. Zero and not -0.0 3662 operand immF0() %{ 3663 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3664 match(ConF); 3665 3666 op_cost(5); 3667 format %{ %} 3668 interface(CONST_INTER); 3669 %} 3670 3671 // Immediates for special shifts (sign extend) 3672 3673 // Constants for increment 3674 operand immI_16() %{ 3675 predicate( n->get_int() == 16 ); 3676 match(ConI); 3677 3678 format %{ %} 3679 interface(CONST_INTER); 3680 %} 3681 3682 operand immI_24() %{ 3683 predicate( n->get_int() == 24 ); 3684 match(ConI); 3685 3686 format %{ %} 3687 interface(CONST_INTER); 3688 %} 3689 3690 // Constant for byte-wide masking 3691 operand immI_255() %{ 3692 predicate( n->get_int() == 255 ); 3693 match(ConI); 3694 3695 format %{ %} 3696 interface(CONST_INTER); 3697 %} 3698 3699 // Constant for short-wide masking 3700 operand immI_65535() %{ 3701 predicate(n->get_int() == 65535); 3702 match(ConI); 3703 3704 format %{ %} 3705 interface(CONST_INTER); 3706 %} 3707 3708 // Register Operands 3709 // Integer Register 3710 operand rRegI() %{ 3711 constraint(ALLOC_IN_RC(int_reg)); 3712 match(RegI); 3713 match(xRegI); 3714 match(eAXRegI); 3715 match(eBXRegI); 3716 match(eCXRegI); 3717 match(eDXRegI); 3718 match(eDIRegI); 3719 match(eSIRegI); 3720 3721 format %{ %} 3722 interface(REG_INTER); 3723 %} 3724 3725 // Subset of Integer Register 3726 operand xRegI(rRegI reg) %{ 3727 constraint(ALLOC_IN_RC(int_x_reg)); 3728 match(reg); 3729 match(eAXRegI); 3730 match(eBXRegI); 3731 match(eCXRegI); 3732 match(eDXRegI); 3733 3734 format %{ %} 3735 interface(REG_INTER); 3736 %} 3737 3738 // Special Registers 3739 operand eAXRegI(xRegI reg) %{ 3740 constraint(ALLOC_IN_RC(eax_reg)); 3741 match(reg); 3742 match(rRegI); 3743 3744 format %{ "EAX" %} 3745 interface(REG_INTER); 3746 %} 3747 3748 // Special Registers 3749 operand eBXRegI(xRegI reg) %{ 3750 constraint(ALLOC_IN_RC(ebx_reg)); 3751 match(reg); 3752 match(rRegI); 3753 3754 format %{ "EBX" %} 3755 interface(REG_INTER); 3756 %} 3757 3758 operand eCXRegI(xRegI reg) %{ 3759 constraint(ALLOC_IN_RC(ecx_reg)); 3760 match(reg); 3761 match(rRegI); 3762 3763 format %{ "ECX" %} 3764 interface(REG_INTER); 3765 %} 3766 3767 operand eDXRegI(xRegI reg) %{ 3768 constraint(ALLOC_IN_RC(edx_reg)); 3769 match(reg); 3770 match(rRegI); 3771 3772 format %{ "EDX" %} 3773 interface(REG_INTER); 3774 %} 3775 3776 operand eDIRegI(xRegI reg) %{ 3777 constraint(ALLOC_IN_RC(edi_reg)); 3778 match(reg); 3779 match(rRegI); 3780 3781 format %{ "EDI" %} 3782 interface(REG_INTER); 3783 %} 3784 3785 operand naxRegI() %{ 3786 constraint(ALLOC_IN_RC(nax_reg)); 3787 match(RegI); 3788 match(eCXRegI); 3789 match(eDXRegI); 3790 match(eSIRegI); 3791 match(eDIRegI); 3792 3793 format %{ %} 3794 interface(REG_INTER); 3795 %} 3796 3797 operand nadxRegI() %{ 3798 constraint(ALLOC_IN_RC(nadx_reg)); 3799 match(RegI); 3800 match(eBXRegI); 3801 match(eCXRegI); 3802 match(eSIRegI); 3803 match(eDIRegI); 3804 3805 format %{ %} 3806 interface(REG_INTER); 3807 %} 3808 3809 operand ncxRegI() %{ 3810 constraint(ALLOC_IN_RC(ncx_reg)); 3811 match(RegI); 3812 match(eAXRegI); 3813 match(eDXRegI); 3814 match(eSIRegI); 3815 match(eDIRegI); 3816 3817 format %{ %} 3818 interface(REG_INTER); 3819 %} 3820 3821 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3822 // // 3823 operand eSIRegI(xRegI reg) %{ 3824 constraint(ALLOC_IN_RC(esi_reg)); 3825 match(reg); 3826 match(rRegI); 3827 3828 format %{ "ESI" %} 3829 interface(REG_INTER); 3830 %} 3831 3832 // Pointer Register 3833 operand anyRegP() %{ 3834 constraint(ALLOC_IN_RC(any_reg)); 3835 match(RegP); 3836 match(eAXRegP); 3837 match(eBXRegP); 3838 match(eCXRegP); 3839 match(eDIRegP); 3840 match(eRegP); 3841 3842 format %{ %} 3843 interface(REG_INTER); 3844 %} 3845 3846 operand eRegP() %{ 3847 constraint(ALLOC_IN_RC(int_reg)); 3848 match(RegP); 3849 match(eAXRegP); 3850 match(eBXRegP); 3851 match(eCXRegP); 3852 match(eDIRegP); 3853 3854 format %{ %} 3855 interface(REG_INTER); 3856 %} 3857 3858 // On windows95, EBP is not safe to use for implicit null tests. 3859 operand eRegP_no_EBP() %{ 3860 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3861 match(RegP); 3862 match(eAXRegP); 3863 match(eBXRegP); 3864 match(eCXRegP); 3865 match(eDIRegP); 3866 3867 op_cost(100); 3868 format %{ %} 3869 interface(REG_INTER); 3870 %} 3871 3872 operand naxRegP() %{ 3873 constraint(ALLOC_IN_RC(nax_reg)); 3874 match(RegP); 3875 match(eBXRegP); 3876 match(eDXRegP); 3877 match(eCXRegP); 3878 match(eSIRegP); 3879 match(eDIRegP); 3880 3881 format %{ %} 3882 interface(REG_INTER); 3883 %} 3884 3885 operand nabxRegP() %{ 3886 constraint(ALLOC_IN_RC(nabx_reg)); 3887 match(RegP); 3888 match(eCXRegP); 3889 match(eDXRegP); 3890 match(eSIRegP); 3891 match(eDIRegP); 3892 3893 format %{ %} 3894 interface(REG_INTER); 3895 %} 3896 3897 operand pRegP() %{ 3898 constraint(ALLOC_IN_RC(p_reg)); 3899 match(RegP); 3900 match(eBXRegP); 3901 match(eDXRegP); 3902 match(eSIRegP); 3903 match(eDIRegP); 3904 3905 format %{ %} 3906 interface(REG_INTER); 3907 %} 3908 3909 // Special Registers 3910 // Return a pointer value 3911 operand eAXRegP(eRegP reg) %{ 3912 constraint(ALLOC_IN_RC(eax_reg)); 3913 match(reg); 3914 format %{ "EAX" %} 3915 interface(REG_INTER); 3916 %} 3917 3918 // Used in AtomicAdd 3919 operand eBXRegP(eRegP reg) %{ 3920 constraint(ALLOC_IN_RC(ebx_reg)); 3921 match(reg); 3922 format %{ "EBX" %} 3923 interface(REG_INTER); 3924 %} 3925 3926 // Tail-call (interprocedural jump) to interpreter 3927 operand eCXRegP(eRegP reg) %{ 3928 constraint(ALLOC_IN_RC(ecx_reg)); 3929 match(reg); 3930 format %{ "ECX" %} 3931 interface(REG_INTER); 3932 %} 3933 3934 operand eSIRegP(eRegP reg) %{ 3935 constraint(ALLOC_IN_RC(esi_reg)); 3936 match(reg); 3937 format %{ "ESI" %} 3938 interface(REG_INTER); 3939 %} 3940 3941 // Used in rep stosw 3942 operand eDIRegP(eRegP reg) %{ 3943 constraint(ALLOC_IN_RC(edi_reg)); 3944 match(reg); 3945 format %{ "EDI" %} 3946 interface(REG_INTER); 3947 %} 3948 3949 operand eRegL() %{ 3950 constraint(ALLOC_IN_RC(long_reg)); 3951 match(RegL); 3952 match(eADXRegL); 3953 3954 format %{ %} 3955 interface(REG_INTER); 3956 %} 3957 3958 operand eADXRegL( eRegL reg ) %{ 3959 constraint(ALLOC_IN_RC(eadx_reg)); 3960 match(reg); 3961 3962 format %{ "EDX:EAX" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 operand eBCXRegL( eRegL reg ) %{ 3967 constraint(ALLOC_IN_RC(ebcx_reg)); 3968 match(reg); 3969 3970 format %{ "EBX:ECX" %} 3971 interface(REG_INTER); 3972 %} 3973 3974 // Special case for integer high multiply 3975 operand eADXRegL_low_only() %{ 3976 constraint(ALLOC_IN_RC(eadx_reg)); 3977 match(RegL); 3978 3979 format %{ "EAX" %} 3980 interface(REG_INTER); 3981 %} 3982 3983 // Flags register, used as output of compare instructions 3984 operand eFlagsReg() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 3988 format %{ "EFLAGS" %} 3989 interface(REG_INTER); 3990 %} 3991 3992 // Flags register, used as output of FLOATING POINT compare instructions 3993 operand eFlagsRegU() %{ 3994 constraint(ALLOC_IN_RC(int_flags)); 3995 match(RegFlags); 3996 3997 format %{ "EFLAGS_U" %} 3998 interface(REG_INTER); 3999 %} 4000 4001 operand eFlagsRegUCF() %{ 4002 constraint(ALLOC_IN_RC(int_flags)); 4003 match(RegFlags); 4004 predicate(false); 4005 4006 format %{ "EFLAGS_U_CF" %} 4007 interface(REG_INTER); 4008 %} 4009 4010 // Condition Code Register used by long compare 4011 operand flagsReg_long_LTGE() %{ 4012 constraint(ALLOC_IN_RC(int_flags)); 4013 match(RegFlags); 4014 format %{ "FLAGS_LTGE" %} 4015 interface(REG_INTER); 4016 %} 4017 operand flagsReg_long_EQNE() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_EQNE" %} 4021 interface(REG_INTER); 4022 %} 4023 operand flagsReg_long_LEGT() %{ 4024 constraint(ALLOC_IN_RC(int_flags)); 4025 match(RegFlags); 4026 format %{ "FLAGS_LEGT" %} 4027 interface(REG_INTER); 4028 %} 4029 4030 // Condition Code Register used by unsigned long compare 4031 operand flagsReg_ulong_LTGE() %{ 4032 constraint(ALLOC_IN_RC(int_flags)); 4033 match(RegFlags); 4034 format %{ "FLAGS_U_LTGE" %} 4035 interface(REG_INTER); 4036 %} 4037 operand flagsReg_ulong_EQNE() %{ 4038 constraint(ALLOC_IN_RC(int_flags)); 4039 match(RegFlags); 4040 format %{ "FLAGS_U_EQNE" %} 4041 interface(REG_INTER); 4042 %} 4043 operand flagsReg_ulong_LEGT() %{ 4044 constraint(ALLOC_IN_RC(int_flags)); 4045 match(RegFlags); 4046 format %{ "FLAGS_U_LEGT" %} 4047 interface(REG_INTER); 4048 %} 4049 4050 // Float register operands 4051 operand regDPR() %{ 4052 predicate( UseSSE < 2 ); 4053 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4054 match(RegD); 4055 match(regDPR1); 4056 match(regDPR2); 4057 format %{ %} 4058 interface(REG_INTER); 4059 %} 4060 4061 operand regDPR1(regDPR reg) %{ 4062 predicate( UseSSE < 2 ); 4063 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4064 match(reg); 4065 format %{ "FPR1" %} 4066 interface(REG_INTER); 4067 %} 4068 4069 operand regDPR2(regDPR reg) %{ 4070 predicate( UseSSE < 2 ); 4071 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4072 match(reg); 4073 format %{ "FPR2" %} 4074 interface(REG_INTER); 4075 %} 4076 4077 operand regnotDPR1(regDPR reg) %{ 4078 predicate( UseSSE < 2 ); 4079 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4080 match(reg); 4081 format %{ %} 4082 interface(REG_INTER); 4083 %} 4084 4085 // Float register operands 4086 operand regFPR() %{ 4087 predicate( UseSSE < 2 ); 4088 constraint(ALLOC_IN_RC(fp_flt_reg)); 4089 match(RegF); 4090 match(regFPR1); 4091 format %{ %} 4092 interface(REG_INTER); 4093 %} 4094 4095 // Float register operands 4096 operand regFPR1(regFPR reg) %{ 4097 predicate( UseSSE < 2 ); 4098 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4099 match(reg); 4100 format %{ "FPR1" %} 4101 interface(REG_INTER); 4102 %} 4103 4104 // XMM Float register operands 4105 operand regF() %{ 4106 predicate( UseSSE>=1 ); 4107 constraint(ALLOC_IN_RC(float_reg_legacy)); 4108 match(RegF); 4109 format %{ %} 4110 interface(REG_INTER); 4111 %} 4112 4113 // XMM Double register operands 4114 operand regD() %{ 4115 predicate( UseSSE>=2 ); 4116 constraint(ALLOC_IN_RC(double_reg_legacy)); 4117 match(RegD); 4118 format %{ %} 4119 interface(REG_INTER); 4120 %} 4121 4122 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4123 // runtime code generation via reg_class_dynamic. 4124 operand vecS() %{ 4125 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4126 match(VecS); 4127 4128 format %{ %} 4129 interface(REG_INTER); 4130 %} 4131 4132 operand vecD() %{ 4133 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4134 match(VecD); 4135 4136 format %{ %} 4137 interface(REG_INTER); 4138 %} 4139 4140 operand vecX() %{ 4141 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4142 match(VecX); 4143 4144 format %{ %} 4145 interface(REG_INTER); 4146 %} 4147 4148 operand vecY() %{ 4149 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4150 match(VecY); 4151 4152 format %{ %} 4153 interface(REG_INTER); 4154 %} 4155 4156 //----------Memory Operands---------------------------------------------------- 4157 // Direct Memory Operand 4158 operand direct(immP addr) %{ 4159 match(addr); 4160 4161 format %{ "[$addr]" %} 4162 interface(MEMORY_INTER) %{ 4163 base(0xFFFFFFFF); 4164 index(0x4); 4165 scale(0x0); 4166 disp($addr); 4167 %} 4168 %} 4169 4170 // Indirect Memory Operand 4171 operand indirect(eRegP reg) %{ 4172 constraint(ALLOC_IN_RC(int_reg)); 4173 match(reg); 4174 4175 format %{ "[$reg]" %} 4176 interface(MEMORY_INTER) %{ 4177 base($reg); 4178 index(0x4); 4179 scale(0x0); 4180 disp(0x0); 4181 %} 4182 %} 4183 4184 // Indirect Memory Plus Short Offset Operand 4185 operand indOffset8(eRegP reg, immI8 off) %{ 4186 match(AddP reg off); 4187 4188 format %{ "[$reg + $off]" %} 4189 interface(MEMORY_INTER) %{ 4190 base($reg); 4191 index(0x4); 4192 scale(0x0); 4193 disp($off); 4194 %} 4195 %} 4196 4197 // Indirect Memory Plus Long Offset Operand 4198 operand indOffset32(eRegP reg, immI off) %{ 4199 match(AddP reg off); 4200 4201 format %{ "[$reg + $off]" %} 4202 interface(MEMORY_INTER) %{ 4203 base($reg); 4204 index(0x4); 4205 scale(0x0); 4206 disp($off); 4207 %} 4208 %} 4209 4210 // Indirect Memory Plus Long Offset Operand 4211 operand indOffset32X(rRegI reg, immP off) %{ 4212 match(AddP off reg); 4213 4214 format %{ "[$reg + $off]" %} 4215 interface(MEMORY_INTER) %{ 4216 base($reg); 4217 index(0x4); 4218 scale(0x0); 4219 disp($off); 4220 %} 4221 %} 4222 4223 // Indirect Memory Plus Index Register Plus Offset Operand 4224 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4225 match(AddP (AddP reg ireg) off); 4226 4227 op_cost(10); 4228 format %{"[$reg + $off + $ireg]" %} 4229 interface(MEMORY_INTER) %{ 4230 base($reg); 4231 index($ireg); 4232 scale(0x0); 4233 disp($off); 4234 %} 4235 %} 4236 4237 // Indirect Memory Plus Index Register Plus Offset Operand 4238 operand indIndex(eRegP reg, rRegI ireg) %{ 4239 match(AddP reg ireg); 4240 4241 op_cost(10); 4242 format %{"[$reg + $ireg]" %} 4243 interface(MEMORY_INTER) %{ 4244 base($reg); 4245 index($ireg); 4246 scale(0x0); 4247 disp(0x0); 4248 %} 4249 %} 4250 4251 // // ------------------------------------------------------------------------- 4252 // // 486 architecture doesn't support "scale * index + offset" with out a base 4253 // // ------------------------------------------------------------------------- 4254 // // Scaled Memory Operands 4255 // // Indirect Memory Times Scale Plus Offset Operand 4256 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4257 // match(AddP off (LShiftI ireg scale)); 4258 // 4259 // op_cost(10); 4260 // format %{"[$off + $ireg << $scale]" %} 4261 // interface(MEMORY_INTER) %{ 4262 // base(0x4); 4263 // index($ireg); 4264 // scale($scale); 4265 // disp($off); 4266 // %} 4267 // %} 4268 4269 // Indirect Memory Times Scale Plus Index Register 4270 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4271 match(AddP reg (LShiftI ireg scale)); 4272 4273 op_cost(10); 4274 format %{"[$reg + $ireg << $scale]" %} 4275 interface(MEMORY_INTER) %{ 4276 base($reg); 4277 index($ireg); 4278 scale($scale); 4279 disp(0x0); 4280 %} 4281 %} 4282 4283 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4284 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4285 match(AddP (AddP reg (LShiftI ireg scale)) off); 4286 4287 op_cost(10); 4288 format %{"[$reg + $off + $ireg << $scale]" %} 4289 interface(MEMORY_INTER) %{ 4290 base($reg); 4291 index($ireg); 4292 scale($scale); 4293 disp($off); 4294 %} 4295 %} 4296 4297 //----------Load Long Memory Operands------------------------------------------ 4298 // The load-long idiom will use it's address expression again after loading 4299 // the first word of the long. If the load-long destination overlaps with 4300 // registers used in the addressing expression, the 2nd half will be loaded 4301 // from a clobbered address. Fix this by requiring that load-long use 4302 // address registers that do not overlap with the load-long target. 4303 4304 // load-long support 4305 operand load_long_RegP() %{ 4306 constraint(ALLOC_IN_RC(esi_reg)); 4307 match(RegP); 4308 match(eSIRegP); 4309 op_cost(100); 4310 format %{ %} 4311 interface(REG_INTER); 4312 %} 4313 4314 // Indirect Memory Operand Long 4315 operand load_long_indirect(load_long_RegP reg) %{ 4316 constraint(ALLOC_IN_RC(esi_reg)); 4317 match(reg); 4318 4319 format %{ "[$reg]" %} 4320 interface(MEMORY_INTER) %{ 4321 base($reg); 4322 index(0x4); 4323 scale(0x0); 4324 disp(0x0); 4325 %} 4326 %} 4327 4328 // Indirect Memory Plus Long Offset Operand 4329 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4330 match(AddP reg off); 4331 4332 format %{ "[$reg + $off]" %} 4333 interface(MEMORY_INTER) %{ 4334 base($reg); 4335 index(0x4); 4336 scale(0x0); 4337 disp($off); 4338 %} 4339 %} 4340 4341 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4342 4343 4344 //----------Special Memory Operands-------------------------------------------- 4345 // Stack Slot Operand - This operand is used for loading and storing temporary 4346 // values on the stack where a match requires a value to 4347 // flow through memory. 4348 operand stackSlotP(sRegP reg) %{ 4349 constraint(ALLOC_IN_RC(stack_slots)); 4350 // No match rule because this operand is only generated in matching 4351 format %{ "[$reg]" %} 4352 interface(MEMORY_INTER) %{ 4353 base(0x4); // ESP 4354 index(0x4); // No Index 4355 scale(0x0); // No Scale 4356 disp($reg); // Stack Offset 4357 %} 4358 %} 4359 4360 operand stackSlotI(sRegI reg) %{ 4361 constraint(ALLOC_IN_RC(stack_slots)); 4362 // No match rule because this operand is only generated in matching 4363 format %{ "[$reg]" %} 4364 interface(MEMORY_INTER) %{ 4365 base(0x4); // ESP 4366 index(0x4); // No Index 4367 scale(0x0); // No Scale 4368 disp($reg); // Stack Offset 4369 %} 4370 %} 4371 4372 operand stackSlotF(sRegF reg) %{ 4373 constraint(ALLOC_IN_RC(stack_slots)); 4374 // No match rule because this operand is only generated in matching 4375 format %{ "[$reg]" %} 4376 interface(MEMORY_INTER) %{ 4377 base(0x4); // ESP 4378 index(0x4); // No Index 4379 scale(0x0); // No Scale 4380 disp($reg); // Stack Offset 4381 %} 4382 %} 4383 4384 operand stackSlotD(sRegD reg) %{ 4385 constraint(ALLOC_IN_RC(stack_slots)); 4386 // No match rule because this operand is only generated in matching 4387 format %{ "[$reg]" %} 4388 interface(MEMORY_INTER) %{ 4389 base(0x4); // ESP 4390 index(0x4); // No Index 4391 scale(0x0); // No Scale 4392 disp($reg); // Stack Offset 4393 %} 4394 %} 4395 4396 operand stackSlotL(sRegL reg) %{ 4397 constraint(ALLOC_IN_RC(stack_slots)); 4398 // No match rule because this operand is only generated in matching 4399 format %{ "[$reg]" %} 4400 interface(MEMORY_INTER) %{ 4401 base(0x4); // ESP 4402 index(0x4); // No Index 4403 scale(0x0); // No Scale 4404 disp($reg); // Stack Offset 4405 %} 4406 %} 4407 4408 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4409 // Indirect Memory Operand 4410 operand indirect_win95_safe(eRegP_no_EBP reg) 4411 %{ 4412 constraint(ALLOC_IN_RC(int_reg)); 4413 match(reg); 4414 4415 op_cost(100); 4416 format %{ "[$reg]" %} 4417 interface(MEMORY_INTER) %{ 4418 base($reg); 4419 index(0x4); 4420 scale(0x0); 4421 disp(0x0); 4422 %} 4423 %} 4424 4425 // Indirect Memory Plus Short Offset Operand 4426 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4427 %{ 4428 match(AddP reg off); 4429 4430 op_cost(100); 4431 format %{ "[$reg + $off]" %} 4432 interface(MEMORY_INTER) %{ 4433 base($reg); 4434 index(0x4); 4435 scale(0x0); 4436 disp($off); 4437 %} 4438 %} 4439 4440 // Indirect Memory Plus Long Offset Operand 4441 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4442 %{ 4443 match(AddP reg off); 4444 4445 op_cost(100); 4446 format %{ "[$reg + $off]" %} 4447 interface(MEMORY_INTER) %{ 4448 base($reg); 4449 index(0x4); 4450 scale(0x0); 4451 disp($off); 4452 %} 4453 %} 4454 4455 // Indirect Memory Plus Index Register Plus Offset Operand 4456 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4457 %{ 4458 match(AddP (AddP reg ireg) off); 4459 4460 op_cost(100); 4461 format %{"[$reg + $off + $ireg]" %} 4462 interface(MEMORY_INTER) %{ 4463 base($reg); 4464 index($ireg); 4465 scale(0x0); 4466 disp($off); 4467 %} 4468 %} 4469 4470 // Indirect Memory Times Scale Plus Index Register 4471 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4472 %{ 4473 match(AddP reg (LShiftI ireg scale)); 4474 4475 op_cost(100); 4476 format %{"[$reg + $ireg << $scale]" %} 4477 interface(MEMORY_INTER) %{ 4478 base($reg); 4479 index($ireg); 4480 scale($scale); 4481 disp(0x0); 4482 %} 4483 %} 4484 4485 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4486 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4487 %{ 4488 match(AddP (AddP reg (LShiftI ireg scale)) off); 4489 4490 op_cost(100); 4491 format %{"[$reg + $off + $ireg << $scale]" %} 4492 interface(MEMORY_INTER) %{ 4493 base($reg); 4494 index($ireg); 4495 scale($scale); 4496 disp($off); 4497 %} 4498 %} 4499 4500 //----------Conditional Branch Operands---------------------------------------- 4501 // Comparison Op - This is the operation of the comparison, and is limited to 4502 // the following set of codes: 4503 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4504 // 4505 // Other attributes of the comparison, such as unsignedness, are specified 4506 // by the comparison instruction that sets a condition code flags register. 4507 // That result is represented by a flags operand whose subtype is appropriate 4508 // to the unsignedness (etc.) of the comparison. 4509 // 4510 // Later, the instruction which matches both the Comparison Op (a Bool) and 4511 // the flags (produced by the Cmp) specifies the coding of the comparison op 4512 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4513 4514 // Comparision Code 4515 operand cmpOp() %{ 4516 match(Bool); 4517 4518 format %{ "" %} 4519 interface(COND_INTER) %{ 4520 equal(0x4, "e"); 4521 not_equal(0x5, "ne"); 4522 less(0xC, "l"); 4523 greater_equal(0xD, "ge"); 4524 less_equal(0xE, "le"); 4525 greater(0xF, "g"); 4526 overflow(0x0, "o"); 4527 no_overflow(0x1, "no"); 4528 %} 4529 %} 4530 4531 // Comparison Code, unsigned compare. Used by FP also, with 4532 // C2 (unordered) turned into GT or LT already. The other bits 4533 // C0 and C3 are turned into Carry & Zero flags. 4534 operand cmpOpU() %{ 4535 match(Bool); 4536 4537 format %{ "" %} 4538 interface(COND_INTER) %{ 4539 equal(0x4, "e"); 4540 not_equal(0x5, "ne"); 4541 less(0x2, "b"); 4542 greater_equal(0x3, "nb"); 4543 less_equal(0x6, "be"); 4544 greater(0x7, "nbe"); 4545 overflow(0x0, "o"); 4546 no_overflow(0x1, "no"); 4547 %} 4548 %} 4549 4550 // Floating comparisons that don't require any fixup for the unordered case 4551 operand cmpOpUCF() %{ 4552 match(Bool); 4553 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4554 n->as_Bool()->_test._test == BoolTest::ge || 4555 n->as_Bool()->_test._test == BoolTest::le || 4556 n->as_Bool()->_test._test == BoolTest::gt); 4557 format %{ "" %} 4558 interface(COND_INTER) %{ 4559 equal(0x4, "e"); 4560 not_equal(0x5, "ne"); 4561 less(0x2, "b"); 4562 greater_equal(0x3, "nb"); 4563 less_equal(0x6, "be"); 4564 greater(0x7, "nbe"); 4565 overflow(0x0, "o"); 4566 no_overflow(0x1, "no"); 4567 %} 4568 %} 4569 4570 4571 // Floating comparisons that can be fixed up with extra conditional jumps 4572 operand cmpOpUCF2() %{ 4573 match(Bool); 4574 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4575 n->as_Bool()->_test._test == BoolTest::eq); 4576 format %{ "" %} 4577 interface(COND_INTER) %{ 4578 equal(0x4, "e"); 4579 not_equal(0x5, "ne"); 4580 less(0x2, "b"); 4581 greater_equal(0x3, "nb"); 4582 less_equal(0x6, "be"); 4583 greater(0x7, "nbe"); 4584 overflow(0x0, "o"); 4585 no_overflow(0x1, "no"); 4586 %} 4587 %} 4588 4589 // Comparison Code for FP conditional move 4590 operand cmpOp_fcmov() %{ 4591 match(Bool); 4592 4593 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4594 n->as_Bool()->_test._test != BoolTest::no_overflow); 4595 format %{ "" %} 4596 interface(COND_INTER) %{ 4597 equal (0x0C8); 4598 not_equal (0x1C8); 4599 less (0x0C0); 4600 greater_equal(0x1C0); 4601 less_equal (0x0D0); 4602 greater (0x1D0); 4603 overflow(0x0, "o"); // not really supported by the instruction 4604 no_overflow(0x1, "no"); // not really supported by the instruction 4605 %} 4606 %} 4607 4608 // Comparison Code used in long compares 4609 operand cmpOp_commute() %{ 4610 match(Bool); 4611 4612 format %{ "" %} 4613 interface(COND_INTER) %{ 4614 equal(0x4, "e"); 4615 not_equal(0x5, "ne"); 4616 less(0xF, "g"); 4617 greater_equal(0xE, "le"); 4618 less_equal(0xD, "ge"); 4619 greater(0xC, "l"); 4620 overflow(0x0, "o"); 4621 no_overflow(0x1, "no"); 4622 %} 4623 %} 4624 4625 // Comparison Code used in unsigned long compares 4626 operand cmpOpU_commute() %{ 4627 match(Bool); 4628 4629 format %{ "" %} 4630 interface(COND_INTER) %{ 4631 equal(0x4, "e"); 4632 not_equal(0x5, "ne"); 4633 less(0x7, "nbe"); 4634 greater_equal(0x6, "be"); 4635 less_equal(0x3, "nb"); 4636 greater(0x2, "b"); 4637 overflow(0x0, "o"); 4638 no_overflow(0x1, "no"); 4639 %} 4640 %} 4641 4642 //----------OPERAND CLASSES---------------------------------------------------- 4643 // Operand Classes are groups of operands that are used as to simplify 4644 // instruction definitions by not requiring the AD writer to specify separate 4645 // instructions for every form of operand when the instruction accepts 4646 // multiple operand types with the same basic encoding and format. The classic 4647 // case of this is memory operands. 4648 4649 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4650 indIndex, indIndexScale, indIndexScaleOffset); 4651 4652 // Long memory operations are encoded in 2 instructions and a +4 offset. 4653 // This means some kind of offset is always required and you cannot use 4654 // an oop as the offset (done when working on static globals). 4655 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4656 indIndex, indIndexScale, indIndexScaleOffset); 4657 4658 4659 //----------PIPELINE----------------------------------------------------------- 4660 // Rules which define the behavior of the target architectures pipeline. 4661 pipeline %{ 4662 4663 //----------ATTRIBUTES--------------------------------------------------------- 4664 attributes %{ 4665 variable_size_instructions; // Fixed size instructions 4666 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4667 instruction_unit_size = 1; // An instruction is 1 bytes long 4668 instruction_fetch_unit_size = 16; // The processor fetches one line 4669 instruction_fetch_units = 1; // of 16 bytes 4670 4671 // List of nop instructions 4672 nops( MachNop ); 4673 %} 4674 4675 //----------RESOURCES---------------------------------------------------------- 4676 // Resources are the functional units available to the machine 4677 4678 // Generic P2/P3 pipeline 4679 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4680 // 3 instructions decoded per cycle. 4681 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4682 // 2 ALU op, only ALU0 handles mul/div instructions. 4683 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4684 MS0, MS1, MEM = MS0 | MS1, 4685 BR, FPU, 4686 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4687 4688 //----------PIPELINE DESCRIPTION----------------------------------------------- 4689 // Pipeline Description specifies the stages in the machine's pipeline 4690 4691 // Generic P2/P3 pipeline 4692 pipe_desc(S0, S1, S2, S3, S4, S5); 4693 4694 //----------PIPELINE CLASSES--------------------------------------------------- 4695 // Pipeline Classes describe the stages in which input and output are 4696 // referenced by the hardware pipeline. 4697 4698 // Naming convention: ialu or fpu 4699 // Then: _reg 4700 // Then: _reg if there is a 2nd register 4701 // Then: _long if it's a pair of instructions implementing a long 4702 // Then: _fat if it requires the big decoder 4703 // Or: _mem if it requires the big decoder and a memory unit. 4704 4705 // Integer ALU reg operation 4706 pipe_class ialu_reg(rRegI dst) %{ 4707 single_instruction; 4708 dst : S4(write); 4709 dst : S3(read); 4710 DECODE : S0; // any decoder 4711 ALU : S3; // any alu 4712 %} 4713 4714 // Long ALU reg operation 4715 pipe_class ialu_reg_long(eRegL dst) %{ 4716 instruction_count(2); 4717 dst : S4(write); 4718 dst : S3(read); 4719 DECODE : S0(2); // any 2 decoders 4720 ALU : S3(2); // both alus 4721 %} 4722 4723 // Integer ALU reg operation using big decoder 4724 pipe_class ialu_reg_fat(rRegI dst) %{ 4725 single_instruction; 4726 dst : S4(write); 4727 dst : S3(read); 4728 D0 : S0; // big decoder only 4729 ALU : S3; // any alu 4730 %} 4731 4732 // Long ALU reg operation using big decoder 4733 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4734 instruction_count(2); 4735 dst : S4(write); 4736 dst : S3(read); 4737 D0 : S0(2); // big decoder only; twice 4738 ALU : S3(2); // any 2 alus 4739 %} 4740 4741 // Integer ALU reg-reg operation 4742 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4743 single_instruction; 4744 dst : S4(write); 4745 src : S3(read); 4746 DECODE : S0; // any decoder 4747 ALU : S3; // any alu 4748 %} 4749 4750 // Long ALU reg-reg operation 4751 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4752 instruction_count(2); 4753 dst : S4(write); 4754 src : S3(read); 4755 DECODE : S0(2); // any 2 decoders 4756 ALU : S3(2); // both alus 4757 %} 4758 4759 // Integer ALU reg-reg operation 4760 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4761 single_instruction; 4762 dst : S4(write); 4763 src : S3(read); 4764 D0 : S0; // big decoder only 4765 ALU : S3; // any alu 4766 %} 4767 4768 // Long ALU reg-reg operation 4769 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4770 instruction_count(2); 4771 dst : S4(write); 4772 src : S3(read); 4773 D0 : S0(2); // big decoder only; twice 4774 ALU : S3(2); // both alus 4775 %} 4776 4777 // Integer ALU reg-mem operation 4778 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4779 single_instruction; 4780 dst : S5(write); 4781 mem : S3(read); 4782 D0 : S0; // big decoder only 4783 ALU : S4; // any alu 4784 MEM : S3; // any mem 4785 %} 4786 4787 // Long ALU reg-mem operation 4788 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4789 instruction_count(2); 4790 dst : S5(write); 4791 mem : S3(read); 4792 D0 : S0(2); // big decoder only; twice 4793 ALU : S4(2); // any 2 alus 4794 MEM : S3(2); // both mems 4795 %} 4796 4797 // Integer mem operation (prefetch) 4798 pipe_class ialu_mem(memory mem) 4799 %{ 4800 single_instruction; 4801 mem : S3(read); 4802 D0 : S0; // big decoder only 4803 MEM : S3; // any mem 4804 %} 4805 4806 // Integer Store to Memory 4807 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4808 single_instruction; 4809 mem : S3(read); 4810 src : S5(read); 4811 D0 : S0; // big decoder only 4812 ALU : S4; // any alu 4813 MEM : S3; 4814 %} 4815 4816 // Long Store to Memory 4817 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4818 instruction_count(2); 4819 mem : S3(read); 4820 src : S5(read); 4821 D0 : S0(2); // big decoder only; twice 4822 ALU : S4(2); // any 2 alus 4823 MEM : S3(2); // Both mems 4824 %} 4825 4826 // Integer Store to Memory 4827 pipe_class ialu_mem_imm(memory mem) %{ 4828 single_instruction; 4829 mem : S3(read); 4830 D0 : S0; // big decoder only 4831 ALU : S4; // any alu 4832 MEM : S3; 4833 %} 4834 4835 // Integer ALU0 reg-reg operation 4836 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4837 single_instruction; 4838 dst : S4(write); 4839 src : S3(read); 4840 D0 : S0; // Big decoder only 4841 ALU0 : S3; // only alu0 4842 %} 4843 4844 // Integer ALU0 reg-mem operation 4845 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4846 single_instruction; 4847 dst : S5(write); 4848 mem : S3(read); 4849 D0 : S0; // big decoder only 4850 ALU0 : S4; // ALU0 only 4851 MEM : S3; // any mem 4852 %} 4853 4854 // Integer ALU reg-reg operation 4855 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4856 single_instruction; 4857 cr : S4(write); 4858 src1 : S3(read); 4859 src2 : S3(read); 4860 DECODE : S0; // any decoder 4861 ALU : S3; // any alu 4862 %} 4863 4864 // Integer ALU reg-imm operation 4865 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4866 single_instruction; 4867 cr : S4(write); 4868 src1 : S3(read); 4869 DECODE : S0; // any decoder 4870 ALU : S3; // any alu 4871 %} 4872 4873 // Integer ALU reg-mem operation 4874 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4875 single_instruction; 4876 cr : S4(write); 4877 src1 : S3(read); 4878 src2 : S3(read); 4879 D0 : S0; // big decoder only 4880 ALU : S4; // any alu 4881 MEM : S3; 4882 %} 4883 4884 // Conditional move reg-reg 4885 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4886 instruction_count(4); 4887 y : S4(read); 4888 q : S3(read); 4889 p : S3(read); 4890 DECODE : S0(4); // any decoder 4891 %} 4892 4893 // Conditional move reg-reg 4894 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4895 single_instruction; 4896 dst : S4(write); 4897 src : S3(read); 4898 cr : S3(read); 4899 DECODE : S0; // any decoder 4900 %} 4901 4902 // Conditional move reg-mem 4903 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4904 single_instruction; 4905 dst : S4(write); 4906 src : S3(read); 4907 cr : S3(read); 4908 DECODE : S0; // any decoder 4909 MEM : S3; 4910 %} 4911 4912 // Conditional move reg-reg long 4913 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4914 single_instruction; 4915 dst : S4(write); 4916 src : S3(read); 4917 cr : S3(read); 4918 DECODE : S0(2); // any 2 decoders 4919 %} 4920 4921 // Conditional move double reg-reg 4922 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4923 single_instruction; 4924 dst : S4(write); 4925 src : S3(read); 4926 cr : S3(read); 4927 DECODE : S0; // any decoder 4928 %} 4929 4930 // Float reg-reg operation 4931 pipe_class fpu_reg(regDPR dst) %{ 4932 instruction_count(2); 4933 dst : S3(read); 4934 DECODE : S0(2); // any 2 decoders 4935 FPU : S3; 4936 %} 4937 4938 // Float reg-reg operation 4939 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4940 instruction_count(2); 4941 dst : S4(write); 4942 src : S3(read); 4943 DECODE : S0(2); // any 2 decoders 4944 FPU : S3; 4945 %} 4946 4947 // Float reg-reg operation 4948 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4949 instruction_count(3); 4950 dst : S4(write); 4951 src1 : S3(read); 4952 src2 : S3(read); 4953 DECODE : S0(3); // any 3 decoders 4954 FPU : S3(2); 4955 %} 4956 4957 // Float reg-reg operation 4958 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4959 instruction_count(4); 4960 dst : S4(write); 4961 src1 : S3(read); 4962 src2 : S3(read); 4963 src3 : S3(read); 4964 DECODE : S0(4); // any 3 decoders 4965 FPU : S3(2); 4966 %} 4967 4968 // Float reg-reg operation 4969 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4970 instruction_count(4); 4971 dst : S4(write); 4972 src1 : S3(read); 4973 src2 : S3(read); 4974 src3 : S3(read); 4975 DECODE : S1(3); // any 3 decoders 4976 D0 : S0; // Big decoder only 4977 FPU : S3(2); 4978 MEM : S3; 4979 %} 4980 4981 // Float reg-mem operation 4982 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4983 instruction_count(2); 4984 dst : S5(write); 4985 mem : S3(read); 4986 D0 : S0; // big decoder only 4987 DECODE : S1; // any decoder for FPU POP 4988 FPU : S4; 4989 MEM : S3; // any mem 4990 %} 4991 4992 // Float reg-mem operation 4993 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4994 instruction_count(3); 4995 dst : S5(write); 4996 src1 : S3(read); 4997 mem : S3(read); 4998 D0 : S0; // big decoder only 4999 DECODE : S1(2); // any decoder for FPU POP 5000 FPU : S4; 5001 MEM : S3; // any mem 5002 %} 5003 5004 // Float mem-reg operation 5005 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 5006 instruction_count(2); 5007 src : S5(read); 5008 mem : S3(read); 5009 DECODE : S0; // any decoder for FPU PUSH 5010 D0 : S1; // big decoder only 5011 FPU : S4; 5012 MEM : S3; // any mem 5013 %} 5014 5015 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5016 instruction_count(3); 5017 src1 : S3(read); 5018 src2 : S3(read); 5019 mem : S3(read); 5020 DECODE : S0(2); // any decoder for FPU PUSH 5021 D0 : S1; // big decoder only 5022 FPU : S4; 5023 MEM : S3; // any mem 5024 %} 5025 5026 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5027 instruction_count(3); 5028 src1 : S3(read); 5029 src2 : S3(read); 5030 mem : S4(read); 5031 DECODE : S0; // any decoder for FPU PUSH 5032 D0 : S0(2); // big decoder only 5033 FPU : S4; 5034 MEM : S3(2); // any mem 5035 %} 5036 5037 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5038 instruction_count(2); 5039 src1 : S3(read); 5040 dst : S4(read); 5041 D0 : S0(2); // big decoder only 5042 MEM : S3(2); // any mem 5043 %} 5044 5045 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5046 instruction_count(3); 5047 src1 : S3(read); 5048 src2 : S3(read); 5049 dst : S4(read); 5050 D0 : S0(3); // big decoder only 5051 FPU : S4; 5052 MEM : S3(3); // any mem 5053 %} 5054 5055 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5056 instruction_count(3); 5057 src1 : S4(read); 5058 mem : S4(read); 5059 DECODE : S0; // any decoder for FPU PUSH 5060 D0 : S0(2); // big decoder only 5061 FPU : S4; 5062 MEM : S3(2); // any mem 5063 %} 5064 5065 // Float load constant 5066 pipe_class fpu_reg_con(regDPR dst) %{ 5067 instruction_count(2); 5068 dst : S5(write); 5069 D0 : S0; // big decoder only for the load 5070 DECODE : S1; // any decoder for FPU POP 5071 FPU : S4; 5072 MEM : S3; // any mem 5073 %} 5074 5075 // Float load constant 5076 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5077 instruction_count(3); 5078 dst : S5(write); 5079 src : S3(read); 5080 D0 : S0; // big decoder only for the load 5081 DECODE : S1(2); // any decoder for FPU POP 5082 FPU : S4; 5083 MEM : S3; // any mem 5084 %} 5085 5086 // UnConditional branch 5087 pipe_class pipe_jmp( label labl ) %{ 5088 single_instruction; 5089 BR : S3; 5090 %} 5091 5092 // Conditional branch 5093 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5094 single_instruction; 5095 cr : S1(read); 5096 BR : S3; 5097 %} 5098 5099 // Allocation idiom 5100 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5101 instruction_count(1); force_serialization; 5102 fixed_latency(6); 5103 heap_ptr : S3(read); 5104 DECODE : S0(3); 5105 D0 : S2; 5106 MEM : S3; 5107 ALU : S3(2); 5108 dst : S5(write); 5109 BR : S5; 5110 %} 5111 5112 // Generic big/slow expanded idiom 5113 pipe_class pipe_slow( ) %{ 5114 instruction_count(10); multiple_bundles; force_serialization; 5115 fixed_latency(100); 5116 D0 : S0(2); 5117 MEM : S3(2); 5118 %} 5119 5120 // The real do-nothing guy 5121 pipe_class empty( ) %{ 5122 instruction_count(0); 5123 %} 5124 5125 // Define the class for the Nop node 5126 define %{ 5127 MachNop = empty; 5128 %} 5129 5130 %} 5131 5132 //----------INSTRUCTIONS------------------------------------------------------- 5133 // 5134 // match -- States which machine-independent subtree may be replaced 5135 // by this instruction. 5136 // ins_cost -- The estimated cost of this instruction is used by instruction 5137 // selection to identify a minimum cost tree of machine 5138 // instructions that matches a tree of machine-independent 5139 // instructions. 5140 // format -- A string providing the disassembly for this instruction. 5141 // The value of an instruction's operand may be inserted 5142 // by referring to it with a '$' prefix. 5143 // opcode -- Three instruction opcodes may be provided. These are referred 5144 // to within an encode class as $primary, $secondary, and $tertiary 5145 // respectively. The primary opcode is commonly used to 5146 // indicate the type of machine instruction, while secondary 5147 // and tertiary are often used for prefix options or addressing 5148 // modes. 5149 // ins_encode -- A list of encode classes with parameters. The encode class 5150 // name must have been defined in an 'enc_class' specification 5151 // in the encode section of the architecture description. 5152 5153 //----------BSWAP-Instruction-------------------------------------------------- 5154 instruct bytes_reverse_int(rRegI dst) %{ 5155 match(Set dst (ReverseBytesI dst)); 5156 5157 format %{ "BSWAP $dst" %} 5158 opcode(0x0F, 0xC8); 5159 ins_encode( OpcP, OpcSReg(dst) ); 5160 ins_pipe( ialu_reg ); 5161 %} 5162 5163 instruct bytes_reverse_long(eRegL dst) %{ 5164 match(Set dst (ReverseBytesL dst)); 5165 5166 format %{ "BSWAP $dst.lo\n\t" 5167 "BSWAP $dst.hi\n\t" 5168 "XCHG $dst.lo $dst.hi" %} 5169 5170 ins_cost(125); 5171 ins_encode( bswap_long_bytes(dst) ); 5172 ins_pipe( ialu_reg_reg); 5173 %} 5174 5175 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5176 match(Set dst (ReverseBytesUS dst)); 5177 effect(KILL cr); 5178 5179 format %{ "BSWAP $dst\n\t" 5180 "SHR $dst,16\n\t" %} 5181 ins_encode %{ 5182 __ bswapl($dst$$Register); 5183 __ shrl($dst$$Register, 16); 5184 %} 5185 ins_pipe( ialu_reg ); 5186 %} 5187 5188 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5189 match(Set dst (ReverseBytesS dst)); 5190 effect(KILL cr); 5191 5192 format %{ "BSWAP $dst\n\t" 5193 "SAR $dst,16\n\t" %} 5194 ins_encode %{ 5195 __ bswapl($dst$$Register); 5196 __ sarl($dst$$Register, 16); 5197 %} 5198 ins_pipe( ialu_reg ); 5199 %} 5200 5201 5202 //---------- Zeros Count Instructions ------------------------------------------ 5203 5204 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5205 predicate(UseCountLeadingZerosInstruction); 5206 match(Set dst (CountLeadingZerosI src)); 5207 effect(KILL cr); 5208 5209 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5210 ins_encode %{ 5211 __ lzcntl($dst$$Register, $src$$Register); 5212 %} 5213 ins_pipe(ialu_reg); 5214 %} 5215 5216 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5217 predicate(!UseCountLeadingZerosInstruction); 5218 match(Set dst (CountLeadingZerosI src)); 5219 effect(KILL cr); 5220 5221 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5222 "JNZ skip\n\t" 5223 "MOV $dst, -1\n" 5224 "skip:\n\t" 5225 "NEG $dst\n\t" 5226 "ADD $dst, 31" %} 5227 ins_encode %{ 5228 Register Rdst = $dst$$Register; 5229 Register Rsrc = $src$$Register; 5230 Label skip; 5231 __ bsrl(Rdst, Rsrc); 5232 __ jccb(Assembler::notZero, skip); 5233 __ movl(Rdst, -1); 5234 __ bind(skip); 5235 __ negl(Rdst); 5236 __ addl(Rdst, BitsPerInt - 1); 5237 %} 5238 ins_pipe(ialu_reg); 5239 %} 5240 5241 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5242 predicate(UseCountLeadingZerosInstruction); 5243 match(Set dst (CountLeadingZerosL src)); 5244 effect(TEMP dst, KILL cr); 5245 5246 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5247 "JNC done\n\t" 5248 "LZCNT $dst, $src.lo\n\t" 5249 "ADD $dst, 32\n" 5250 "done:" %} 5251 ins_encode %{ 5252 Register Rdst = $dst$$Register; 5253 Register Rsrc = $src$$Register; 5254 Label done; 5255 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5256 __ jccb(Assembler::carryClear, done); 5257 __ lzcntl(Rdst, Rsrc); 5258 __ addl(Rdst, BitsPerInt); 5259 __ bind(done); 5260 %} 5261 ins_pipe(ialu_reg); 5262 %} 5263 5264 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5265 predicate(!UseCountLeadingZerosInstruction); 5266 match(Set dst (CountLeadingZerosL src)); 5267 effect(TEMP dst, KILL cr); 5268 5269 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5270 "JZ msw_is_zero\n\t" 5271 "ADD $dst, 32\n\t" 5272 "JMP not_zero\n" 5273 "msw_is_zero:\n\t" 5274 "BSR $dst, $src.lo\n\t" 5275 "JNZ not_zero\n\t" 5276 "MOV $dst, -1\n" 5277 "not_zero:\n\t" 5278 "NEG $dst\n\t" 5279 "ADD $dst, 63\n" %} 5280 ins_encode %{ 5281 Register Rdst = $dst$$Register; 5282 Register Rsrc = $src$$Register; 5283 Label msw_is_zero; 5284 Label not_zero; 5285 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5286 __ jccb(Assembler::zero, msw_is_zero); 5287 __ addl(Rdst, BitsPerInt); 5288 __ jmpb(not_zero); 5289 __ bind(msw_is_zero); 5290 __ bsrl(Rdst, Rsrc); 5291 __ jccb(Assembler::notZero, not_zero); 5292 __ movl(Rdst, -1); 5293 __ bind(not_zero); 5294 __ negl(Rdst); 5295 __ addl(Rdst, BitsPerLong - 1); 5296 %} 5297 ins_pipe(ialu_reg); 5298 %} 5299 5300 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5301 predicate(UseCountTrailingZerosInstruction); 5302 match(Set dst (CountTrailingZerosI src)); 5303 effect(KILL cr); 5304 5305 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5306 ins_encode %{ 5307 __ tzcntl($dst$$Register, $src$$Register); 5308 %} 5309 ins_pipe(ialu_reg); 5310 %} 5311 5312 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5313 predicate(!UseCountTrailingZerosInstruction); 5314 match(Set dst (CountTrailingZerosI src)); 5315 effect(KILL cr); 5316 5317 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5318 "JNZ done\n\t" 5319 "MOV $dst, 32\n" 5320 "done:" %} 5321 ins_encode %{ 5322 Register Rdst = $dst$$Register; 5323 Label done; 5324 __ bsfl(Rdst, $src$$Register); 5325 __ jccb(Assembler::notZero, done); 5326 __ movl(Rdst, BitsPerInt); 5327 __ bind(done); 5328 %} 5329 ins_pipe(ialu_reg); 5330 %} 5331 5332 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5333 predicate(UseCountTrailingZerosInstruction); 5334 match(Set dst (CountTrailingZerosL src)); 5335 effect(TEMP dst, KILL cr); 5336 5337 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5338 "JNC done\n\t" 5339 "TZCNT $dst, $src.hi\n\t" 5340 "ADD $dst, 32\n" 5341 "done:" %} 5342 ins_encode %{ 5343 Register Rdst = $dst$$Register; 5344 Register Rsrc = $src$$Register; 5345 Label done; 5346 __ tzcntl(Rdst, Rsrc); 5347 __ jccb(Assembler::carryClear, done); 5348 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5349 __ addl(Rdst, BitsPerInt); 5350 __ bind(done); 5351 %} 5352 ins_pipe(ialu_reg); 5353 %} 5354 5355 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5356 predicate(!UseCountTrailingZerosInstruction); 5357 match(Set dst (CountTrailingZerosL src)); 5358 effect(TEMP dst, KILL cr); 5359 5360 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5361 "JNZ done\n\t" 5362 "BSF $dst, $src.hi\n\t" 5363 "JNZ msw_not_zero\n\t" 5364 "MOV $dst, 32\n" 5365 "msw_not_zero:\n\t" 5366 "ADD $dst, 32\n" 5367 "done:" %} 5368 ins_encode %{ 5369 Register Rdst = $dst$$Register; 5370 Register Rsrc = $src$$Register; 5371 Label msw_not_zero; 5372 Label done; 5373 __ bsfl(Rdst, Rsrc); 5374 __ jccb(Assembler::notZero, done); 5375 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5376 __ jccb(Assembler::notZero, msw_not_zero); 5377 __ movl(Rdst, BitsPerInt); 5378 __ bind(msw_not_zero); 5379 __ addl(Rdst, BitsPerInt); 5380 __ bind(done); 5381 %} 5382 ins_pipe(ialu_reg); 5383 %} 5384 5385 5386 //---------- Population Count Instructions ------------------------------------- 5387 5388 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5389 predicate(UsePopCountInstruction); 5390 match(Set dst (PopCountI src)); 5391 effect(KILL cr); 5392 5393 format %{ "POPCNT $dst, $src" %} 5394 ins_encode %{ 5395 __ popcntl($dst$$Register, $src$$Register); 5396 %} 5397 ins_pipe(ialu_reg); 5398 %} 5399 5400 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5401 predicate(UsePopCountInstruction); 5402 match(Set dst (PopCountI (LoadI mem))); 5403 effect(KILL cr); 5404 5405 format %{ "POPCNT $dst, $mem" %} 5406 ins_encode %{ 5407 __ popcntl($dst$$Register, $mem$$Address); 5408 %} 5409 ins_pipe(ialu_reg); 5410 %} 5411 5412 // Note: Long.bitCount(long) returns an int. 5413 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5414 predicate(UsePopCountInstruction); 5415 match(Set dst (PopCountL src)); 5416 effect(KILL cr, TEMP tmp, TEMP dst); 5417 5418 format %{ "POPCNT $dst, $src.lo\n\t" 5419 "POPCNT $tmp, $src.hi\n\t" 5420 "ADD $dst, $tmp" %} 5421 ins_encode %{ 5422 __ popcntl($dst$$Register, $src$$Register); 5423 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5424 __ addl($dst$$Register, $tmp$$Register); 5425 %} 5426 ins_pipe(ialu_reg); 5427 %} 5428 5429 // Note: Long.bitCount(long) returns an int. 5430 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5431 predicate(UsePopCountInstruction); 5432 match(Set dst (PopCountL (LoadL mem))); 5433 effect(KILL cr, TEMP tmp, TEMP dst); 5434 5435 format %{ "POPCNT $dst, $mem\n\t" 5436 "POPCNT $tmp, $mem+4\n\t" 5437 "ADD $dst, $tmp" %} 5438 ins_encode %{ 5439 //__ popcntl($dst$$Register, $mem$$Address$$first); 5440 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5441 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5442 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5443 __ addl($dst$$Register, $tmp$$Register); 5444 %} 5445 ins_pipe(ialu_reg); 5446 %} 5447 5448 5449 //----------Load/Store/Move Instructions--------------------------------------- 5450 //----------Load Instructions-------------------------------------------------- 5451 // Load Byte (8bit signed) 5452 instruct loadB(xRegI dst, memory mem) %{ 5453 match(Set dst (LoadB mem)); 5454 5455 ins_cost(125); 5456 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5457 5458 ins_encode %{ 5459 __ movsbl($dst$$Register, $mem$$Address); 5460 %} 5461 5462 ins_pipe(ialu_reg_mem); 5463 %} 5464 5465 // Load Byte (8bit signed) into Long Register 5466 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5467 match(Set dst (ConvI2L (LoadB mem))); 5468 effect(KILL cr); 5469 5470 ins_cost(375); 5471 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5472 "MOV $dst.hi,$dst.lo\n\t" 5473 "SAR $dst.hi,7" %} 5474 5475 ins_encode %{ 5476 __ movsbl($dst$$Register, $mem$$Address); 5477 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5478 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5479 %} 5480 5481 ins_pipe(ialu_reg_mem); 5482 %} 5483 5484 // Load Unsigned Byte (8bit UNsigned) 5485 instruct loadUB(xRegI dst, memory mem) %{ 5486 match(Set dst (LoadUB mem)); 5487 5488 ins_cost(125); 5489 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5490 5491 ins_encode %{ 5492 __ movzbl($dst$$Register, $mem$$Address); 5493 %} 5494 5495 ins_pipe(ialu_reg_mem); 5496 %} 5497 5498 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5499 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5500 match(Set dst (ConvI2L (LoadUB mem))); 5501 effect(KILL cr); 5502 5503 ins_cost(250); 5504 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5505 "XOR $dst.hi,$dst.hi" %} 5506 5507 ins_encode %{ 5508 Register Rdst = $dst$$Register; 5509 __ movzbl(Rdst, $mem$$Address); 5510 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5511 %} 5512 5513 ins_pipe(ialu_reg_mem); 5514 %} 5515 5516 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5517 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5518 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5519 effect(KILL cr); 5520 5521 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5522 "XOR $dst.hi,$dst.hi\n\t" 5523 "AND $dst.lo,right_n_bits($mask, 8)" %} 5524 ins_encode %{ 5525 Register Rdst = $dst$$Register; 5526 __ movzbl(Rdst, $mem$$Address); 5527 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5528 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5529 %} 5530 ins_pipe(ialu_reg_mem); 5531 %} 5532 5533 // Load Short (16bit signed) 5534 instruct loadS(rRegI dst, memory mem) %{ 5535 match(Set dst (LoadS mem)); 5536 5537 ins_cost(125); 5538 format %{ "MOVSX $dst,$mem\t# short" %} 5539 5540 ins_encode %{ 5541 __ movswl($dst$$Register, $mem$$Address); 5542 %} 5543 5544 ins_pipe(ialu_reg_mem); 5545 %} 5546 5547 // Load Short (16 bit signed) to Byte (8 bit signed) 5548 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5549 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5550 5551 ins_cost(125); 5552 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5553 ins_encode %{ 5554 __ movsbl($dst$$Register, $mem$$Address); 5555 %} 5556 ins_pipe(ialu_reg_mem); 5557 %} 5558 5559 // Load Short (16bit signed) into Long Register 5560 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5561 match(Set dst (ConvI2L (LoadS mem))); 5562 effect(KILL cr); 5563 5564 ins_cost(375); 5565 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5566 "MOV $dst.hi,$dst.lo\n\t" 5567 "SAR $dst.hi,15" %} 5568 5569 ins_encode %{ 5570 __ movswl($dst$$Register, $mem$$Address); 5571 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5572 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5573 %} 5574 5575 ins_pipe(ialu_reg_mem); 5576 %} 5577 5578 // Load Unsigned Short/Char (16bit unsigned) 5579 instruct loadUS(rRegI dst, memory mem) %{ 5580 match(Set dst (LoadUS mem)); 5581 5582 ins_cost(125); 5583 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5584 5585 ins_encode %{ 5586 __ movzwl($dst$$Register, $mem$$Address); 5587 %} 5588 5589 ins_pipe(ialu_reg_mem); 5590 %} 5591 5592 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5593 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5594 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5595 5596 ins_cost(125); 5597 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5598 ins_encode %{ 5599 __ movsbl($dst$$Register, $mem$$Address); 5600 %} 5601 ins_pipe(ialu_reg_mem); 5602 %} 5603 5604 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5605 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5606 match(Set dst (ConvI2L (LoadUS mem))); 5607 effect(KILL cr); 5608 5609 ins_cost(250); 5610 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5611 "XOR $dst.hi,$dst.hi" %} 5612 5613 ins_encode %{ 5614 __ movzwl($dst$$Register, $mem$$Address); 5615 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5616 %} 5617 5618 ins_pipe(ialu_reg_mem); 5619 %} 5620 5621 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5622 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5623 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5624 effect(KILL cr); 5625 5626 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5627 "XOR $dst.hi,$dst.hi" %} 5628 ins_encode %{ 5629 Register Rdst = $dst$$Register; 5630 __ movzbl(Rdst, $mem$$Address); 5631 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5632 %} 5633 ins_pipe(ialu_reg_mem); 5634 %} 5635 5636 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5637 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5638 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5639 effect(KILL cr); 5640 5641 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5642 "XOR $dst.hi,$dst.hi\n\t" 5643 "AND $dst.lo,right_n_bits($mask, 16)" %} 5644 ins_encode %{ 5645 Register Rdst = $dst$$Register; 5646 __ movzwl(Rdst, $mem$$Address); 5647 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5648 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5649 %} 5650 ins_pipe(ialu_reg_mem); 5651 %} 5652 5653 // Load Integer 5654 instruct loadI(rRegI dst, memory mem) %{ 5655 match(Set dst (LoadI mem)); 5656 5657 ins_cost(125); 5658 format %{ "MOV $dst,$mem\t# int" %} 5659 5660 ins_encode %{ 5661 __ movl($dst$$Register, $mem$$Address); 5662 %} 5663 5664 ins_pipe(ialu_reg_mem); 5665 %} 5666 5667 // Load Integer (32 bit signed) to Byte (8 bit signed) 5668 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5669 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5670 5671 ins_cost(125); 5672 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5673 ins_encode %{ 5674 __ movsbl($dst$$Register, $mem$$Address); 5675 %} 5676 ins_pipe(ialu_reg_mem); 5677 %} 5678 5679 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5680 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5681 match(Set dst (AndI (LoadI mem) mask)); 5682 5683 ins_cost(125); 5684 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5685 ins_encode %{ 5686 __ movzbl($dst$$Register, $mem$$Address); 5687 %} 5688 ins_pipe(ialu_reg_mem); 5689 %} 5690 5691 // Load Integer (32 bit signed) to Short (16 bit signed) 5692 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5693 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5694 5695 ins_cost(125); 5696 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5697 ins_encode %{ 5698 __ movswl($dst$$Register, $mem$$Address); 5699 %} 5700 ins_pipe(ialu_reg_mem); 5701 %} 5702 5703 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5704 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5705 match(Set dst (AndI (LoadI mem) mask)); 5706 5707 ins_cost(125); 5708 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5709 ins_encode %{ 5710 __ movzwl($dst$$Register, $mem$$Address); 5711 %} 5712 ins_pipe(ialu_reg_mem); 5713 %} 5714 5715 // Load Integer into Long Register 5716 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5717 match(Set dst (ConvI2L (LoadI mem))); 5718 effect(KILL cr); 5719 5720 ins_cost(375); 5721 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5722 "MOV $dst.hi,$dst.lo\n\t" 5723 "SAR $dst.hi,31" %} 5724 5725 ins_encode %{ 5726 __ movl($dst$$Register, $mem$$Address); 5727 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5728 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5729 %} 5730 5731 ins_pipe(ialu_reg_mem); 5732 %} 5733 5734 // Load Integer with mask 0xFF into Long Register 5735 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5736 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5737 effect(KILL cr); 5738 5739 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5740 "XOR $dst.hi,$dst.hi" %} 5741 ins_encode %{ 5742 Register Rdst = $dst$$Register; 5743 __ movzbl(Rdst, $mem$$Address); 5744 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5745 %} 5746 ins_pipe(ialu_reg_mem); 5747 %} 5748 5749 // Load Integer with mask 0xFFFF into Long Register 5750 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5751 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5752 effect(KILL cr); 5753 5754 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5755 "XOR $dst.hi,$dst.hi" %} 5756 ins_encode %{ 5757 Register Rdst = $dst$$Register; 5758 __ movzwl(Rdst, $mem$$Address); 5759 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5760 %} 5761 ins_pipe(ialu_reg_mem); 5762 %} 5763 5764 // Load Integer with 31-bit mask into Long Register 5765 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5766 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5767 effect(KILL cr); 5768 5769 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5770 "XOR $dst.hi,$dst.hi\n\t" 5771 "AND $dst.lo,$mask" %} 5772 ins_encode %{ 5773 Register Rdst = $dst$$Register; 5774 __ movl(Rdst, $mem$$Address); 5775 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5776 __ andl(Rdst, $mask$$constant); 5777 %} 5778 ins_pipe(ialu_reg_mem); 5779 %} 5780 5781 // Load Unsigned Integer into Long Register 5782 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5783 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5784 effect(KILL cr); 5785 5786 ins_cost(250); 5787 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5788 "XOR $dst.hi,$dst.hi" %} 5789 5790 ins_encode %{ 5791 __ movl($dst$$Register, $mem$$Address); 5792 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5793 %} 5794 5795 ins_pipe(ialu_reg_mem); 5796 %} 5797 5798 // Load Long. Cannot clobber address while loading, so restrict address 5799 // register to ESI 5800 instruct loadL(eRegL dst, load_long_memory mem) %{ 5801 predicate(!((LoadLNode*)n)->require_atomic_access()); 5802 match(Set dst (LoadL mem)); 5803 5804 ins_cost(250); 5805 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5806 "MOV $dst.hi,$mem+4" %} 5807 5808 ins_encode %{ 5809 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5810 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5811 __ movl($dst$$Register, Amemlo); 5812 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5813 %} 5814 5815 ins_pipe(ialu_reg_long_mem); 5816 %} 5817 5818 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5819 // then store it down to the stack and reload on the int 5820 // side. 5821 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5822 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5823 match(Set dst (LoadL mem)); 5824 5825 ins_cost(200); 5826 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5827 "FISTp $dst" %} 5828 ins_encode(enc_loadL_volatile(mem,dst)); 5829 ins_pipe( fpu_reg_mem ); 5830 %} 5831 5832 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5833 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5834 match(Set dst (LoadL mem)); 5835 effect(TEMP tmp); 5836 ins_cost(180); 5837 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5838 "MOVSD $dst,$tmp" %} 5839 ins_encode %{ 5840 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5841 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5842 %} 5843 ins_pipe( pipe_slow ); 5844 %} 5845 5846 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5847 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5848 match(Set dst (LoadL mem)); 5849 effect(TEMP tmp); 5850 ins_cost(160); 5851 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5852 "MOVD $dst.lo,$tmp\n\t" 5853 "PSRLQ $tmp,32\n\t" 5854 "MOVD $dst.hi,$tmp" %} 5855 ins_encode %{ 5856 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5857 __ movdl($dst$$Register, $tmp$$XMMRegister); 5858 __ psrlq($tmp$$XMMRegister, 32); 5859 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5860 %} 5861 ins_pipe( pipe_slow ); 5862 %} 5863 5864 // Load Range 5865 instruct loadRange(rRegI dst, memory mem) %{ 5866 match(Set dst (LoadRange mem)); 5867 5868 ins_cost(125); 5869 format %{ "MOV $dst,$mem" %} 5870 opcode(0x8B); 5871 ins_encode( OpcP, RegMem(dst,mem)); 5872 ins_pipe( ialu_reg_mem ); 5873 %} 5874 5875 5876 // Load Pointer 5877 instruct loadP(eRegP dst, memory mem) %{ 5878 match(Set dst (LoadP mem)); 5879 5880 ins_cost(125); 5881 format %{ "MOV $dst,$mem" %} 5882 opcode(0x8B); 5883 ins_encode( OpcP, RegMem(dst,mem)); 5884 ins_pipe( ialu_reg_mem ); 5885 %} 5886 5887 // Load Klass Pointer 5888 instruct loadKlass(eRegP dst, memory mem) %{ 5889 match(Set dst (LoadKlass mem)); 5890 5891 ins_cost(125); 5892 format %{ "MOV $dst,$mem" %} 5893 opcode(0x8B); 5894 ins_encode( OpcP, RegMem(dst,mem)); 5895 ins_pipe( ialu_reg_mem ); 5896 %} 5897 5898 // Load Double 5899 instruct loadDPR(regDPR dst, memory mem) %{ 5900 predicate(UseSSE<=1); 5901 match(Set dst (LoadD mem)); 5902 5903 ins_cost(150); 5904 format %{ "FLD_D ST,$mem\n\t" 5905 "FSTP $dst" %} 5906 opcode(0xDD); /* DD /0 */ 5907 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5908 Pop_Reg_DPR(dst) ); 5909 ins_pipe( fpu_reg_mem ); 5910 %} 5911 5912 // Load Double to XMM 5913 instruct loadD(regD dst, memory mem) %{ 5914 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5915 match(Set dst (LoadD mem)); 5916 ins_cost(145); 5917 format %{ "MOVSD $dst,$mem" %} 5918 ins_encode %{ 5919 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 instruct loadD_partial(regD dst, memory mem) %{ 5925 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5926 match(Set dst (LoadD mem)); 5927 ins_cost(145); 5928 format %{ "MOVLPD $dst,$mem" %} 5929 ins_encode %{ 5930 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5931 %} 5932 ins_pipe( pipe_slow ); 5933 %} 5934 5935 // Load to XMM register (single-precision floating point) 5936 // MOVSS instruction 5937 instruct loadF(regF dst, memory mem) %{ 5938 predicate(UseSSE>=1); 5939 match(Set dst (LoadF mem)); 5940 ins_cost(145); 5941 format %{ "MOVSS $dst,$mem" %} 5942 ins_encode %{ 5943 __ movflt ($dst$$XMMRegister, $mem$$Address); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 // Load Float 5949 instruct loadFPR(regFPR dst, memory mem) %{ 5950 predicate(UseSSE==0); 5951 match(Set dst (LoadF mem)); 5952 5953 ins_cost(150); 5954 format %{ "FLD_S ST,$mem\n\t" 5955 "FSTP $dst" %} 5956 opcode(0xD9); /* D9 /0 */ 5957 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5958 Pop_Reg_FPR(dst) ); 5959 ins_pipe( fpu_reg_mem ); 5960 %} 5961 5962 // Load Effective Address 5963 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5964 match(Set dst mem); 5965 5966 ins_cost(110); 5967 format %{ "LEA $dst,$mem" %} 5968 opcode(0x8D); 5969 ins_encode( OpcP, RegMem(dst,mem)); 5970 ins_pipe( ialu_reg_reg_fat ); 5971 %} 5972 5973 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5974 match(Set dst mem); 5975 5976 ins_cost(110); 5977 format %{ "LEA $dst,$mem" %} 5978 opcode(0x8D); 5979 ins_encode( OpcP, RegMem(dst,mem)); 5980 ins_pipe( ialu_reg_reg_fat ); 5981 %} 5982 5983 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5984 match(Set dst mem); 5985 5986 ins_cost(110); 5987 format %{ "LEA $dst,$mem" %} 5988 opcode(0x8D); 5989 ins_encode( OpcP, RegMem(dst,mem)); 5990 ins_pipe( ialu_reg_reg_fat ); 5991 %} 5992 5993 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5994 match(Set dst mem); 5995 5996 ins_cost(110); 5997 format %{ "LEA $dst,$mem" %} 5998 opcode(0x8D); 5999 ins_encode( OpcP, RegMem(dst,mem)); 6000 ins_pipe( ialu_reg_reg_fat ); 6001 %} 6002 6003 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 6004 match(Set dst mem); 6005 6006 ins_cost(110); 6007 format %{ "LEA $dst,$mem" %} 6008 opcode(0x8D); 6009 ins_encode( OpcP, RegMem(dst,mem)); 6010 ins_pipe( ialu_reg_reg_fat ); 6011 %} 6012 6013 // Load Constant 6014 instruct loadConI(rRegI dst, immI src) %{ 6015 match(Set dst src); 6016 6017 format %{ "MOV $dst,$src" %} 6018 ins_encode( LdImmI(dst, src) ); 6019 ins_pipe( ialu_reg_fat ); 6020 %} 6021 6022 // Load Constant zero 6023 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6024 match(Set dst src); 6025 effect(KILL cr); 6026 6027 ins_cost(50); 6028 format %{ "XOR $dst,$dst" %} 6029 opcode(0x33); /* + rd */ 6030 ins_encode( OpcP, RegReg( dst, dst ) ); 6031 ins_pipe( ialu_reg ); 6032 %} 6033 6034 instruct loadConP(eRegP dst, immP src) %{ 6035 match(Set dst src); 6036 6037 format %{ "MOV $dst,$src" %} 6038 opcode(0xB8); /* + rd */ 6039 ins_encode( LdImmP(dst, src) ); 6040 ins_pipe( ialu_reg_fat ); 6041 %} 6042 6043 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6044 match(Set dst src); 6045 effect(KILL cr); 6046 ins_cost(200); 6047 format %{ "MOV $dst.lo,$src.lo\n\t" 6048 "MOV $dst.hi,$src.hi" %} 6049 opcode(0xB8); 6050 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6051 ins_pipe( ialu_reg_long_fat ); 6052 %} 6053 6054 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6055 match(Set dst src); 6056 effect(KILL cr); 6057 ins_cost(150); 6058 format %{ "XOR $dst.lo,$dst.lo\n\t" 6059 "XOR $dst.hi,$dst.hi" %} 6060 opcode(0x33,0x33); 6061 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6062 ins_pipe( ialu_reg_long ); 6063 %} 6064 6065 // The instruction usage is guarded by predicate in operand immFPR(). 6066 instruct loadConFPR(regFPR dst, immFPR con) %{ 6067 match(Set dst con); 6068 ins_cost(125); 6069 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6070 "FSTP $dst" %} 6071 ins_encode %{ 6072 __ fld_s($constantaddress($con)); 6073 __ fstp_d($dst$$reg); 6074 %} 6075 ins_pipe(fpu_reg_con); 6076 %} 6077 6078 // The instruction usage is guarded by predicate in operand immFPR0(). 6079 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6080 match(Set dst con); 6081 ins_cost(125); 6082 format %{ "FLDZ ST\n\t" 6083 "FSTP $dst" %} 6084 ins_encode %{ 6085 __ fldz(); 6086 __ fstp_d($dst$$reg); 6087 %} 6088 ins_pipe(fpu_reg_con); 6089 %} 6090 6091 // The instruction usage is guarded by predicate in operand immFPR1(). 6092 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6093 match(Set dst con); 6094 ins_cost(125); 6095 format %{ "FLD1 ST\n\t" 6096 "FSTP $dst" %} 6097 ins_encode %{ 6098 __ fld1(); 6099 __ fstp_d($dst$$reg); 6100 %} 6101 ins_pipe(fpu_reg_con); 6102 %} 6103 6104 // The instruction usage is guarded by predicate in operand immF(). 6105 instruct loadConF(regF dst, immF con) %{ 6106 match(Set dst con); 6107 ins_cost(125); 6108 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6109 ins_encode %{ 6110 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6111 %} 6112 ins_pipe(pipe_slow); 6113 %} 6114 6115 // The instruction usage is guarded by predicate in operand immF0(). 6116 instruct loadConF0(regF dst, immF0 src) %{ 6117 match(Set dst src); 6118 ins_cost(100); 6119 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6120 ins_encode %{ 6121 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6122 %} 6123 ins_pipe(pipe_slow); 6124 %} 6125 6126 // The instruction usage is guarded by predicate in operand immDPR(). 6127 instruct loadConDPR(regDPR dst, immDPR con) %{ 6128 match(Set dst con); 6129 ins_cost(125); 6130 6131 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6132 "FSTP $dst" %} 6133 ins_encode %{ 6134 __ fld_d($constantaddress($con)); 6135 __ fstp_d($dst$$reg); 6136 %} 6137 ins_pipe(fpu_reg_con); 6138 %} 6139 6140 // The instruction usage is guarded by predicate in operand immDPR0(). 6141 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6142 match(Set dst con); 6143 ins_cost(125); 6144 6145 format %{ "FLDZ ST\n\t" 6146 "FSTP $dst" %} 6147 ins_encode %{ 6148 __ fldz(); 6149 __ fstp_d($dst$$reg); 6150 %} 6151 ins_pipe(fpu_reg_con); 6152 %} 6153 6154 // The instruction usage is guarded by predicate in operand immDPR1(). 6155 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6156 match(Set dst con); 6157 ins_cost(125); 6158 6159 format %{ "FLD1 ST\n\t" 6160 "FSTP $dst" %} 6161 ins_encode %{ 6162 __ fld1(); 6163 __ fstp_d($dst$$reg); 6164 %} 6165 ins_pipe(fpu_reg_con); 6166 %} 6167 6168 // The instruction usage is guarded by predicate in operand immD(). 6169 instruct loadConD(regD dst, immD con) %{ 6170 match(Set dst con); 6171 ins_cost(125); 6172 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6173 ins_encode %{ 6174 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6175 %} 6176 ins_pipe(pipe_slow); 6177 %} 6178 6179 // The instruction usage is guarded by predicate in operand immD0(). 6180 instruct loadConD0(regD dst, immD0 src) %{ 6181 match(Set dst src); 6182 ins_cost(100); 6183 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6184 ins_encode %{ 6185 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6186 %} 6187 ins_pipe( pipe_slow ); 6188 %} 6189 6190 // Load Stack Slot 6191 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6192 match(Set dst src); 6193 ins_cost(125); 6194 6195 format %{ "MOV $dst,$src" %} 6196 opcode(0x8B); 6197 ins_encode( OpcP, RegMem(dst,src)); 6198 ins_pipe( ialu_reg_mem ); 6199 %} 6200 6201 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6202 match(Set dst src); 6203 6204 ins_cost(200); 6205 format %{ "MOV $dst,$src.lo\n\t" 6206 "MOV $dst+4,$src.hi" %} 6207 opcode(0x8B, 0x8B); 6208 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6209 ins_pipe( ialu_mem_long_reg ); 6210 %} 6211 6212 // Load Stack Slot 6213 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6214 match(Set dst src); 6215 ins_cost(125); 6216 6217 format %{ "MOV $dst,$src" %} 6218 opcode(0x8B); 6219 ins_encode( OpcP, RegMem(dst,src)); 6220 ins_pipe( ialu_reg_mem ); 6221 %} 6222 6223 // Load Stack Slot 6224 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6225 match(Set dst src); 6226 ins_cost(125); 6227 6228 format %{ "FLD_S $src\n\t" 6229 "FSTP $dst" %} 6230 opcode(0xD9); /* D9 /0, FLD m32real */ 6231 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6232 Pop_Reg_FPR(dst) ); 6233 ins_pipe( fpu_reg_mem ); 6234 %} 6235 6236 // Load Stack Slot 6237 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6238 match(Set dst src); 6239 ins_cost(125); 6240 6241 format %{ "FLD_D $src\n\t" 6242 "FSTP $dst" %} 6243 opcode(0xDD); /* DD /0, FLD m64real */ 6244 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6245 Pop_Reg_DPR(dst) ); 6246 ins_pipe( fpu_reg_mem ); 6247 %} 6248 6249 // Prefetch instructions for allocation. 6250 // Must be safe to execute with invalid address (cannot fault). 6251 6252 instruct prefetchAlloc0( memory mem ) %{ 6253 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6254 match(PrefetchAllocation mem); 6255 ins_cost(0); 6256 size(0); 6257 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6258 ins_encode(); 6259 ins_pipe(empty); 6260 %} 6261 6262 instruct prefetchAlloc( memory mem ) %{ 6263 predicate(AllocatePrefetchInstr==3); 6264 match( PrefetchAllocation mem ); 6265 ins_cost(100); 6266 6267 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6268 ins_encode %{ 6269 __ prefetchw($mem$$Address); 6270 %} 6271 ins_pipe(ialu_mem); 6272 %} 6273 6274 instruct prefetchAllocNTA( memory mem ) %{ 6275 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6276 match(PrefetchAllocation mem); 6277 ins_cost(100); 6278 6279 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6280 ins_encode %{ 6281 __ prefetchnta($mem$$Address); 6282 %} 6283 ins_pipe(ialu_mem); 6284 %} 6285 6286 instruct prefetchAllocT0( memory mem ) %{ 6287 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6288 match(PrefetchAllocation mem); 6289 ins_cost(100); 6290 6291 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6292 ins_encode %{ 6293 __ prefetcht0($mem$$Address); 6294 %} 6295 ins_pipe(ialu_mem); 6296 %} 6297 6298 instruct prefetchAllocT2( memory mem ) %{ 6299 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6300 match(PrefetchAllocation mem); 6301 ins_cost(100); 6302 6303 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6304 ins_encode %{ 6305 __ prefetcht2($mem$$Address); 6306 %} 6307 ins_pipe(ialu_mem); 6308 %} 6309 6310 //----------Store Instructions------------------------------------------------- 6311 6312 // Store Byte 6313 instruct storeB(memory mem, xRegI src) %{ 6314 match(Set mem (StoreB mem src)); 6315 6316 ins_cost(125); 6317 format %{ "MOV8 $mem,$src" %} 6318 opcode(0x88); 6319 ins_encode( OpcP, RegMem( src, mem ) ); 6320 ins_pipe( ialu_mem_reg ); 6321 %} 6322 6323 // Store Char/Short 6324 instruct storeC(memory mem, rRegI src) %{ 6325 match(Set mem (StoreC mem src)); 6326 6327 ins_cost(125); 6328 format %{ "MOV16 $mem,$src" %} 6329 opcode(0x89, 0x66); 6330 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6331 ins_pipe( ialu_mem_reg ); 6332 %} 6333 6334 // Store Integer 6335 instruct storeI(memory mem, rRegI src) %{ 6336 match(Set mem (StoreI mem src)); 6337 6338 ins_cost(125); 6339 format %{ "MOV $mem,$src" %} 6340 opcode(0x89); 6341 ins_encode( OpcP, RegMem( src, mem ) ); 6342 ins_pipe( ialu_mem_reg ); 6343 %} 6344 6345 // Store Long 6346 instruct storeL(long_memory mem, eRegL src) %{ 6347 predicate(!((StoreLNode*)n)->require_atomic_access()); 6348 match(Set mem (StoreL mem src)); 6349 6350 ins_cost(200); 6351 format %{ "MOV $mem,$src.lo\n\t" 6352 "MOV $mem+4,$src.hi" %} 6353 opcode(0x89, 0x89); 6354 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6355 ins_pipe( ialu_mem_long_reg ); 6356 %} 6357 6358 // Store Long to Integer 6359 instruct storeL2I(memory mem, eRegL src) %{ 6360 match(Set mem (StoreI mem (ConvL2I src))); 6361 6362 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6363 ins_encode %{ 6364 __ movl($mem$$Address, $src$$Register); 6365 %} 6366 ins_pipe(ialu_mem_reg); 6367 %} 6368 6369 // Volatile Store Long. Must be atomic, so move it into 6370 // the FP TOS and then do a 64-bit FIST. Has to probe the 6371 // target address before the store (for null-ptr checks) 6372 // so the memory operand is used twice in the encoding. 6373 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6374 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6375 match(Set mem (StoreL mem src)); 6376 effect( KILL cr ); 6377 ins_cost(400); 6378 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6379 "FILD $src\n\t" 6380 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6381 opcode(0x3B); 6382 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6383 ins_pipe( fpu_reg_mem ); 6384 %} 6385 6386 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6387 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6388 match(Set mem (StoreL mem src)); 6389 effect( TEMP tmp, KILL cr ); 6390 ins_cost(380); 6391 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6392 "MOVSD $tmp,$src\n\t" 6393 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6394 ins_encode %{ 6395 __ cmpl(rax, $mem$$Address); 6396 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6397 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6398 %} 6399 ins_pipe( pipe_slow ); 6400 %} 6401 6402 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6403 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6404 match(Set mem (StoreL mem src)); 6405 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6406 ins_cost(360); 6407 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6408 "MOVD $tmp,$src.lo\n\t" 6409 "MOVD $tmp2,$src.hi\n\t" 6410 "PUNPCKLDQ $tmp,$tmp2\n\t" 6411 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6412 ins_encode %{ 6413 __ cmpl(rax, $mem$$Address); 6414 __ movdl($tmp$$XMMRegister, $src$$Register); 6415 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6416 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6417 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6418 %} 6419 ins_pipe( pipe_slow ); 6420 %} 6421 6422 // Store Pointer; for storing unknown oops and raw pointers 6423 instruct storeP(memory mem, anyRegP src) %{ 6424 match(Set mem (StoreP mem src)); 6425 6426 ins_cost(125); 6427 format %{ "MOV $mem,$src" %} 6428 opcode(0x89); 6429 ins_encode( OpcP, RegMem( src, mem ) ); 6430 ins_pipe( ialu_mem_reg ); 6431 %} 6432 6433 // Store Integer Immediate 6434 instruct storeImmI(memory mem, immI src) %{ 6435 match(Set mem (StoreI mem src)); 6436 6437 ins_cost(150); 6438 format %{ "MOV $mem,$src" %} 6439 opcode(0xC7); /* C7 /0 */ 6440 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6441 ins_pipe( ialu_mem_imm ); 6442 %} 6443 6444 // Store Short/Char Immediate 6445 instruct storeImmI16(memory mem, immI16 src) %{ 6446 predicate(UseStoreImmI16); 6447 match(Set mem (StoreC mem src)); 6448 6449 ins_cost(150); 6450 format %{ "MOV16 $mem,$src" %} 6451 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6452 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6453 ins_pipe( ialu_mem_imm ); 6454 %} 6455 6456 // Store Pointer Immediate; null pointers or constant oops that do not 6457 // need card-mark barriers. 6458 instruct storeImmP(memory mem, immP src) %{ 6459 match(Set mem (StoreP mem src)); 6460 6461 ins_cost(150); 6462 format %{ "MOV $mem,$src" %} 6463 opcode(0xC7); /* C7 /0 */ 6464 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6465 ins_pipe( ialu_mem_imm ); 6466 %} 6467 6468 // Store Byte Immediate 6469 instruct storeImmB(memory mem, immI8 src) %{ 6470 match(Set mem (StoreB mem src)); 6471 6472 ins_cost(150); 6473 format %{ "MOV8 $mem,$src" %} 6474 opcode(0xC6); /* C6 /0 */ 6475 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6476 ins_pipe( ialu_mem_imm ); 6477 %} 6478 6479 // Store CMS card-mark Immediate 6480 instruct storeImmCM(memory mem, immI8 src) %{ 6481 match(Set mem (StoreCM mem src)); 6482 6483 ins_cost(150); 6484 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6485 opcode(0xC6); /* C6 /0 */ 6486 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6487 ins_pipe( ialu_mem_imm ); 6488 %} 6489 6490 // Store Double 6491 instruct storeDPR( memory mem, regDPR1 src) %{ 6492 predicate(UseSSE<=1); 6493 match(Set mem (StoreD mem src)); 6494 6495 ins_cost(100); 6496 format %{ "FST_D $mem,$src" %} 6497 opcode(0xDD); /* DD /2 */ 6498 ins_encode( enc_FPR_store(mem,src) ); 6499 ins_pipe( fpu_mem_reg ); 6500 %} 6501 6502 // Store double does rounding on x86 6503 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6504 predicate(UseSSE<=1); 6505 match(Set mem (StoreD mem (RoundDouble src))); 6506 6507 ins_cost(100); 6508 format %{ "FST_D $mem,$src\t# round" %} 6509 opcode(0xDD); /* DD /2 */ 6510 ins_encode( enc_FPR_store(mem,src) ); 6511 ins_pipe( fpu_mem_reg ); 6512 %} 6513 6514 // Store XMM register to memory (double-precision floating points) 6515 // MOVSD instruction 6516 instruct storeD(memory mem, regD src) %{ 6517 predicate(UseSSE>=2); 6518 match(Set mem (StoreD mem src)); 6519 ins_cost(95); 6520 format %{ "MOVSD $mem,$src" %} 6521 ins_encode %{ 6522 __ movdbl($mem$$Address, $src$$XMMRegister); 6523 %} 6524 ins_pipe( pipe_slow ); 6525 %} 6526 6527 // Store XMM register to memory (single-precision floating point) 6528 // MOVSS instruction 6529 instruct storeF(memory mem, regF src) %{ 6530 predicate(UseSSE>=1); 6531 match(Set mem (StoreF mem src)); 6532 ins_cost(95); 6533 format %{ "MOVSS $mem,$src" %} 6534 ins_encode %{ 6535 __ movflt($mem$$Address, $src$$XMMRegister); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 // Store Float 6541 instruct storeFPR( memory mem, regFPR1 src) %{ 6542 predicate(UseSSE==0); 6543 match(Set mem (StoreF mem src)); 6544 6545 ins_cost(100); 6546 format %{ "FST_S $mem,$src" %} 6547 opcode(0xD9); /* D9 /2 */ 6548 ins_encode( enc_FPR_store(mem,src) ); 6549 ins_pipe( fpu_mem_reg ); 6550 %} 6551 6552 // Store Float does rounding on x86 6553 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6554 predicate(UseSSE==0); 6555 match(Set mem (StoreF mem (RoundFloat src))); 6556 6557 ins_cost(100); 6558 format %{ "FST_S $mem,$src\t# round" %} 6559 opcode(0xD9); /* D9 /2 */ 6560 ins_encode( enc_FPR_store(mem,src) ); 6561 ins_pipe( fpu_mem_reg ); 6562 %} 6563 6564 // Store Float does rounding on x86 6565 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6566 predicate(UseSSE<=1); 6567 match(Set mem (StoreF mem (ConvD2F src))); 6568 6569 ins_cost(100); 6570 format %{ "FST_S $mem,$src\t# D-round" %} 6571 opcode(0xD9); /* D9 /2 */ 6572 ins_encode( enc_FPR_store(mem,src) ); 6573 ins_pipe( fpu_mem_reg ); 6574 %} 6575 6576 // Store immediate Float value (it is faster than store from FPU register) 6577 // The instruction usage is guarded by predicate in operand immFPR(). 6578 instruct storeFPR_imm( memory mem, immFPR src) %{ 6579 match(Set mem (StoreF mem src)); 6580 6581 ins_cost(50); 6582 format %{ "MOV $mem,$src\t# store float" %} 6583 opcode(0xC7); /* C7 /0 */ 6584 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6585 ins_pipe( ialu_mem_imm ); 6586 %} 6587 6588 // Store immediate Float value (it is faster than store from XMM register) 6589 // The instruction usage is guarded by predicate in operand immF(). 6590 instruct storeF_imm( memory mem, immF src) %{ 6591 match(Set mem (StoreF mem src)); 6592 6593 ins_cost(50); 6594 format %{ "MOV $mem,$src\t# store float" %} 6595 opcode(0xC7); /* C7 /0 */ 6596 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6597 ins_pipe( ialu_mem_imm ); 6598 %} 6599 6600 // Store Integer to stack slot 6601 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6602 match(Set dst src); 6603 6604 ins_cost(100); 6605 format %{ "MOV $dst,$src" %} 6606 opcode(0x89); 6607 ins_encode( OpcPRegSS( dst, src ) ); 6608 ins_pipe( ialu_mem_reg ); 6609 %} 6610 6611 // Store Integer to stack slot 6612 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6613 match(Set dst src); 6614 6615 ins_cost(100); 6616 format %{ "MOV $dst,$src" %} 6617 opcode(0x89); 6618 ins_encode( OpcPRegSS( dst, src ) ); 6619 ins_pipe( ialu_mem_reg ); 6620 %} 6621 6622 // Store Long to stack slot 6623 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6624 match(Set dst src); 6625 6626 ins_cost(200); 6627 format %{ "MOV $dst,$src.lo\n\t" 6628 "MOV $dst+4,$src.hi" %} 6629 opcode(0x89, 0x89); 6630 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6631 ins_pipe( ialu_mem_long_reg ); 6632 %} 6633 6634 //----------MemBar Instructions----------------------------------------------- 6635 // Memory barrier flavors 6636 6637 instruct membar_acquire() %{ 6638 match(MemBarAcquire); 6639 match(LoadFence); 6640 ins_cost(400); 6641 6642 size(0); 6643 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6644 ins_encode(); 6645 ins_pipe(empty); 6646 %} 6647 6648 instruct membar_acquire_lock() %{ 6649 match(MemBarAcquireLock); 6650 ins_cost(0); 6651 6652 size(0); 6653 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6654 ins_encode( ); 6655 ins_pipe(empty); 6656 %} 6657 6658 instruct membar_release() %{ 6659 match(MemBarRelease); 6660 match(StoreFence); 6661 ins_cost(400); 6662 6663 size(0); 6664 format %{ "MEMBAR-release ! (empty encoding)" %} 6665 ins_encode( ); 6666 ins_pipe(empty); 6667 %} 6668 6669 instruct membar_release_lock() %{ 6670 match(MemBarReleaseLock); 6671 ins_cost(0); 6672 6673 size(0); 6674 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6675 ins_encode( ); 6676 ins_pipe(empty); 6677 %} 6678 6679 instruct membar_volatile(eFlagsReg cr) %{ 6680 match(MemBarVolatile); 6681 effect(KILL cr); 6682 ins_cost(400); 6683 6684 format %{ 6685 $$template 6686 if (os::is_MP()) { 6687 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6688 } else { 6689 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6690 } 6691 %} 6692 ins_encode %{ 6693 __ membar(Assembler::StoreLoad); 6694 %} 6695 ins_pipe(pipe_slow); 6696 %} 6697 6698 instruct unnecessary_membar_volatile() %{ 6699 match(MemBarVolatile); 6700 predicate(Matcher::post_store_load_barrier(n)); 6701 ins_cost(0); 6702 6703 size(0); 6704 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6705 ins_encode( ); 6706 ins_pipe(empty); 6707 %} 6708 6709 instruct membar_storestore() %{ 6710 match(MemBarStoreStore); 6711 ins_cost(0); 6712 6713 size(0); 6714 format %{ "MEMBAR-storestore (empty encoding)" %} 6715 ins_encode( ); 6716 ins_pipe(empty); 6717 %} 6718 6719 //----------Move Instructions-------------------------------------------------- 6720 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6721 match(Set dst (CastX2P src)); 6722 format %{ "# X2P $dst, $src" %} 6723 ins_encode( /*empty encoding*/ ); 6724 ins_cost(0); 6725 ins_pipe(empty); 6726 %} 6727 6728 instruct castP2X(rRegI dst, eRegP src ) %{ 6729 match(Set dst (CastP2X src)); 6730 ins_cost(50); 6731 format %{ "MOV $dst, $src\t# CastP2X" %} 6732 ins_encode( enc_Copy( dst, src) ); 6733 ins_pipe( ialu_reg_reg ); 6734 %} 6735 6736 //----------Conditional Move--------------------------------------------------- 6737 // Conditional move 6738 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6739 predicate(!VM_Version::supports_cmov() ); 6740 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6741 ins_cost(200); 6742 format %{ "J$cop,us skip\t# signed cmove\n\t" 6743 "MOV $dst,$src\n" 6744 "skip:" %} 6745 ins_encode %{ 6746 Label Lskip; 6747 // Invert sense of branch from sense of CMOV 6748 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6749 __ movl($dst$$Register, $src$$Register); 6750 __ bind(Lskip); 6751 %} 6752 ins_pipe( pipe_cmov_reg ); 6753 %} 6754 6755 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6756 predicate(!VM_Version::supports_cmov() ); 6757 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6758 ins_cost(200); 6759 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6760 "MOV $dst,$src\n" 6761 "skip:" %} 6762 ins_encode %{ 6763 Label Lskip; 6764 // Invert sense of branch from sense of CMOV 6765 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6766 __ movl($dst$$Register, $src$$Register); 6767 __ bind(Lskip); 6768 %} 6769 ins_pipe( pipe_cmov_reg ); 6770 %} 6771 6772 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6773 predicate(VM_Version::supports_cmov() ); 6774 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6775 ins_cost(200); 6776 format %{ "CMOV$cop $dst,$src" %} 6777 opcode(0x0F,0x40); 6778 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6779 ins_pipe( pipe_cmov_reg ); 6780 %} 6781 6782 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6783 predicate(VM_Version::supports_cmov() ); 6784 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6785 ins_cost(200); 6786 format %{ "CMOV$cop $dst,$src" %} 6787 opcode(0x0F,0x40); 6788 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6789 ins_pipe( pipe_cmov_reg ); 6790 %} 6791 6792 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6793 predicate(VM_Version::supports_cmov() ); 6794 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6795 ins_cost(200); 6796 expand %{ 6797 cmovI_regU(cop, cr, dst, src); 6798 %} 6799 %} 6800 6801 // Conditional move 6802 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6803 predicate(VM_Version::supports_cmov() ); 6804 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6805 ins_cost(250); 6806 format %{ "CMOV$cop $dst,$src" %} 6807 opcode(0x0F,0x40); 6808 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6809 ins_pipe( pipe_cmov_mem ); 6810 %} 6811 6812 // Conditional move 6813 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6814 predicate(VM_Version::supports_cmov() ); 6815 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6816 ins_cost(250); 6817 format %{ "CMOV$cop $dst,$src" %} 6818 opcode(0x0F,0x40); 6819 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6820 ins_pipe( pipe_cmov_mem ); 6821 %} 6822 6823 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6824 predicate(VM_Version::supports_cmov() ); 6825 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6826 ins_cost(250); 6827 expand %{ 6828 cmovI_memU(cop, cr, dst, src); 6829 %} 6830 %} 6831 6832 // Conditional move 6833 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6834 predicate(VM_Version::supports_cmov() ); 6835 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6836 ins_cost(200); 6837 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6838 opcode(0x0F,0x40); 6839 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6840 ins_pipe( pipe_cmov_reg ); 6841 %} 6842 6843 // Conditional move (non-P6 version) 6844 // Note: a CMoveP is generated for stubs and native wrappers 6845 // regardless of whether we are on a P6, so we 6846 // emulate a cmov here 6847 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6848 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6849 ins_cost(300); 6850 format %{ "Jn$cop skip\n\t" 6851 "MOV $dst,$src\t# pointer\n" 6852 "skip:" %} 6853 opcode(0x8b); 6854 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6855 ins_pipe( pipe_cmov_reg ); 6856 %} 6857 6858 // Conditional move 6859 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6860 predicate(VM_Version::supports_cmov() ); 6861 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6862 ins_cost(200); 6863 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6864 opcode(0x0F,0x40); 6865 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6866 ins_pipe( pipe_cmov_reg ); 6867 %} 6868 6869 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6870 predicate(VM_Version::supports_cmov() ); 6871 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6872 ins_cost(200); 6873 expand %{ 6874 cmovP_regU(cop, cr, dst, src); 6875 %} 6876 %} 6877 6878 // DISABLED: Requires the ADLC to emit a bottom_type call that 6879 // correctly meets the two pointer arguments; one is an incoming 6880 // register but the other is a memory operand. ALSO appears to 6881 // be buggy with implicit null checks. 6882 // 6883 //// Conditional move 6884 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6885 // predicate(VM_Version::supports_cmov() ); 6886 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6887 // ins_cost(250); 6888 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6889 // opcode(0x0F,0x40); 6890 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6891 // ins_pipe( pipe_cmov_mem ); 6892 //%} 6893 // 6894 //// Conditional move 6895 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6896 // predicate(VM_Version::supports_cmov() ); 6897 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6898 // ins_cost(250); 6899 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6900 // opcode(0x0F,0x40); 6901 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6902 // ins_pipe( pipe_cmov_mem ); 6903 //%} 6904 6905 // Conditional move 6906 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6907 predicate(UseSSE<=1); 6908 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6909 ins_cost(200); 6910 format %{ "FCMOV$cop $dst,$src\t# double" %} 6911 opcode(0xDA); 6912 ins_encode( enc_cmov_dpr(cop,src) ); 6913 ins_pipe( pipe_cmovDPR_reg ); 6914 %} 6915 6916 // Conditional move 6917 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6918 predicate(UseSSE==0); 6919 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6920 ins_cost(200); 6921 format %{ "FCMOV$cop $dst,$src\t# float" %} 6922 opcode(0xDA); 6923 ins_encode( enc_cmov_dpr(cop,src) ); 6924 ins_pipe( pipe_cmovDPR_reg ); 6925 %} 6926 6927 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6928 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6929 predicate(UseSSE<=1); 6930 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6931 ins_cost(200); 6932 format %{ "Jn$cop skip\n\t" 6933 "MOV $dst,$src\t# double\n" 6934 "skip:" %} 6935 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6936 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6937 ins_pipe( pipe_cmovDPR_reg ); 6938 %} 6939 6940 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6941 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6942 predicate(UseSSE==0); 6943 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6944 ins_cost(200); 6945 format %{ "Jn$cop skip\n\t" 6946 "MOV $dst,$src\t# float\n" 6947 "skip:" %} 6948 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6949 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6950 ins_pipe( pipe_cmovDPR_reg ); 6951 %} 6952 6953 // No CMOVE with SSE/SSE2 6954 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6955 predicate (UseSSE>=1); 6956 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6957 ins_cost(200); 6958 format %{ "Jn$cop skip\n\t" 6959 "MOVSS $dst,$src\t# float\n" 6960 "skip:" %} 6961 ins_encode %{ 6962 Label skip; 6963 // Invert sense of branch from sense of CMOV 6964 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6965 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6966 __ bind(skip); 6967 %} 6968 ins_pipe( pipe_slow ); 6969 %} 6970 6971 // No CMOVE with SSE/SSE2 6972 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6973 predicate (UseSSE>=2); 6974 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6975 ins_cost(200); 6976 format %{ "Jn$cop skip\n\t" 6977 "MOVSD $dst,$src\t# float\n" 6978 "skip:" %} 6979 ins_encode %{ 6980 Label skip; 6981 // Invert sense of branch from sense of CMOV 6982 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6983 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6984 __ bind(skip); 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 // unsigned version 6990 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6991 predicate (UseSSE>=1); 6992 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6993 ins_cost(200); 6994 format %{ "Jn$cop skip\n\t" 6995 "MOVSS $dst,$src\t# float\n" 6996 "skip:" %} 6997 ins_encode %{ 6998 Label skip; 6999 // Invert sense of branch from sense of CMOV 7000 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7001 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7002 __ bind(skip); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 7008 predicate (UseSSE>=1); 7009 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7010 ins_cost(200); 7011 expand %{ 7012 fcmovF_regU(cop, cr, dst, src); 7013 %} 7014 %} 7015 7016 // unsigned version 7017 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7018 predicate (UseSSE>=2); 7019 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7020 ins_cost(200); 7021 format %{ "Jn$cop skip\n\t" 7022 "MOVSD $dst,$src\t# float\n" 7023 "skip:" %} 7024 ins_encode %{ 7025 Label skip; 7026 // Invert sense of branch from sense of CMOV 7027 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7028 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7029 __ bind(skip); 7030 %} 7031 ins_pipe( pipe_slow ); 7032 %} 7033 7034 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7035 predicate (UseSSE>=2); 7036 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7037 ins_cost(200); 7038 expand %{ 7039 fcmovD_regU(cop, cr, dst, src); 7040 %} 7041 %} 7042 7043 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7044 predicate(VM_Version::supports_cmov() ); 7045 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7046 ins_cost(200); 7047 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7048 "CMOV$cop $dst.hi,$src.hi" %} 7049 opcode(0x0F,0x40); 7050 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7051 ins_pipe( pipe_cmov_reg_long ); 7052 %} 7053 7054 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7055 predicate(VM_Version::supports_cmov() ); 7056 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7057 ins_cost(200); 7058 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7059 "CMOV$cop $dst.hi,$src.hi" %} 7060 opcode(0x0F,0x40); 7061 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7062 ins_pipe( pipe_cmov_reg_long ); 7063 %} 7064 7065 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7066 predicate(VM_Version::supports_cmov() ); 7067 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7068 ins_cost(200); 7069 expand %{ 7070 cmovL_regU(cop, cr, dst, src); 7071 %} 7072 %} 7073 7074 //----------Arithmetic Instructions-------------------------------------------- 7075 //----------Addition Instructions---------------------------------------------- 7076 7077 // Integer Addition Instructions 7078 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7079 match(Set dst (AddI dst src)); 7080 effect(KILL cr); 7081 7082 size(2); 7083 format %{ "ADD $dst,$src" %} 7084 opcode(0x03); 7085 ins_encode( OpcP, RegReg( dst, src) ); 7086 ins_pipe( ialu_reg_reg ); 7087 %} 7088 7089 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7090 match(Set dst (AddI dst src)); 7091 effect(KILL cr); 7092 7093 format %{ "ADD $dst,$src" %} 7094 opcode(0x81, 0x00); /* /0 id */ 7095 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7096 ins_pipe( ialu_reg ); 7097 %} 7098 7099 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7100 predicate(UseIncDec); 7101 match(Set dst (AddI dst src)); 7102 effect(KILL cr); 7103 7104 size(1); 7105 format %{ "INC $dst" %} 7106 opcode(0x40); /* */ 7107 ins_encode( Opc_plus( primary, dst ) ); 7108 ins_pipe( ialu_reg ); 7109 %} 7110 7111 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7112 match(Set dst (AddI src0 src1)); 7113 ins_cost(110); 7114 7115 format %{ "LEA $dst,[$src0 + $src1]" %} 7116 opcode(0x8D); /* 0x8D /r */ 7117 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7118 ins_pipe( ialu_reg_reg ); 7119 %} 7120 7121 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7122 match(Set dst (AddP src0 src1)); 7123 ins_cost(110); 7124 7125 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7126 opcode(0x8D); /* 0x8D /r */ 7127 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7128 ins_pipe( ialu_reg_reg ); 7129 %} 7130 7131 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7132 predicate(UseIncDec); 7133 match(Set dst (AddI dst src)); 7134 effect(KILL cr); 7135 7136 size(1); 7137 format %{ "DEC $dst" %} 7138 opcode(0x48); /* */ 7139 ins_encode( Opc_plus( primary, dst ) ); 7140 ins_pipe( ialu_reg ); 7141 %} 7142 7143 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7144 match(Set dst (AddP dst src)); 7145 effect(KILL cr); 7146 7147 size(2); 7148 format %{ "ADD $dst,$src" %} 7149 opcode(0x03); 7150 ins_encode( OpcP, RegReg( dst, src) ); 7151 ins_pipe( ialu_reg_reg ); 7152 %} 7153 7154 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7155 match(Set dst (AddP dst src)); 7156 effect(KILL cr); 7157 7158 format %{ "ADD $dst,$src" %} 7159 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7160 // ins_encode( RegImm( dst, src) ); 7161 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7162 ins_pipe( ialu_reg ); 7163 %} 7164 7165 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7166 match(Set dst (AddI dst (LoadI src))); 7167 effect(KILL cr); 7168 7169 ins_cost(125); 7170 format %{ "ADD $dst,$src" %} 7171 opcode(0x03); 7172 ins_encode( OpcP, RegMem( dst, src) ); 7173 ins_pipe( ialu_reg_mem ); 7174 %} 7175 7176 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7177 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7178 effect(KILL cr); 7179 7180 ins_cost(150); 7181 format %{ "ADD $dst,$src" %} 7182 opcode(0x01); /* Opcode 01 /r */ 7183 ins_encode( OpcP, RegMem( src, dst ) ); 7184 ins_pipe( ialu_mem_reg ); 7185 %} 7186 7187 // Add Memory with Immediate 7188 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7189 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7190 effect(KILL cr); 7191 7192 ins_cost(125); 7193 format %{ "ADD $dst,$src" %} 7194 opcode(0x81); /* Opcode 81 /0 id */ 7195 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7196 ins_pipe( ialu_mem_imm ); 7197 %} 7198 7199 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7200 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7201 effect(KILL cr); 7202 7203 ins_cost(125); 7204 format %{ "INC $dst" %} 7205 opcode(0xFF); /* Opcode FF /0 */ 7206 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7207 ins_pipe( ialu_mem_imm ); 7208 %} 7209 7210 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7211 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7212 effect(KILL cr); 7213 7214 ins_cost(125); 7215 format %{ "DEC $dst" %} 7216 opcode(0xFF); /* Opcode FF /1 */ 7217 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7218 ins_pipe( ialu_mem_imm ); 7219 %} 7220 7221 7222 instruct checkCastPP( eRegP dst ) %{ 7223 match(Set dst (CheckCastPP dst)); 7224 7225 size(0); 7226 format %{ "#checkcastPP of $dst" %} 7227 ins_encode( /*empty encoding*/ ); 7228 ins_pipe( empty ); 7229 %} 7230 7231 instruct castPP( eRegP dst ) %{ 7232 match(Set dst (CastPP dst)); 7233 format %{ "#castPP of $dst" %} 7234 ins_encode( /*empty encoding*/ ); 7235 ins_pipe( empty ); 7236 %} 7237 7238 instruct castII( rRegI dst ) %{ 7239 match(Set dst (CastII dst)); 7240 format %{ "#castII of $dst" %} 7241 ins_encode( /*empty encoding*/ ); 7242 ins_cost(0); 7243 ins_pipe( empty ); 7244 %} 7245 7246 7247 // Load-locked - same as a regular pointer load when used with compare-swap 7248 instruct loadPLocked(eRegP dst, memory mem) %{ 7249 match(Set dst (LoadPLocked mem)); 7250 7251 ins_cost(125); 7252 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7253 opcode(0x8B); 7254 ins_encode( OpcP, RegMem(dst,mem)); 7255 ins_pipe( ialu_reg_mem ); 7256 %} 7257 7258 // Conditional-store of the updated heap-top. 7259 // Used during allocation of the shared heap. 7260 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7261 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7262 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7263 // EAX is killed if there is contention, but then it's also unused. 7264 // In the common case of no contention, EAX holds the new oop address. 7265 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7266 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7267 ins_pipe( pipe_cmpxchg ); 7268 %} 7269 7270 // Conditional-store of an int value. 7271 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7272 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7273 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7274 effect(KILL oldval); 7275 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7276 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7277 ins_pipe( pipe_cmpxchg ); 7278 %} 7279 7280 // Conditional-store of a long value. 7281 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7282 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7283 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7284 effect(KILL oldval); 7285 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7286 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7287 "XCHG EBX,ECX" 7288 %} 7289 ins_encode %{ 7290 // Note: we need to swap rbx, and rcx before and after the 7291 // cmpxchg8 instruction because the instruction uses 7292 // rcx as the high order word of the new value to store but 7293 // our register encoding uses rbx. 7294 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7295 if( os::is_MP() ) 7296 __ lock(); 7297 __ cmpxchg8($mem$$Address); 7298 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7299 %} 7300 ins_pipe( pipe_cmpxchg ); 7301 %} 7302 7303 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7304 7305 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7306 predicate(VM_Version::supports_cx8()); 7307 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7308 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7309 effect(KILL cr, KILL oldval); 7310 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7311 "MOV $res,0\n\t" 7312 "JNE,s fail\n\t" 7313 "MOV $res,1\n" 7314 "fail:" %} 7315 ins_encode( enc_cmpxchg8(mem_ptr), 7316 enc_flags_ne_to_boolean(res) ); 7317 ins_pipe( pipe_cmpxchg ); 7318 %} 7319 7320 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7321 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7322 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7323 effect(KILL cr, KILL oldval); 7324 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7325 "MOV $res,0\n\t" 7326 "JNE,s fail\n\t" 7327 "MOV $res,1\n" 7328 "fail:" %} 7329 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7330 ins_pipe( pipe_cmpxchg ); 7331 %} 7332 7333 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7334 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7335 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7336 effect(KILL cr, KILL oldval); 7337 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7338 "MOV $res,0\n\t" 7339 "JNE,s fail\n\t" 7340 "MOV $res,1\n" 7341 "fail:" %} 7342 ins_encode( enc_cmpxchgb(mem_ptr), 7343 enc_flags_ne_to_boolean(res) ); 7344 ins_pipe( pipe_cmpxchg ); 7345 %} 7346 7347 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7348 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7349 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7350 effect(KILL cr, KILL oldval); 7351 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7352 "MOV $res,0\n\t" 7353 "JNE,s fail\n\t" 7354 "MOV $res,1\n" 7355 "fail:" %} 7356 ins_encode( enc_cmpxchgw(mem_ptr), 7357 enc_flags_ne_to_boolean(res) ); 7358 ins_pipe( pipe_cmpxchg ); 7359 %} 7360 7361 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7362 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7363 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7364 effect(KILL cr, KILL oldval); 7365 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7366 "MOV $res,0\n\t" 7367 "JNE,s fail\n\t" 7368 "MOV $res,1\n" 7369 "fail:" %} 7370 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7371 ins_pipe( pipe_cmpxchg ); 7372 %} 7373 7374 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7375 predicate(VM_Version::supports_cx8()); 7376 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7377 effect(KILL cr); 7378 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7379 ins_encode( enc_cmpxchg8(mem_ptr) ); 7380 ins_pipe( pipe_cmpxchg ); 7381 %} 7382 7383 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7384 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7385 effect(KILL cr); 7386 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7387 ins_encode( enc_cmpxchg(mem_ptr) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7392 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7393 effect(KILL cr); 7394 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7395 ins_encode( enc_cmpxchgb(mem_ptr) ); 7396 ins_pipe( pipe_cmpxchg ); 7397 %} 7398 7399 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7400 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7401 effect(KILL cr); 7402 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7403 ins_encode( enc_cmpxchgw(mem_ptr) ); 7404 ins_pipe( pipe_cmpxchg ); 7405 %} 7406 7407 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7408 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7409 effect(KILL cr); 7410 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7411 ins_encode( enc_cmpxchg(mem_ptr) ); 7412 ins_pipe( pipe_cmpxchg ); 7413 %} 7414 7415 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7416 predicate(n->as_LoadStore()->result_not_used()); 7417 match(Set dummy (GetAndAddB mem add)); 7418 effect(KILL cr); 7419 format %{ "ADDB [$mem],$add" %} 7420 ins_encode %{ 7421 if (os::is_MP()) { __ lock(); } 7422 __ addb($mem$$Address, $add$$constant); 7423 %} 7424 ins_pipe( pipe_cmpxchg ); 7425 %} 7426 7427 // Important to match to xRegI: only 8-bit regs. 7428 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7429 match(Set newval (GetAndAddB mem newval)); 7430 effect(KILL cr); 7431 format %{ "XADDB [$mem],$newval" %} 7432 ins_encode %{ 7433 if (os::is_MP()) { __ lock(); } 7434 __ xaddb($mem$$Address, $newval$$Register); 7435 %} 7436 ins_pipe( pipe_cmpxchg ); 7437 %} 7438 7439 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7440 predicate(n->as_LoadStore()->result_not_used()); 7441 match(Set dummy (GetAndAddS mem add)); 7442 effect(KILL cr); 7443 format %{ "ADDS [$mem],$add" %} 7444 ins_encode %{ 7445 if (os::is_MP()) { __ lock(); } 7446 __ addw($mem$$Address, $add$$constant); 7447 %} 7448 ins_pipe( pipe_cmpxchg ); 7449 %} 7450 7451 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7452 match(Set newval (GetAndAddS mem newval)); 7453 effect(KILL cr); 7454 format %{ "XADDS [$mem],$newval" %} 7455 ins_encode %{ 7456 if (os::is_MP()) { __ lock(); } 7457 __ xaddw($mem$$Address, $newval$$Register); 7458 %} 7459 ins_pipe( pipe_cmpxchg ); 7460 %} 7461 7462 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7463 predicate(n->as_LoadStore()->result_not_used()); 7464 match(Set dummy (GetAndAddI mem add)); 7465 effect(KILL cr); 7466 format %{ "ADDL [$mem],$add" %} 7467 ins_encode %{ 7468 if (os::is_MP()) { __ lock(); } 7469 __ addl($mem$$Address, $add$$constant); 7470 %} 7471 ins_pipe( pipe_cmpxchg ); 7472 %} 7473 7474 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7475 match(Set newval (GetAndAddI mem newval)); 7476 effect(KILL cr); 7477 format %{ "XADDL [$mem],$newval" %} 7478 ins_encode %{ 7479 if (os::is_MP()) { __ lock(); } 7480 __ xaddl($mem$$Address, $newval$$Register); 7481 %} 7482 ins_pipe( pipe_cmpxchg ); 7483 %} 7484 7485 // Important to match to xRegI: only 8-bit regs. 7486 instruct xchgB( memory mem, xRegI newval) %{ 7487 match(Set newval (GetAndSetB mem newval)); 7488 format %{ "XCHGB $newval,[$mem]" %} 7489 ins_encode %{ 7490 __ xchgb($newval$$Register, $mem$$Address); 7491 %} 7492 ins_pipe( pipe_cmpxchg ); 7493 %} 7494 7495 instruct xchgS( memory mem, rRegI newval) %{ 7496 match(Set newval (GetAndSetS mem newval)); 7497 format %{ "XCHGW $newval,[$mem]" %} 7498 ins_encode %{ 7499 __ xchgw($newval$$Register, $mem$$Address); 7500 %} 7501 ins_pipe( pipe_cmpxchg ); 7502 %} 7503 7504 instruct xchgI( memory mem, rRegI newval) %{ 7505 match(Set newval (GetAndSetI mem newval)); 7506 format %{ "XCHGL $newval,[$mem]" %} 7507 ins_encode %{ 7508 __ xchgl($newval$$Register, $mem$$Address); 7509 %} 7510 ins_pipe( pipe_cmpxchg ); 7511 %} 7512 7513 instruct xchgP( memory mem, pRegP newval) %{ 7514 match(Set newval (GetAndSetP mem newval)); 7515 format %{ "XCHGL $newval,[$mem]" %} 7516 ins_encode %{ 7517 __ xchgl($newval$$Register, $mem$$Address); 7518 %} 7519 ins_pipe( pipe_cmpxchg ); 7520 %} 7521 7522 //----------Subtraction Instructions------------------------------------------- 7523 7524 // Integer Subtraction Instructions 7525 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7526 match(Set dst (SubI dst src)); 7527 effect(KILL cr); 7528 7529 size(2); 7530 format %{ "SUB $dst,$src" %} 7531 opcode(0x2B); 7532 ins_encode( OpcP, RegReg( dst, src) ); 7533 ins_pipe( ialu_reg_reg ); 7534 %} 7535 7536 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7537 match(Set dst (SubI dst src)); 7538 effect(KILL cr); 7539 7540 format %{ "SUB $dst,$src" %} 7541 opcode(0x81,0x05); /* Opcode 81 /5 */ 7542 // ins_encode( RegImm( dst, src) ); 7543 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7544 ins_pipe( ialu_reg ); 7545 %} 7546 7547 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7548 match(Set dst (SubI dst (LoadI src))); 7549 effect(KILL cr); 7550 7551 ins_cost(125); 7552 format %{ "SUB $dst,$src" %} 7553 opcode(0x2B); 7554 ins_encode( OpcP, RegMem( dst, src) ); 7555 ins_pipe( ialu_reg_mem ); 7556 %} 7557 7558 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7559 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7560 effect(KILL cr); 7561 7562 ins_cost(150); 7563 format %{ "SUB $dst,$src" %} 7564 opcode(0x29); /* Opcode 29 /r */ 7565 ins_encode( OpcP, RegMem( src, dst ) ); 7566 ins_pipe( ialu_mem_reg ); 7567 %} 7568 7569 // Subtract from a pointer 7570 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7571 match(Set dst (AddP dst (SubI zero src))); 7572 effect(KILL cr); 7573 7574 size(2); 7575 format %{ "SUB $dst,$src" %} 7576 opcode(0x2B); 7577 ins_encode( OpcP, RegReg( dst, src) ); 7578 ins_pipe( ialu_reg_reg ); 7579 %} 7580 7581 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7582 match(Set dst (SubI zero dst)); 7583 effect(KILL cr); 7584 7585 size(2); 7586 format %{ "NEG $dst" %} 7587 opcode(0xF7,0x03); // Opcode F7 /3 7588 ins_encode( OpcP, RegOpc( dst ) ); 7589 ins_pipe( ialu_reg ); 7590 %} 7591 7592 //----------Multiplication/Division Instructions------------------------------- 7593 // Integer Multiplication Instructions 7594 // Multiply Register 7595 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7596 match(Set dst (MulI dst src)); 7597 effect(KILL cr); 7598 7599 size(3); 7600 ins_cost(300); 7601 format %{ "IMUL $dst,$src" %} 7602 opcode(0xAF, 0x0F); 7603 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7604 ins_pipe( ialu_reg_reg_alu0 ); 7605 %} 7606 7607 // Multiply 32-bit Immediate 7608 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7609 match(Set dst (MulI src imm)); 7610 effect(KILL cr); 7611 7612 ins_cost(300); 7613 format %{ "IMUL $dst,$src,$imm" %} 7614 opcode(0x69); /* 69 /r id */ 7615 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7616 ins_pipe( ialu_reg_reg_alu0 ); 7617 %} 7618 7619 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7620 match(Set dst src); 7621 effect(KILL cr); 7622 7623 // Note that this is artificially increased to make it more expensive than loadConL 7624 ins_cost(250); 7625 format %{ "MOV EAX,$src\t// low word only" %} 7626 opcode(0xB8); 7627 ins_encode( LdImmL_Lo(dst, src) ); 7628 ins_pipe( ialu_reg_fat ); 7629 %} 7630 7631 // Multiply by 32-bit Immediate, taking the shifted high order results 7632 // (special case for shift by 32) 7633 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7634 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7635 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7636 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7637 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7638 effect(USE src1, KILL cr); 7639 7640 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7641 ins_cost(0*100 + 1*400 - 150); 7642 format %{ "IMUL EDX:EAX,$src1" %} 7643 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 // Multiply by 32-bit Immediate, taking the shifted high order results 7648 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7649 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7650 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7651 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7652 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7653 effect(USE src1, KILL cr); 7654 7655 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7656 ins_cost(1*100 + 1*400 - 150); 7657 format %{ "IMUL EDX:EAX,$src1\n\t" 7658 "SAR EDX,$cnt-32" %} 7659 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7660 ins_pipe( pipe_slow ); 7661 %} 7662 7663 // Multiply Memory 32-bit Immediate 7664 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7665 match(Set dst (MulI (LoadI src) imm)); 7666 effect(KILL cr); 7667 7668 ins_cost(300); 7669 format %{ "IMUL $dst,$src,$imm" %} 7670 opcode(0x69); /* 69 /r id */ 7671 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7672 ins_pipe( ialu_reg_mem_alu0 ); 7673 %} 7674 7675 // Multiply Memory 7676 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7677 match(Set dst (MulI dst (LoadI src))); 7678 effect(KILL cr); 7679 7680 ins_cost(350); 7681 format %{ "IMUL $dst,$src" %} 7682 opcode(0xAF, 0x0F); 7683 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7684 ins_pipe( ialu_reg_mem_alu0 ); 7685 %} 7686 7687 // Multiply Register Int to Long 7688 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7689 // Basic Idea: long = (long)int * (long)int 7690 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7691 effect(DEF dst, USE src, USE src1, KILL flags); 7692 7693 ins_cost(300); 7694 format %{ "IMUL $dst,$src1" %} 7695 7696 ins_encode( long_int_multiply( dst, src1 ) ); 7697 ins_pipe( ialu_reg_reg_alu0 ); 7698 %} 7699 7700 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7701 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7702 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7703 effect(KILL flags); 7704 7705 ins_cost(300); 7706 format %{ "MUL $dst,$src1" %} 7707 7708 ins_encode( long_uint_multiply(dst, src1) ); 7709 ins_pipe( ialu_reg_reg_alu0 ); 7710 %} 7711 7712 // Multiply Register Long 7713 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7714 match(Set dst (MulL dst src)); 7715 effect(KILL cr, TEMP tmp); 7716 ins_cost(4*100+3*400); 7717 // Basic idea: lo(result) = lo(x_lo * y_lo) 7718 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7719 format %{ "MOV $tmp,$src.lo\n\t" 7720 "IMUL $tmp,EDX\n\t" 7721 "MOV EDX,$src.hi\n\t" 7722 "IMUL EDX,EAX\n\t" 7723 "ADD $tmp,EDX\n\t" 7724 "MUL EDX:EAX,$src.lo\n\t" 7725 "ADD EDX,$tmp" %} 7726 ins_encode( long_multiply( dst, src, tmp ) ); 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 // Multiply Register Long where the left operand's high 32 bits are zero 7731 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7732 predicate(is_operand_hi32_zero(n->in(1))); 7733 match(Set dst (MulL dst src)); 7734 effect(KILL cr, TEMP tmp); 7735 ins_cost(2*100+2*400); 7736 // Basic idea: lo(result) = lo(x_lo * y_lo) 7737 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7738 format %{ "MOV $tmp,$src.hi\n\t" 7739 "IMUL $tmp,EAX\n\t" 7740 "MUL EDX:EAX,$src.lo\n\t" 7741 "ADD EDX,$tmp" %} 7742 ins_encode %{ 7743 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7744 __ imull($tmp$$Register, rax); 7745 __ mull($src$$Register); 7746 __ addl(rdx, $tmp$$Register); 7747 %} 7748 ins_pipe( pipe_slow ); 7749 %} 7750 7751 // Multiply Register Long where the right operand's high 32 bits are zero 7752 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7753 predicate(is_operand_hi32_zero(n->in(2))); 7754 match(Set dst (MulL dst src)); 7755 effect(KILL cr, TEMP tmp); 7756 ins_cost(2*100+2*400); 7757 // Basic idea: lo(result) = lo(x_lo * y_lo) 7758 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7759 format %{ "MOV $tmp,$src.lo\n\t" 7760 "IMUL $tmp,EDX\n\t" 7761 "MUL EDX:EAX,$src.lo\n\t" 7762 "ADD EDX,$tmp" %} 7763 ins_encode %{ 7764 __ movl($tmp$$Register, $src$$Register); 7765 __ imull($tmp$$Register, rdx); 7766 __ mull($src$$Register); 7767 __ addl(rdx, $tmp$$Register); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7773 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7774 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7775 match(Set dst (MulL dst src)); 7776 effect(KILL cr); 7777 ins_cost(1*400); 7778 // Basic idea: lo(result) = lo(x_lo * y_lo) 7779 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7780 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7781 ins_encode %{ 7782 __ mull($src$$Register); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 // Multiply Register Long by small constant 7788 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7789 match(Set dst (MulL dst src)); 7790 effect(KILL cr, TEMP tmp); 7791 ins_cost(2*100+2*400); 7792 size(12); 7793 // Basic idea: lo(result) = lo(src * EAX) 7794 // hi(result) = hi(src * EAX) + lo(src * EDX) 7795 format %{ "IMUL $tmp,EDX,$src\n\t" 7796 "MOV EDX,$src\n\t" 7797 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7798 "ADD EDX,$tmp" %} 7799 ins_encode( long_multiply_con( dst, src, tmp ) ); 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 // Integer DIV with Register 7804 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7805 match(Set rax (DivI rax div)); 7806 effect(KILL rdx, KILL cr); 7807 size(26); 7808 ins_cost(30*100+10*100); 7809 format %{ "CMP EAX,0x80000000\n\t" 7810 "JNE,s normal\n\t" 7811 "XOR EDX,EDX\n\t" 7812 "CMP ECX,-1\n\t" 7813 "JE,s done\n" 7814 "normal: CDQ\n\t" 7815 "IDIV $div\n\t" 7816 "done:" %} 7817 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7818 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7819 ins_pipe( ialu_reg_reg_alu0 ); 7820 %} 7821 7822 // Divide Register Long 7823 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7824 match(Set dst (DivL src1 src2)); 7825 effect( KILL cr, KILL cx, KILL bx ); 7826 ins_cost(10000); 7827 format %{ "PUSH $src1.hi\n\t" 7828 "PUSH $src1.lo\n\t" 7829 "PUSH $src2.hi\n\t" 7830 "PUSH $src2.lo\n\t" 7831 "CALL SharedRuntime::ldiv\n\t" 7832 "ADD ESP,16" %} 7833 ins_encode( long_div(src1,src2) ); 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 // Integer DIVMOD with Register, both quotient and mod results 7838 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7839 match(DivModI rax div); 7840 effect(KILL cr); 7841 size(26); 7842 ins_cost(30*100+10*100); 7843 format %{ "CMP EAX,0x80000000\n\t" 7844 "JNE,s normal\n\t" 7845 "XOR EDX,EDX\n\t" 7846 "CMP ECX,-1\n\t" 7847 "JE,s done\n" 7848 "normal: CDQ\n\t" 7849 "IDIV $div\n\t" 7850 "done:" %} 7851 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7852 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7853 ins_pipe( pipe_slow ); 7854 %} 7855 7856 // Integer MOD with Register 7857 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7858 match(Set rdx (ModI rax div)); 7859 effect(KILL rax, KILL cr); 7860 7861 size(26); 7862 ins_cost(300); 7863 format %{ "CDQ\n\t" 7864 "IDIV $div" %} 7865 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7866 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7867 ins_pipe( ialu_reg_reg_alu0 ); 7868 %} 7869 7870 // Remainder Register Long 7871 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7872 match(Set dst (ModL src1 src2)); 7873 effect( KILL cr, KILL cx, KILL bx ); 7874 ins_cost(10000); 7875 format %{ "PUSH $src1.hi\n\t" 7876 "PUSH $src1.lo\n\t" 7877 "PUSH $src2.hi\n\t" 7878 "PUSH $src2.lo\n\t" 7879 "CALL SharedRuntime::lrem\n\t" 7880 "ADD ESP,16" %} 7881 ins_encode( long_mod(src1,src2) ); 7882 ins_pipe( pipe_slow ); 7883 %} 7884 7885 // Divide Register Long (no special case since divisor != -1) 7886 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7887 match(Set dst (DivL dst imm)); 7888 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7889 ins_cost(1000); 7890 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7891 "XOR $tmp2,$tmp2\n\t" 7892 "CMP $tmp,EDX\n\t" 7893 "JA,s fast\n\t" 7894 "MOV $tmp2,EAX\n\t" 7895 "MOV EAX,EDX\n\t" 7896 "MOV EDX,0\n\t" 7897 "JLE,s pos\n\t" 7898 "LNEG EAX : $tmp2\n\t" 7899 "DIV $tmp # unsigned division\n\t" 7900 "XCHG EAX,$tmp2\n\t" 7901 "DIV $tmp\n\t" 7902 "LNEG $tmp2 : EAX\n\t" 7903 "JMP,s done\n" 7904 "pos:\n\t" 7905 "DIV $tmp\n\t" 7906 "XCHG EAX,$tmp2\n" 7907 "fast:\n\t" 7908 "DIV $tmp\n" 7909 "done:\n\t" 7910 "MOV EDX,$tmp2\n\t" 7911 "NEG EDX:EAX # if $imm < 0" %} 7912 ins_encode %{ 7913 int con = (int)$imm$$constant; 7914 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7915 int pcon = (con > 0) ? con : -con; 7916 Label Lfast, Lpos, Ldone; 7917 7918 __ movl($tmp$$Register, pcon); 7919 __ xorl($tmp2$$Register,$tmp2$$Register); 7920 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7921 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7922 7923 __ movl($tmp2$$Register, $dst$$Register); // save 7924 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7925 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7926 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7927 7928 // Negative dividend. 7929 // convert value to positive to use unsigned division 7930 __ lneg($dst$$Register, $tmp2$$Register); 7931 __ divl($tmp$$Register); 7932 __ xchgl($dst$$Register, $tmp2$$Register); 7933 __ divl($tmp$$Register); 7934 // revert result back to negative 7935 __ lneg($tmp2$$Register, $dst$$Register); 7936 __ jmpb(Ldone); 7937 7938 __ bind(Lpos); 7939 __ divl($tmp$$Register); // Use unsigned division 7940 __ xchgl($dst$$Register, $tmp2$$Register); 7941 // Fallthrow for final divide, tmp2 has 32 bit hi result 7942 7943 __ bind(Lfast); 7944 // fast path: src is positive 7945 __ divl($tmp$$Register); // Use unsigned division 7946 7947 __ bind(Ldone); 7948 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7949 if (con < 0) { 7950 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7951 } 7952 %} 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 // Remainder Register Long (remainder fit into 32 bits) 7957 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7958 match(Set dst (ModL dst imm)); 7959 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7960 ins_cost(1000); 7961 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7962 "CMP $tmp,EDX\n\t" 7963 "JA,s fast\n\t" 7964 "MOV $tmp2,EAX\n\t" 7965 "MOV EAX,EDX\n\t" 7966 "MOV EDX,0\n\t" 7967 "JLE,s pos\n\t" 7968 "LNEG EAX : $tmp2\n\t" 7969 "DIV $tmp # unsigned division\n\t" 7970 "MOV EAX,$tmp2\n\t" 7971 "DIV $tmp\n\t" 7972 "NEG EDX\n\t" 7973 "JMP,s done\n" 7974 "pos:\n\t" 7975 "DIV $tmp\n\t" 7976 "MOV EAX,$tmp2\n" 7977 "fast:\n\t" 7978 "DIV $tmp\n" 7979 "done:\n\t" 7980 "MOV EAX,EDX\n\t" 7981 "SAR EDX,31\n\t" %} 7982 ins_encode %{ 7983 int con = (int)$imm$$constant; 7984 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7985 int pcon = (con > 0) ? con : -con; 7986 Label Lfast, Lpos, Ldone; 7987 7988 __ movl($tmp$$Register, pcon); 7989 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7990 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7991 7992 __ movl($tmp2$$Register, $dst$$Register); // save 7993 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7994 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7995 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7996 7997 // Negative dividend. 7998 // convert value to positive to use unsigned division 7999 __ lneg($dst$$Register, $tmp2$$Register); 8000 __ divl($tmp$$Register); 8001 __ movl($dst$$Register, $tmp2$$Register); 8002 __ divl($tmp$$Register); 8003 // revert remainder back to negative 8004 __ negl(HIGH_FROM_LOW($dst$$Register)); 8005 __ jmpb(Ldone); 8006 8007 __ bind(Lpos); 8008 __ divl($tmp$$Register); 8009 __ movl($dst$$Register, $tmp2$$Register); 8010 8011 __ bind(Lfast); 8012 // fast path: src is positive 8013 __ divl($tmp$$Register); 8014 8015 __ bind(Ldone); 8016 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8017 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8018 8019 %} 8020 ins_pipe( pipe_slow ); 8021 %} 8022 8023 // Integer Shift Instructions 8024 // Shift Left by one 8025 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8026 match(Set dst (LShiftI dst shift)); 8027 effect(KILL cr); 8028 8029 size(2); 8030 format %{ "SHL $dst,$shift" %} 8031 opcode(0xD1, 0x4); /* D1 /4 */ 8032 ins_encode( OpcP, RegOpc( dst ) ); 8033 ins_pipe( ialu_reg ); 8034 %} 8035 8036 // Shift Left by 8-bit immediate 8037 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8038 match(Set dst (LShiftI dst shift)); 8039 effect(KILL cr); 8040 8041 size(3); 8042 format %{ "SHL $dst,$shift" %} 8043 opcode(0xC1, 0x4); /* C1 /4 ib */ 8044 ins_encode( RegOpcImm( dst, shift) ); 8045 ins_pipe( ialu_reg ); 8046 %} 8047 8048 // Shift Left by variable 8049 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8050 match(Set dst (LShiftI dst shift)); 8051 effect(KILL cr); 8052 8053 size(2); 8054 format %{ "SHL $dst,$shift" %} 8055 opcode(0xD3, 0x4); /* D3 /4 */ 8056 ins_encode( OpcP, RegOpc( dst ) ); 8057 ins_pipe( ialu_reg_reg ); 8058 %} 8059 8060 // Arithmetic shift right by one 8061 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8062 match(Set dst (RShiftI dst shift)); 8063 effect(KILL cr); 8064 8065 size(2); 8066 format %{ "SAR $dst,$shift" %} 8067 opcode(0xD1, 0x7); /* D1 /7 */ 8068 ins_encode( OpcP, RegOpc( dst ) ); 8069 ins_pipe( ialu_reg ); 8070 %} 8071 8072 // Arithmetic shift right by one 8073 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8074 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8075 effect(KILL cr); 8076 format %{ "SAR $dst,$shift" %} 8077 opcode(0xD1, 0x7); /* D1 /7 */ 8078 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8079 ins_pipe( ialu_mem_imm ); 8080 %} 8081 8082 // Arithmetic Shift Right by 8-bit immediate 8083 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8084 match(Set dst (RShiftI dst shift)); 8085 effect(KILL cr); 8086 8087 size(3); 8088 format %{ "SAR $dst,$shift" %} 8089 opcode(0xC1, 0x7); /* C1 /7 ib */ 8090 ins_encode( RegOpcImm( dst, shift ) ); 8091 ins_pipe( ialu_mem_imm ); 8092 %} 8093 8094 // Arithmetic Shift Right by 8-bit immediate 8095 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8096 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8097 effect(KILL cr); 8098 8099 format %{ "SAR $dst,$shift" %} 8100 opcode(0xC1, 0x7); /* C1 /7 ib */ 8101 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8102 ins_pipe( ialu_mem_imm ); 8103 %} 8104 8105 // Arithmetic Shift Right by variable 8106 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8107 match(Set dst (RShiftI dst shift)); 8108 effect(KILL cr); 8109 8110 size(2); 8111 format %{ "SAR $dst,$shift" %} 8112 opcode(0xD3, 0x7); /* D3 /7 */ 8113 ins_encode( OpcP, RegOpc( dst ) ); 8114 ins_pipe( ialu_reg_reg ); 8115 %} 8116 8117 // Logical shift right by one 8118 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8119 match(Set dst (URShiftI dst shift)); 8120 effect(KILL cr); 8121 8122 size(2); 8123 format %{ "SHR $dst,$shift" %} 8124 opcode(0xD1, 0x5); /* D1 /5 */ 8125 ins_encode( OpcP, RegOpc( dst ) ); 8126 ins_pipe( ialu_reg ); 8127 %} 8128 8129 // Logical Shift Right by 8-bit immediate 8130 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8131 match(Set dst (URShiftI dst shift)); 8132 effect(KILL cr); 8133 8134 size(3); 8135 format %{ "SHR $dst,$shift" %} 8136 opcode(0xC1, 0x5); /* C1 /5 ib */ 8137 ins_encode( RegOpcImm( dst, shift) ); 8138 ins_pipe( ialu_reg ); 8139 %} 8140 8141 8142 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8143 // This idiom is used by the compiler for the i2b bytecode. 8144 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8145 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8146 8147 size(3); 8148 format %{ "MOVSX $dst,$src :8" %} 8149 ins_encode %{ 8150 __ movsbl($dst$$Register, $src$$Register); 8151 %} 8152 ins_pipe(ialu_reg_reg); 8153 %} 8154 8155 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8156 // This idiom is used by the compiler the i2s bytecode. 8157 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8158 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8159 8160 size(3); 8161 format %{ "MOVSX $dst,$src :16" %} 8162 ins_encode %{ 8163 __ movswl($dst$$Register, $src$$Register); 8164 %} 8165 ins_pipe(ialu_reg_reg); 8166 %} 8167 8168 8169 // Logical Shift Right by variable 8170 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8171 match(Set dst (URShiftI dst shift)); 8172 effect(KILL cr); 8173 8174 size(2); 8175 format %{ "SHR $dst,$shift" %} 8176 opcode(0xD3, 0x5); /* D3 /5 */ 8177 ins_encode( OpcP, RegOpc( dst ) ); 8178 ins_pipe( ialu_reg_reg ); 8179 %} 8180 8181 8182 //----------Logical Instructions----------------------------------------------- 8183 //----------Integer Logical Instructions--------------------------------------- 8184 // And Instructions 8185 // And Register with Register 8186 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8187 match(Set dst (AndI dst src)); 8188 effect(KILL cr); 8189 8190 size(2); 8191 format %{ "AND $dst,$src" %} 8192 opcode(0x23); 8193 ins_encode( OpcP, RegReg( dst, src) ); 8194 ins_pipe( ialu_reg_reg ); 8195 %} 8196 8197 // And Register with Immediate 8198 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8199 match(Set dst (AndI dst src)); 8200 effect(KILL cr); 8201 8202 format %{ "AND $dst,$src" %} 8203 opcode(0x81,0x04); /* Opcode 81 /4 */ 8204 // ins_encode( RegImm( dst, src) ); 8205 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8206 ins_pipe( ialu_reg ); 8207 %} 8208 8209 // And Register with Memory 8210 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8211 match(Set dst (AndI dst (LoadI src))); 8212 effect(KILL cr); 8213 8214 ins_cost(125); 8215 format %{ "AND $dst,$src" %} 8216 opcode(0x23); 8217 ins_encode( OpcP, RegMem( dst, src) ); 8218 ins_pipe( ialu_reg_mem ); 8219 %} 8220 8221 // And Memory with Register 8222 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8223 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8224 effect(KILL cr); 8225 8226 ins_cost(150); 8227 format %{ "AND $dst,$src" %} 8228 opcode(0x21); /* Opcode 21 /r */ 8229 ins_encode( OpcP, RegMem( src, dst ) ); 8230 ins_pipe( ialu_mem_reg ); 8231 %} 8232 8233 // And Memory with Immediate 8234 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8235 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8236 effect(KILL cr); 8237 8238 ins_cost(125); 8239 format %{ "AND $dst,$src" %} 8240 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8241 // ins_encode( MemImm( dst, src) ); 8242 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8243 ins_pipe( ialu_mem_imm ); 8244 %} 8245 8246 // BMI1 instructions 8247 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8248 match(Set dst (AndI (XorI src1 minus_1) src2)); 8249 predicate(UseBMI1Instructions); 8250 effect(KILL cr); 8251 8252 format %{ "ANDNL $dst, $src1, $src2" %} 8253 8254 ins_encode %{ 8255 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8256 %} 8257 ins_pipe(ialu_reg); 8258 %} 8259 8260 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8261 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8262 predicate(UseBMI1Instructions); 8263 effect(KILL cr); 8264 8265 ins_cost(125); 8266 format %{ "ANDNL $dst, $src1, $src2" %} 8267 8268 ins_encode %{ 8269 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8270 %} 8271 ins_pipe(ialu_reg_mem); 8272 %} 8273 8274 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8275 match(Set dst (AndI (SubI imm_zero src) src)); 8276 predicate(UseBMI1Instructions); 8277 effect(KILL cr); 8278 8279 format %{ "BLSIL $dst, $src" %} 8280 8281 ins_encode %{ 8282 __ blsil($dst$$Register, $src$$Register); 8283 %} 8284 ins_pipe(ialu_reg); 8285 %} 8286 8287 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8288 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8289 predicate(UseBMI1Instructions); 8290 effect(KILL cr); 8291 8292 ins_cost(125); 8293 format %{ "BLSIL $dst, $src" %} 8294 8295 ins_encode %{ 8296 __ blsil($dst$$Register, $src$$Address); 8297 %} 8298 ins_pipe(ialu_reg_mem); 8299 %} 8300 8301 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8302 %{ 8303 match(Set dst (XorI (AddI src minus_1) src)); 8304 predicate(UseBMI1Instructions); 8305 effect(KILL cr); 8306 8307 format %{ "BLSMSKL $dst, $src" %} 8308 8309 ins_encode %{ 8310 __ blsmskl($dst$$Register, $src$$Register); 8311 %} 8312 8313 ins_pipe(ialu_reg); 8314 %} 8315 8316 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8317 %{ 8318 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8319 predicate(UseBMI1Instructions); 8320 effect(KILL cr); 8321 8322 ins_cost(125); 8323 format %{ "BLSMSKL $dst, $src" %} 8324 8325 ins_encode %{ 8326 __ blsmskl($dst$$Register, $src$$Address); 8327 %} 8328 8329 ins_pipe(ialu_reg_mem); 8330 %} 8331 8332 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8333 %{ 8334 match(Set dst (AndI (AddI src minus_1) src) ); 8335 predicate(UseBMI1Instructions); 8336 effect(KILL cr); 8337 8338 format %{ "BLSRL $dst, $src" %} 8339 8340 ins_encode %{ 8341 __ blsrl($dst$$Register, $src$$Register); 8342 %} 8343 8344 ins_pipe(ialu_reg); 8345 %} 8346 8347 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8348 %{ 8349 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8350 predicate(UseBMI1Instructions); 8351 effect(KILL cr); 8352 8353 ins_cost(125); 8354 format %{ "BLSRL $dst, $src" %} 8355 8356 ins_encode %{ 8357 __ blsrl($dst$$Register, $src$$Address); 8358 %} 8359 8360 ins_pipe(ialu_reg_mem); 8361 %} 8362 8363 // Or Instructions 8364 // Or Register with Register 8365 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8366 match(Set dst (OrI dst src)); 8367 effect(KILL cr); 8368 8369 size(2); 8370 format %{ "OR $dst,$src" %} 8371 opcode(0x0B); 8372 ins_encode( OpcP, RegReg( dst, src) ); 8373 ins_pipe( ialu_reg_reg ); 8374 %} 8375 8376 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8377 match(Set dst (OrI dst (CastP2X src))); 8378 effect(KILL cr); 8379 8380 size(2); 8381 format %{ "OR $dst,$src" %} 8382 opcode(0x0B); 8383 ins_encode( OpcP, RegReg( dst, src) ); 8384 ins_pipe( ialu_reg_reg ); 8385 %} 8386 8387 8388 // Or Register with Immediate 8389 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8390 match(Set dst (OrI dst src)); 8391 effect(KILL cr); 8392 8393 format %{ "OR $dst,$src" %} 8394 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8395 // ins_encode( RegImm( dst, src) ); 8396 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8397 ins_pipe( ialu_reg ); 8398 %} 8399 8400 // Or Register with Memory 8401 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8402 match(Set dst (OrI dst (LoadI src))); 8403 effect(KILL cr); 8404 8405 ins_cost(125); 8406 format %{ "OR $dst,$src" %} 8407 opcode(0x0B); 8408 ins_encode( OpcP, RegMem( dst, src) ); 8409 ins_pipe( ialu_reg_mem ); 8410 %} 8411 8412 // Or Memory with Register 8413 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8414 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8415 effect(KILL cr); 8416 8417 ins_cost(150); 8418 format %{ "OR $dst,$src" %} 8419 opcode(0x09); /* Opcode 09 /r */ 8420 ins_encode( OpcP, RegMem( src, dst ) ); 8421 ins_pipe( ialu_mem_reg ); 8422 %} 8423 8424 // Or Memory with Immediate 8425 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8426 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8427 effect(KILL cr); 8428 8429 ins_cost(125); 8430 format %{ "OR $dst,$src" %} 8431 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8432 // ins_encode( MemImm( dst, src) ); 8433 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8434 ins_pipe( ialu_mem_imm ); 8435 %} 8436 8437 // ROL/ROR 8438 // ROL expand 8439 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8440 effect(USE_DEF dst, USE shift, KILL cr); 8441 8442 format %{ "ROL $dst, $shift" %} 8443 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8444 ins_encode( OpcP, RegOpc( dst )); 8445 ins_pipe( ialu_reg ); 8446 %} 8447 8448 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8449 effect(USE_DEF dst, USE shift, KILL cr); 8450 8451 format %{ "ROL $dst, $shift" %} 8452 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8453 ins_encode( RegOpcImm(dst, shift) ); 8454 ins_pipe(ialu_reg); 8455 %} 8456 8457 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8458 effect(USE_DEF dst, USE shift, KILL cr); 8459 8460 format %{ "ROL $dst, $shift" %} 8461 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8462 ins_encode(OpcP, RegOpc(dst)); 8463 ins_pipe( ialu_reg_reg ); 8464 %} 8465 // end of ROL expand 8466 8467 // ROL 32bit by one once 8468 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8469 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8470 8471 expand %{ 8472 rolI_eReg_imm1(dst, lshift, cr); 8473 %} 8474 %} 8475 8476 // ROL 32bit var by imm8 once 8477 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8478 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8479 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8480 8481 expand %{ 8482 rolI_eReg_imm8(dst, lshift, cr); 8483 %} 8484 %} 8485 8486 // ROL 32bit var by var once 8487 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8488 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8489 8490 expand %{ 8491 rolI_eReg_CL(dst, shift, cr); 8492 %} 8493 %} 8494 8495 // ROL 32bit var by var once 8496 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8497 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8498 8499 expand %{ 8500 rolI_eReg_CL(dst, shift, cr); 8501 %} 8502 %} 8503 8504 // ROR expand 8505 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8506 effect(USE_DEF dst, USE shift, KILL cr); 8507 8508 format %{ "ROR $dst, $shift" %} 8509 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8510 ins_encode( OpcP, RegOpc( dst ) ); 8511 ins_pipe( ialu_reg ); 8512 %} 8513 8514 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8515 effect (USE_DEF dst, USE shift, KILL cr); 8516 8517 format %{ "ROR $dst, $shift" %} 8518 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8519 ins_encode( RegOpcImm(dst, shift) ); 8520 ins_pipe( ialu_reg ); 8521 %} 8522 8523 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8524 effect(USE_DEF dst, USE shift, KILL cr); 8525 8526 format %{ "ROR $dst, $shift" %} 8527 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8528 ins_encode(OpcP, RegOpc(dst)); 8529 ins_pipe( ialu_reg_reg ); 8530 %} 8531 // end of ROR expand 8532 8533 // ROR right once 8534 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8535 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8536 8537 expand %{ 8538 rorI_eReg_imm1(dst, rshift, cr); 8539 %} 8540 %} 8541 8542 // ROR 32bit by immI8 once 8543 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8544 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8545 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8546 8547 expand %{ 8548 rorI_eReg_imm8(dst, rshift, cr); 8549 %} 8550 %} 8551 8552 // ROR 32bit var by var once 8553 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8554 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8555 8556 expand %{ 8557 rorI_eReg_CL(dst, shift, cr); 8558 %} 8559 %} 8560 8561 // ROR 32bit var by var once 8562 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8563 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8564 8565 expand %{ 8566 rorI_eReg_CL(dst, shift, cr); 8567 %} 8568 %} 8569 8570 // Xor Instructions 8571 // Xor Register with Register 8572 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8573 match(Set dst (XorI dst src)); 8574 effect(KILL cr); 8575 8576 size(2); 8577 format %{ "XOR $dst,$src" %} 8578 opcode(0x33); 8579 ins_encode( OpcP, RegReg( dst, src) ); 8580 ins_pipe( ialu_reg_reg ); 8581 %} 8582 8583 // Xor Register with Immediate -1 8584 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8585 match(Set dst (XorI dst imm)); 8586 8587 size(2); 8588 format %{ "NOT $dst" %} 8589 ins_encode %{ 8590 __ notl($dst$$Register); 8591 %} 8592 ins_pipe( ialu_reg ); 8593 %} 8594 8595 // Xor Register with Immediate 8596 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8597 match(Set dst (XorI dst src)); 8598 effect(KILL cr); 8599 8600 format %{ "XOR $dst,$src" %} 8601 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8602 // ins_encode( RegImm( dst, src) ); 8603 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8604 ins_pipe( ialu_reg ); 8605 %} 8606 8607 // Xor Register with Memory 8608 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8609 match(Set dst (XorI dst (LoadI src))); 8610 effect(KILL cr); 8611 8612 ins_cost(125); 8613 format %{ "XOR $dst,$src" %} 8614 opcode(0x33); 8615 ins_encode( OpcP, RegMem(dst, src) ); 8616 ins_pipe( ialu_reg_mem ); 8617 %} 8618 8619 // Xor Memory with Register 8620 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8621 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8622 effect(KILL cr); 8623 8624 ins_cost(150); 8625 format %{ "XOR $dst,$src" %} 8626 opcode(0x31); /* Opcode 31 /r */ 8627 ins_encode( OpcP, RegMem( src, dst ) ); 8628 ins_pipe( ialu_mem_reg ); 8629 %} 8630 8631 // Xor Memory with Immediate 8632 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8633 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8634 effect(KILL cr); 8635 8636 ins_cost(125); 8637 format %{ "XOR $dst,$src" %} 8638 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8639 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8640 ins_pipe( ialu_mem_imm ); 8641 %} 8642 8643 //----------Convert Int to Boolean--------------------------------------------- 8644 8645 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8646 effect( DEF dst, USE src ); 8647 format %{ "MOV $dst,$src" %} 8648 ins_encode( enc_Copy( dst, src) ); 8649 ins_pipe( ialu_reg_reg ); 8650 %} 8651 8652 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8653 effect( USE_DEF dst, USE src, KILL cr ); 8654 8655 size(4); 8656 format %{ "NEG $dst\n\t" 8657 "ADC $dst,$src" %} 8658 ins_encode( neg_reg(dst), 8659 OpcRegReg(0x13,dst,src) ); 8660 ins_pipe( ialu_reg_reg_long ); 8661 %} 8662 8663 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8664 match(Set dst (Conv2B src)); 8665 8666 expand %{ 8667 movI_nocopy(dst,src); 8668 ci2b(dst,src,cr); 8669 %} 8670 %} 8671 8672 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8673 effect( DEF dst, USE src ); 8674 format %{ "MOV $dst,$src" %} 8675 ins_encode( enc_Copy( dst, src) ); 8676 ins_pipe( ialu_reg_reg ); 8677 %} 8678 8679 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8680 effect( USE_DEF dst, USE src, KILL cr ); 8681 format %{ "NEG $dst\n\t" 8682 "ADC $dst,$src" %} 8683 ins_encode( neg_reg(dst), 8684 OpcRegReg(0x13,dst,src) ); 8685 ins_pipe( ialu_reg_reg_long ); 8686 %} 8687 8688 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8689 match(Set dst (Conv2B src)); 8690 8691 expand %{ 8692 movP_nocopy(dst,src); 8693 cp2b(dst,src,cr); 8694 %} 8695 %} 8696 8697 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8698 match(Set dst (CmpLTMask p q)); 8699 effect(KILL cr); 8700 ins_cost(400); 8701 8702 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8703 format %{ "XOR $dst,$dst\n\t" 8704 "CMP $p,$q\n\t" 8705 "SETlt $dst\n\t" 8706 "NEG $dst" %} 8707 ins_encode %{ 8708 Register Rp = $p$$Register; 8709 Register Rq = $q$$Register; 8710 Register Rd = $dst$$Register; 8711 Label done; 8712 __ xorl(Rd, Rd); 8713 __ cmpl(Rp, Rq); 8714 __ setb(Assembler::less, Rd); 8715 __ negl(Rd); 8716 %} 8717 8718 ins_pipe(pipe_slow); 8719 %} 8720 8721 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8722 match(Set dst (CmpLTMask dst zero)); 8723 effect(DEF dst, KILL cr); 8724 ins_cost(100); 8725 8726 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8727 ins_encode %{ 8728 __ sarl($dst$$Register, 31); 8729 %} 8730 ins_pipe(ialu_reg); 8731 %} 8732 8733 /* better to save a register than avoid a branch */ 8734 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8735 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8736 effect(KILL cr); 8737 ins_cost(400); 8738 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8739 "JGE done\n\t" 8740 "ADD $p,$y\n" 8741 "done: " %} 8742 ins_encode %{ 8743 Register Rp = $p$$Register; 8744 Register Rq = $q$$Register; 8745 Register Ry = $y$$Register; 8746 Label done; 8747 __ subl(Rp, Rq); 8748 __ jccb(Assembler::greaterEqual, done); 8749 __ addl(Rp, Ry); 8750 __ bind(done); 8751 %} 8752 8753 ins_pipe(pipe_cmplt); 8754 %} 8755 8756 /* better to save a register than avoid a branch */ 8757 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8758 match(Set y (AndI (CmpLTMask p q) y)); 8759 effect(KILL cr); 8760 8761 ins_cost(300); 8762 8763 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8764 "JLT done\n\t" 8765 "XORL $y, $y\n" 8766 "done: " %} 8767 ins_encode %{ 8768 Register Rp = $p$$Register; 8769 Register Rq = $q$$Register; 8770 Register Ry = $y$$Register; 8771 Label done; 8772 __ cmpl(Rp, Rq); 8773 __ jccb(Assembler::less, done); 8774 __ xorl(Ry, Ry); 8775 __ bind(done); 8776 %} 8777 8778 ins_pipe(pipe_cmplt); 8779 %} 8780 8781 /* If I enable this, I encourage spilling in the inner loop of compress. 8782 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8783 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8784 */ 8785 //----------Overflow Math Instructions----------------------------------------- 8786 8787 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8788 %{ 8789 match(Set cr (OverflowAddI op1 op2)); 8790 effect(DEF cr, USE_KILL op1, USE op2); 8791 8792 format %{ "ADD $op1, $op2\t# overflow check int" %} 8793 8794 ins_encode %{ 8795 __ addl($op1$$Register, $op2$$Register); 8796 %} 8797 ins_pipe(ialu_reg_reg); 8798 %} 8799 8800 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8801 %{ 8802 match(Set cr (OverflowAddI op1 op2)); 8803 effect(DEF cr, USE_KILL op1, USE op2); 8804 8805 format %{ "ADD $op1, $op2\t# overflow check int" %} 8806 8807 ins_encode %{ 8808 __ addl($op1$$Register, $op2$$constant); 8809 %} 8810 ins_pipe(ialu_reg_reg); 8811 %} 8812 8813 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8814 %{ 8815 match(Set cr (OverflowSubI op1 op2)); 8816 8817 format %{ "CMP $op1, $op2\t# overflow check int" %} 8818 ins_encode %{ 8819 __ cmpl($op1$$Register, $op2$$Register); 8820 %} 8821 ins_pipe(ialu_reg_reg); 8822 %} 8823 8824 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8825 %{ 8826 match(Set cr (OverflowSubI op1 op2)); 8827 8828 format %{ "CMP $op1, $op2\t# overflow check int" %} 8829 ins_encode %{ 8830 __ cmpl($op1$$Register, $op2$$constant); 8831 %} 8832 ins_pipe(ialu_reg_reg); 8833 %} 8834 8835 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8836 %{ 8837 match(Set cr (OverflowSubI zero op2)); 8838 effect(DEF cr, USE_KILL op2); 8839 8840 format %{ "NEG $op2\t# overflow check int" %} 8841 ins_encode %{ 8842 __ negl($op2$$Register); 8843 %} 8844 ins_pipe(ialu_reg_reg); 8845 %} 8846 8847 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8848 %{ 8849 match(Set cr (OverflowMulI op1 op2)); 8850 effect(DEF cr, USE_KILL op1, USE op2); 8851 8852 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8853 ins_encode %{ 8854 __ imull($op1$$Register, $op2$$Register); 8855 %} 8856 ins_pipe(ialu_reg_reg_alu0); 8857 %} 8858 8859 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8860 %{ 8861 match(Set cr (OverflowMulI op1 op2)); 8862 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8863 8864 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8865 ins_encode %{ 8866 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8867 %} 8868 ins_pipe(ialu_reg_reg_alu0); 8869 %} 8870 8871 //----------Long Instructions------------------------------------------------ 8872 // Add Long Register with Register 8873 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8874 match(Set dst (AddL dst src)); 8875 effect(KILL cr); 8876 ins_cost(200); 8877 format %{ "ADD $dst.lo,$src.lo\n\t" 8878 "ADC $dst.hi,$src.hi" %} 8879 opcode(0x03, 0x13); 8880 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8881 ins_pipe( ialu_reg_reg_long ); 8882 %} 8883 8884 // Add Long Register with Immediate 8885 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8886 match(Set dst (AddL dst src)); 8887 effect(KILL cr); 8888 format %{ "ADD $dst.lo,$src.lo\n\t" 8889 "ADC $dst.hi,$src.hi" %} 8890 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8891 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8892 ins_pipe( ialu_reg_long ); 8893 %} 8894 8895 // Add Long Register with Memory 8896 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8897 match(Set dst (AddL dst (LoadL mem))); 8898 effect(KILL cr); 8899 ins_cost(125); 8900 format %{ "ADD $dst.lo,$mem\n\t" 8901 "ADC $dst.hi,$mem+4" %} 8902 opcode(0x03, 0x13); 8903 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8904 ins_pipe( ialu_reg_long_mem ); 8905 %} 8906 8907 // Subtract Long Register with Register. 8908 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8909 match(Set dst (SubL dst src)); 8910 effect(KILL cr); 8911 ins_cost(200); 8912 format %{ "SUB $dst.lo,$src.lo\n\t" 8913 "SBB $dst.hi,$src.hi" %} 8914 opcode(0x2B, 0x1B); 8915 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8916 ins_pipe( ialu_reg_reg_long ); 8917 %} 8918 8919 // Subtract Long Register with Immediate 8920 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8921 match(Set dst (SubL dst src)); 8922 effect(KILL cr); 8923 format %{ "SUB $dst.lo,$src.lo\n\t" 8924 "SBB $dst.hi,$src.hi" %} 8925 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8926 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8927 ins_pipe( ialu_reg_long ); 8928 %} 8929 8930 // Subtract Long Register with Memory 8931 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8932 match(Set dst (SubL dst (LoadL mem))); 8933 effect(KILL cr); 8934 ins_cost(125); 8935 format %{ "SUB $dst.lo,$mem\n\t" 8936 "SBB $dst.hi,$mem+4" %} 8937 opcode(0x2B, 0x1B); 8938 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8939 ins_pipe( ialu_reg_long_mem ); 8940 %} 8941 8942 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8943 match(Set dst (SubL zero dst)); 8944 effect(KILL cr); 8945 ins_cost(300); 8946 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8947 ins_encode( neg_long(dst) ); 8948 ins_pipe( ialu_reg_reg_long ); 8949 %} 8950 8951 // And Long Register with Register 8952 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8953 match(Set dst (AndL dst src)); 8954 effect(KILL cr); 8955 format %{ "AND $dst.lo,$src.lo\n\t" 8956 "AND $dst.hi,$src.hi" %} 8957 opcode(0x23,0x23); 8958 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8959 ins_pipe( ialu_reg_reg_long ); 8960 %} 8961 8962 // And Long Register with Immediate 8963 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8964 match(Set dst (AndL dst src)); 8965 effect(KILL cr); 8966 format %{ "AND $dst.lo,$src.lo\n\t" 8967 "AND $dst.hi,$src.hi" %} 8968 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8969 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8970 ins_pipe( ialu_reg_long ); 8971 %} 8972 8973 // And Long Register with Memory 8974 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8975 match(Set dst (AndL dst (LoadL mem))); 8976 effect(KILL cr); 8977 ins_cost(125); 8978 format %{ "AND $dst.lo,$mem\n\t" 8979 "AND $dst.hi,$mem+4" %} 8980 opcode(0x23, 0x23); 8981 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8982 ins_pipe( ialu_reg_long_mem ); 8983 %} 8984 8985 // BMI1 instructions 8986 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8987 match(Set dst (AndL (XorL src1 minus_1) src2)); 8988 predicate(UseBMI1Instructions); 8989 effect(KILL cr, TEMP dst); 8990 8991 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8992 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8993 %} 8994 8995 ins_encode %{ 8996 Register Rdst = $dst$$Register; 8997 Register Rsrc1 = $src1$$Register; 8998 Register Rsrc2 = $src2$$Register; 8999 __ andnl(Rdst, Rsrc1, Rsrc2); 9000 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9001 %} 9002 ins_pipe(ialu_reg_reg_long); 9003 %} 9004 9005 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9006 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9007 predicate(UseBMI1Instructions); 9008 effect(KILL cr, TEMP dst); 9009 9010 ins_cost(125); 9011 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9012 "ANDNL $dst.hi, $src1.hi, $src2+4" 9013 %} 9014 9015 ins_encode %{ 9016 Register Rdst = $dst$$Register; 9017 Register Rsrc1 = $src1$$Register; 9018 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9019 9020 __ andnl(Rdst, Rsrc1, $src2$$Address); 9021 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9022 %} 9023 ins_pipe(ialu_reg_mem); 9024 %} 9025 9026 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9027 match(Set dst (AndL (SubL imm_zero src) src)); 9028 predicate(UseBMI1Instructions); 9029 effect(KILL cr, TEMP dst); 9030 9031 format %{ "MOVL $dst.hi, 0\n\t" 9032 "BLSIL $dst.lo, $src.lo\n\t" 9033 "JNZ done\n\t" 9034 "BLSIL $dst.hi, $src.hi\n" 9035 "done:" 9036 %} 9037 9038 ins_encode %{ 9039 Label done; 9040 Register Rdst = $dst$$Register; 9041 Register Rsrc = $src$$Register; 9042 __ movl(HIGH_FROM_LOW(Rdst), 0); 9043 __ blsil(Rdst, Rsrc); 9044 __ jccb(Assembler::notZero, done); 9045 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9046 __ bind(done); 9047 %} 9048 ins_pipe(ialu_reg); 9049 %} 9050 9051 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9052 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9053 predicate(UseBMI1Instructions); 9054 effect(KILL cr, TEMP dst); 9055 9056 ins_cost(125); 9057 format %{ "MOVL $dst.hi, 0\n\t" 9058 "BLSIL $dst.lo, $src\n\t" 9059 "JNZ done\n\t" 9060 "BLSIL $dst.hi, $src+4\n" 9061 "done:" 9062 %} 9063 9064 ins_encode %{ 9065 Label done; 9066 Register Rdst = $dst$$Register; 9067 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9068 9069 __ movl(HIGH_FROM_LOW(Rdst), 0); 9070 __ blsil(Rdst, $src$$Address); 9071 __ jccb(Assembler::notZero, done); 9072 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9073 __ bind(done); 9074 %} 9075 ins_pipe(ialu_reg_mem); 9076 %} 9077 9078 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9079 %{ 9080 match(Set dst (XorL (AddL src minus_1) src)); 9081 predicate(UseBMI1Instructions); 9082 effect(KILL cr, TEMP dst); 9083 9084 format %{ "MOVL $dst.hi, 0\n\t" 9085 "BLSMSKL $dst.lo, $src.lo\n\t" 9086 "JNC done\n\t" 9087 "BLSMSKL $dst.hi, $src.hi\n" 9088 "done:" 9089 %} 9090 9091 ins_encode %{ 9092 Label done; 9093 Register Rdst = $dst$$Register; 9094 Register Rsrc = $src$$Register; 9095 __ movl(HIGH_FROM_LOW(Rdst), 0); 9096 __ blsmskl(Rdst, Rsrc); 9097 __ jccb(Assembler::carryClear, done); 9098 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9099 __ bind(done); 9100 %} 9101 9102 ins_pipe(ialu_reg); 9103 %} 9104 9105 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9106 %{ 9107 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9108 predicate(UseBMI1Instructions); 9109 effect(KILL cr, TEMP dst); 9110 9111 ins_cost(125); 9112 format %{ "MOVL $dst.hi, 0\n\t" 9113 "BLSMSKL $dst.lo, $src\n\t" 9114 "JNC done\n\t" 9115 "BLSMSKL $dst.hi, $src+4\n" 9116 "done:" 9117 %} 9118 9119 ins_encode %{ 9120 Label done; 9121 Register Rdst = $dst$$Register; 9122 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9123 9124 __ movl(HIGH_FROM_LOW(Rdst), 0); 9125 __ blsmskl(Rdst, $src$$Address); 9126 __ jccb(Assembler::carryClear, done); 9127 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9128 __ bind(done); 9129 %} 9130 9131 ins_pipe(ialu_reg_mem); 9132 %} 9133 9134 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9135 %{ 9136 match(Set dst (AndL (AddL src minus_1) src) ); 9137 predicate(UseBMI1Instructions); 9138 effect(KILL cr, TEMP dst); 9139 9140 format %{ "MOVL $dst.hi, $src.hi\n\t" 9141 "BLSRL $dst.lo, $src.lo\n\t" 9142 "JNC done\n\t" 9143 "BLSRL $dst.hi, $src.hi\n" 9144 "done:" 9145 %} 9146 9147 ins_encode %{ 9148 Label done; 9149 Register Rdst = $dst$$Register; 9150 Register Rsrc = $src$$Register; 9151 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9152 __ blsrl(Rdst, Rsrc); 9153 __ jccb(Assembler::carryClear, done); 9154 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9155 __ bind(done); 9156 %} 9157 9158 ins_pipe(ialu_reg); 9159 %} 9160 9161 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9162 %{ 9163 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9164 predicate(UseBMI1Instructions); 9165 effect(KILL cr, TEMP dst); 9166 9167 ins_cost(125); 9168 format %{ "MOVL $dst.hi, $src+4\n\t" 9169 "BLSRL $dst.lo, $src\n\t" 9170 "JNC done\n\t" 9171 "BLSRL $dst.hi, $src+4\n" 9172 "done:" 9173 %} 9174 9175 ins_encode %{ 9176 Label done; 9177 Register Rdst = $dst$$Register; 9178 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9179 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9180 __ blsrl(Rdst, $src$$Address); 9181 __ jccb(Assembler::carryClear, done); 9182 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9183 __ bind(done); 9184 %} 9185 9186 ins_pipe(ialu_reg_mem); 9187 %} 9188 9189 // Or Long Register with Register 9190 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9191 match(Set dst (OrL dst src)); 9192 effect(KILL cr); 9193 format %{ "OR $dst.lo,$src.lo\n\t" 9194 "OR $dst.hi,$src.hi" %} 9195 opcode(0x0B,0x0B); 9196 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9197 ins_pipe( ialu_reg_reg_long ); 9198 %} 9199 9200 // Or Long Register with Immediate 9201 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9202 match(Set dst (OrL dst src)); 9203 effect(KILL cr); 9204 format %{ "OR $dst.lo,$src.lo\n\t" 9205 "OR $dst.hi,$src.hi" %} 9206 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9207 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9208 ins_pipe( ialu_reg_long ); 9209 %} 9210 9211 // Or Long Register with Memory 9212 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9213 match(Set dst (OrL dst (LoadL mem))); 9214 effect(KILL cr); 9215 ins_cost(125); 9216 format %{ "OR $dst.lo,$mem\n\t" 9217 "OR $dst.hi,$mem+4" %} 9218 opcode(0x0B,0x0B); 9219 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9220 ins_pipe( ialu_reg_long_mem ); 9221 %} 9222 9223 // Xor Long Register with Register 9224 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9225 match(Set dst (XorL dst src)); 9226 effect(KILL cr); 9227 format %{ "XOR $dst.lo,$src.lo\n\t" 9228 "XOR $dst.hi,$src.hi" %} 9229 opcode(0x33,0x33); 9230 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9231 ins_pipe( ialu_reg_reg_long ); 9232 %} 9233 9234 // Xor Long Register with Immediate -1 9235 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9236 match(Set dst (XorL dst imm)); 9237 format %{ "NOT $dst.lo\n\t" 9238 "NOT $dst.hi" %} 9239 ins_encode %{ 9240 __ notl($dst$$Register); 9241 __ notl(HIGH_FROM_LOW($dst$$Register)); 9242 %} 9243 ins_pipe( ialu_reg_long ); 9244 %} 9245 9246 // Xor Long Register with Immediate 9247 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9248 match(Set dst (XorL dst src)); 9249 effect(KILL cr); 9250 format %{ "XOR $dst.lo,$src.lo\n\t" 9251 "XOR $dst.hi,$src.hi" %} 9252 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9253 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9254 ins_pipe( ialu_reg_long ); 9255 %} 9256 9257 // Xor Long Register with Memory 9258 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9259 match(Set dst (XorL dst (LoadL mem))); 9260 effect(KILL cr); 9261 ins_cost(125); 9262 format %{ "XOR $dst.lo,$mem\n\t" 9263 "XOR $dst.hi,$mem+4" %} 9264 opcode(0x33,0x33); 9265 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9266 ins_pipe( ialu_reg_long_mem ); 9267 %} 9268 9269 // Shift Left Long by 1 9270 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9271 predicate(UseNewLongLShift); 9272 match(Set dst (LShiftL dst cnt)); 9273 effect(KILL cr); 9274 ins_cost(100); 9275 format %{ "ADD $dst.lo,$dst.lo\n\t" 9276 "ADC $dst.hi,$dst.hi" %} 9277 ins_encode %{ 9278 __ addl($dst$$Register,$dst$$Register); 9279 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9280 %} 9281 ins_pipe( ialu_reg_long ); 9282 %} 9283 9284 // Shift Left Long by 2 9285 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9286 predicate(UseNewLongLShift); 9287 match(Set dst (LShiftL dst cnt)); 9288 effect(KILL cr); 9289 ins_cost(100); 9290 format %{ "ADD $dst.lo,$dst.lo\n\t" 9291 "ADC $dst.hi,$dst.hi\n\t" 9292 "ADD $dst.lo,$dst.lo\n\t" 9293 "ADC $dst.hi,$dst.hi" %} 9294 ins_encode %{ 9295 __ addl($dst$$Register,$dst$$Register); 9296 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9297 __ addl($dst$$Register,$dst$$Register); 9298 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9299 %} 9300 ins_pipe( ialu_reg_long ); 9301 %} 9302 9303 // Shift Left Long by 3 9304 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9305 predicate(UseNewLongLShift); 9306 match(Set dst (LShiftL dst cnt)); 9307 effect(KILL cr); 9308 ins_cost(100); 9309 format %{ "ADD $dst.lo,$dst.lo\n\t" 9310 "ADC $dst.hi,$dst.hi\n\t" 9311 "ADD $dst.lo,$dst.lo\n\t" 9312 "ADC $dst.hi,$dst.hi\n\t" 9313 "ADD $dst.lo,$dst.lo\n\t" 9314 "ADC $dst.hi,$dst.hi" %} 9315 ins_encode %{ 9316 __ addl($dst$$Register,$dst$$Register); 9317 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9318 __ addl($dst$$Register,$dst$$Register); 9319 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9320 __ addl($dst$$Register,$dst$$Register); 9321 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9322 %} 9323 ins_pipe( ialu_reg_long ); 9324 %} 9325 9326 // Shift Left Long by 1-31 9327 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9328 match(Set dst (LShiftL dst cnt)); 9329 effect(KILL cr); 9330 ins_cost(200); 9331 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9332 "SHL $dst.lo,$cnt" %} 9333 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9334 ins_encode( move_long_small_shift(dst,cnt) ); 9335 ins_pipe( ialu_reg_long ); 9336 %} 9337 9338 // Shift Left Long by 32-63 9339 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9340 match(Set dst (LShiftL dst cnt)); 9341 effect(KILL cr); 9342 ins_cost(300); 9343 format %{ "MOV $dst.hi,$dst.lo\n" 9344 "\tSHL $dst.hi,$cnt-32\n" 9345 "\tXOR $dst.lo,$dst.lo" %} 9346 opcode(0xC1, 0x4); /* C1 /4 ib */ 9347 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9348 ins_pipe( ialu_reg_long ); 9349 %} 9350 9351 // Shift Left Long by variable 9352 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9353 match(Set dst (LShiftL dst shift)); 9354 effect(KILL cr); 9355 ins_cost(500+200); 9356 size(17); 9357 format %{ "TEST $shift,32\n\t" 9358 "JEQ,s small\n\t" 9359 "MOV $dst.hi,$dst.lo\n\t" 9360 "XOR $dst.lo,$dst.lo\n" 9361 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9362 "SHL $dst.lo,$shift" %} 9363 ins_encode( shift_left_long( dst, shift ) ); 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 // Shift Right Long by 1-31 9368 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9369 match(Set dst (URShiftL dst cnt)); 9370 effect(KILL cr); 9371 ins_cost(200); 9372 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9373 "SHR $dst.hi,$cnt" %} 9374 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9375 ins_encode( move_long_small_shift(dst,cnt) ); 9376 ins_pipe( ialu_reg_long ); 9377 %} 9378 9379 // Shift Right Long by 32-63 9380 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9381 match(Set dst (URShiftL dst cnt)); 9382 effect(KILL cr); 9383 ins_cost(300); 9384 format %{ "MOV $dst.lo,$dst.hi\n" 9385 "\tSHR $dst.lo,$cnt-32\n" 9386 "\tXOR $dst.hi,$dst.hi" %} 9387 opcode(0xC1, 0x5); /* C1 /5 ib */ 9388 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9389 ins_pipe( ialu_reg_long ); 9390 %} 9391 9392 // Shift Right Long by variable 9393 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9394 match(Set dst (URShiftL dst shift)); 9395 effect(KILL cr); 9396 ins_cost(600); 9397 size(17); 9398 format %{ "TEST $shift,32\n\t" 9399 "JEQ,s small\n\t" 9400 "MOV $dst.lo,$dst.hi\n\t" 9401 "XOR $dst.hi,$dst.hi\n" 9402 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9403 "SHR $dst.hi,$shift" %} 9404 ins_encode( shift_right_long( dst, shift ) ); 9405 ins_pipe( pipe_slow ); 9406 %} 9407 9408 // Shift Right Long by 1-31 9409 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9410 match(Set dst (RShiftL dst cnt)); 9411 effect(KILL cr); 9412 ins_cost(200); 9413 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9414 "SAR $dst.hi,$cnt" %} 9415 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9416 ins_encode( move_long_small_shift(dst,cnt) ); 9417 ins_pipe( ialu_reg_long ); 9418 %} 9419 9420 // Shift Right Long by 32-63 9421 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9422 match(Set dst (RShiftL dst cnt)); 9423 effect(KILL cr); 9424 ins_cost(300); 9425 format %{ "MOV $dst.lo,$dst.hi\n" 9426 "\tSAR $dst.lo,$cnt-32\n" 9427 "\tSAR $dst.hi,31" %} 9428 opcode(0xC1, 0x7); /* C1 /7 ib */ 9429 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9430 ins_pipe( ialu_reg_long ); 9431 %} 9432 9433 // Shift Right arithmetic Long by variable 9434 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9435 match(Set dst (RShiftL dst shift)); 9436 effect(KILL cr); 9437 ins_cost(600); 9438 size(18); 9439 format %{ "TEST $shift,32\n\t" 9440 "JEQ,s small\n\t" 9441 "MOV $dst.lo,$dst.hi\n\t" 9442 "SAR $dst.hi,31\n" 9443 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9444 "SAR $dst.hi,$shift" %} 9445 ins_encode( shift_right_arith_long( dst, shift ) ); 9446 ins_pipe( pipe_slow ); 9447 %} 9448 9449 9450 //----------Double Instructions------------------------------------------------ 9451 // Double Math 9452 9453 // Compare & branch 9454 9455 // P6 version of float compare, sets condition codes in EFLAGS 9456 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9457 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9458 match(Set cr (CmpD src1 src2)); 9459 effect(KILL rax); 9460 ins_cost(150); 9461 format %{ "FLD $src1\n\t" 9462 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9463 "JNP exit\n\t" 9464 "MOV ah,1 // saw a NaN, set CF\n\t" 9465 "SAHF\n" 9466 "exit:\tNOP // avoid branch to branch" %} 9467 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9468 ins_encode( Push_Reg_DPR(src1), 9469 OpcP, RegOpc(src2), 9470 cmpF_P6_fixup ); 9471 ins_pipe( pipe_slow ); 9472 %} 9473 9474 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9475 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9476 match(Set cr (CmpD src1 src2)); 9477 ins_cost(150); 9478 format %{ "FLD $src1\n\t" 9479 "FUCOMIP ST,$src2 // P6 instruction" %} 9480 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9481 ins_encode( Push_Reg_DPR(src1), 9482 OpcP, RegOpc(src2)); 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 // Compare & branch 9487 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9488 predicate(UseSSE<=1); 9489 match(Set cr (CmpD src1 src2)); 9490 effect(KILL rax); 9491 ins_cost(200); 9492 format %{ "FLD $src1\n\t" 9493 "FCOMp $src2\n\t" 9494 "FNSTSW AX\n\t" 9495 "TEST AX,0x400\n\t" 9496 "JZ,s flags\n\t" 9497 "MOV AH,1\t# unordered treat as LT\n" 9498 "flags:\tSAHF" %} 9499 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9500 ins_encode( Push_Reg_DPR(src1), 9501 OpcP, RegOpc(src2), 9502 fpu_flags); 9503 ins_pipe( pipe_slow ); 9504 %} 9505 9506 // Compare vs zero into -1,0,1 9507 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9508 predicate(UseSSE<=1); 9509 match(Set dst (CmpD3 src1 zero)); 9510 effect(KILL cr, KILL rax); 9511 ins_cost(280); 9512 format %{ "FTSTD $dst,$src1" %} 9513 opcode(0xE4, 0xD9); 9514 ins_encode( Push_Reg_DPR(src1), 9515 OpcS, OpcP, PopFPU, 9516 CmpF_Result(dst)); 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 // Compare into -1,0,1 9521 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9522 predicate(UseSSE<=1); 9523 match(Set dst (CmpD3 src1 src2)); 9524 effect(KILL cr, KILL rax); 9525 ins_cost(300); 9526 format %{ "FCMPD $dst,$src1,$src2" %} 9527 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9528 ins_encode( Push_Reg_DPR(src1), 9529 OpcP, RegOpc(src2), 9530 CmpF_Result(dst)); 9531 ins_pipe( pipe_slow ); 9532 %} 9533 9534 // float compare and set condition codes in EFLAGS by XMM regs 9535 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9536 predicate(UseSSE>=2); 9537 match(Set cr (CmpD src1 src2)); 9538 ins_cost(145); 9539 format %{ "UCOMISD $src1,$src2\n\t" 9540 "JNP,s exit\n\t" 9541 "PUSHF\t# saw NaN, set CF\n\t" 9542 "AND [rsp], #0xffffff2b\n\t" 9543 "POPF\n" 9544 "exit:" %} 9545 ins_encode %{ 9546 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9547 emit_cmpfp_fixup(_masm); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9553 predicate(UseSSE>=2); 9554 match(Set cr (CmpD src1 src2)); 9555 ins_cost(100); 9556 format %{ "UCOMISD $src1,$src2" %} 9557 ins_encode %{ 9558 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 // float compare and set condition codes in EFLAGS by XMM regs 9564 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9565 predicate(UseSSE>=2); 9566 match(Set cr (CmpD src1 (LoadD src2))); 9567 ins_cost(145); 9568 format %{ "UCOMISD $src1,$src2\n\t" 9569 "JNP,s exit\n\t" 9570 "PUSHF\t# saw NaN, set CF\n\t" 9571 "AND [rsp], #0xffffff2b\n\t" 9572 "POPF\n" 9573 "exit:" %} 9574 ins_encode %{ 9575 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9576 emit_cmpfp_fixup(_masm); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9582 predicate(UseSSE>=2); 9583 match(Set cr (CmpD src1 (LoadD src2))); 9584 ins_cost(100); 9585 format %{ "UCOMISD $src1,$src2" %} 9586 ins_encode %{ 9587 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9588 %} 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 // Compare into -1,0,1 in XMM 9593 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9594 predicate(UseSSE>=2); 9595 match(Set dst (CmpD3 src1 src2)); 9596 effect(KILL cr); 9597 ins_cost(255); 9598 format %{ "UCOMISD $src1, $src2\n\t" 9599 "MOV $dst, #-1\n\t" 9600 "JP,s done\n\t" 9601 "JB,s done\n\t" 9602 "SETNE $dst\n\t" 9603 "MOVZB $dst, $dst\n" 9604 "done:" %} 9605 ins_encode %{ 9606 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9607 emit_cmpfp3(_masm, $dst$$Register); 9608 %} 9609 ins_pipe( pipe_slow ); 9610 %} 9611 9612 // Compare into -1,0,1 in XMM and memory 9613 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9614 predicate(UseSSE>=2); 9615 match(Set dst (CmpD3 src1 (LoadD src2))); 9616 effect(KILL cr); 9617 ins_cost(275); 9618 format %{ "UCOMISD $src1, $src2\n\t" 9619 "MOV $dst, #-1\n\t" 9620 "JP,s done\n\t" 9621 "JB,s done\n\t" 9622 "SETNE $dst\n\t" 9623 "MOVZB $dst, $dst\n" 9624 "done:" %} 9625 ins_encode %{ 9626 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9627 emit_cmpfp3(_masm, $dst$$Register); 9628 %} 9629 ins_pipe( pipe_slow ); 9630 %} 9631 9632 9633 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9634 predicate (UseSSE <=1); 9635 match(Set dst (SubD dst src)); 9636 9637 format %{ "FLD $src\n\t" 9638 "DSUBp $dst,ST" %} 9639 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9640 ins_cost(150); 9641 ins_encode( Push_Reg_DPR(src), 9642 OpcP, RegOpc(dst) ); 9643 ins_pipe( fpu_reg_reg ); 9644 %} 9645 9646 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9647 predicate (UseSSE <=1); 9648 match(Set dst (RoundDouble (SubD src1 src2))); 9649 ins_cost(250); 9650 9651 format %{ "FLD $src2\n\t" 9652 "DSUB ST,$src1\n\t" 9653 "FSTP_D $dst\t# D-round" %} 9654 opcode(0xD8, 0x5); 9655 ins_encode( Push_Reg_DPR(src2), 9656 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9657 ins_pipe( fpu_mem_reg_reg ); 9658 %} 9659 9660 9661 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9662 predicate (UseSSE <=1); 9663 match(Set dst (SubD dst (LoadD src))); 9664 ins_cost(150); 9665 9666 format %{ "FLD $src\n\t" 9667 "DSUBp $dst,ST" %} 9668 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9669 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9670 OpcP, RegOpc(dst) ); 9671 ins_pipe( fpu_reg_mem ); 9672 %} 9673 9674 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9675 predicate (UseSSE<=1); 9676 match(Set dst (AbsD src)); 9677 ins_cost(100); 9678 format %{ "FABS" %} 9679 opcode(0xE1, 0xD9); 9680 ins_encode( OpcS, OpcP ); 9681 ins_pipe( fpu_reg_reg ); 9682 %} 9683 9684 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9685 predicate(UseSSE<=1); 9686 match(Set dst (NegD src)); 9687 ins_cost(100); 9688 format %{ "FCHS" %} 9689 opcode(0xE0, 0xD9); 9690 ins_encode( OpcS, OpcP ); 9691 ins_pipe( fpu_reg_reg ); 9692 %} 9693 9694 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9695 predicate(UseSSE<=1); 9696 match(Set dst (AddD dst src)); 9697 format %{ "FLD $src\n\t" 9698 "DADD $dst,ST" %} 9699 size(4); 9700 ins_cost(150); 9701 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9702 ins_encode( Push_Reg_DPR(src), 9703 OpcP, RegOpc(dst) ); 9704 ins_pipe( fpu_reg_reg ); 9705 %} 9706 9707 9708 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9709 predicate(UseSSE<=1); 9710 match(Set dst (RoundDouble (AddD src1 src2))); 9711 ins_cost(250); 9712 9713 format %{ "FLD $src2\n\t" 9714 "DADD ST,$src1\n\t" 9715 "FSTP_D $dst\t# D-round" %} 9716 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9717 ins_encode( Push_Reg_DPR(src2), 9718 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9719 ins_pipe( fpu_mem_reg_reg ); 9720 %} 9721 9722 9723 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9724 predicate(UseSSE<=1); 9725 match(Set dst (AddD dst (LoadD src))); 9726 ins_cost(150); 9727 9728 format %{ "FLD $src\n\t" 9729 "DADDp $dst,ST" %} 9730 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9731 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9732 OpcP, RegOpc(dst) ); 9733 ins_pipe( fpu_reg_mem ); 9734 %} 9735 9736 // add-to-memory 9737 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9738 predicate(UseSSE<=1); 9739 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9740 ins_cost(150); 9741 9742 format %{ "FLD_D $dst\n\t" 9743 "DADD ST,$src\n\t" 9744 "FST_D $dst" %} 9745 opcode(0xDD, 0x0); 9746 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9747 Opcode(0xD8), RegOpc(src), 9748 set_instruction_start, 9749 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9750 ins_pipe( fpu_reg_mem ); 9751 %} 9752 9753 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9754 predicate(UseSSE<=1); 9755 match(Set dst (AddD dst con)); 9756 ins_cost(125); 9757 format %{ "FLD1\n\t" 9758 "DADDp $dst,ST" %} 9759 ins_encode %{ 9760 __ fld1(); 9761 __ faddp($dst$$reg); 9762 %} 9763 ins_pipe(fpu_reg); 9764 %} 9765 9766 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9767 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9768 match(Set dst (AddD dst con)); 9769 ins_cost(200); 9770 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9771 "DADDp $dst,ST" %} 9772 ins_encode %{ 9773 __ fld_d($constantaddress($con)); 9774 __ faddp($dst$$reg); 9775 %} 9776 ins_pipe(fpu_reg_mem); 9777 %} 9778 9779 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9780 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9781 match(Set dst (RoundDouble (AddD src con))); 9782 ins_cost(200); 9783 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9784 "DADD ST,$src\n\t" 9785 "FSTP_D $dst\t# D-round" %} 9786 ins_encode %{ 9787 __ fld_d($constantaddress($con)); 9788 __ fadd($src$$reg); 9789 __ fstp_d(Address(rsp, $dst$$disp)); 9790 %} 9791 ins_pipe(fpu_mem_reg_con); 9792 %} 9793 9794 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9795 predicate(UseSSE<=1); 9796 match(Set dst (MulD dst src)); 9797 format %{ "FLD $src\n\t" 9798 "DMULp $dst,ST" %} 9799 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9800 ins_cost(150); 9801 ins_encode( Push_Reg_DPR(src), 9802 OpcP, RegOpc(dst) ); 9803 ins_pipe( fpu_reg_reg ); 9804 %} 9805 9806 // Strict FP instruction biases argument before multiply then 9807 // biases result to avoid double rounding of subnormals. 9808 // 9809 // scale arg1 by multiplying arg1 by 2^(-15360) 9810 // load arg2 9811 // multiply scaled arg1 by arg2 9812 // rescale product by 2^(15360) 9813 // 9814 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9815 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9816 match(Set dst (MulD dst src)); 9817 ins_cost(1); // Select this instruction for all strict FP double multiplies 9818 9819 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9820 "DMULp $dst,ST\n\t" 9821 "FLD $src\n\t" 9822 "DMULp $dst,ST\n\t" 9823 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9824 "DMULp $dst,ST\n\t" %} 9825 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9826 ins_encode( strictfp_bias1(dst), 9827 Push_Reg_DPR(src), 9828 OpcP, RegOpc(dst), 9829 strictfp_bias2(dst) ); 9830 ins_pipe( fpu_reg_reg ); 9831 %} 9832 9833 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9834 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9835 match(Set dst (MulD dst con)); 9836 ins_cost(200); 9837 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9838 "DMULp $dst,ST" %} 9839 ins_encode %{ 9840 __ fld_d($constantaddress($con)); 9841 __ fmulp($dst$$reg); 9842 %} 9843 ins_pipe(fpu_reg_mem); 9844 %} 9845 9846 9847 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9848 predicate( UseSSE<=1 ); 9849 match(Set dst (MulD dst (LoadD src))); 9850 ins_cost(200); 9851 format %{ "FLD_D $src\n\t" 9852 "DMULp $dst,ST" %} 9853 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9854 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9855 OpcP, RegOpc(dst) ); 9856 ins_pipe( fpu_reg_mem ); 9857 %} 9858 9859 // 9860 // Cisc-alternate to reg-reg multiply 9861 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9862 predicate( UseSSE<=1 ); 9863 match(Set dst (MulD src (LoadD mem))); 9864 ins_cost(250); 9865 format %{ "FLD_D $mem\n\t" 9866 "DMUL ST,$src\n\t" 9867 "FSTP_D $dst" %} 9868 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9869 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9870 OpcReg_FPR(src), 9871 Pop_Reg_DPR(dst) ); 9872 ins_pipe( fpu_reg_reg_mem ); 9873 %} 9874 9875 9876 // MACRO3 -- addDPR a mulDPR 9877 // This instruction is a '2-address' instruction in that the result goes 9878 // back to src2. This eliminates a move from the macro; possibly the 9879 // register allocator will have to add it back (and maybe not). 9880 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9881 predicate( UseSSE<=1 ); 9882 match(Set src2 (AddD (MulD src0 src1) src2)); 9883 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9884 "DMUL ST,$src1\n\t" 9885 "DADDp $src2,ST" %} 9886 ins_cost(250); 9887 opcode(0xDD); /* LoadD DD /0 */ 9888 ins_encode( Push_Reg_FPR(src0), 9889 FMul_ST_reg(src1), 9890 FAddP_reg_ST(src2) ); 9891 ins_pipe( fpu_reg_reg_reg ); 9892 %} 9893 9894 9895 // MACRO3 -- subDPR a mulDPR 9896 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9897 predicate( UseSSE<=1 ); 9898 match(Set src2 (SubD (MulD src0 src1) src2)); 9899 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9900 "DMUL ST,$src1\n\t" 9901 "DSUBRp $src2,ST" %} 9902 ins_cost(250); 9903 ins_encode( Push_Reg_FPR(src0), 9904 FMul_ST_reg(src1), 9905 Opcode(0xDE), Opc_plus(0xE0,src2)); 9906 ins_pipe( fpu_reg_reg_reg ); 9907 %} 9908 9909 9910 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9911 predicate( UseSSE<=1 ); 9912 match(Set dst (DivD dst src)); 9913 9914 format %{ "FLD $src\n\t" 9915 "FDIVp $dst,ST" %} 9916 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9917 ins_cost(150); 9918 ins_encode( Push_Reg_DPR(src), 9919 OpcP, RegOpc(dst) ); 9920 ins_pipe( fpu_reg_reg ); 9921 %} 9922 9923 // Strict FP instruction biases argument before division then 9924 // biases result, to avoid double rounding of subnormals. 9925 // 9926 // scale dividend by multiplying dividend by 2^(-15360) 9927 // load divisor 9928 // divide scaled dividend by divisor 9929 // rescale quotient by 2^(15360) 9930 // 9931 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9932 predicate (UseSSE<=1); 9933 match(Set dst (DivD dst src)); 9934 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9935 ins_cost(01); 9936 9937 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9938 "DMULp $dst,ST\n\t" 9939 "FLD $src\n\t" 9940 "FDIVp $dst,ST\n\t" 9941 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9942 "DMULp $dst,ST\n\t" %} 9943 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9944 ins_encode( strictfp_bias1(dst), 9945 Push_Reg_DPR(src), 9946 OpcP, RegOpc(dst), 9947 strictfp_bias2(dst) ); 9948 ins_pipe( fpu_reg_reg ); 9949 %} 9950 9951 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9952 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9953 match(Set dst (RoundDouble (DivD src1 src2))); 9954 9955 format %{ "FLD $src1\n\t" 9956 "FDIV ST,$src2\n\t" 9957 "FSTP_D $dst\t# D-round" %} 9958 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9959 ins_encode( Push_Reg_DPR(src1), 9960 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9961 ins_pipe( fpu_mem_reg_reg ); 9962 %} 9963 9964 9965 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9966 predicate(UseSSE<=1); 9967 match(Set dst (ModD dst src)); 9968 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9969 9970 format %{ "DMOD $dst,$src" %} 9971 ins_cost(250); 9972 ins_encode(Push_Reg_Mod_DPR(dst, src), 9973 emitModDPR(), 9974 Push_Result_Mod_DPR(src), 9975 Pop_Reg_DPR(dst)); 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9980 predicate(UseSSE>=2); 9981 match(Set dst (ModD src0 src1)); 9982 effect(KILL rax, KILL cr); 9983 9984 format %{ "SUB ESP,8\t # DMOD\n" 9985 "\tMOVSD [ESP+0],$src1\n" 9986 "\tFLD_D [ESP+0]\n" 9987 "\tMOVSD [ESP+0],$src0\n" 9988 "\tFLD_D [ESP+0]\n" 9989 "loop:\tFPREM\n" 9990 "\tFWAIT\n" 9991 "\tFNSTSW AX\n" 9992 "\tSAHF\n" 9993 "\tJP loop\n" 9994 "\tFSTP_D [ESP+0]\n" 9995 "\tMOVSD $dst,[ESP+0]\n" 9996 "\tADD ESP,8\n" 9997 "\tFSTP ST0\t # Restore FPU Stack" 9998 %} 9999 ins_cost(250); 10000 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10005 predicate (UseSSE<=1); 10006 match(Set dst(AtanD dst src)); 10007 format %{ "DATA $dst,$src" %} 10008 opcode(0xD9, 0xF3); 10009 ins_encode( Push_Reg_DPR(src), 10010 OpcP, OpcS, RegOpc(dst) ); 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10015 predicate (UseSSE>=2); 10016 match(Set dst(AtanD dst src)); 10017 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10018 format %{ "DATA $dst,$src" %} 10019 opcode(0xD9, 0xF3); 10020 ins_encode( Push_SrcD(src), 10021 OpcP, OpcS, Push_ResultD(dst) ); 10022 ins_pipe( pipe_slow ); 10023 %} 10024 10025 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10026 predicate (UseSSE<=1); 10027 match(Set dst (SqrtD src)); 10028 format %{ "DSQRT $dst,$src" %} 10029 opcode(0xFA, 0xD9); 10030 ins_encode( Push_Reg_DPR(src), 10031 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10032 ins_pipe( pipe_slow ); 10033 %} 10034 10035 //-------------Float Instructions------------------------------- 10036 // Float Math 10037 10038 // Code for float compare: 10039 // fcompp(); 10040 // fwait(); fnstsw_ax(); 10041 // sahf(); 10042 // movl(dst, unordered_result); 10043 // jcc(Assembler::parity, exit); 10044 // movl(dst, less_result); 10045 // jcc(Assembler::below, exit); 10046 // movl(dst, equal_result); 10047 // jcc(Assembler::equal, exit); 10048 // movl(dst, greater_result); 10049 // exit: 10050 10051 // P6 version of float compare, sets condition codes in EFLAGS 10052 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10053 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10054 match(Set cr (CmpF src1 src2)); 10055 effect(KILL rax); 10056 ins_cost(150); 10057 format %{ "FLD $src1\n\t" 10058 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10059 "JNP exit\n\t" 10060 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10061 "SAHF\n" 10062 "exit:\tNOP // avoid branch to branch" %} 10063 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10064 ins_encode( Push_Reg_DPR(src1), 10065 OpcP, RegOpc(src2), 10066 cmpF_P6_fixup ); 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10071 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10072 match(Set cr (CmpF src1 src2)); 10073 ins_cost(100); 10074 format %{ "FLD $src1\n\t" 10075 "FUCOMIP ST,$src2 // P6 instruction" %} 10076 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10077 ins_encode( Push_Reg_DPR(src1), 10078 OpcP, RegOpc(src2)); 10079 ins_pipe( pipe_slow ); 10080 %} 10081 10082 10083 // Compare & branch 10084 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10085 predicate(UseSSE == 0); 10086 match(Set cr (CmpF src1 src2)); 10087 effect(KILL rax); 10088 ins_cost(200); 10089 format %{ "FLD $src1\n\t" 10090 "FCOMp $src2\n\t" 10091 "FNSTSW AX\n\t" 10092 "TEST AX,0x400\n\t" 10093 "JZ,s flags\n\t" 10094 "MOV AH,1\t# unordered treat as LT\n" 10095 "flags:\tSAHF" %} 10096 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10097 ins_encode( Push_Reg_DPR(src1), 10098 OpcP, RegOpc(src2), 10099 fpu_flags); 10100 ins_pipe( pipe_slow ); 10101 %} 10102 10103 // Compare vs zero into -1,0,1 10104 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10105 predicate(UseSSE == 0); 10106 match(Set dst (CmpF3 src1 zero)); 10107 effect(KILL cr, KILL rax); 10108 ins_cost(280); 10109 format %{ "FTSTF $dst,$src1" %} 10110 opcode(0xE4, 0xD9); 10111 ins_encode( Push_Reg_DPR(src1), 10112 OpcS, OpcP, PopFPU, 10113 CmpF_Result(dst)); 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 // Compare into -1,0,1 10118 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10119 predicate(UseSSE == 0); 10120 match(Set dst (CmpF3 src1 src2)); 10121 effect(KILL cr, KILL rax); 10122 ins_cost(300); 10123 format %{ "FCMPF $dst,$src1,$src2" %} 10124 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10125 ins_encode( Push_Reg_DPR(src1), 10126 OpcP, RegOpc(src2), 10127 CmpF_Result(dst)); 10128 ins_pipe( pipe_slow ); 10129 %} 10130 10131 // float compare and set condition codes in EFLAGS by XMM regs 10132 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10133 predicate(UseSSE>=1); 10134 match(Set cr (CmpF src1 src2)); 10135 ins_cost(145); 10136 format %{ "UCOMISS $src1,$src2\n\t" 10137 "JNP,s exit\n\t" 10138 "PUSHF\t# saw NaN, set CF\n\t" 10139 "AND [rsp], #0xffffff2b\n\t" 10140 "POPF\n" 10141 "exit:" %} 10142 ins_encode %{ 10143 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10144 emit_cmpfp_fixup(_masm); 10145 %} 10146 ins_pipe( pipe_slow ); 10147 %} 10148 10149 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10150 predicate(UseSSE>=1); 10151 match(Set cr (CmpF src1 src2)); 10152 ins_cost(100); 10153 format %{ "UCOMISS $src1,$src2" %} 10154 ins_encode %{ 10155 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10156 %} 10157 ins_pipe( pipe_slow ); 10158 %} 10159 10160 // float compare and set condition codes in EFLAGS by XMM regs 10161 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10162 predicate(UseSSE>=1); 10163 match(Set cr (CmpF src1 (LoadF src2))); 10164 ins_cost(165); 10165 format %{ "UCOMISS $src1,$src2\n\t" 10166 "JNP,s exit\n\t" 10167 "PUSHF\t# saw NaN, set CF\n\t" 10168 "AND [rsp], #0xffffff2b\n\t" 10169 "POPF\n" 10170 "exit:" %} 10171 ins_encode %{ 10172 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10173 emit_cmpfp_fixup(_masm); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10179 predicate(UseSSE>=1); 10180 match(Set cr (CmpF src1 (LoadF src2))); 10181 ins_cost(100); 10182 format %{ "UCOMISS $src1,$src2" %} 10183 ins_encode %{ 10184 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10185 %} 10186 ins_pipe( pipe_slow ); 10187 %} 10188 10189 // Compare into -1,0,1 in XMM 10190 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10191 predicate(UseSSE>=1); 10192 match(Set dst (CmpF3 src1 src2)); 10193 effect(KILL cr); 10194 ins_cost(255); 10195 format %{ "UCOMISS $src1, $src2\n\t" 10196 "MOV $dst, #-1\n\t" 10197 "JP,s done\n\t" 10198 "JB,s done\n\t" 10199 "SETNE $dst\n\t" 10200 "MOVZB $dst, $dst\n" 10201 "done:" %} 10202 ins_encode %{ 10203 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10204 emit_cmpfp3(_masm, $dst$$Register); 10205 %} 10206 ins_pipe( pipe_slow ); 10207 %} 10208 10209 // Compare into -1,0,1 in XMM and memory 10210 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10211 predicate(UseSSE>=1); 10212 match(Set dst (CmpF3 src1 (LoadF src2))); 10213 effect(KILL cr); 10214 ins_cost(275); 10215 format %{ "UCOMISS $src1, $src2\n\t" 10216 "MOV $dst, #-1\n\t" 10217 "JP,s done\n\t" 10218 "JB,s done\n\t" 10219 "SETNE $dst\n\t" 10220 "MOVZB $dst, $dst\n" 10221 "done:" %} 10222 ins_encode %{ 10223 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10224 emit_cmpfp3(_masm, $dst$$Register); 10225 %} 10226 ins_pipe( pipe_slow ); 10227 %} 10228 10229 // Spill to obtain 24-bit precision 10230 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10231 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10232 match(Set dst (SubF src1 src2)); 10233 10234 format %{ "FSUB $dst,$src1 - $src2" %} 10235 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10236 ins_encode( Push_Reg_FPR(src1), 10237 OpcReg_FPR(src2), 10238 Pop_Mem_FPR(dst) ); 10239 ins_pipe( fpu_mem_reg_reg ); 10240 %} 10241 // 10242 // This instruction does not round to 24-bits 10243 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10244 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10245 match(Set dst (SubF dst src)); 10246 10247 format %{ "FSUB $dst,$src" %} 10248 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10249 ins_encode( Push_Reg_FPR(src), 10250 OpcP, RegOpc(dst) ); 10251 ins_pipe( fpu_reg_reg ); 10252 %} 10253 10254 // Spill to obtain 24-bit precision 10255 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10256 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10257 match(Set dst (AddF src1 src2)); 10258 10259 format %{ "FADD $dst,$src1,$src2" %} 10260 opcode(0xD8, 0x0); /* D8 C0+i */ 10261 ins_encode( Push_Reg_FPR(src2), 10262 OpcReg_FPR(src1), 10263 Pop_Mem_FPR(dst) ); 10264 ins_pipe( fpu_mem_reg_reg ); 10265 %} 10266 // 10267 // This instruction does not round to 24-bits 10268 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10269 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10270 match(Set dst (AddF dst src)); 10271 10272 format %{ "FLD $src\n\t" 10273 "FADDp $dst,ST" %} 10274 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10275 ins_encode( Push_Reg_FPR(src), 10276 OpcP, RegOpc(dst) ); 10277 ins_pipe( fpu_reg_reg ); 10278 %} 10279 10280 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10281 predicate(UseSSE==0); 10282 match(Set dst (AbsF src)); 10283 ins_cost(100); 10284 format %{ "FABS" %} 10285 opcode(0xE1, 0xD9); 10286 ins_encode( OpcS, OpcP ); 10287 ins_pipe( fpu_reg_reg ); 10288 %} 10289 10290 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10291 predicate(UseSSE==0); 10292 match(Set dst (NegF src)); 10293 ins_cost(100); 10294 format %{ "FCHS" %} 10295 opcode(0xE0, 0xD9); 10296 ins_encode( OpcS, OpcP ); 10297 ins_pipe( fpu_reg_reg ); 10298 %} 10299 10300 // Cisc-alternate to addFPR_reg 10301 // Spill to obtain 24-bit precision 10302 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10303 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10304 match(Set dst (AddF src1 (LoadF src2))); 10305 10306 format %{ "FLD $src2\n\t" 10307 "FADD ST,$src1\n\t" 10308 "FSTP_S $dst" %} 10309 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10310 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10311 OpcReg_FPR(src1), 10312 Pop_Mem_FPR(dst) ); 10313 ins_pipe( fpu_mem_reg_mem ); 10314 %} 10315 // 10316 // Cisc-alternate to addFPR_reg 10317 // This instruction does not round to 24-bits 10318 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10319 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10320 match(Set dst (AddF dst (LoadF src))); 10321 10322 format %{ "FADD $dst,$src" %} 10323 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10324 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10325 OpcP, RegOpc(dst) ); 10326 ins_pipe( fpu_reg_mem ); 10327 %} 10328 10329 // // Following two instructions for _222_mpegaudio 10330 // Spill to obtain 24-bit precision 10331 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10332 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10333 match(Set dst (AddF src1 src2)); 10334 10335 format %{ "FADD $dst,$src1,$src2" %} 10336 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10337 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10338 OpcReg_FPR(src2), 10339 Pop_Mem_FPR(dst) ); 10340 ins_pipe( fpu_mem_reg_mem ); 10341 %} 10342 10343 // Cisc-spill variant 10344 // Spill to obtain 24-bit precision 10345 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10346 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10347 match(Set dst (AddF src1 (LoadF src2))); 10348 10349 format %{ "FADD $dst,$src1,$src2 cisc" %} 10350 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10351 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10352 set_instruction_start, 10353 OpcP, RMopc_Mem(secondary,src1), 10354 Pop_Mem_FPR(dst) ); 10355 ins_pipe( fpu_mem_mem_mem ); 10356 %} 10357 10358 // Spill to obtain 24-bit precision 10359 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10360 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10361 match(Set dst (AddF src1 src2)); 10362 10363 format %{ "FADD $dst,$src1,$src2" %} 10364 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10365 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10366 set_instruction_start, 10367 OpcP, RMopc_Mem(secondary,src1), 10368 Pop_Mem_FPR(dst) ); 10369 ins_pipe( fpu_mem_mem_mem ); 10370 %} 10371 10372 10373 // Spill to obtain 24-bit precision 10374 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10375 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10376 match(Set dst (AddF src con)); 10377 format %{ "FLD $src\n\t" 10378 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10379 "FSTP_S $dst" %} 10380 ins_encode %{ 10381 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10382 __ fadd_s($constantaddress($con)); 10383 __ fstp_s(Address(rsp, $dst$$disp)); 10384 %} 10385 ins_pipe(fpu_mem_reg_con); 10386 %} 10387 // 10388 // This instruction does not round to 24-bits 10389 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10390 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10391 match(Set dst (AddF src con)); 10392 format %{ "FLD $src\n\t" 10393 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10394 "FSTP $dst" %} 10395 ins_encode %{ 10396 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10397 __ fadd_s($constantaddress($con)); 10398 __ fstp_d($dst$$reg); 10399 %} 10400 ins_pipe(fpu_reg_reg_con); 10401 %} 10402 10403 // Spill to obtain 24-bit precision 10404 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10405 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10406 match(Set dst (MulF src1 src2)); 10407 10408 format %{ "FLD $src1\n\t" 10409 "FMUL $src2\n\t" 10410 "FSTP_S $dst" %} 10411 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10412 ins_encode( Push_Reg_FPR(src1), 10413 OpcReg_FPR(src2), 10414 Pop_Mem_FPR(dst) ); 10415 ins_pipe( fpu_mem_reg_reg ); 10416 %} 10417 // 10418 // This instruction does not round to 24-bits 10419 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10420 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10421 match(Set dst (MulF src1 src2)); 10422 10423 format %{ "FLD $src1\n\t" 10424 "FMUL $src2\n\t" 10425 "FSTP_S $dst" %} 10426 opcode(0xD8, 0x1); /* D8 C8+i */ 10427 ins_encode( Push_Reg_FPR(src2), 10428 OpcReg_FPR(src1), 10429 Pop_Reg_FPR(dst) ); 10430 ins_pipe( fpu_reg_reg_reg ); 10431 %} 10432 10433 10434 // Spill to obtain 24-bit precision 10435 // Cisc-alternate to reg-reg multiply 10436 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10437 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10438 match(Set dst (MulF src1 (LoadF src2))); 10439 10440 format %{ "FLD_S $src2\n\t" 10441 "FMUL $src1\n\t" 10442 "FSTP_S $dst" %} 10443 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10444 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10445 OpcReg_FPR(src1), 10446 Pop_Mem_FPR(dst) ); 10447 ins_pipe( fpu_mem_reg_mem ); 10448 %} 10449 // 10450 // This instruction does not round to 24-bits 10451 // Cisc-alternate to reg-reg multiply 10452 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10453 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10454 match(Set dst (MulF src1 (LoadF src2))); 10455 10456 format %{ "FMUL $dst,$src1,$src2" %} 10457 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10458 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10459 OpcReg_FPR(src1), 10460 Pop_Reg_FPR(dst) ); 10461 ins_pipe( fpu_reg_reg_mem ); 10462 %} 10463 10464 // Spill to obtain 24-bit precision 10465 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10466 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10467 match(Set dst (MulF src1 src2)); 10468 10469 format %{ "FMUL $dst,$src1,$src2" %} 10470 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10471 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10472 set_instruction_start, 10473 OpcP, RMopc_Mem(secondary,src1), 10474 Pop_Mem_FPR(dst) ); 10475 ins_pipe( fpu_mem_mem_mem ); 10476 %} 10477 10478 // Spill to obtain 24-bit precision 10479 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10480 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10481 match(Set dst (MulF src con)); 10482 10483 format %{ "FLD $src\n\t" 10484 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10485 "FSTP_S $dst" %} 10486 ins_encode %{ 10487 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10488 __ fmul_s($constantaddress($con)); 10489 __ fstp_s(Address(rsp, $dst$$disp)); 10490 %} 10491 ins_pipe(fpu_mem_reg_con); 10492 %} 10493 // 10494 // This instruction does not round to 24-bits 10495 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10496 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10497 match(Set dst (MulF src con)); 10498 10499 format %{ "FLD $src\n\t" 10500 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10501 "FSTP $dst" %} 10502 ins_encode %{ 10503 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10504 __ fmul_s($constantaddress($con)); 10505 __ fstp_d($dst$$reg); 10506 %} 10507 ins_pipe(fpu_reg_reg_con); 10508 %} 10509 10510 10511 // 10512 // MACRO1 -- subsume unshared load into mulFPR 10513 // This instruction does not round to 24-bits 10514 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10515 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10516 match(Set dst (MulF (LoadF mem1) src)); 10517 10518 format %{ "FLD $mem1 ===MACRO1===\n\t" 10519 "FMUL ST,$src\n\t" 10520 "FSTP $dst" %} 10521 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10522 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10523 OpcReg_FPR(src), 10524 Pop_Reg_FPR(dst) ); 10525 ins_pipe( fpu_reg_reg_mem ); 10526 %} 10527 // 10528 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10529 // This instruction does not round to 24-bits 10530 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10531 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10532 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10533 ins_cost(95); 10534 10535 format %{ "FLD $mem1 ===MACRO2===\n\t" 10536 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10537 "FADD ST,$src2\n\t" 10538 "FSTP $dst" %} 10539 opcode(0xD9); /* LoadF D9 /0 */ 10540 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10541 FMul_ST_reg(src1), 10542 FAdd_ST_reg(src2), 10543 Pop_Reg_FPR(dst) ); 10544 ins_pipe( fpu_reg_mem_reg_reg ); 10545 %} 10546 10547 // MACRO3 -- addFPR a mulFPR 10548 // This instruction does not round to 24-bits. It is a '2-address' 10549 // instruction in that the result goes back to src2. This eliminates 10550 // a move from the macro; possibly the register allocator will have 10551 // to add it back (and maybe not). 10552 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10553 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10554 match(Set src2 (AddF (MulF src0 src1) src2)); 10555 10556 format %{ "FLD $src0 ===MACRO3===\n\t" 10557 "FMUL ST,$src1\n\t" 10558 "FADDP $src2,ST" %} 10559 opcode(0xD9); /* LoadF D9 /0 */ 10560 ins_encode( Push_Reg_FPR(src0), 10561 FMul_ST_reg(src1), 10562 FAddP_reg_ST(src2) ); 10563 ins_pipe( fpu_reg_reg_reg ); 10564 %} 10565 10566 // MACRO4 -- divFPR subFPR 10567 // This instruction does not round to 24-bits 10568 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10569 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10570 match(Set dst (DivF (SubF src2 src1) src3)); 10571 10572 format %{ "FLD $src2 ===MACRO4===\n\t" 10573 "FSUB ST,$src1\n\t" 10574 "FDIV ST,$src3\n\t" 10575 "FSTP $dst" %} 10576 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10577 ins_encode( Push_Reg_FPR(src2), 10578 subFPR_divFPR_encode(src1,src3), 10579 Pop_Reg_FPR(dst) ); 10580 ins_pipe( fpu_reg_reg_reg_reg ); 10581 %} 10582 10583 // Spill to obtain 24-bit precision 10584 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10585 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10586 match(Set dst (DivF src1 src2)); 10587 10588 format %{ "FDIV $dst,$src1,$src2" %} 10589 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10590 ins_encode( Push_Reg_FPR(src1), 10591 OpcReg_FPR(src2), 10592 Pop_Mem_FPR(dst) ); 10593 ins_pipe( fpu_mem_reg_reg ); 10594 %} 10595 // 10596 // This instruction does not round to 24-bits 10597 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10598 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10599 match(Set dst (DivF dst src)); 10600 10601 format %{ "FDIV $dst,$src" %} 10602 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10603 ins_encode( Push_Reg_FPR(src), 10604 OpcP, RegOpc(dst) ); 10605 ins_pipe( fpu_reg_reg ); 10606 %} 10607 10608 10609 // Spill to obtain 24-bit precision 10610 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10611 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10612 match(Set dst (ModF src1 src2)); 10613 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10614 10615 format %{ "FMOD $dst,$src1,$src2" %} 10616 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10617 emitModDPR(), 10618 Push_Result_Mod_DPR(src2), 10619 Pop_Mem_FPR(dst)); 10620 ins_pipe( pipe_slow ); 10621 %} 10622 // 10623 // This instruction does not round to 24-bits 10624 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10625 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10626 match(Set dst (ModF dst src)); 10627 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10628 10629 format %{ "FMOD $dst,$src" %} 10630 ins_encode(Push_Reg_Mod_DPR(dst, src), 10631 emitModDPR(), 10632 Push_Result_Mod_DPR(src), 10633 Pop_Reg_FPR(dst)); 10634 ins_pipe( pipe_slow ); 10635 %} 10636 10637 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10638 predicate(UseSSE>=1); 10639 match(Set dst (ModF src0 src1)); 10640 effect(KILL rax, KILL cr); 10641 format %{ "SUB ESP,4\t # FMOD\n" 10642 "\tMOVSS [ESP+0],$src1\n" 10643 "\tFLD_S [ESP+0]\n" 10644 "\tMOVSS [ESP+0],$src0\n" 10645 "\tFLD_S [ESP+0]\n" 10646 "loop:\tFPREM\n" 10647 "\tFWAIT\n" 10648 "\tFNSTSW AX\n" 10649 "\tSAHF\n" 10650 "\tJP loop\n" 10651 "\tFSTP_S [ESP+0]\n" 10652 "\tMOVSS $dst,[ESP+0]\n" 10653 "\tADD ESP,4\n" 10654 "\tFSTP ST0\t # Restore FPU Stack" 10655 %} 10656 ins_cost(250); 10657 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10658 ins_pipe( pipe_slow ); 10659 %} 10660 10661 10662 //----------Arithmetic Conversion Instructions--------------------------------- 10663 // The conversions operations are all Alpha sorted. Please keep it that way! 10664 10665 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10666 predicate(UseSSE==0); 10667 match(Set dst (RoundFloat src)); 10668 ins_cost(125); 10669 format %{ "FST_S $dst,$src\t# F-round" %} 10670 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10671 ins_pipe( fpu_mem_reg ); 10672 %} 10673 10674 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10675 predicate(UseSSE<=1); 10676 match(Set dst (RoundDouble src)); 10677 ins_cost(125); 10678 format %{ "FST_D $dst,$src\t# D-round" %} 10679 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10680 ins_pipe( fpu_mem_reg ); 10681 %} 10682 10683 // Force rounding to 24-bit precision and 6-bit exponent 10684 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10685 predicate(UseSSE==0); 10686 match(Set dst (ConvD2F src)); 10687 format %{ "FST_S $dst,$src\t# F-round" %} 10688 expand %{ 10689 roundFloat_mem_reg(dst,src); 10690 %} 10691 %} 10692 10693 // Force rounding to 24-bit precision and 6-bit exponent 10694 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10695 predicate(UseSSE==1); 10696 match(Set dst (ConvD2F src)); 10697 effect( KILL cr ); 10698 format %{ "SUB ESP,4\n\t" 10699 "FST_S [ESP],$src\t# F-round\n\t" 10700 "MOVSS $dst,[ESP]\n\t" 10701 "ADD ESP,4" %} 10702 ins_encode %{ 10703 __ subptr(rsp, 4); 10704 if ($src$$reg != FPR1L_enc) { 10705 __ fld_s($src$$reg-1); 10706 __ fstp_s(Address(rsp, 0)); 10707 } else { 10708 __ fst_s(Address(rsp, 0)); 10709 } 10710 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10711 __ addptr(rsp, 4); 10712 %} 10713 ins_pipe( pipe_slow ); 10714 %} 10715 10716 // Force rounding double precision to single precision 10717 instruct convD2F_reg(regF dst, regD src) %{ 10718 predicate(UseSSE>=2); 10719 match(Set dst (ConvD2F src)); 10720 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10721 ins_encode %{ 10722 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10723 %} 10724 ins_pipe( pipe_slow ); 10725 %} 10726 10727 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10728 predicate(UseSSE==0); 10729 match(Set dst (ConvF2D src)); 10730 format %{ "FST_S $dst,$src\t# D-round" %} 10731 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10732 ins_pipe( fpu_reg_reg ); 10733 %} 10734 10735 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10736 predicate(UseSSE==1); 10737 match(Set dst (ConvF2D src)); 10738 format %{ "FST_D $dst,$src\t# D-round" %} 10739 expand %{ 10740 roundDouble_mem_reg(dst,src); 10741 %} 10742 %} 10743 10744 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10745 predicate(UseSSE==1); 10746 match(Set dst (ConvF2D src)); 10747 effect( KILL cr ); 10748 format %{ "SUB ESP,4\n\t" 10749 "MOVSS [ESP] $src\n\t" 10750 "FLD_S [ESP]\n\t" 10751 "ADD ESP,4\n\t" 10752 "FSTP $dst\t# D-round" %} 10753 ins_encode %{ 10754 __ subptr(rsp, 4); 10755 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10756 __ fld_s(Address(rsp, 0)); 10757 __ addptr(rsp, 4); 10758 __ fstp_d($dst$$reg); 10759 %} 10760 ins_pipe( pipe_slow ); 10761 %} 10762 10763 instruct convF2D_reg(regD dst, regF src) %{ 10764 predicate(UseSSE>=2); 10765 match(Set dst (ConvF2D src)); 10766 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10767 ins_encode %{ 10768 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10769 %} 10770 ins_pipe( pipe_slow ); 10771 %} 10772 10773 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10774 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10775 predicate(UseSSE<=1); 10776 match(Set dst (ConvD2I src)); 10777 effect( KILL tmp, KILL cr ); 10778 format %{ "FLD $src\t# Convert double to int \n\t" 10779 "FLDCW trunc mode\n\t" 10780 "SUB ESP,4\n\t" 10781 "FISTp [ESP + #0]\n\t" 10782 "FLDCW std/24-bit mode\n\t" 10783 "POP EAX\n\t" 10784 "CMP EAX,0x80000000\n\t" 10785 "JNE,s fast\n\t" 10786 "FLD_D $src\n\t" 10787 "CALL d2i_wrapper\n" 10788 "fast:" %} 10789 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10790 ins_pipe( pipe_slow ); 10791 %} 10792 10793 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10794 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10795 predicate(UseSSE>=2); 10796 match(Set dst (ConvD2I src)); 10797 effect( KILL tmp, KILL cr ); 10798 format %{ "CVTTSD2SI $dst, $src\n\t" 10799 "CMP $dst,0x80000000\n\t" 10800 "JNE,s fast\n\t" 10801 "SUB ESP, 8\n\t" 10802 "MOVSD [ESP], $src\n\t" 10803 "FLD_D [ESP]\n\t" 10804 "ADD ESP, 8\n\t" 10805 "CALL d2i_wrapper\n" 10806 "fast:" %} 10807 ins_encode %{ 10808 Label fast; 10809 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10810 __ cmpl($dst$$Register, 0x80000000); 10811 __ jccb(Assembler::notEqual, fast); 10812 __ subptr(rsp, 8); 10813 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10814 __ fld_d(Address(rsp, 0)); 10815 __ addptr(rsp, 8); 10816 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10817 __ bind(fast); 10818 %} 10819 ins_pipe( pipe_slow ); 10820 %} 10821 10822 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10823 predicate(UseSSE<=1); 10824 match(Set dst (ConvD2L src)); 10825 effect( KILL cr ); 10826 format %{ "FLD $src\t# Convert double to long\n\t" 10827 "FLDCW trunc mode\n\t" 10828 "SUB ESP,8\n\t" 10829 "FISTp [ESP + #0]\n\t" 10830 "FLDCW std/24-bit mode\n\t" 10831 "POP EAX\n\t" 10832 "POP EDX\n\t" 10833 "CMP EDX,0x80000000\n\t" 10834 "JNE,s fast\n\t" 10835 "TEST EAX,EAX\n\t" 10836 "JNE,s fast\n\t" 10837 "FLD $src\n\t" 10838 "CALL d2l_wrapper\n" 10839 "fast:" %} 10840 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10841 ins_pipe( pipe_slow ); 10842 %} 10843 10844 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10845 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10846 predicate (UseSSE>=2); 10847 match(Set dst (ConvD2L src)); 10848 effect( KILL cr ); 10849 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10850 "MOVSD [ESP],$src\n\t" 10851 "FLD_D [ESP]\n\t" 10852 "FLDCW trunc mode\n\t" 10853 "FISTp [ESP + #0]\n\t" 10854 "FLDCW std/24-bit mode\n\t" 10855 "POP EAX\n\t" 10856 "POP EDX\n\t" 10857 "CMP EDX,0x80000000\n\t" 10858 "JNE,s fast\n\t" 10859 "TEST EAX,EAX\n\t" 10860 "JNE,s fast\n\t" 10861 "SUB ESP,8\n\t" 10862 "MOVSD [ESP],$src\n\t" 10863 "FLD_D [ESP]\n\t" 10864 "ADD ESP,8\n\t" 10865 "CALL d2l_wrapper\n" 10866 "fast:" %} 10867 ins_encode %{ 10868 Label fast; 10869 __ subptr(rsp, 8); 10870 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10871 __ fld_d(Address(rsp, 0)); 10872 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10873 __ fistp_d(Address(rsp, 0)); 10874 // Restore the rounding mode, mask the exception 10875 if (Compile::current()->in_24_bit_fp_mode()) { 10876 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10877 } else { 10878 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10879 } 10880 // Load the converted long, adjust CPU stack 10881 __ pop(rax); 10882 __ pop(rdx); 10883 __ cmpl(rdx, 0x80000000); 10884 __ jccb(Assembler::notEqual, fast); 10885 __ testl(rax, rax); 10886 __ jccb(Assembler::notEqual, fast); 10887 __ subptr(rsp, 8); 10888 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10889 __ fld_d(Address(rsp, 0)); 10890 __ addptr(rsp, 8); 10891 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10892 __ bind(fast); 10893 %} 10894 ins_pipe( pipe_slow ); 10895 %} 10896 10897 // Convert a double to an int. Java semantics require we do complex 10898 // manglations in the corner cases. So we set the rounding mode to 10899 // 'zero', store the darned double down as an int, and reset the 10900 // rounding mode to 'nearest'. The hardware stores a flag value down 10901 // if we would overflow or converted a NAN; we check for this and 10902 // and go the slow path if needed. 10903 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10904 predicate(UseSSE==0); 10905 match(Set dst (ConvF2I src)); 10906 effect( KILL tmp, KILL cr ); 10907 format %{ "FLD $src\t# Convert float to int \n\t" 10908 "FLDCW trunc mode\n\t" 10909 "SUB ESP,4\n\t" 10910 "FISTp [ESP + #0]\n\t" 10911 "FLDCW std/24-bit mode\n\t" 10912 "POP EAX\n\t" 10913 "CMP EAX,0x80000000\n\t" 10914 "JNE,s fast\n\t" 10915 "FLD $src\n\t" 10916 "CALL d2i_wrapper\n" 10917 "fast:" %} 10918 // DPR2I_encoding works for FPR2I 10919 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10920 ins_pipe( pipe_slow ); 10921 %} 10922 10923 // Convert a float in xmm to an int reg. 10924 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10925 predicate(UseSSE>=1); 10926 match(Set dst (ConvF2I src)); 10927 effect( KILL tmp, KILL cr ); 10928 format %{ "CVTTSS2SI $dst, $src\n\t" 10929 "CMP $dst,0x80000000\n\t" 10930 "JNE,s fast\n\t" 10931 "SUB ESP, 4\n\t" 10932 "MOVSS [ESP], $src\n\t" 10933 "FLD [ESP]\n\t" 10934 "ADD ESP, 4\n\t" 10935 "CALL d2i_wrapper\n" 10936 "fast:" %} 10937 ins_encode %{ 10938 Label fast; 10939 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10940 __ cmpl($dst$$Register, 0x80000000); 10941 __ jccb(Assembler::notEqual, fast); 10942 __ subptr(rsp, 4); 10943 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10944 __ fld_s(Address(rsp, 0)); 10945 __ addptr(rsp, 4); 10946 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10947 __ bind(fast); 10948 %} 10949 ins_pipe( pipe_slow ); 10950 %} 10951 10952 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10953 predicate(UseSSE==0); 10954 match(Set dst (ConvF2L src)); 10955 effect( KILL cr ); 10956 format %{ "FLD $src\t# Convert float to long\n\t" 10957 "FLDCW trunc mode\n\t" 10958 "SUB ESP,8\n\t" 10959 "FISTp [ESP + #0]\n\t" 10960 "FLDCW std/24-bit mode\n\t" 10961 "POP EAX\n\t" 10962 "POP EDX\n\t" 10963 "CMP EDX,0x80000000\n\t" 10964 "JNE,s fast\n\t" 10965 "TEST EAX,EAX\n\t" 10966 "JNE,s fast\n\t" 10967 "FLD $src\n\t" 10968 "CALL d2l_wrapper\n" 10969 "fast:" %} 10970 // DPR2L_encoding works for FPR2L 10971 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10972 ins_pipe( pipe_slow ); 10973 %} 10974 10975 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10976 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10977 predicate (UseSSE>=1); 10978 match(Set dst (ConvF2L src)); 10979 effect( KILL cr ); 10980 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10981 "MOVSS [ESP],$src\n\t" 10982 "FLD_S [ESP]\n\t" 10983 "FLDCW trunc mode\n\t" 10984 "FISTp [ESP + #0]\n\t" 10985 "FLDCW std/24-bit mode\n\t" 10986 "POP EAX\n\t" 10987 "POP EDX\n\t" 10988 "CMP EDX,0x80000000\n\t" 10989 "JNE,s fast\n\t" 10990 "TEST EAX,EAX\n\t" 10991 "JNE,s fast\n\t" 10992 "SUB ESP,4\t# Convert float to long\n\t" 10993 "MOVSS [ESP],$src\n\t" 10994 "FLD_S [ESP]\n\t" 10995 "ADD ESP,4\n\t" 10996 "CALL d2l_wrapper\n" 10997 "fast:" %} 10998 ins_encode %{ 10999 Label fast; 11000 __ subptr(rsp, 8); 11001 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11002 __ fld_s(Address(rsp, 0)); 11003 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 11004 __ fistp_d(Address(rsp, 0)); 11005 // Restore the rounding mode, mask the exception 11006 if (Compile::current()->in_24_bit_fp_mode()) { 11007 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 11008 } else { 11009 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11010 } 11011 // Load the converted long, adjust CPU stack 11012 __ pop(rax); 11013 __ pop(rdx); 11014 __ cmpl(rdx, 0x80000000); 11015 __ jccb(Assembler::notEqual, fast); 11016 __ testl(rax, rax); 11017 __ jccb(Assembler::notEqual, fast); 11018 __ subptr(rsp, 4); 11019 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11020 __ fld_s(Address(rsp, 0)); 11021 __ addptr(rsp, 4); 11022 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11023 __ bind(fast); 11024 %} 11025 ins_pipe( pipe_slow ); 11026 %} 11027 11028 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11029 predicate( UseSSE<=1 ); 11030 match(Set dst (ConvI2D src)); 11031 format %{ "FILD $src\n\t" 11032 "FSTP $dst" %} 11033 opcode(0xDB, 0x0); /* DB /0 */ 11034 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11035 ins_pipe( fpu_reg_mem ); 11036 %} 11037 11038 instruct convI2D_reg(regD dst, rRegI src) %{ 11039 predicate( UseSSE>=2 && !UseXmmI2D ); 11040 match(Set dst (ConvI2D src)); 11041 format %{ "CVTSI2SD $dst,$src" %} 11042 ins_encode %{ 11043 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11044 %} 11045 ins_pipe( pipe_slow ); 11046 %} 11047 11048 instruct convI2D_mem(regD dst, memory mem) %{ 11049 predicate( UseSSE>=2 ); 11050 match(Set dst (ConvI2D (LoadI mem))); 11051 format %{ "CVTSI2SD $dst,$mem" %} 11052 ins_encode %{ 11053 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11054 %} 11055 ins_pipe( pipe_slow ); 11056 %} 11057 11058 instruct convXI2D_reg(regD dst, rRegI src) 11059 %{ 11060 predicate( UseSSE>=2 && UseXmmI2D ); 11061 match(Set dst (ConvI2D src)); 11062 11063 format %{ "MOVD $dst,$src\n\t" 11064 "CVTDQ2PD $dst,$dst\t# i2d" %} 11065 ins_encode %{ 11066 __ movdl($dst$$XMMRegister, $src$$Register); 11067 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11068 %} 11069 ins_pipe(pipe_slow); // XXX 11070 %} 11071 11072 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11073 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11074 match(Set dst (ConvI2D (LoadI mem))); 11075 format %{ "FILD $mem\n\t" 11076 "FSTP $dst" %} 11077 opcode(0xDB); /* DB /0 */ 11078 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11079 Pop_Reg_DPR(dst)); 11080 ins_pipe( fpu_reg_mem ); 11081 %} 11082 11083 // Convert a byte to a float; no rounding step needed. 11084 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11085 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11086 match(Set dst (ConvI2F src)); 11087 format %{ "FILD $src\n\t" 11088 "FSTP $dst" %} 11089 11090 opcode(0xDB, 0x0); /* DB /0 */ 11091 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11092 ins_pipe( fpu_reg_mem ); 11093 %} 11094 11095 // In 24-bit mode, force exponent rounding by storing back out 11096 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11097 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11098 match(Set dst (ConvI2F src)); 11099 ins_cost(200); 11100 format %{ "FILD $src\n\t" 11101 "FSTP_S $dst" %} 11102 opcode(0xDB, 0x0); /* DB /0 */ 11103 ins_encode( Push_Mem_I(src), 11104 Pop_Mem_FPR(dst)); 11105 ins_pipe( fpu_mem_mem ); 11106 %} 11107 11108 // In 24-bit mode, force exponent rounding by storing back out 11109 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11110 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11111 match(Set dst (ConvI2F (LoadI mem))); 11112 ins_cost(200); 11113 format %{ "FILD $mem\n\t" 11114 "FSTP_S $dst" %} 11115 opcode(0xDB); /* DB /0 */ 11116 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11117 Pop_Mem_FPR(dst)); 11118 ins_pipe( fpu_mem_mem ); 11119 %} 11120 11121 // This instruction does not round to 24-bits 11122 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11123 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11124 match(Set dst (ConvI2F src)); 11125 format %{ "FILD $src\n\t" 11126 "FSTP $dst" %} 11127 opcode(0xDB, 0x0); /* DB /0 */ 11128 ins_encode( Push_Mem_I(src), 11129 Pop_Reg_FPR(dst)); 11130 ins_pipe( fpu_reg_mem ); 11131 %} 11132 11133 // This instruction does not round to 24-bits 11134 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11135 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11136 match(Set dst (ConvI2F (LoadI mem))); 11137 format %{ "FILD $mem\n\t" 11138 "FSTP $dst" %} 11139 opcode(0xDB); /* DB /0 */ 11140 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11141 Pop_Reg_FPR(dst)); 11142 ins_pipe( fpu_reg_mem ); 11143 %} 11144 11145 // Convert an int to a float in xmm; no rounding step needed. 11146 instruct convI2F_reg(regF dst, rRegI src) %{ 11147 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11148 match(Set dst (ConvI2F src)); 11149 format %{ "CVTSI2SS $dst, $src" %} 11150 ins_encode %{ 11151 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11152 %} 11153 ins_pipe( pipe_slow ); 11154 %} 11155 11156 instruct convXI2F_reg(regF dst, rRegI src) 11157 %{ 11158 predicate( UseSSE>=2 && UseXmmI2F ); 11159 match(Set dst (ConvI2F src)); 11160 11161 format %{ "MOVD $dst,$src\n\t" 11162 "CVTDQ2PS $dst,$dst\t# i2f" %} 11163 ins_encode %{ 11164 __ movdl($dst$$XMMRegister, $src$$Register); 11165 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11166 %} 11167 ins_pipe(pipe_slow); // XXX 11168 %} 11169 11170 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11171 match(Set dst (ConvI2L src)); 11172 effect(KILL cr); 11173 ins_cost(375); 11174 format %{ "MOV $dst.lo,$src\n\t" 11175 "MOV $dst.hi,$src\n\t" 11176 "SAR $dst.hi,31" %} 11177 ins_encode(convert_int_long(dst,src)); 11178 ins_pipe( ialu_reg_reg_long ); 11179 %} 11180 11181 // Zero-extend convert int to long 11182 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11183 match(Set dst (AndL (ConvI2L src) mask) ); 11184 effect( KILL flags ); 11185 ins_cost(250); 11186 format %{ "MOV $dst.lo,$src\n\t" 11187 "XOR $dst.hi,$dst.hi" %} 11188 opcode(0x33); // XOR 11189 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11190 ins_pipe( ialu_reg_reg_long ); 11191 %} 11192 11193 // Zero-extend long 11194 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11195 match(Set dst (AndL src mask) ); 11196 effect( KILL flags ); 11197 ins_cost(250); 11198 format %{ "MOV $dst.lo,$src.lo\n\t" 11199 "XOR $dst.hi,$dst.hi\n\t" %} 11200 opcode(0x33); // XOR 11201 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11202 ins_pipe( ialu_reg_reg_long ); 11203 %} 11204 11205 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11206 predicate (UseSSE<=1); 11207 match(Set dst (ConvL2D src)); 11208 effect( KILL cr ); 11209 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11210 "PUSH $src.lo\n\t" 11211 "FILD ST,[ESP + #0]\n\t" 11212 "ADD ESP,8\n\t" 11213 "FSTP_D $dst\t# D-round" %} 11214 opcode(0xDF, 0x5); /* DF /5 */ 11215 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11216 ins_pipe( pipe_slow ); 11217 %} 11218 11219 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11220 predicate (UseSSE>=2); 11221 match(Set dst (ConvL2D src)); 11222 effect( KILL cr ); 11223 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11224 "PUSH $src.lo\n\t" 11225 "FILD_D [ESP]\n\t" 11226 "FSTP_D [ESP]\n\t" 11227 "MOVSD $dst,[ESP]\n\t" 11228 "ADD ESP,8" %} 11229 opcode(0xDF, 0x5); /* DF /5 */ 11230 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11231 ins_pipe( pipe_slow ); 11232 %} 11233 11234 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11235 predicate (UseSSE>=1); 11236 match(Set dst (ConvL2F src)); 11237 effect( KILL cr ); 11238 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11239 "PUSH $src.lo\n\t" 11240 "FILD_D [ESP]\n\t" 11241 "FSTP_S [ESP]\n\t" 11242 "MOVSS $dst,[ESP]\n\t" 11243 "ADD ESP,8" %} 11244 opcode(0xDF, 0x5); /* DF /5 */ 11245 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11246 ins_pipe( pipe_slow ); 11247 %} 11248 11249 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11250 match(Set dst (ConvL2F src)); 11251 effect( KILL cr ); 11252 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11253 "PUSH $src.lo\n\t" 11254 "FILD ST,[ESP + #0]\n\t" 11255 "ADD ESP,8\n\t" 11256 "FSTP_S $dst\t# F-round" %} 11257 opcode(0xDF, 0x5); /* DF /5 */ 11258 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11259 ins_pipe( pipe_slow ); 11260 %} 11261 11262 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11263 match(Set dst (ConvL2I src)); 11264 effect( DEF dst, USE src ); 11265 format %{ "MOV $dst,$src.lo" %} 11266 ins_encode(enc_CopyL_Lo(dst,src)); 11267 ins_pipe( ialu_reg_reg ); 11268 %} 11269 11270 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11271 match(Set dst (MoveF2I src)); 11272 effect( DEF dst, USE src ); 11273 ins_cost(100); 11274 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11275 ins_encode %{ 11276 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11277 %} 11278 ins_pipe( ialu_reg_mem ); 11279 %} 11280 11281 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11282 predicate(UseSSE==0); 11283 match(Set dst (MoveF2I src)); 11284 effect( DEF dst, USE src ); 11285 11286 ins_cost(125); 11287 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11288 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11289 ins_pipe( fpu_mem_reg ); 11290 %} 11291 11292 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11293 predicate(UseSSE>=1); 11294 match(Set dst (MoveF2I src)); 11295 effect( DEF dst, USE src ); 11296 11297 ins_cost(95); 11298 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11299 ins_encode %{ 11300 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11301 %} 11302 ins_pipe( pipe_slow ); 11303 %} 11304 11305 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11306 predicate(UseSSE>=2); 11307 match(Set dst (MoveF2I src)); 11308 effect( DEF dst, USE src ); 11309 ins_cost(85); 11310 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11311 ins_encode %{ 11312 __ movdl($dst$$Register, $src$$XMMRegister); 11313 %} 11314 ins_pipe( pipe_slow ); 11315 %} 11316 11317 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11318 match(Set dst (MoveI2F src)); 11319 effect( DEF dst, USE src ); 11320 11321 ins_cost(100); 11322 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11323 ins_encode %{ 11324 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11325 %} 11326 ins_pipe( ialu_mem_reg ); 11327 %} 11328 11329 11330 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11331 predicate(UseSSE==0); 11332 match(Set dst (MoveI2F src)); 11333 effect(DEF dst, USE src); 11334 11335 ins_cost(125); 11336 format %{ "FLD_S $src\n\t" 11337 "FSTP $dst\t# MoveI2F_stack_reg" %} 11338 opcode(0xD9); /* D9 /0, FLD m32real */ 11339 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11340 Pop_Reg_FPR(dst) ); 11341 ins_pipe( fpu_reg_mem ); 11342 %} 11343 11344 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11345 predicate(UseSSE>=1); 11346 match(Set dst (MoveI2F src)); 11347 effect( DEF dst, USE src ); 11348 11349 ins_cost(95); 11350 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11351 ins_encode %{ 11352 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11353 %} 11354 ins_pipe( pipe_slow ); 11355 %} 11356 11357 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11358 predicate(UseSSE>=2); 11359 match(Set dst (MoveI2F src)); 11360 effect( DEF dst, USE src ); 11361 11362 ins_cost(85); 11363 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11364 ins_encode %{ 11365 __ movdl($dst$$XMMRegister, $src$$Register); 11366 %} 11367 ins_pipe( pipe_slow ); 11368 %} 11369 11370 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11371 match(Set dst (MoveD2L src)); 11372 effect(DEF dst, USE src); 11373 11374 ins_cost(250); 11375 format %{ "MOV $dst.lo,$src\n\t" 11376 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11377 opcode(0x8B, 0x8B); 11378 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11379 ins_pipe( ialu_mem_long_reg ); 11380 %} 11381 11382 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11383 predicate(UseSSE<=1); 11384 match(Set dst (MoveD2L src)); 11385 effect(DEF dst, USE src); 11386 11387 ins_cost(125); 11388 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11389 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11390 ins_pipe( fpu_mem_reg ); 11391 %} 11392 11393 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11394 predicate(UseSSE>=2); 11395 match(Set dst (MoveD2L src)); 11396 effect(DEF dst, USE src); 11397 ins_cost(95); 11398 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11399 ins_encode %{ 11400 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11401 %} 11402 ins_pipe( pipe_slow ); 11403 %} 11404 11405 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11406 predicate(UseSSE>=2); 11407 match(Set dst (MoveD2L src)); 11408 effect(DEF dst, USE src, TEMP tmp); 11409 ins_cost(85); 11410 format %{ "MOVD $dst.lo,$src\n\t" 11411 "PSHUFLW $tmp,$src,0x4E\n\t" 11412 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11413 ins_encode %{ 11414 __ movdl($dst$$Register, $src$$XMMRegister); 11415 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11416 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11417 %} 11418 ins_pipe( pipe_slow ); 11419 %} 11420 11421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11422 match(Set dst (MoveL2D src)); 11423 effect(DEF dst, USE src); 11424 11425 ins_cost(200); 11426 format %{ "MOV $dst,$src.lo\n\t" 11427 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11428 opcode(0x89, 0x89); 11429 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11430 ins_pipe( ialu_mem_long_reg ); 11431 %} 11432 11433 11434 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11435 predicate(UseSSE<=1); 11436 match(Set dst (MoveL2D src)); 11437 effect(DEF dst, USE src); 11438 ins_cost(125); 11439 11440 format %{ "FLD_D $src\n\t" 11441 "FSTP $dst\t# MoveL2D_stack_reg" %} 11442 opcode(0xDD); /* DD /0, FLD m64real */ 11443 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11444 Pop_Reg_DPR(dst) ); 11445 ins_pipe( fpu_reg_mem ); 11446 %} 11447 11448 11449 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11450 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11451 match(Set dst (MoveL2D src)); 11452 effect(DEF dst, USE src); 11453 11454 ins_cost(95); 11455 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11456 ins_encode %{ 11457 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11458 %} 11459 ins_pipe( pipe_slow ); 11460 %} 11461 11462 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11463 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11464 match(Set dst (MoveL2D src)); 11465 effect(DEF dst, USE src); 11466 11467 ins_cost(95); 11468 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11469 ins_encode %{ 11470 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11471 %} 11472 ins_pipe( pipe_slow ); 11473 %} 11474 11475 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11476 predicate(UseSSE>=2); 11477 match(Set dst (MoveL2D src)); 11478 effect(TEMP dst, USE src, TEMP tmp); 11479 ins_cost(85); 11480 format %{ "MOVD $dst,$src.lo\n\t" 11481 "MOVD $tmp,$src.hi\n\t" 11482 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11483 ins_encode %{ 11484 __ movdl($dst$$XMMRegister, $src$$Register); 11485 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11486 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11487 %} 11488 ins_pipe( pipe_slow ); 11489 %} 11490 11491 11492 // ======================================================================= 11493 // fast clearing of an array 11494 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11495 predicate(!((ClearArrayNode*)n)->is_large()); 11496 match(Set dummy (ClearArray cnt base)); 11497 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11498 11499 format %{ $$template 11500 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11501 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11502 $$emit$$"JG LARGE\n\t" 11503 $$emit$$"SHL ECX, 1\n\t" 11504 $$emit$$"DEC ECX\n\t" 11505 $$emit$$"JS DONE\t# Zero length\n\t" 11506 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11507 $$emit$$"DEC ECX\n\t" 11508 $$emit$$"JGE LOOP\n\t" 11509 $$emit$$"JMP DONE\n\t" 11510 $$emit$$"# LARGE:\n\t" 11511 if (UseFastStosb) { 11512 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11513 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11514 } else { 11515 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11516 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11517 } 11518 $$emit$$"# DONE" 11519 %} 11520 ins_encode %{ 11521 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11522 %} 11523 ins_pipe( pipe_slow ); 11524 %} 11525 11526 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11527 predicate(((ClearArrayNode*)n)->is_large()); 11528 match(Set dummy (ClearArray cnt base)); 11529 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11530 format %{ $$template 11531 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11532 if (UseFastStosb) { 11533 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11534 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11535 } else { 11536 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11537 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11538 } 11539 $$emit$$"# DONE" 11540 %} 11541 ins_encode %{ 11542 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11543 %} 11544 ins_pipe( pipe_slow ); 11545 %} 11546 11547 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11548 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11549 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11550 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11551 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11552 11553 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11554 ins_encode %{ 11555 __ string_compare($str1$$Register, $str2$$Register, 11556 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11557 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11558 %} 11559 ins_pipe( pipe_slow ); 11560 %} 11561 11562 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11563 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11564 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11565 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11566 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11567 11568 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11569 ins_encode %{ 11570 __ string_compare($str1$$Register, $str2$$Register, 11571 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11572 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11573 %} 11574 ins_pipe( pipe_slow ); 11575 %} 11576 11577 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11578 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11579 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11580 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11581 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11582 11583 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11584 ins_encode %{ 11585 __ string_compare($str1$$Register, $str2$$Register, 11586 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11587 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11588 %} 11589 ins_pipe( pipe_slow ); 11590 %} 11591 11592 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11593 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11594 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11595 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11596 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11597 11598 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11599 ins_encode %{ 11600 __ string_compare($str2$$Register, $str1$$Register, 11601 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11602 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11603 %} 11604 ins_pipe( pipe_slow ); 11605 %} 11606 11607 // fast string equals 11608 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11609 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11610 match(Set result (StrEquals (Binary str1 str2) cnt)); 11611 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11612 11613 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11614 ins_encode %{ 11615 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11616 $cnt$$Register, $result$$Register, $tmp3$$Register, 11617 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11618 %} 11619 11620 ins_pipe( pipe_slow ); 11621 %} 11622 11623 // fast search of substring with known size. 11624 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11625 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11626 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11627 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11628 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11629 11630 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11631 ins_encode %{ 11632 int icnt2 = (int)$int_cnt2$$constant; 11633 if (icnt2 >= 16) { 11634 // IndexOf for constant substrings with size >= 16 elements 11635 // which don't need to be loaded through stack. 11636 __ string_indexofC8($str1$$Register, $str2$$Register, 11637 $cnt1$$Register, $cnt2$$Register, 11638 icnt2, $result$$Register, 11639 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11640 } else { 11641 // Small strings are loaded through stack if they cross page boundary. 11642 __ string_indexof($str1$$Register, $str2$$Register, 11643 $cnt1$$Register, $cnt2$$Register, 11644 icnt2, $result$$Register, 11645 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11646 } 11647 %} 11648 ins_pipe( pipe_slow ); 11649 %} 11650 11651 // fast search of substring with known size. 11652 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11653 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11654 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11655 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11656 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11657 11658 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11659 ins_encode %{ 11660 int icnt2 = (int)$int_cnt2$$constant; 11661 if (icnt2 >= 8) { 11662 // IndexOf for constant substrings with size >= 8 elements 11663 // which don't need to be loaded through stack. 11664 __ string_indexofC8($str1$$Register, $str2$$Register, 11665 $cnt1$$Register, $cnt2$$Register, 11666 icnt2, $result$$Register, 11667 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11668 } else { 11669 // Small strings are loaded through stack if they cross page boundary. 11670 __ string_indexof($str1$$Register, $str2$$Register, 11671 $cnt1$$Register, $cnt2$$Register, 11672 icnt2, $result$$Register, 11673 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11674 } 11675 %} 11676 ins_pipe( pipe_slow ); 11677 %} 11678 11679 // fast search of substring with known size. 11680 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11681 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11682 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11683 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11684 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11685 11686 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11687 ins_encode %{ 11688 int icnt2 = (int)$int_cnt2$$constant; 11689 if (icnt2 >= 8) { 11690 // IndexOf for constant substrings with size >= 8 elements 11691 // which don't need to be loaded through stack. 11692 __ string_indexofC8($str1$$Register, $str2$$Register, 11693 $cnt1$$Register, $cnt2$$Register, 11694 icnt2, $result$$Register, 11695 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11696 } else { 11697 // Small strings are loaded through stack if they cross page boundary. 11698 __ string_indexof($str1$$Register, $str2$$Register, 11699 $cnt1$$Register, $cnt2$$Register, 11700 icnt2, $result$$Register, 11701 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11702 } 11703 %} 11704 ins_pipe( pipe_slow ); 11705 %} 11706 11707 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11708 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11709 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11710 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11711 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11712 11713 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11714 ins_encode %{ 11715 __ string_indexof($str1$$Register, $str2$$Register, 11716 $cnt1$$Register, $cnt2$$Register, 11717 (-1), $result$$Register, 11718 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11719 %} 11720 ins_pipe( pipe_slow ); 11721 %} 11722 11723 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11724 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11725 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11726 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11727 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11728 11729 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11730 ins_encode %{ 11731 __ string_indexof($str1$$Register, $str2$$Register, 11732 $cnt1$$Register, $cnt2$$Register, 11733 (-1), $result$$Register, 11734 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11735 %} 11736 ins_pipe( pipe_slow ); 11737 %} 11738 11739 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11740 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11741 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11742 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11743 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11744 11745 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11746 ins_encode %{ 11747 __ string_indexof($str1$$Register, $str2$$Register, 11748 $cnt1$$Register, $cnt2$$Register, 11749 (-1), $result$$Register, 11750 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11751 %} 11752 ins_pipe( pipe_slow ); 11753 %} 11754 11755 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11756 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11757 predicate(UseSSE42Intrinsics); 11758 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11759 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11760 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11761 ins_encode %{ 11762 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11763 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11764 %} 11765 ins_pipe( pipe_slow ); 11766 %} 11767 11768 // fast array equals 11769 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11770 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11771 %{ 11772 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11773 match(Set result (AryEq ary1 ary2)); 11774 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11775 //ins_cost(300); 11776 11777 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11778 ins_encode %{ 11779 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11780 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11781 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11782 %} 11783 ins_pipe( pipe_slow ); 11784 %} 11785 11786 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11787 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11788 %{ 11789 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11790 match(Set result (AryEq ary1 ary2)); 11791 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11792 //ins_cost(300); 11793 11794 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11795 ins_encode %{ 11796 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11797 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11798 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11799 %} 11800 ins_pipe( pipe_slow ); 11801 %} 11802 11803 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11804 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11805 %{ 11806 match(Set result (HasNegatives ary1 len)); 11807 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11808 11809 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11810 ins_encode %{ 11811 __ has_negatives($ary1$$Register, $len$$Register, 11812 $result$$Register, $tmp3$$Register, 11813 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11814 %} 11815 ins_pipe( pipe_slow ); 11816 %} 11817 11818 // fast char[] to byte[] compression 11819 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11820 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11821 match(Set result (StrCompressedCopy src (Binary dst len))); 11822 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11823 11824 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11825 ins_encode %{ 11826 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11827 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11828 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11829 %} 11830 ins_pipe( pipe_slow ); 11831 %} 11832 11833 // fast byte[] to char[] inflation 11834 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11835 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11836 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11837 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11838 11839 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11840 ins_encode %{ 11841 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11842 $tmp1$$XMMRegister, $tmp2$$Register); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 // encode char[] to byte[] in ISO_8859_1 11848 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11849 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11850 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11851 match(Set result (EncodeISOArray src (Binary dst len))); 11852 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11853 11854 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11855 ins_encode %{ 11856 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11857 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11858 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11859 %} 11860 ins_pipe( pipe_slow ); 11861 %} 11862 11863 11864 //----------Control Flow Instructions------------------------------------------ 11865 // Signed compare Instructions 11866 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11867 match(Set cr (CmpI op1 op2)); 11868 effect( DEF cr, USE op1, USE op2 ); 11869 format %{ "CMP $op1,$op2" %} 11870 opcode(0x3B); /* Opcode 3B /r */ 11871 ins_encode( OpcP, RegReg( op1, op2) ); 11872 ins_pipe( ialu_cr_reg_reg ); 11873 %} 11874 11875 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11876 match(Set cr (CmpI op1 op2)); 11877 effect( DEF cr, USE op1 ); 11878 format %{ "CMP $op1,$op2" %} 11879 opcode(0x81,0x07); /* Opcode 81 /7 */ 11880 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11881 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11882 ins_pipe( ialu_cr_reg_imm ); 11883 %} 11884 11885 // Cisc-spilled version of cmpI_eReg 11886 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11887 match(Set cr (CmpI op1 (LoadI op2))); 11888 11889 format %{ "CMP $op1,$op2" %} 11890 ins_cost(500); 11891 opcode(0x3B); /* Opcode 3B /r */ 11892 ins_encode( OpcP, RegMem( op1, op2) ); 11893 ins_pipe( ialu_cr_reg_mem ); 11894 %} 11895 11896 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11897 match(Set cr (CmpI src zero)); 11898 effect( DEF cr, USE src ); 11899 11900 format %{ "TEST $src,$src" %} 11901 opcode(0x85); 11902 ins_encode( OpcP, RegReg( src, src ) ); 11903 ins_pipe( ialu_cr_reg_imm ); 11904 %} 11905 11906 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11907 match(Set cr (CmpI (AndI src con) zero)); 11908 11909 format %{ "TEST $src,$con" %} 11910 opcode(0xF7,0x00); 11911 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11912 ins_pipe( ialu_cr_reg_imm ); 11913 %} 11914 11915 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11916 match(Set cr (CmpI (AndI src mem) zero)); 11917 11918 format %{ "TEST $src,$mem" %} 11919 opcode(0x85); 11920 ins_encode( OpcP, RegMem( src, mem ) ); 11921 ins_pipe( ialu_cr_reg_mem ); 11922 %} 11923 11924 // Unsigned compare Instructions; really, same as signed except they 11925 // produce an eFlagsRegU instead of eFlagsReg. 11926 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11927 match(Set cr (CmpU op1 op2)); 11928 11929 format %{ "CMPu $op1,$op2" %} 11930 opcode(0x3B); /* Opcode 3B /r */ 11931 ins_encode( OpcP, RegReg( op1, op2) ); 11932 ins_pipe( ialu_cr_reg_reg ); 11933 %} 11934 11935 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11936 match(Set cr (CmpU op1 op2)); 11937 11938 format %{ "CMPu $op1,$op2" %} 11939 opcode(0x81,0x07); /* Opcode 81 /7 */ 11940 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11941 ins_pipe( ialu_cr_reg_imm ); 11942 %} 11943 11944 // // Cisc-spilled version of cmpU_eReg 11945 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11946 match(Set cr (CmpU op1 (LoadI op2))); 11947 11948 format %{ "CMPu $op1,$op2" %} 11949 ins_cost(500); 11950 opcode(0x3B); /* Opcode 3B /r */ 11951 ins_encode( OpcP, RegMem( op1, op2) ); 11952 ins_pipe( ialu_cr_reg_mem ); 11953 %} 11954 11955 // // Cisc-spilled version of cmpU_eReg 11956 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11957 // match(Set cr (CmpU (LoadI op1) op2)); 11958 // 11959 // format %{ "CMPu $op1,$op2" %} 11960 // ins_cost(500); 11961 // opcode(0x39); /* Opcode 39 /r */ 11962 // ins_encode( OpcP, RegMem( op1, op2) ); 11963 //%} 11964 11965 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11966 match(Set cr (CmpU src zero)); 11967 11968 format %{ "TESTu $src,$src" %} 11969 opcode(0x85); 11970 ins_encode( OpcP, RegReg( src, src ) ); 11971 ins_pipe( ialu_cr_reg_imm ); 11972 %} 11973 11974 // Unsigned pointer compare Instructions 11975 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11976 match(Set cr (CmpP op1 op2)); 11977 11978 format %{ "CMPu $op1,$op2" %} 11979 opcode(0x3B); /* Opcode 3B /r */ 11980 ins_encode( OpcP, RegReg( op1, op2) ); 11981 ins_pipe( ialu_cr_reg_reg ); 11982 %} 11983 11984 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11985 match(Set cr (CmpP op1 op2)); 11986 11987 format %{ "CMPu $op1,$op2" %} 11988 opcode(0x81,0x07); /* Opcode 81 /7 */ 11989 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11990 ins_pipe( ialu_cr_reg_imm ); 11991 %} 11992 11993 // // Cisc-spilled version of cmpP_eReg 11994 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11995 match(Set cr (CmpP op1 (LoadP op2))); 11996 11997 format %{ "CMPu $op1,$op2" %} 11998 ins_cost(500); 11999 opcode(0x3B); /* Opcode 3B /r */ 12000 ins_encode( OpcP, RegMem( op1, op2) ); 12001 ins_pipe( ialu_cr_reg_mem ); 12002 %} 12003 12004 // // Cisc-spilled version of cmpP_eReg 12005 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12006 // match(Set cr (CmpP (LoadP op1) op2)); 12007 // 12008 // format %{ "CMPu $op1,$op2" %} 12009 // ins_cost(500); 12010 // opcode(0x39); /* Opcode 39 /r */ 12011 // ins_encode( OpcP, RegMem( op1, op2) ); 12012 //%} 12013 12014 // Compare raw pointer (used in out-of-heap check). 12015 // Only works because non-oop pointers must be raw pointers 12016 // and raw pointers have no anti-dependencies. 12017 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12018 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12019 match(Set cr (CmpP op1 (LoadP op2))); 12020 12021 format %{ "CMPu $op1,$op2" %} 12022 opcode(0x3B); /* Opcode 3B /r */ 12023 ins_encode( OpcP, RegMem( op1, op2) ); 12024 ins_pipe( ialu_cr_reg_mem ); 12025 %} 12026 12027 // 12028 // This will generate a signed flags result. This should be ok 12029 // since any compare to a zero should be eq/neq. 12030 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12031 match(Set cr (CmpP src zero)); 12032 12033 format %{ "TEST $src,$src" %} 12034 opcode(0x85); 12035 ins_encode( OpcP, RegReg( src, src ) ); 12036 ins_pipe( ialu_cr_reg_imm ); 12037 %} 12038 12039 // Cisc-spilled version of testP_reg 12040 // This will generate a signed flags result. This should be ok 12041 // since any compare to a zero should be eq/neq. 12042 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12043 match(Set cr (CmpP (LoadP op) zero)); 12044 12045 format %{ "TEST $op,0xFFFFFFFF" %} 12046 ins_cost(500); 12047 opcode(0xF7); /* Opcode F7 /0 */ 12048 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12049 ins_pipe( ialu_cr_reg_imm ); 12050 %} 12051 12052 // Yanked all unsigned pointer compare operations. 12053 // Pointer compares are done with CmpP which is already unsigned. 12054 12055 //----------Max and Min-------------------------------------------------------- 12056 // Min Instructions 12057 //// 12058 // *** Min and Max using the conditional move are slower than the 12059 // *** branch version on a Pentium III. 12060 // // Conditional move for min 12061 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12062 // effect( USE_DEF op2, USE op1, USE cr ); 12063 // format %{ "CMOVlt $op2,$op1\t! min" %} 12064 // opcode(0x4C,0x0F); 12065 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12066 // ins_pipe( pipe_cmov_reg ); 12067 //%} 12068 // 12069 //// Min Register with Register (P6 version) 12070 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12071 // predicate(VM_Version::supports_cmov() ); 12072 // match(Set op2 (MinI op1 op2)); 12073 // ins_cost(200); 12074 // expand %{ 12075 // eFlagsReg cr; 12076 // compI_eReg(cr,op1,op2); 12077 // cmovI_reg_lt(op2,op1,cr); 12078 // %} 12079 //%} 12080 12081 // Min Register with Register (generic version) 12082 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12083 match(Set dst (MinI dst src)); 12084 effect(KILL flags); 12085 ins_cost(300); 12086 12087 format %{ "MIN $dst,$src" %} 12088 opcode(0xCC); 12089 ins_encode( min_enc(dst,src) ); 12090 ins_pipe( pipe_slow ); 12091 %} 12092 12093 // Max Register with Register 12094 // *** Min and Max using the conditional move are slower than the 12095 // *** branch version on a Pentium III. 12096 // // Conditional move for max 12097 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12098 // effect( USE_DEF op2, USE op1, USE cr ); 12099 // format %{ "CMOVgt $op2,$op1\t! max" %} 12100 // opcode(0x4F,0x0F); 12101 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12102 // ins_pipe( pipe_cmov_reg ); 12103 //%} 12104 // 12105 // // Max Register with Register (P6 version) 12106 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12107 // predicate(VM_Version::supports_cmov() ); 12108 // match(Set op2 (MaxI op1 op2)); 12109 // ins_cost(200); 12110 // expand %{ 12111 // eFlagsReg cr; 12112 // compI_eReg(cr,op1,op2); 12113 // cmovI_reg_gt(op2,op1,cr); 12114 // %} 12115 //%} 12116 12117 // Max Register with Register (generic version) 12118 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12119 match(Set dst (MaxI dst src)); 12120 effect(KILL flags); 12121 ins_cost(300); 12122 12123 format %{ "MAX $dst,$src" %} 12124 opcode(0xCC); 12125 ins_encode( max_enc(dst,src) ); 12126 ins_pipe( pipe_slow ); 12127 %} 12128 12129 // ============================================================================ 12130 // Counted Loop limit node which represents exact final iterator value. 12131 // Note: the resulting value should fit into integer range since 12132 // counted loops have limit check on overflow. 12133 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12134 match(Set limit (LoopLimit (Binary init limit) stride)); 12135 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12136 ins_cost(300); 12137 12138 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12139 ins_encode %{ 12140 int strd = (int)$stride$$constant; 12141 assert(strd != 1 && strd != -1, "sanity"); 12142 int m1 = (strd > 0) ? 1 : -1; 12143 // Convert limit to long (EAX:EDX) 12144 __ cdql(); 12145 // Convert init to long (init:tmp) 12146 __ movl($tmp$$Register, $init$$Register); 12147 __ sarl($tmp$$Register, 31); 12148 // $limit - $init 12149 __ subl($limit$$Register, $init$$Register); 12150 __ sbbl($limit_hi$$Register, $tmp$$Register); 12151 // + ($stride - 1) 12152 if (strd > 0) { 12153 __ addl($limit$$Register, (strd - 1)); 12154 __ adcl($limit_hi$$Register, 0); 12155 __ movl($tmp$$Register, strd); 12156 } else { 12157 __ addl($limit$$Register, (strd + 1)); 12158 __ adcl($limit_hi$$Register, -1); 12159 __ lneg($limit_hi$$Register, $limit$$Register); 12160 __ movl($tmp$$Register, -strd); 12161 } 12162 // signed devision: (EAX:EDX) / pos_stride 12163 __ idivl($tmp$$Register); 12164 if (strd < 0) { 12165 // restore sign 12166 __ negl($tmp$$Register); 12167 } 12168 // (EAX) * stride 12169 __ mull($tmp$$Register); 12170 // + init (ignore upper bits) 12171 __ addl($limit$$Register, $init$$Register); 12172 %} 12173 ins_pipe( pipe_slow ); 12174 %} 12175 12176 // ============================================================================ 12177 // Branch Instructions 12178 // Jump Table 12179 instruct jumpXtnd(rRegI switch_val) %{ 12180 match(Jump switch_val); 12181 ins_cost(350); 12182 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12183 ins_encode %{ 12184 // Jump to Address(table_base + switch_reg) 12185 Address index(noreg, $switch_val$$Register, Address::times_1); 12186 __ jump(ArrayAddress($constantaddress, index)); 12187 %} 12188 ins_pipe(pipe_jmp); 12189 %} 12190 12191 // Jump Direct - Label defines a relative address from JMP+1 12192 instruct jmpDir(label labl) %{ 12193 match(Goto); 12194 effect(USE labl); 12195 12196 ins_cost(300); 12197 format %{ "JMP $labl" %} 12198 size(5); 12199 ins_encode %{ 12200 Label* L = $labl$$label; 12201 __ jmp(*L, false); // Always long jump 12202 %} 12203 ins_pipe( pipe_jmp ); 12204 %} 12205 12206 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12207 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12208 match(If cop cr); 12209 effect(USE labl); 12210 12211 ins_cost(300); 12212 format %{ "J$cop $labl" %} 12213 size(6); 12214 ins_encode %{ 12215 Label* L = $labl$$label; 12216 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12217 %} 12218 ins_pipe( pipe_jcc ); 12219 %} 12220 12221 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12222 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12223 predicate(!n->has_vector_mask_set()); 12224 match(CountedLoopEnd cop cr); 12225 effect(USE labl); 12226 12227 ins_cost(300); 12228 format %{ "J$cop $labl\t# Loop end" %} 12229 size(6); 12230 ins_encode %{ 12231 Label* L = $labl$$label; 12232 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12233 %} 12234 ins_pipe( pipe_jcc ); 12235 %} 12236 12237 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12238 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12239 predicate(!n->has_vector_mask_set()); 12240 match(CountedLoopEnd cop cmp); 12241 effect(USE labl); 12242 12243 ins_cost(300); 12244 format %{ "J$cop,u $labl\t# Loop end" %} 12245 size(6); 12246 ins_encode %{ 12247 Label* L = $labl$$label; 12248 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12249 %} 12250 ins_pipe( pipe_jcc ); 12251 %} 12252 12253 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12254 predicate(!n->has_vector_mask_set()); 12255 match(CountedLoopEnd cop cmp); 12256 effect(USE labl); 12257 12258 ins_cost(200); 12259 format %{ "J$cop,u $labl\t# Loop end" %} 12260 size(6); 12261 ins_encode %{ 12262 Label* L = $labl$$label; 12263 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12264 %} 12265 ins_pipe( pipe_jcc ); 12266 %} 12267 12268 // mask version 12269 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12270 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12271 predicate(n->has_vector_mask_set()); 12272 match(CountedLoopEnd cop cr); 12273 effect(USE labl); 12274 12275 ins_cost(400); 12276 format %{ "J$cop $labl\t# Loop end\n\t" 12277 "restorevectmask \t# vector mask restore for loops" %} 12278 size(10); 12279 ins_encode %{ 12280 Label* L = $labl$$label; 12281 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12282 __ restorevectmask(); 12283 %} 12284 ins_pipe( pipe_jcc ); 12285 %} 12286 12287 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12288 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12289 predicate(n->has_vector_mask_set()); 12290 match(CountedLoopEnd cop cmp); 12291 effect(USE labl); 12292 12293 ins_cost(400); 12294 format %{ "J$cop,u $labl\t# Loop end\n\t" 12295 "restorevectmask \t# vector mask restore for loops" %} 12296 size(10); 12297 ins_encode %{ 12298 Label* L = $labl$$label; 12299 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12300 __ restorevectmask(); 12301 %} 12302 ins_pipe( pipe_jcc ); 12303 %} 12304 12305 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12306 predicate(n->has_vector_mask_set()); 12307 match(CountedLoopEnd cop cmp); 12308 effect(USE labl); 12309 12310 ins_cost(300); 12311 format %{ "J$cop,u $labl\t# Loop end\n\t" 12312 "restorevectmask \t# vector mask restore for loops" %} 12313 size(10); 12314 ins_encode %{ 12315 Label* L = $labl$$label; 12316 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12317 __ restorevectmask(); 12318 %} 12319 ins_pipe( pipe_jcc ); 12320 %} 12321 12322 // Jump Direct Conditional - using unsigned comparison 12323 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12324 match(If cop cmp); 12325 effect(USE labl); 12326 12327 ins_cost(300); 12328 format %{ "J$cop,u $labl" %} 12329 size(6); 12330 ins_encode %{ 12331 Label* L = $labl$$label; 12332 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12333 %} 12334 ins_pipe(pipe_jcc); 12335 %} 12336 12337 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12338 match(If cop cmp); 12339 effect(USE labl); 12340 12341 ins_cost(200); 12342 format %{ "J$cop,u $labl" %} 12343 size(6); 12344 ins_encode %{ 12345 Label* L = $labl$$label; 12346 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12347 %} 12348 ins_pipe(pipe_jcc); 12349 %} 12350 12351 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12352 match(If cop cmp); 12353 effect(USE labl); 12354 12355 ins_cost(200); 12356 format %{ $$template 12357 if ($cop$$cmpcode == Assembler::notEqual) { 12358 $$emit$$"JP,u $labl\n\t" 12359 $$emit$$"J$cop,u $labl" 12360 } else { 12361 $$emit$$"JP,u done\n\t" 12362 $$emit$$"J$cop,u $labl\n\t" 12363 $$emit$$"done:" 12364 } 12365 %} 12366 ins_encode %{ 12367 Label* l = $labl$$label; 12368 if ($cop$$cmpcode == Assembler::notEqual) { 12369 __ jcc(Assembler::parity, *l, false); 12370 __ jcc(Assembler::notEqual, *l, false); 12371 } else if ($cop$$cmpcode == Assembler::equal) { 12372 Label done; 12373 __ jccb(Assembler::parity, done); 12374 __ jcc(Assembler::equal, *l, false); 12375 __ bind(done); 12376 } else { 12377 ShouldNotReachHere(); 12378 } 12379 %} 12380 ins_pipe(pipe_jcc); 12381 %} 12382 12383 // ============================================================================ 12384 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12385 // array for an instance of the superklass. Set a hidden internal cache on a 12386 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12387 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12388 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12389 match(Set result (PartialSubtypeCheck sub super)); 12390 effect( KILL rcx, KILL cr ); 12391 12392 ins_cost(1100); // slightly larger than the next version 12393 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12394 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12395 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12396 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12397 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12398 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12399 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12400 "miss:\t" %} 12401 12402 opcode(0x1); // Force a XOR of EDI 12403 ins_encode( enc_PartialSubtypeCheck() ); 12404 ins_pipe( pipe_slow ); 12405 %} 12406 12407 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12408 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12409 effect( KILL rcx, KILL result ); 12410 12411 ins_cost(1000); 12412 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12413 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12414 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12415 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12416 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12417 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12418 "miss:\t" %} 12419 12420 opcode(0x0); // No need to XOR EDI 12421 ins_encode( enc_PartialSubtypeCheck() ); 12422 ins_pipe( pipe_slow ); 12423 %} 12424 12425 // ============================================================================ 12426 // Branch Instructions -- short offset versions 12427 // 12428 // These instructions are used to replace jumps of a long offset (the default 12429 // match) with jumps of a shorter offset. These instructions are all tagged 12430 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12431 // match rules in general matching. Instead, the ADLC generates a conversion 12432 // method in the MachNode which can be used to do in-place replacement of the 12433 // long variant with the shorter variant. The compiler will determine if a 12434 // branch can be taken by the is_short_branch_offset() predicate in the machine 12435 // specific code section of the file. 12436 12437 // Jump Direct - Label defines a relative address from JMP+1 12438 instruct jmpDir_short(label labl) %{ 12439 match(Goto); 12440 effect(USE labl); 12441 12442 ins_cost(300); 12443 format %{ "JMP,s $labl" %} 12444 size(2); 12445 ins_encode %{ 12446 Label* L = $labl$$label; 12447 __ jmpb(*L); 12448 %} 12449 ins_pipe( pipe_jmp ); 12450 ins_short_branch(1); 12451 %} 12452 12453 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12454 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12455 match(If cop cr); 12456 effect(USE labl); 12457 12458 ins_cost(300); 12459 format %{ "J$cop,s $labl" %} 12460 size(2); 12461 ins_encode %{ 12462 Label* L = $labl$$label; 12463 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12464 %} 12465 ins_pipe( pipe_jcc ); 12466 ins_short_branch(1); 12467 %} 12468 12469 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12470 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12471 match(CountedLoopEnd cop cr); 12472 effect(USE labl); 12473 12474 ins_cost(300); 12475 format %{ "J$cop,s $labl\t# Loop end" %} 12476 size(2); 12477 ins_encode %{ 12478 Label* L = $labl$$label; 12479 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12480 %} 12481 ins_pipe( pipe_jcc ); 12482 ins_short_branch(1); 12483 %} 12484 12485 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12486 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12487 match(CountedLoopEnd cop cmp); 12488 effect(USE labl); 12489 12490 ins_cost(300); 12491 format %{ "J$cop,us $labl\t# Loop end" %} 12492 size(2); 12493 ins_encode %{ 12494 Label* L = $labl$$label; 12495 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12496 %} 12497 ins_pipe( pipe_jcc ); 12498 ins_short_branch(1); 12499 %} 12500 12501 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12502 match(CountedLoopEnd cop cmp); 12503 effect(USE labl); 12504 12505 ins_cost(300); 12506 format %{ "J$cop,us $labl\t# Loop end" %} 12507 size(2); 12508 ins_encode %{ 12509 Label* L = $labl$$label; 12510 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12511 %} 12512 ins_pipe( pipe_jcc ); 12513 ins_short_branch(1); 12514 %} 12515 12516 // Jump Direct Conditional - using unsigned comparison 12517 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12518 match(If cop cmp); 12519 effect(USE labl); 12520 12521 ins_cost(300); 12522 format %{ "J$cop,us $labl" %} 12523 size(2); 12524 ins_encode %{ 12525 Label* L = $labl$$label; 12526 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12527 %} 12528 ins_pipe( pipe_jcc ); 12529 ins_short_branch(1); 12530 %} 12531 12532 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12533 match(If cop cmp); 12534 effect(USE labl); 12535 12536 ins_cost(300); 12537 format %{ "J$cop,us $labl" %} 12538 size(2); 12539 ins_encode %{ 12540 Label* L = $labl$$label; 12541 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12542 %} 12543 ins_pipe( pipe_jcc ); 12544 ins_short_branch(1); 12545 %} 12546 12547 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12548 match(If cop cmp); 12549 effect(USE labl); 12550 12551 ins_cost(300); 12552 format %{ $$template 12553 if ($cop$$cmpcode == Assembler::notEqual) { 12554 $$emit$$"JP,u,s $labl\n\t" 12555 $$emit$$"J$cop,u,s $labl" 12556 } else { 12557 $$emit$$"JP,u,s done\n\t" 12558 $$emit$$"J$cop,u,s $labl\n\t" 12559 $$emit$$"done:" 12560 } 12561 %} 12562 size(4); 12563 ins_encode %{ 12564 Label* l = $labl$$label; 12565 if ($cop$$cmpcode == Assembler::notEqual) { 12566 __ jccb(Assembler::parity, *l); 12567 __ jccb(Assembler::notEqual, *l); 12568 } else if ($cop$$cmpcode == Assembler::equal) { 12569 Label done; 12570 __ jccb(Assembler::parity, done); 12571 __ jccb(Assembler::equal, *l); 12572 __ bind(done); 12573 } else { 12574 ShouldNotReachHere(); 12575 } 12576 %} 12577 ins_pipe(pipe_jcc); 12578 ins_short_branch(1); 12579 %} 12580 12581 // ============================================================================ 12582 // Long Compare 12583 // 12584 // Currently we hold longs in 2 registers. Comparing such values efficiently 12585 // is tricky. The flavor of compare used depends on whether we are testing 12586 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12587 // The GE test is the negated LT test. The LE test can be had by commuting 12588 // the operands (yielding a GE test) and then negating; negate again for the 12589 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12590 // NE test is negated from that. 12591 12592 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12593 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12594 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12595 // are collapsed internally in the ADLC's dfa-gen code. The match for 12596 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12597 // foo match ends up with the wrong leaf. One fix is to not match both 12598 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12599 // both forms beat the trinary form of long-compare and both are very useful 12600 // on Intel which has so few registers. 12601 12602 // Manifest a CmpL result in an integer register. Very painful. 12603 // This is the test to avoid. 12604 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12605 match(Set dst (CmpL3 src1 src2)); 12606 effect( KILL flags ); 12607 ins_cost(1000); 12608 format %{ "XOR $dst,$dst\n\t" 12609 "CMP $src1.hi,$src2.hi\n\t" 12610 "JLT,s m_one\n\t" 12611 "JGT,s p_one\n\t" 12612 "CMP $src1.lo,$src2.lo\n\t" 12613 "JB,s m_one\n\t" 12614 "JEQ,s done\n" 12615 "p_one:\tINC $dst\n\t" 12616 "JMP,s done\n" 12617 "m_one:\tDEC $dst\n" 12618 "done:" %} 12619 ins_encode %{ 12620 Label p_one, m_one, done; 12621 __ xorptr($dst$$Register, $dst$$Register); 12622 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12623 __ jccb(Assembler::less, m_one); 12624 __ jccb(Assembler::greater, p_one); 12625 __ cmpl($src1$$Register, $src2$$Register); 12626 __ jccb(Assembler::below, m_one); 12627 __ jccb(Assembler::equal, done); 12628 __ bind(p_one); 12629 __ incrementl($dst$$Register); 12630 __ jmpb(done); 12631 __ bind(m_one); 12632 __ decrementl($dst$$Register); 12633 __ bind(done); 12634 %} 12635 ins_pipe( pipe_slow ); 12636 %} 12637 12638 //====== 12639 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12640 // compares. Can be used for LE or GT compares by reversing arguments. 12641 // NOT GOOD FOR EQ/NE tests. 12642 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12643 match( Set flags (CmpL src zero )); 12644 ins_cost(100); 12645 format %{ "TEST $src.hi,$src.hi" %} 12646 opcode(0x85); 12647 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12648 ins_pipe( ialu_cr_reg_reg ); 12649 %} 12650 12651 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12652 // compares. Can be used for LE or GT compares by reversing arguments. 12653 // NOT GOOD FOR EQ/NE tests. 12654 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12655 match( Set flags (CmpL src1 src2 )); 12656 effect( TEMP tmp ); 12657 ins_cost(300); 12658 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12659 "MOV $tmp,$src1.hi\n\t" 12660 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12661 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12662 ins_pipe( ialu_cr_reg_reg ); 12663 %} 12664 12665 // Long compares reg < zero/req OR reg >= zero/req. 12666 // Just a wrapper for a normal branch, plus the predicate test. 12667 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12668 match(If cmp flags); 12669 effect(USE labl); 12670 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12671 expand %{ 12672 jmpCon(cmp,flags,labl); // JLT or JGE... 12673 %} 12674 %} 12675 12676 //====== 12677 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12678 // compares. Can be used for LE or GT compares by reversing arguments. 12679 // NOT GOOD FOR EQ/NE tests. 12680 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12681 match(Set flags (CmpUL src zero)); 12682 ins_cost(100); 12683 format %{ "TEST $src.hi,$src.hi" %} 12684 opcode(0x85); 12685 ins_encode(OpcP, RegReg_Hi2(src, src)); 12686 ins_pipe(ialu_cr_reg_reg); 12687 %} 12688 12689 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12690 // compares. Can be used for LE or GT compares by reversing arguments. 12691 // NOT GOOD FOR EQ/NE tests. 12692 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12693 match(Set flags (CmpUL src1 src2)); 12694 effect(TEMP tmp); 12695 ins_cost(300); 12696 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12697 "MOV $tmp,$src1.hi\n\t" 12698 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12699 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12700 ins_pipe(ialu_cr_reg_reg); 12701 %} 12702 12703 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12704 // Just a wrapper for a normal branch, plus the predicate test. 12705 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12706 match(If cmp flags); 12707 effect(USE labl); 12708 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12709 expand %{ 12710 jmpCon(cmp, flags, labl); // JLT or JGE... 12711 %} 12712 %} 12713 12714 // Compare 2 longs and CMOVE longs. 12715 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12716 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12717 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12718 ins_cost(400); 12719 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12720 "CMOV$cmp $dst.hi,$src.hi" %} 12721 opcode(0x0F,0x40); 12722 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12723 ins_pipe( pipe_cmov_reg_long ); 12724 %} 12725 12726 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12727 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12728 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12729 ins_cost(500); 12730 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12731 "CMOV$cmp $dst.hi,$src.hi" %} 12732 opcode(0x0F,0x40); 12733 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12734 ins_pipe( pipe_cmov_reg_long ); 12735 %} 12736 12737 // Compare 2 longs and CMOVE ints. 12738 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12739 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12740 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12741 ins_cost(200); 12742 format %{ "CMOV$cmp $dst,$src" %} 12743 opcode(0x0F,0x40); 12744 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12745 ins_pipe( pipe_cmov_reg ); 12746 %} 12747 12748 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12749 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12750 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12751 ins_cost(250); 12752 format %{ "CMOV$cmp $dst,$src" %} 12753 opcode(0x0F,0x40); 12754 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12755 ins_pipe( pipe_cmov_mem ); 12756 %} 12757 12758 // Compare 2 longs and CMOVE ints. 12759 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12760 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12761 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12762 ins_cost(200); 12763 format %{ "CMOV$cmp $dst,$src" %} 12764 opcode(0x0F,0x40); 12765 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12766 ins_pipe( pipe_cmov_reg ); 12767 %} 12768 12769 // Compare 2 longs and CMOVE doubles 12770 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12771 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12772 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12773 ins_cost(200); 12774 expand %{ 12775 fcmovDPR_regS(cmp,flags,dst,src); 12776 %} 12777 %} 12778 12779 // Compare 2 longs and CMOVE doubles 12780 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12781 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12782 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12783 ins_cost(200); 12784 expand %{ 12785 fcmovD_regS(cmp,flags,dst,src); 12786 %} 12787 %} 12788 12789 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12790 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12791 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12792 ins_cost(200); 12793 expand %{ 12794 fcmovFPR_regS(cmp,flags,dst,src); 12795 %} 12796 %} 12797 12798 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12799 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12800 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12801 ins_cost(200); 12802 expand %{ 12803 fcmovF_regS(cmp,flags,dst,src); 12804 %} 12805 %} 12806 12807 //====== 12808 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12809 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12810 match( Set flags (CmpL src zero )); 12811 effect(TEMP tmp); 12812 ins_cost(200); 12813 format %{ "MOV $tmp,$src.lo\n\t" 12814 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12815 ins_encode( long_cmp_flags0( src, tmp ) ); 12816 ins_pipe( ialu_reg_reg_long ); 12817 %} 12818 12819 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12820 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12821 match( Set flags (CmpL src1 src2 )); 12822 ins_cost(200+300); 12823 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12824 "JNE,s skip\n\t" 12825 "CMP $src1.hi,$src2.hi\n\t" 12826 "skip:\t" %} 12827 ins_encode( long_cmp_flags1( src1, src2 ) ); 12828 ins_pipe( ialu_cr_reg_reg ); 12829 %} 12830 12831 // Long compare reg == zero/reg OR reg != zero/reg 12832 // Just a wrapper for a normal branch, plus the predicate test. 12833 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12834 match(If cmp flags); 12835 effect(USE labl); 12836 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12837 expand %{ 12838 jmpCon(cmp,flags,labl); // JEQ or JNE... 12839 %} 12840 %} 12841 12842 //====== 12843 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12844 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12845 match(Set flags (CmpUL src zero)); 12846 effect(TEMP tmp); 12847 ins_cost(200); 12848 format %{ "MOV $tmp,$src.lo\n\t" 12849 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12850 ins_encode(long_cmp_flags0(src, tmp)); 12851 ins_pipe(ialu_reg_reg_long); 12852 %} 12853 12854 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12855 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12856 match(Set flags (CmpUL src1 src2)); 12857 ins_cost(200+300); 12858 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12859 "JNE,s skip\n\t" 12860 "CMP $src1.hi,$src2.hi\n\t" 12861 "skip:\t" %} 12862 ins_encode(long_cmp_flags1(src1, src2)); 12863 ins_pipe(ialu_cr_reg_reg); 12864 %} 12865 12866 // Unsigned long compare reg == zero/reg OR reg != zero/reg 12867 // Just a wrapper for a normal branch, plus the predicate test. 12868 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 12869 match(If cmp flags); 12870 effect(USE labl); 12871 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 12872 expand %{ 12873 jmpCon(cmp, flags, labl); // JEQ or JNE... 12874 %} 12875 %} 12876 12877 // Compare 2 longs and CMOVE longs. 12878 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12879 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12880 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12881 ins_cost(400); 12882 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12883 "CMOV$cmp $dst.hi,$src.hi" %} 12884 opcode(0x0F,0x40); 12885 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12886 ins_pipe( pipe_cmov_reg_long ); 12887 %} 12888 12889 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12890 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12891 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12892 ins_cost(500); 12893 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12894 "CMOV$cmp $dst.hi,$src.hi" %} 12895 opcode(0x0F,0x40); 12896 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12897 ins_pipe( pipe_cmov_reg_long ); 12898 %} 12899 12900 // Compare 2 longs and CMOVE ints. 12901 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12902 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12903 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12904 ins_cost(200); 12905 format %{ "CMOV$cmp $dst,$src" %} 12906 opcode(0x0F,0x40); 12907 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12908 ins_pipe( pipe_cmov_reg ); 12909 %} 12910 12911 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12912 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12913 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12914 ins_cost(250); 12915 format %{ "CMOV$cmp $dst,$src" %} 12916 opcode(0x0F,0x40); 12917 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12918 ins_pipe( pipe_cmov_mem ); 12919 %} 12920 12921 // Compare 2 longs and CMOVE ints. 12922 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12923 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12924 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12925 ins_cost(200); 12926 format %{ "CMOV$cmp $dst,$src" %} 12927 opcode(0x0F,0x40); 12928 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12929 ins_pipe( pipe_cmov_reg ); 12930 %} 12931 12932 // Compare 2 longs and CMOVE doubles 12933 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12934 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12935 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12936 ins_cost(200); 12937 expand %{ 12938 fcmovDPR_regS(cmp,flags,dst,src); 12939 %} 12940 %} 12941 12942 // Compare 2 longs and CMOVE doubles 12943 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12944 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12945 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12946 ins_cost(200); 12947 expand %{ 12948 fcmovD_regS(cmp,flags,dst,src); 12949 %} 12950 %} 12951 12952 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12953 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12954 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12955 ins_cost(200); 12956 expand %{ 12957 fcmovFPR_regS(cmp,flags,dst,src); 12958 %} 12959 %} 12960 12961 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12962 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12963 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12964 ins_cost(200); 12965 expand %{ 12966 fcmovF_regS(cmp,flags,dst,src); 12967 %} 12968 %} 12969 12970 //====== 12971 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12972 // Same as cmpL_reg_flags_LEGT except must negate src 12973 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12974 match( Set flags (CmpL src zero )); 12975 effect( TEMP tmp ); 12976 ins_cost(300); 12977 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12978 "CMP $tmp,$src.lo\n\t" 12979 "SBB $tmp,$src.hi\n\t" %} 12980 ins_encode( long_cmp_flags3(src, tmp) ); 12981 ins_pipe( ialu_reg_reg_long ); 12982 %} 12983 12984 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12985 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12986 // requires a commuted test to get the same result. 12987 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12988 match( Set flags (CmpL src1 src2 )); 12989 effect( TEMP tmp ); 12990 ins_cost(300); 12991 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12992 "MOV $tmp,$src2.hi\n\t" 12993 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12994 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12995 ins_pipe( ialu_cr_reg_reg ); 12996 %} 12997 12998 // Long compares reg < zero/req OR reg >= zero/req. 12999 // Just a wrapper for a normal branch, plus the predicate test 13000 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13001 match(If cmp flags); 13002 effect(USE labl); 13003 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13004 ins_cost(300); 13005 expand %{ 13006 jmpCon(cmp,flags,labl); // JGT or JLE... 13007 %} 13008 %} 13009 13010 //====== 13011 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13012 // Same as cmpUL_reg_flags_LEGT except must negate src 13013 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13014 match(Set flags (CmpUL src zero)); 13015 effect(TEMP tmp); 13016 ins_cost(300); 13017 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13018 "CMP $tmp,$src.lo\n\t" 13019 "SBB $tmp,$src.hi\n\t" %} 13020 ins_encode(long_cmp_flags3(src, tmp)); 13021 ins_pipe(ialu_reg_reg_long); 13022 %} 13023 13024 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13025 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13026 // requires a commuted test to get the same result. 13027 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13028 match(Set flags (CmpUL src1 src2)); 13029 effect(TEMP tmp); 13030 ins_cost(300); 13031 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13032 "MOV $tmp,$src2.hi\n\t" 13033 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13034 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13035 ins_pipe(ialu_cr_reg_reg); 13036 %} 13037 13038 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13039 // Just a wrapper for a normal branch, plus the predicate test 13040 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13041 match(If cmp flags); 13042 effect(USE labl); 13043 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13044 ins_cost(300); 13045 expand %{ 13046 jmpCon(cmp, flags, labl); // JGT or JLE... 13047 %} 13048 %} 13049 13050 // Compare 2 longs and CMOVE longs. 13051 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13052 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13053 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13054 ins_cost(400); 13055 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13056 "CMOV$cmp $dst.hi,$src.hi" %} 13057 opcode(0x0F,0x40); 13058 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13059 ins_pipe( pipe_cmov_reg_long ); 13060 %} 13061 13062 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13063 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13064 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13065 ins_cost(500); 13066 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13067 "CMOV$cmp $dst.hi,$src.hi+4" %} 13068 opcode(0x0F,0x40); 13069 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13070 ins_pipe( pipe_cmov_reg_long ); 13071 %} 13072 13073 // Compare 2 longs and CMOVE ints. 13074 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13075 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13076 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13077 ins_cost(200); 13078 format %{ "CMOV$cmp $dst,$src" %} 13079 opcode(0x0F,0x40); 13080 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13081 ins_pipe( pipe_cmov_reg ); 13082 %} 13083 13084 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13085 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13086 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13087 ins_cost(250); 13088 format %{ "CMOV$cmp $dst,$src" %} 13089 opcode(0x0F,0x40); 13090 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13091 ins_pipe( pipe_cmov_mem ); 13092 %} 13093 13094 // Compare 2 longs and CMOVE ptrs. 13095 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13096 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13097 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13098 ins_cost(200); 13099 format %{ "CMOV$cmp $dst,$src" %} 13100 opcode(0x0F,0x40); 13101 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13102 ins_pipe( pipe_cmov_reg ); 13103 %} 13104 13105 // Compare 2 longs and CMOVE doubles 13106 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13107 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13108 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13109 ins_cost(200); 13110 expand %{ 13111 fcmovDPR_regS(cmp,flags,dst,src); 13112 %} 13113 %} 13114 13115 // Compare 2 longs and CMOVE doubles 13116 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13117 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13118 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13119 ins_cost(200); 13120 expand %{ 13121 fcmovD_regS(cmp,flags,dst,src); 13122 %} 13123 %} 13124 13125 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13126 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13127 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13128 ins_cost(200); 13129 expand %{ 13130 fcmovFPR_regS(cmp,flags,dst,src); 13131 %} 13132 %} 13133 13134 13135 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13136 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13137 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13138 ins_cost(200); 13139 expand %{ 13140 fcmovF_regS(cmp,flags,dst,src); 13141 %} 13142 %} 13143 13144 13145 // ============================================================================ 13146 // Procedure Call/Return Instructions 13147 // Call Java Static Instruction 13148 // Note: If this code changes, the corresponding ret_addr_offset() and 13149 // compute_padding() functions will have to be adjusted. 13150 instruct CallStaticJavaDirect(method meth) %{ 13151 match(CallStaticJava); 13152 effect(USE meth); 13153 13154 ins_cost(300); 13155 format %{ "CALL,static " %} 13156 opcode(0xE8); /* E8 cd */ 13157 ins_encode( pre_call_resets, 13158 Java_Static_Call( meth ), 13159 call_epilog, 13160 post_call_FPU ); 13161 ins_pipe( pipe_slow ); 13162 ins_alignment(4); 13163 %} 13164 13165 // Call Java Dynamic Instruction 13166 // Note: If this code changes, the corresponding ret_addr_offset() and 13167 // compute_padding() functions will have to be adjusted. 13168 instruct CallDynamicJavaDirect(method meth) %{ 13169 match(CallDynamicJava); 13170 effect(USE meth); 13171 13172 ins_cost(300); 13173 format %{ "MOV EAX,(oop)-1\n\t" 13174 "CALL,dynamic" %} 13175 opcode(0xE8); /* E8 cd */ 13176 ins_encode( pre_call_resets, 13177 Java_Dynamic_Call( meth ), 13178 call_epilog, 13179 post_call_FPU ); 13180 ins_pipe( pipe_slow ); 13181 ins_alignment(4); 13182 %} 13183 13184 // Call Runtime Instruction 13185 instruct CallRuntimeDirect(method meth) %{ 13186 match(CallRuntime ); 13187 effect(USE meth); 13188 13189 ins_cost(300); 13190 format %{ "CALL,runtime " %} 13191 opcode(0xE8); /* E8 cd */ 13192 // Use FFREEs to clear entries in float stack 13193 ins_encode( pre_call_resets, 13194 FFree_Float_Stack_All, 13195 Java_To_Runtime( meth ), 13196 post_call_FPU ); 13197 ins_pipe( pipe_slow ); 13198 %} 13199 13200 // Call runtime without safepoint 13201 instruct CallLeafDirect(method meth) %{ 13202 match(CallLeaf); 13203 effect(USE meth); 13204 13205 ins_cost(300); 13206 format %{ "CALL_LEAF,runtime " %} 13207 opcode(0xE8); /* E8 cd */ 13208 ins_encode( pre_call_resets, 13209 FFree_Float_Stack_All, 13210 Java_To_Runtime( meth ), 13211 Verify_FPU_For_Leaf, post_call_FPU ); 13212 ins_pipe( pipe_slow ); 13213 %} 13214 13215 instruct CallLeafNoFPDirect(method meth) %{ 13216 match(CallLeafNoFP); 13217 effect(USE meth); 13218 13219 ins_cost(300); 13220 format %{ "CALL_LEAF_NOFP,runtime " %} 13221 opcode(0xE8); /* E8 cd */ 13222 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13223 ins_pipe( pipe_slow ); 13224 %} 13225 13226 13227 // Return Instruction 13228 // Remove the return address & jump to it. 13229 instruct Ret() %{ 13230 match(Return); 13231 format %{ "RET" %} 13232 opcode(0xC3); 13233 ins_encode(OpcP); 13234 ins_pipe( pipe_jmp ); 13235 %} 13236 13237 // Tail Call; Jump from runtime stub to Java code. 13238 // Also known as an 'interprocedural jump'. 13239 // Target of jump will eventually return to caller. 13240 // TailJump below removes the return address. 13241 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13242 match(TailCall jump_target method_oop ); 13243 ins_cost(300); 13244 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13245 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13246 ins_encode( OpcP, RegOpc(jump_target) ); 13247 ins_pipe( pipe_jmp ); 13248 %} 13249 13250 13251 // Tail Jump; remove the return address; jump to target. 13252 // TailCall above leaves the return address around. 13253 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13254 match( TailJump jump_target ex_oop ); 13255 ins_cost(300); 13256 format %{ "POP EDX\t# pop return address into dummy\n\t" 13257 "JMP $jump_target " %} 13258 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13259 ins_encode( enc_pop_rdx, 13260 OpcP, RegOpc(jump_target) ); 13261 ins_pipe( pipe_jmp ); 13262 %} 13263 13264 // Create exception oop: created by stack-crawling runtime code. 13265 // Created exception is now available to this handler, and is setup 13266 // just prior to jumping to this handler. No code emitted. 13267 instruct CreateException( eAXRegP ex_oop ) 13268 %{ 13269 match(Set ex_oop (CreateEx)); 13270 13271 size(0); 13272 // use the following format syntax 13273 format %{ "# exception oop is in EAX; no code emitted" %} 13274 ins_encode(); 13275 ins_pipe( empty ); 13276 %} 13277 13278 13279 // Rethrow exception: 13280 // The exception oop will come in the first argument position. 13281 // Then JUMP (not call) to the rethrow stub code. 13282 instruct RethrowException() 13283 %{ 13284 match(Rethrow); 13285 13286 // use the following format syntax 13287 format %{ "JMP rethrow_stub" %} 13288 ins_encode(enc_rethrow); 13289 ins_pipe( pipe_jmp ); 13290 %} 13291 13292 // inlined locking and unlocking 13293 13294 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13295 predicate(Compile::current()->use_rtm()); 13296 match(Set cr (FastLock object box)); 13297 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13298 ins_cost(300); 13299 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13300 ins_encode %{ 13301 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13302 $scr$$Register, $cx1$$Register, $cx2$$Register, 13303 _counters, _rtm_counters, _stack_rtm_counters, 13304 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13305 true, ra_->C->profile_rtm()); 13306 %} 13307 ins_pipe(pipe_slow); 13308 %} 13309 13310 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13311 predicate(!Compile::current()->use_rtm()); 13312 match(Set cr (FastLock object box)); 13313 effect(TEMP tmp, TEMP scr, USE_KILL box); 13314 ins_cost(300); 13315 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13316 ins_encode %{ 13317 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13318 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13319 %} 13320 ins_pipe(pipe_slow); 13321 %} 13322 13323 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13324 match(Set cr (FastUnlock object box)); 13325 effect(TEMP tmp, USE_KILL box); 13326 ins_cost(300); 13327 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13328 ins_encode %{ 13329 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13330 %} 13331 ins_pipe(pipe_slow); 13332 %} 13333 13334 13335 13336 // ============================================================================ 13337 // Safepoint Instruction 13338 instruct safePoint_poll(eFlagsReg cr) %{ 13339 match(SafePoint); 13340 effect(KILL cr); 13341 13342 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13343 // On SPARC that might be acceptable as we can generate the address with 13344 // just a sethi, saving an or. By polling at offset 0 we can end up 13345 // putting additional pressure on the index-0 in the D$. Because of 13346 // alignment (just like the situation at hand) the lower indices tend 13347 // to see more traffic. It'd be better to change the polling address 13348 // to offset 0 of the last $line in the polling page. 13349 13350 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13351 ins_cost(125); 13352 size(6) ; 13353 ins_encode( Safepoint_Poll() ); 13354 ins_pipe( ialu_reg_mem ); 13355 %} 13356 13357 13358 // ============================================================================ 13359 // This name is KNOWN by the ADLC and cannot be changed. 13360 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13361 // for this guy. 13362 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13363 match(Set dst (ThreadLocal)); 13364 effect(DEF dst, KILL cr); 13365 13366 format %{ "MOV $dst, Thread::current()" %} 13367 ins_encode %{ 13368 Register dstReg = as_Register($dst$$reg); 13369 __ get_thread(dstReg); 13370 %} 13371 ins_pipe( ialu_reg_fat ); 13372 %} 13373 13374 13375 13376 //----------PEEPHOLE RULES----------------------------------------------------- 13377 // These must follow all instruction definitions as they use the names 13378 // defined in the instructions definitions. 13379 // 13380 // peepmatch ( root_instr_name [preceding_instruction]* ); 13381 // 13382 // peepconstraint %{ 13383 // (instruction_number.operand_name relational_op instruction_number.operand_name 13384 // [, ...] ); 13385 // // instruction numbers are zero-based using left to right order in peepmatch 13386 // 13387 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13388 // // provide an instruction_number.operand_name for each operand that appears 13389 // // in the replacement instruction's match rule 13390 // 13391 // ---------VM FLAGS--------------------------------------------------------- 13392 // 13393 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13394 // 13395 // Each peephole rule is given an identifying number starting with zero and 13396 // increasing by one in the order seen by the parser. An individual peephole 13397 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13398 // on the command-line. 13399 // 13400 // ---------CURRENT LIMITATIONS---------------------------------------------- 13401 // 13402 // Only match adjacent instructions in same basic block 13403 // Only equality constraints 13404 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13405 // Only one replacement instruction 13406 // 13407 // ---------EXAMPLE---------------------------------------------------------- 13408 // 13409 // // pertinent parts of existing instructions in architecture description 13410 // instruct movI(rRegI dst, rRegI src) %{ 13411 // match(Set dst (CopyI src)); 13412 // %} 13413 // 13414 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13415 // match(Set dst (AddI dst src)); 13416 // effect(KILL cr); 13417 // %} 13418 // 13419 // // Change (inc mov) to lea 13420 // peephole %{ 13421 // // increment preceeded by register-register move 13422 // peepmatch ( incI_eReg movI ); 13423 // // require that the destination register of the increment 13424 // // match the destination register of the move 13425 // peepconstraint ( 0.dst == 1.dst ); 13426 // // construct a replacement instruction that sets 13427 // // the destination to ( move's source register + one ) 13428 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13429 // %} 13430 // 13431 // Implementation no longer uses movX instructions since 13432 // machine-independent system no longer uses CopyX nodes. 13433 // 13434 // peephole %{ 13435 // peepmatch ( incI_eReg movI ); 13436 // peepconstraint ( 0.dst == 1.dst ); 13437 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13438 // %} 13439 // 13440 // peephole %{ 13441 // peepmatch ( decI_eReg movI ); 13442 // peepconstraint ( 0.dst == 1.dst ); 13443 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13444 // %} 13445 // 13446 // peephole %{ 13447 // peepmatch ( addI_eReg_imm movI ); 13448 // peepconstraint ( 0.dst == 1.dst ); 13449 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13450 // %} 13451 // 13452 // peephole %{ 13453 // peepmatch ( addP_eReg_imm movP ); 13454 // peepconstraint ( 0.dst == 1.dst ); 13455 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13456 // %} 13457 13458 // // Change load of spilled value to only a spill 13459 // instruct storeI(memory mem, rRegI src) %{ 13460 // match(Set mem (StoreI mem src)); 13461 // %} 13462 // 13463 // instruct loadI(rRegI dst, memory mem) %{ 13464 // match(Set dst (LoadI mem)); 13465 // %} 13466 // 13467 peephole %{ 13468 peepmatch ( loadI storeI ); 13469 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13470 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13471 %} 13472 13473 //----------SMARTSPILL RULES--------------------------------------------------- 13474 // These must follow all instruction definitions as they use the names 13475 // defined in the instructions definitions.