1 // 2 // Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (VM_Version::supports_vzeroupper()) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return SafepointMechanism::uses_thread_local_poll(); 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return align_up(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return align_up(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(oopDesc::is_oop(cast_to_oop(d32)) && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 if (SafepointMechanism::uses_thread_local_poll()) { 710 Register pollReg = as_Register(EBX_enc); 711 MacroAssembler masm(&cbuf); 712 masm.get_thread(pollReg); 713 masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset()))); 714 masm.relocate(relocInfo::poll_return_type); 715 masm.testl(rax, Address(pollReg, 0)); 716 } else { 717 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 718 emit_opcode(cbuf,0x85); 719 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 720 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 721 } 722 } 723 } 724 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 726 return MachNode::size(ra_); // too many variables; just compute it 727 // the hard way 728 } 729 730 int MachEpilogNode::reloc() const { 731 return 0; // a large enough number 732 } 733 734 const Pipeline * MachEpilogNode::pipeline() const { 735 return MachNode::pipeline_class(); 736 } 737 738 int MachEpilogNode::safepoint_offset() const { return 0; } 739 740 //============================================================================= 741 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 743 static enum RC rc_class( OptoReg::Name reg ) { 744 745 if( !OptoReg::is_valid(reg) ) return rc_bad; 746 if (OptoReg::is_stack(reg)) return rc_stack; 747 748 VMReg r = OptoReg::as_VMReg(reg); 749 if (r->is_Register()) return rc_int; 750 if (r->is_FloatRegister()) { 751 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 752 return rc_float; 753 } 754 assert(r->is_XMMRegister(), "must be"); 755 return rc_xmm; 756 } 757 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 759 int opcode, const char *op_str, int size, outputStream* st ) { 760 if( cbuf ) { 761 emit_opcode (*cbuf, opcode ); 762 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 763 #ifndef PRODUCT 764 } else if( !do_size ) { 765 if( size != 0 ) st->print("\n\t"); 766 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 767 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 768 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 769 } else { // FLD, FST, PUSH, POP 770 st->print("%s [ESP + #%d]",op_str,offset); 771 } 772 #endif 773 } 774 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 775 return size+3+offset_size; 776 } 777 778 // Helper for XMM registers. Extra opcode bits, limited syntax. 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 780 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 781 int in_size_in_bits = Assembler::EVEX_32bit; 782 int evex_encoding = 0; 783 if (reg_lo+1 == reg_hi) { 784 in_size_in_bits = Assembler::EVEX_64bit; 785 evex_encoding = Assembler::VEX_W; 786 } 787 if (cbuf) { 788 MacroAssembler _masm(cbuf); 789 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 790 // it maps more cases to single byte displacement 791 _masm.set_managed(); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 843 _masm.set_managed(); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 883 _masm.set_managed(); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 901 _masm.set_managed(); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1016 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Needs 2 CMOV's for longs. 1424 const int Matcher::long_cmove_cost() { return 1; } 1425 1426 // No CMOVF/CMOVD with SSE/SSE2 1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429 // Does the CPU require late expand (see block.cpp for description of late expand)? 1430 const bool Matcher::require_postalloc_expand = false; 1431 1432 // Do we need to mask the count passed to shift instructions or does 1433 // the cpu only look at the lower 5/6 bits anyway? 1434 const bool Matcher::need_masked_shift_count = false; 1435 1436 bool Matcher::narrow_oop_use_complex_address() { 1437 ShouldNotCallThis(); 1438 return true; 1439 } 1440 1441 bool Matcher::narrow_klass_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444 } 1445 1446 bool Matcher::const_oop_prefer_decode() { 1447 ShouldNotCallThis(); 1448 return true; 1449 } 1450 1451 bool Matcher::const_klass_prefer_decode() { 1452 ShouldNotCallThis(); 1453 return true; 1454 } 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 debug_only(int off1 = cbuf.insts_size()); 1890 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1891 %} 1892 1893 enc_class post_call_FPU %{ 1894 // If method sets FPU control word do it here also 1895 if (Compile::current()->in_24_bit_fp_mode()) { 1896 MacroAssembler masm(&cbuf); 1897 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1898 } 1899 %} 1900 1901 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1902 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1903 // who we intended to call. 1904 cbuf.set_insts_mark(); 1905 $$$emit8$primary; 1906 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), 1910 RELOC_IMM32); 1911 } else { 1912 int method_index = resolved_method_index(cbuf); 1913 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1914 : static_call_Relocation::spec(method_index); 1915 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1916 rspec, RELOC_DISP32); 1917 // Emit stubs for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943 // Following encoding is no longer used, but may be restored if calling 1944 // convention changes significantly. 1945 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946 // 1947 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948 // // int ic_reg = Matcher::inline_cache_reg(); 1949 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1950 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1952 // 1953 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954 // // // so we load it immediately before the call 1955 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957 // 1958 // // xor rbp,ebp 1959 // emit_opcode(cbuf, 0x33); 1960 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961 // 1962 // // CALL to interpreter. 1963 // cbuf.set_insts_mark(); 1964 // $$$emit8$primary; 1965 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967 // %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 if( os::is_MP() ) 2091 emit_opcode(cbuf,0xF0); // [Lock] 2092 %} 2093 2094 // Cmp-xchg long value. 2095 // Note: we need to swap rbx, and rcx before and after the 2096 // cmpxchg8 instruction because the instruction uses 2097 // rcx as the high order word of the new value to store but 2098 // our register encoding uses rbx,. 2099 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2100 2101 // XCHG rbx,ecx 2102 emit_opcode(cbuf,0x87); 2103 emit_opcode(cbuf,0xD9); 2104 // [Lock] 2105 if( os::is_MP() ) 2106 emit_opcode(cbuf,0xF0); 2107 // CMPXCHG8 [Eptr] 2108 emit_opcode(cbuf,0x0F); 2109 emit_opcode(cbuf,0xC7); 2110 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2111 // XCHG rbx,ecx 2112 emit_opcode(cbuf,0x87); 2113 emit_opcode(cbuf,0xD9); 2114 %} 2115 2116 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2117 // [Lock] 2118 if( os::is_MP() ) 2119 emit_opcode(cbuf,0xF0); 2120 2121 // CMPXCHG [Eptr] 2122 emit_opcode(cbuf,0x0F); 2123 emit_opcode(cbuf,0xB1); 2124 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2125 %} 2126 2127 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2128 // [Lock] 2129 if( os::is_MP() ) 2130 emit_opcode(cbuf,0xF0); 2131 2132 // CMPXCHGB [Eptr] 2133 emit_opcode(cbuf,0x0F); 2134 emit_opcode(cbuf,0xB0); 2135 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2136 %} 2137 2138 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2139 // [Lock] 2140 if( os::is_MP() ) 2141 emit_opcode(cbuf,0xF0); 2142 2143 // 16-bit mode 2144 emit_opcode(cbuf, 0x66); 2145 2146 // CMPXCHGW [Eptr] 2147 emit_opcode(cbuf,0x0F); 2148 emit_opcode(cbuf,0xB1); 2149 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2150 %} 2151 2152 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2153 int res_encoding = $res$$reg; 2154 2155 // MOV res,0 2156 emit_opcode( cbuf, 0xB8 + res_encoding); 2157 emit_d32( cbuf, 0 ); 2158 // JNE,s fail 2159 emit_opcode(cbuf,0x75); 2160 emit_d8(cbuf, 5 ); 2161 // MOV res,1 2162 emit_opcode( cbuf, 0xB8 + res_encoding); 2163 emit_d32( cbuf, 1 ); 2164 // fail: 2165 %} 2166 2167 enc_class set_instruction_start( ) %{ 2168 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2169 %} 2170 2171 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2172 int reg_encoding = $ereg$$reg; 2173 int base = $mem$$base; 2174 int index = $mem$$index; 2175 int scale = $mem$$scale; 2176 int displace = $mem$$disp; 2177 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2178 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2179 %} 2180 2181 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2182 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2183 int base = $mem$$base; 2184 int index = $mem$$index; 2185 int scale = $mem$$scale; 2186 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2187 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2188 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2189 %} 2190 2191 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2192 int r1, r2; 2193 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2194 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2195 emit_opcode(cbuf,0x0F); 2196 emit_opcode(cbuf,$tertiary); 2197 emit_rm(cbuf, 0x3, r1, r2); 2198 emit_d8(cbuf,$cnt$$constant); 2199 emit_d8(cbuf,$primary); 2200 emit_rm(cbuf, 0x3, $secondary, r1); 2201 emit_d8(cbuf,$cnt$$constant); 2202 %} 2203 2204 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2205 emit_opcode( cbuf, 0x8B ); // Move 2206 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2207 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2208 emit_d8(cbuf,$primary); 2209 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2210 emit_d8(cbuf,$cnt$$constant-32); 2211 } 2212 emit_d8(cbuf,$primary); 2213 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2214 emit_d8(cbuf,31); 2215 %} 2216 2217 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2218 int r1, r2; 2219 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2220 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2221 2222 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2223 emit_rm(cbuf, 0x3, r1, r2); 2224 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2225 emit_opcode(cbuf,$primary); 2226 emit_rm(cbuf, 0x3, $secondary, r1); 2227 emit_d8(cbuf,$cnt$$constant-32); 2228 } 2229 emit_opcode(cbuf,0x33); // XOR r2,r2 2230 emit_rm(cbuf, 0x3, r2, r2); 2231 %} 2232 2233 // Clone of RegMem but accepts an extra parameter to access each 2234 // half of a double in memory; it never needs relocation info. 2235 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2236 emit_opcode(cbuf,$opcode$$constant); 2237 int reg_encoding = $rm_reg$$reg; 2238 int base = $mem$$base; 2239 int index = $mem$$index; 2240 int scale = $mem$$scale; 2241 int displace = $mem$$disp + $disp_for_half$$constant; 2242 relocInfo::relocType disp_reloc = relocInfo::none; 2243 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2244 %} 2245 2246 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2247 // 2248 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2249 // and it never needs relocation information. 2250 // Frequently used to move data between FPU's Stack Top and memory. 2251 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2252 int rm_byte_opcode = $rm_opcode$$constant; 2253 int base = $mem$$base; 2254 int index = $mem$$index; 2255 int scale = $mem$$scale; 2256 int displace = $mem$$disp; 2257 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2258 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2259 %} 2260 2261 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2262 int rm_byte_opcode = $rm_opcode$$constant; 2263 int base = $mem$$base; 2264 int index = $mem$$index; 2265 int scale = $mem$$scale; 2266 int displace = $mem$$disp; 2267 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2268 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2269 %} 2270 2271 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2272 int reg_encoding = $dst$$reg; 2273 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2274 int index = 0x04; // 0x04 indicates no index 2275 int scale = 0x00; // 0x00 indicates no scale 2276 int displace = $src1$$constant; // 0x00 indicates no displacement 2277 relocInfo::relocType disp_reloc = relocInfo::none; 2278 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2279 %} 2280 2281 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2282 // Compare dst,src 2283 emit_opcode(cbuf,0x3B); 2284 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2285 // jmp dst < src around move 2286 emit_opcode(cbuf,0x7C); 2287 emit_d8(cbuf,2); 2288 // move dst,src 2289 emit_opcode(cbuf,0x8B); 2290 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2291 %} 2292 2293 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2294 // Compare dst,src 2295 emit_opcode(cbuf,0x3B); 2296 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2297 // jmp dst > src around move 2298 emit_opcode(cbuf,0x7F); 2299 emit_d8(cbuf,2); 2300 // move dst,src 2301 emit_opcode(cbuf,0x8B); 2302 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2303 %} 2304 2305 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2306 // If src is FPR1, we can just FST to store it. 2307 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2308 int reg_encoding = 0x2; // Just store 2309 int base = $mem$$base; 2310 int index = $mem$$index; 2311 int scale = $mem$$scale; 2312 int displace = $mem$$disp; 2313 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2314 if( $src$$reg != FPR1L_enc ) { 2315 reg_encoding = 0x3; // Store & pop 2316 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2317 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2318 } 2319 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2320 emit_opcode(cbuf,$primary); 2321 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2322 %} 2323 2324 enc_class neg_reg(rRegI dst) %{ 2325 // NEG $dst 2326 emit_opcode(cbuf,0xF7); 2327 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2328 %} 2329 2330 enc_class setLT_reg(eCXRegI dst) %{ 2331 // SETLT $dst 2332 emit_opcode(cbuf,0x0F); 2333 emit_opcode(cbuf,0x9C); 2334 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2335 %} 2336 2337 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2338 int tmpReg = $tmp$$reg; 2339 2340 // SUB $p,$q 2341 emit_opcode(cbuf,0x2B); 2342 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2343 // SBB $tmp,$tmp 2344 emit_opcode(cbuf,0x1B); 2345 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2346 // AND $tmp,$y 2347 emit_opcode(cbuf,0x23); 2348 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2349 // ADD $p,$tmp 2350 emit_opcode(cbuf,0x03); 2351 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2352 %} 2353 2354 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2355 // TEST shift,32 2356 emit_opcode(cbuf,0xF7); 2357 emit_rm(cbuf, 0x3, 0, ECX_enc); 2358 emit_d32(cbuf,0x20); 2359 // JEQ,s small 2360 emit_opcode(cbuf, 0x74); 2361 emit_d8(cbuf, 0x04); 2362 // MOV $dst.hi,$dst.lo 2363 emit_opcode( cbuf, 0x8B ); 2364 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2365 // CLR $dst.lo 2366 emit_opcode(cbuf, 0x33); 2367 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2368 // small: 2369 // SHLD $dst.hi,$dst.lo,$shift 2370 emit_opcode(cbuf,0x0F); 2371 emit_opcode(cbuf,0xA5); 2372 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2373 // SHL $dst.lo,$shift" 2374 emit_opcode(cbuf,0xD3); 2375 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2376 %} 2377 2378 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2379 // TEST shift,32 2380 emit_opcode(cbuf,0xF7); 2381 emit_rm(cbuf, 0x3, 0, ECX_enc); 2382 emit_d32(cbuf,0x20); 2383 // JEQ,s small 2384 emit_opcode(cbuf, 0x74); 2385 emit_d8(cbuf, 0x04); 2386 // MOV $dst.lo,$dst.hi 2387 emit_opcode( cbuf, 0x8B ); 2388 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2389 // CLR $dst.hi 2390 emit_opcode(cbuf, 0x33); 2391 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2392 // small: 2393 // SHRD $dst.lo,$dst.hi,$shift 2394 emit_opcode(cbuf,0x0F); 2395 emit_opcode(cbuf,0xAD); 2396 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2397 // SHR $dst.hi,$shift" 2398 emit_opcode(cbuf,0xD3); 2399 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2400 %} 2401 2402 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2403 // TEST shift,32 2404 emit_opcode(cbuf,0xF7); 2405 emit_rm(cbuf, 0x3, 0, ECX_enc); 2406 emit_d32(cbuf,0x20); 2407 // JEQ,s small 2408 emit_opcode(cbuf, 0x74); 2409 emit_d8(cbuf, 0x05); 2410 // MOV $dst.lo,$dst.hi 2411 emit_opcode( cbuf, 0x8B ); 2412 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2413 // SAR $dst.hi,31 2414 emit_opcode(cbuf, 0xC1); 2415 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2416 emit_d8(cbuf, 0x1F ); 2417 // small: 2418 // SHRD $dst.lo,$dst.hi,$shift 2419 emit_opcode(cbuf,0x0F); 2420 emit_opcode(cbuf,0xAD); 2421 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2422 // SAR $dst.hi,$shift" 2423 emit_opcode(cbuf,0xD3); 2424 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2425 %} 2426 2427 2428 // ----------------- Encodings for floating point unit ----------------- 2429 // May leave result in FPU-TOS or FPU reg depending on opcodes 2430 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2431 $$$emit8$primary; 2432 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2433 %} 2434 2435 // Pop argument in FPR0 with FSTP ST(0) 2436 enc_class PopFPU() %{ 2437 emit_opcode( cbuf, 0xDD ); 2438 emit_d8( cbuf, 0xD8 ); 2439 %} 2440 2441 // !!!!! equivalent to Pop_Reg_F 2442 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2443 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2444 emit_d8( cbuf, 0xD8+$dst$$reg ); 2445 %} 2446 2447 enc_class Push_Reg_DPR( regDPR dst ) %{ 2448 emit_opcode( cbuf, 0xD9 ); 2449 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2450 %} 2451 2452 enc_class strictfp_bias1( regDPR dst ) %{ 2453 emit_opcode( cbuf, 0xDB ); // FLD m80real 2454 emit_opcode( cbuf, 0x2D ); 2455 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2456 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2457 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2458 %} 2459 2460 enc_class strictfp_bias2( regDPR dst ) %{ 2461 emit_opcode( cbuf, 0xDB ); // FLD m80real 2462 emit_opcode( cbuf, 0x2D ); 2463 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2464 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2465 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2466 %} 2467 2468 // Special case for moving an integer register to a stack slot. 2469 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2470 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2471 %} 2472 2473 // Special case for moving a register to a stack slot. 2474 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2475 // Opcode already emitted 2476 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2477 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2478 emit_d32(cbuf, $dst$$disp); // Displacement 2479 %} 2480 2481 // Push the integer in stackSlot 'src' onto FP-stack 2482 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2483 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2484 %} 2485 2486 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2487 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2488 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2489 %} 2490 2491 // Same as Pop_Mem_F except for opcode 2492 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2493 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2494 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2495 %} 2496 2497 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2498 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2499 emit_d8( cbuf, 0xD8+$dst$$reg ); 2500 %} 2501 2502 enc_class Push_Reg_FPR( regFPR dst ) %{ 2503 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2504 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2505 %} 2506 2507 // Push FPU's float to a stack-slot, and pop FPU-stack 2508 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2509 int pop = 0x02; 2510 if ($src$$reg != FPR1L_enc) { 2511 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2512 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2513 pop = 0x03; 2514 } 2515 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2516 %} 2517 2518 // Push FPU's double to a stack-slot, and pop FPU-stack 2519 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2520 int pop = 0x02; 2521 if ($src$$reg != FPR1L_enc) { 2522 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2523 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2524 pop = 0x03; 2525 } 2526 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2527 %} 2528 2529 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2530 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2531 int pop = 0xD0 - 1; // -1 since we skip FLD 2532 if ($src$$reg != FPR1L_enc) { 2533 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2534 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2535 pop = 0xD8; 2536 } 2537 emit_opcode( cbuf, 0xDD ); 2538 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2539 %} 2540 2541 2542 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2543 // load dst in FPR0 2544 emit_opcode( cbuf, 0xD9 ); 2545 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2546 if ($src$$reg != FPR1L_enc) { 2547 // fincstp 2548 emit_opcode (cbuf, 0xD9); 2549 emit_opcode (cbuf, 0xF7); 2550 // swap src with FPR1: 2551 // FXCH FPR1 with src 2552 emit_opcode(cbuf, 0xD9); 2553 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2554 // fdecstp 2555 emit_opcode (cbuf, 0xD9); 2556 emit_opcode (cbuf, 0xF6); 2557 } 2558 %} 2559 2560 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2561 MacroAssembler _masm(&cbuf); 2562 __ subptr(rsp, 8); 2563 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2564 __ fld_d(Address(rsp, 0)); 2565 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2566 __ fld_d(Address(rsp, 0)); 2567 %} 2568 2569 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2570 MacroAssembler _masm(&cbuf); 2571 __ subptr(rsp, 4); 2572 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2573 __ fld_s(Address(rsp, 0)); 2574 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2575 __ fld_s(Address(rsp, 0)); 2576 %} 2577 2578 enc_class Push_ResultD(regD dst) %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ fstp_d(Address(rsp, 0)); 2581 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2582 __ addptr(rsp, 8); 2583 %} 2584 2585 enc_class Push_ResultF(regF dst, immI d8) %{ 2586 MacroAssembler _masm(&cbuf); 2587 __ fstp_s(Address(rsp, 0)); 2588 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2589 __ addptr(rsp, $d8$$constant); 2590 %} 2591 2592 enc_class Push_SrcD(regD src) %{ 2593 MacroAssembler _masm(&cbuf); 2594 __ subptr(rsp, 8); 2595 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2596 __ fld_d(Address(rsp, 0)); 2597 %} 2598 2599 enc_class push_stack_temp_qword() %{ 2600 MacroAssembler _masm(&cbuf); 2601 __ subptr(rsp, 8); 2602 %} 2603 2604 enc_class pop_stack_temp_qword() %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ addptr(rsp, 8); 2607 %} 2608 2609 enc_class push_xmm_to_fpr1(regD src) %{ 2610 MacroAssembler _masm(&cbuf); 2611 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2612 __ fld_d(Address(rsp, 0)); 2613 %} 2614 2615 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2616 if ($src$$reg != FPR1L_enc) { 2617 // fincstp 2618 emit_opcode (cbuf, 0xD9); 2619 emit_opcode (cbuf, 0xF7); 2620 // FXCH FPR1 with src 2621 emit_opcode(cbuf, 0xD9); 2622 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2623 // fdecstp 2624 emit_opcode (cbuf, 0xD9); 2625 emit_opcode (cbuf, 0xF6); 2626 } 2627 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2628 // // FSTP FPR$dst$$reg 2629 // emit_opcode( cbuf, 0xDD ); 2630 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2631 %} 2632 2633 enc_class fnstsw_sahf_skip_parity() %{ 2634 // fnstsw ax 2635 emit_opcode( cbuf, 0xDF ); 2636 emit_opcode( cbuf, 0xE0 ); 2637 // sahf 2638 emit_opcode( cbuf, 0x9E ); 2639 // jnp ::skip 2640 emit_opcode( cbuf, 0x7B ); 2641 emit_opcode( cbuf, 0x05 ); 2642 %} 2643 2644 enc_class emitModDPR() %{ 2645 // fprem must be iterative 2646 // :: loop 2647 // fprem 2648 emit_opcode( cbuf, 0xD9 ); 2649 emit_opcode( cbuf, 0xF8 ); 2650 // wait 2651 emit_opcode( cbuf, 0x9b ); 2652 // fnstsw ax 2653 emit_opcode( cbuf, 0xDF ); 2654 emit_opcode( cbuf, 0xE0 ); 2655 // sahf 2656 emit_opcode( cbuf, 0x9E ); 2657 // jp ::loop 2658 emit_opcode( cbuf, 0x0F ); 2659 emit_opcode( cbuf, 0x8A ); 2660 emit_opcode( cbuf, 0xF4 ); 2661 emit_opcode( cbuf, 0xFF ); 2662 emit_opcode( cbuf, 0xFF ); 2663 emit_opcode( cbuf, 0xFF ); 2664 %} 2665 2666 enc_class fpu_flags() %{ 2667 // fnstsw_ax 2668 emit_opcode( cbuf, 0xDF); 2669 emit_opcode( cbuf, 0xE0); 2670 // test ax,0x0400 2671 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2672 emit_opcode( cbuf, 0xA9 ); 2673 emit_d16 ( cbuf, 0x0400 ); 2674 // // // This sequence works, but stalls for 12-16 cycles on PPro 2675 // // test rax,0x0400 2676 // emit_opcode( cbuf, 0xA9 ); 2677 // emit_d32 ( cbuf, 0x00000400 ); 2678 // 2679 // jz exit (no unordered comparison) 2680 emit_opcode( cbuf, 0x74 ); 2681 emit_d8 ( cbuf, 0x02 ); 2682 // mov ah,1 - treat as LT case (set carry flag) 2683 emit_opcode( cbuf, 0xB4 ); 2684 emit_d8 ( cbuf, 0x01 ); 2685 // sahf 2686 emit_opcode( cbuf, 0x9E); 2687 %} 2688 2689 enc_class cmpF_P6_fixup() %{ 2690 // Fixup the integer flags in case comparison involved a NaN 2691 // 2692 // JNP exit (no unordered comparison, P-flag is set by NaN) 2693 emit_opcode( cbuf, 0x7B ); 2694 emit_d8 ( cbuf, 0x03 ); 2695 // MOV AH,1 - treat as LT case (set carry flag) 2696 emit_opcode( cbuf, 0xB4 ); 2697 emit_d8 ( cbuf, 0x01 ); 2698 // SAHF 2699 emit_opcode( cbuf, 0x9E); 2700 // NOP // target for branch to avoid branch to branch 2701 emit_opcode( cbuf, 0x90); 2702 %} 2703 2704 // fnstsw_ax(); 2705 // sahf(); 2706 // movl(dst, nan_result); 2707 // jcc(Assembler::parity, exit); 2708 // movl(dst, less_result); 2709 // jcc(Assembler::below, exit); 2710 // movl(dst, equal_result); 2711 // jcc(Assembler::equal, exit); 2712 // movl(dst, greater_result); 2713 2714 // less_result = 1; 2715 // greater_result = -1; 2716 // equal_result = 0; 2717 // nan_result = -1; 2718 2719 enc_class CmpF_Result(rRegI dst) %{ 2720 // fnstsw_ax(); 2721 emit_opcode( cbuf, 0xDF); 2722 emit_opcode( cbuf, 0xE0); 2723 // sahf 2724 emit_opcode( cbuf, 0x9E); 2725 // movl(dst, nan_result); 2726 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2727 emit_d32( cbuf, -1 ); 2728 // jcc(Assembler::parity, exit); 2729 emit_opcode( cbuf, 0x7A ); 2730 emit_d8 ( cbuf, 0x13 ); 2731 // movl(dst, less_result); 2732 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2733 emit_d32( cbuf, -1 ); 2734 // jcc(Assembler::below, exit); 2735 emit_opcode( cbuf, 0x72 ); 2736 emit_d8 ( cbuf, 0x0C ); 2737 // movl(dst, equal_result); 2738 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2739 emit_d32( cbuf, 0 ); 2740 // jcc(Assembler::equal, exit); 2741 emit_opcode( cbuf, 0x74 ); 2742 emit_d8 ( cbuf, 0x05 ); 2743 // movl(dst, greater_result); 2744 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2745 emit_d32( cbuf, 1 ); 2746 %} 2747 2748 2749 // Compare the longs and set flags 2750 // BROKEN! Do Not use as-is 2751 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2752 // CMP $src1.hi,$src2.hi 2753 emit_opcode( cbuf, 0x3B ); 2754 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2755 // JNE,s done 2756 emit_opcode(cbuf,0x75); 2757 emit_d8(cbuf, 2 ); 2758 // CMP $src1.lo,$src2.lo 2759 emit_opcode( cbuf, 0x3B ); 2760 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2761 // done: 2762 %} 2763 2764 enc_class convert_int_long( regL dst, rRegI src ) %{ 2765 // mov $dst.lo,$src 2766 int dst_encoding = $dst$$reg; 2767 int src_encoding = $src$$reg; 2768 encode_Copy( cbuf, dst_encoding , src_encoding ); 2769 // mov $dst.hi,$src 2770 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2771 // sar $dst.hi,31 2772 emit_opcode( cbuf, 0xC1 ); 2773 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2774 emit_d8(cbuf, 0x1F ); 2775 %} 2776 2777 enc_class convert_long_double( eRegL src ) %{ 2778 // push $src.hi 2779 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2780 // push $src.lo 2781 emit_opcode(cbuf, 0x50+$src$$reg ); 2782 // fild 64-bits at [SP] 2783 emit_opcode(cbuf,0xdf); 2784 emit_d8(cbuf, 0x6C); 2785 emit_d8(cbuf, 0x24); 2786 emit_d8(cbuf, 0x00); 2787 // pop stack 2788 emit_opcode(cbuf, 0x83); // add SP, #8 2789 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2790 emit_d8(cbuf, 0x8); 2791 %} 2792 2793 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2794 // IMUL EDX:EAX,$src1 2795 emit_opcode( cbuf, 0xF7 ); 2796 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2797 // SAR EDX,$cnt-32 2798 int shift_count = ((int)$cnt$$constant) - 32; 2799 if (shift_count > 0) { 2800 emit_opcode(cbuf, 0xC1); 2801 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2802 emit_d8(cbuf, shift_count); 2803 } 2804 %} 2805 2806 // this version doesn't have add sp, 8 2807 enc_class convert_long_double2( eRegL src ) %{ 2808 // push $src.hi 2809 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2810 // push $src.lo 2811 emit_opcode(cbuf, 0x50+$src$$reg ); 2812 // fild 64-bits at [SP] 2813 emit_opcode(cbuf,0xdf); 2814 emit_d8(cbuf, 0x6C); 2815 emit_d8(cbuf, 0x24); 2816 emit_d8(cbuf, 0x00); 2817 %} 2818 2819 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2820 // Basic idea: long = (long)int * (long)int 2821 // IMUL EDX:EAX, src 2822 emit_opcode( cbuf, 0xF7 ); 2823 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2824 %} 2825 2826 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2827 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2828 // MUL EDX:EAX, src 2829 emit_opcode( cbuf, 0xF7 ); 2830 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2831 %} 2832 2833 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2834 // Basic idea: lo(result) = lo(x_lo * y_lo) 2835 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2836 // MOV $tmp,$src.lo 2837 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2838 // IMUL $tmp,EDX 2839 emit_opcode( cbuf, 0x0F ); 2840 emit_opcode( cbuf, 0xAF ); 2841 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2842 // MOV EDX,$src.hi 2843 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2844 // IMUL EDX,EAX 2845 emit_opcode( cbuf, 0x0F ); 2846 emit_opcode( cbuf, 0xAF ); 2847 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2848 // ADD $tmp,EDX 2849 emit_opcode( cbuf, 0x03 ); 2850 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2851 // MUL EDX:EAX,$src.lo 2852 emit_opcode( cbuf, 0xF7 ); 2853 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2854 // ADD EDX,ESI 2855 emit_opcode( cbuf, 0x03 ); 2856 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2857 %} 2858 2859 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2860 // Basic idea: lo(result) = lo(src * y_lo) 2861 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2862 // IMUL $tmp,EDX,$src 2863 emit_opcode( cbuf, 0x6B ); 2864 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2865 emit_d8( cbuf, (int)$src$$constant ); 2866 // MOV EDX,$src 2867 emit_opcode(cbuf, 0xB8 + EDX_enc); 2868 emit_d32( cbuf, (int)$src$$constant ); 2869 // MUL EDX:EAX,EDX 2870 emit_opcode( cbuf, 0xF7 ); 2871 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2872 // ADD EDX,ESI 2873 emit_opcode( cbuf, 0x03 ); 2874 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2875 %} 2876 2877 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2878 // PUSH src1.hi 2879 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2880 // PUSH src1.lo 2881 emit_opcode(cbuf, 0x50+$src1$$reg ); 2882 // PUSH src2.hi 2883 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2884 // PUSH src2.lo 2885 emit_opcode(cbuf, 0x50+$src2$$reg ); 2886 // CALL directly to the runtime 2887 cbuf.set_insts_mark(); 2888 emit_opcode(cbuf,0xE8); // Call into runtime 2889 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2890 // Restore stack 2891 emit_opcode(cbuf, 0x83); // add SP, #framesize 2892 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2893 emit_d8(cbuf, 4*4); 2894 %} 2895 2896 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2897 // PUSH src1.hi 2898 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2899 // PUSH src1.lo 2900 emit_opcode(cbuf, 0x50+$src1$$reg ); 2901 // PUSH src2.hi 2902 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2903 // PUSH src2.lo 2904 emit_opcode(cbuf, 0x50+$src2$$reg ); 2905 // CALL directly to the runtime 2906 cbuf.set_insts_mark(); 2907 emit_opcode(cbuf,0xE8); // Call into runtime 2908 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2909 // Restore stack 2910 emit_opcode(cbuf, 0x83); // add SP, #framesize 2911 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2912 emit_d8(cbuf, 4*4); 2913 %} 2914 2915 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2916 // MOV $tmp,$src.lo 2917 emit_opcode(cbuf, 0x8B); 2918 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2919 // OR $tmp,$src.hi 2920 emit_opcode(cbuf, 0x0B); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2922 %} 2923 2924 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2925 // CMP $src1.lo,$src2.lo 2926 emit_opcode( cbuf, 0x3B ); 2927 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2928 // JNE,s skip 2929 emit_cc(cbuf, 0x70, 0x5); 2930 emit_d8(cbuf,2); 2931 // CMP $src1.hi,$src2.hi 2932 emit_opcode( cbuf, 0x3B ); 2933 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2934 %} 2935 2936 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2937 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2938 emit_opcode( cbuf, 0x3B ); 2939 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2940 // MOV $tmp,$src1.hi 2941 emit_opcode( cbuf, 0x8B ); 2942 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2943 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2944 emit_opcode( cbuf, 0x1B ); 2945 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2946 %} 2947 2948 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2949 // XOR $tmp,$tmp 2950 emit_opcode(cbuf,0x33); // XOR 2951 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2952 // CMP $tmp,$src.lo 2953 emit_opcode( cbuf, 0x3B ); 2954 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2955 // SBB $tmp,$src.hi 2956 emit_opcode( cbuf, 0x1B ); 2957 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2958 %} 2959 2960 // Sniff, sniff... smells like Gnu Superoptimizer 2961 enc_class neg_long( eRegL dst ) %{ 2962 emit_opcode(cbuf,0xF7); // NEG hi 2963 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2964 emit_opcode(cbuf,0xF7); // NEG lo 2965 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2966 emit_opcode(cbuf,0x83); // SBB hi,0 2967 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2968 emit_d8 (cbuf,0 ); 2969 %} 2970 2971 enc_class enc_pop_rdx() %{ 2972 emit_opcode(cbuf,0x5A); 2973 %} 2974 2975 enc_class enc_rethrow() %{ 2976 cbuf.set_insts_mark(); 2977 emit_opcode(cbuf, 0xE9); // jmp entry 2978 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2979 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2980 %} 2981 2982 2983 // Convert a double to an int. Java semantics require we do complex 2984 // manglelations in the corner cases. So we set the rounding mode to 2985 // 'zero', store the darned double down as an int, and reset the 2986 // rounding mode to 'nearest'. The hardware throws an exception which 2987 // patches up the correct value directly to the stack. 2988 enc_class DPR2I_encoding( regDPR src ) %{ 2989 // Flip to round-to-zero mode. We attempted to allow invalid-op 2990 // exceptions here, so that a NAN or other corner-case value will 2991 // thrown an exception (but normal values get converted at full speed). 2992 // However, I2C adapters and other float-stack manglers leave pending 2993 // invalid-op exceptions hanging. We would have to clear them before 2994 // enabling them and that is more expensive than just testing for the 2995 // invalid value Intel stores down in the corner cases. 2996 emit_opcode(cbuf,0xD9); // FLDCW trunc 2997 emit_opcode(cbuf,0x2D); 2998 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2999 // Allocate a word 3000 emit_opcode(cbuf,0x83); // SUB ESP,4 3001 emit_opcode(cbuf,0xEC); 3002 emit_d8(cbuf,0x04); 3003 // Encoding assumes a double has been pushed into FPR0. 3004 // Store down the double as an int, popping the FPU stack 3005 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3006 emit_opcode(cbuf,0x1C); 3007 emit_d8(cbuf,0x24); 3008 // Restore the rounding mode; mask the exception 3009 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3010 emit_opcode(cbuf,0x2D); 3011 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3012 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3013 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3014 3015 // Load the converted int; adjust CPU stack 3016 emit_opcode(cbuf,0x58); // POP EAX 3017 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3018 emit_d32 (cbuf,0x80000000); // 0x80000000 3019 emit_opcode(cbuf,0x75); // JNE around_slow_call 3020 emit_d8 (cbuf,0x07); // Size of slow_call 3021 // Push src onto stack slow-path 3022 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3023 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3024 // CALL directly to the runtime 3025 cbuf.set_insts_mark(); 3026 emit_opcode(cbuf,0xE8); // Call into runtime 3027 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3028 // Carry on here... 3029 %} 3030 3031 enc_class DPR2L_encoding( regDPR src ) %{ 3032 emit_opcode(cbuf,0xD9); // FLDCW trunc 3033 emit_opcode(cbuf,0x2D); 3034 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3035 // Allocate a word 3036 emit_opcode(cbuf,0x83); // SUB ESP,8 3037 emit_opcode(cbuf,0xEC); 3038 emit_d8(cbuf,0x08); 3039 // Encoding assumes a double has been pushed into FPR0. 3040 // Store down the double as a long, popping the FPU stack 3041 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3042 emit_opcode(cbuf,0x3C); 3043 emit_d8(cbuf,0x24); 3044 // Restore the rounding mode; mask the exception 3045 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3046 emit_opcode(cbuf,0x2D); 3047 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3048 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3049 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3050 3051 // Load the converted int; adjust CPU stack 3052 emit_opcode(cbuf,0x58); // POP EAX 3053 emit_opcode(cbuf,0x5A); // POP EDX 3054 emit_opcode(cbuf,0x81); // CMP EDX,imm 3055 emit_d8 (cbuf,0xFA); // rdx 3056 emit_d32 (cbuf,0x80000000); // 0x80000000 3057 emit_opcode(cbuf,0x75); // JNE around_slow_call 3058 emit_d8 (cbuf,0x07+4); // Size of slow_call 3059 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3060 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3061 emit_opcode(cbuf,0x75); // JNE around_slow_call 3062 emit_d8 (cbuf,0x07); // Size of slow_call 3063 // Push src onto stack slow-path 3064 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3065 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3066 // CALL directly to the runtime 3067 cbuf.set_insts_mark(); 3068 emit_opcode(cbuf,0xE8); // Call into runtime 3069 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3070 // Carry on here... 3071 %} 3072 3073 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3074 // Operand was loaded from memory into fp ST (stack top) 3075 // FMUL ST,$src /* D8 C8+i */ 3076 emit_opcode(cbuf, 0xD8); 3077 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3078 %} 3079 3080 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3081 // FADDP ST,src2 /* D8 C0+i */ 3082 emit_opcode(cbuf, 0xD8); 3083 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3084 //could use FADDP src2,fpST /* DE C0+i */ 3085 %} 3086 3087 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3088 // FADDP src2,ST /* DE C0+i */ 3089 emit_opcode(cbuf, 0xDE); 3090 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3091 %} 3092 3093 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3094 // Operand has been loaded into fp ST (stack top) 3095 // FSUB ST,$src1 3096 emit_opcode(cbuf, 0xD8); 3097 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3098 3099 // FDIV 3100 emit_opcode(cbuf, 0xD8); 3101 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3102 %} 3103 3104 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3105 // Operand was loaded from memory into fp ST (stack top) 3106 // FADD ST,$src /* D8 C0+i */ 3107 emit_opcode(cbuf, 0xD8); 3108 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3109 3110 // FMUL ST,src2 /* D8 C*+i */ 3111 emit_opcode(cbuf, 0xD8); 3112 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3113 %} 3114 3115 3116 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3117 // Operand was loaded from memory into fp ST (stack top) 3118 // FADD ST,$src /* D8 C0+i */ 3119 emit_opcode(cbuf, 0xD8); 3120 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3121 3122 // FMULP src2,ST /* DE C8+i */ 3123 emit_opcode(cbuf, 0xDE); 3124 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3125 %} 3126 3127 // Atomically load the volatile long 3128 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3129 emit_opcode(cbuf,0xDF); 3130 int rm_byte_opcode = 0x05; 3131 int base = $mem$$base; 3132 int index = $mem$$index; 3133 int scale = $mem$$scale; 3134 int displace = $mem$$disp; 3135 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3136 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3137 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3138 %} 3139 3140 // Volatile Store Long. Must be atomic, so move it into 3141 // the FP TOS and then do a 64-bit FIST. Has to probe the 3142 // target address before the store (for null-ptr checks) 3143 // so the memory operand is used twice in the encoding. 3144 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3145 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3146 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3147 emit_opcode(cbuf,0xDF); 3148 int rm_byte_opcode = 0x07; 3149 int base = $mem$$base; 3150 int index = $mem$$index; 3151 int scale = $mem$$scale; 3152 int displace = $mem$$disp; 3153 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3154 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3155 %} 3156 3157 // Safepoint Poll. This polls the safepoint page, and causes an 3158 // exception if it is not readable. Unfortunately, it kills the condition code 3159 // in the process 3160 // We current use TESTL [spp],EDI 3161 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3162 3163 enc_class Safepoint_Poll() %{ 3164 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3165 emit_opcode(cbuf,0x85); 3166 emit_rm (cbuf, 0x0, 0x7, 0x5); 3167 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3168 %} 3169 %} 3170 3171 3172 //----------FRAME-------------------------------------------------------------- 3173 // Definition of frame structure and management information. 3174 // 3175 // S T A C K L A Y O U T Allocators stack-slot number 3176 // | (to get allocators register number 3177 // G Owned by | | v add OptoReg::stack0()) 3178 // r CALLER | | 3179 // o | +--------+ pad to even-align allocators stack-slot 3180 // w V | pad0 | numbers; owned by CALLER 3181 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3182 // h ^ | in | 5 3183 // | | args | 4 Holes in incoming args owned by SELF 3184 // | | | | 3 3185 // | | +--------+ 3186 // V | | old out| Empty on Intel, window on Sparc 3187 // | old |preserve| Must be even aligned. 3188 // | SP-+--------+----> Matcher::_old_SP, even aligned 3189 // | | in | 3 area for Intel ret address 3190 // Owned by |preserve| Empty on Sparc. 3191 // SELF +--------+ 3192 // | | pad2 | 2 pad to align old SP 3193 // | +--------+ 1 3194 // | | locks | 0 3195 // | +--------+----> OptoReg::stack0(), even aligned 3196 // | | pad1 | 11 pad to align new SP 3197 // | +--------+ 3198 // | | | 10 3199 // | | spills | 9 spills 3200 // V | | 8 (pad0 slot for callee) 3201 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3202 // ^ | out | 7 3203 // | | args | 6 Holes in outgoing args owned by CALLEE 3204 // Owned by +--------+ 3205 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3206 // | new |preserve| Must be even-aligned. 3207 // | SP-+--------+----> Matcher::_new_SP, even aligned 3208 // | | | 3209 // 3210 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3211 // known from SELF's arguments and the Java calling convention. 3212 // Region 6-7 is determined per call site. 3213 // Note 2: If the calling convention leaves holes in the incoming argument 3214 // area, those holes are owned by SELF. Holes in the outgoing area 3215 // are owned by the CALLEE. Holes should not be nessecary in the 3216 // incoming area, as the Java calling convention is completely under 3217 // the control of the AD file. Doubles can be sorted and packed to 3218 // avoid holes. Holes in the outgoing arguments may be nessecary for 3219 // varargs C calling conventions. 3220 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3221 // even aligned with pad0 as needed. 3222 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3223 // region 6-11 is even aligned; it may be padded out more so that 3224 // the region from SP to FP meets the minimum stack alignment. 3225 3226 frame %{ 3227 // What direction does stack grow in (assumed to be same for C & Java) 3228 stack_direction(TOWARDS_LOW); 3229 3230 // These three registers define part of the calling convention 3231 // between compiled code and the interpreter. 3232 inline_cache_reg(EAX); // Inline Cache Register 3233 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3234 3235 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3236 cisc_spilling_operand_name(indOffset32); 3237 3238 // Number of stack slots consumed by locking an object 3239 sync_stack_slots(1); 3240 3241 // Compiled code's Frame Pointer 3242 frame_pointer(ESP); 3243 // Interpreter stores its frame pointer in a register which is 3244 // stored to the stack by I2CAdaptors. 3245 // I2CAdaptors convert from interpreted java to compiled java. 3246 interpreter_frame_pointer(EBP); 3247 3248 // Stack alignment requirement 3249 // Alignment size in bytes (128-bit -> 16 bytes) 3250 stack_alignment(StackAlignmentInBytes); 3251 3252 // Number of stack slots between incoming argument block and the start of 3253 // a new frame. The PROLOG must add this many slots to the stack. The 3254 // EPILOG must remove this many slots. Intel needs one slot for 3255 // return address and one for rbp, (must save rbp) 3256 in_preserve_stack_slots(2+VerifyStackAtCalls); 3257 3258 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3259 // for calls to C. Supports the var-args backing area for register parms. 3260 varargs_C_out_slots_killed(0); 3261 3262 // The after-PROLOG location of the return address. Location of 3263 // return address specifies a type (REG or STACK) and a number 3264 // representing the register number (i.e. - use a register name) or 3265 // stack slot. 3266 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3267 // Otherwise, it is above the locks and verification slot and alignment word 3268 return_addr(STACK - 1 + 3269 align_up((Compile::current()->in_preserve_stack_slots() + 3270 Compile::current()->fixed_slots()), 3271 stack_alignment_in_slots())); 3272 3273 // Body of function which returns an integer array locating 3274 // arguments either in registers or in stack slots. Passed an array 3275 // of ideal registers called "sig" and a "length" count. Stack-slot 3276 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3277 // arguments for a CALLEE. Incoming stack arguments are 3278 // automatically biased by the preserve_stack_slots field above. 3279 calling_convention %{ 3280 // No difference between ingoing/outgoing just pass false 3281 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3282 %} 3283 3284 3285 // Body of function which returns an integer array locating 3286 // arguments either in registers or in stack slots. Passed an array 3287 // of ideal registers called "sig" and a "length" count. Stack-slot 3288 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3289 // arguments for a CALLEE. Incoming stack arguments are 3290 // automatically biased by the preserve_stack_slots field above. 3291 c_calling_convention %{ 3292 // This is obviously always outgoing 3293 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3294 %} 3295 3296 // Location of C & interpreter return values 3297 c_return_value %{ 3298 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3299 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3300 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3301 3302 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3303 // that C functions return float and double results in XMM0. 3304 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3305 return OptoRegPair(XMM0b_num,XMM0_num); 3306 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3307 return OptoRegPair(OptoReg::Bad,XMM0_num); 3308 3309 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3310 %} 3311 3312 // Location of return values 3313 return_value %{ 3314 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3315 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3316 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3317 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3318 return OptoRegPair(XMM0b_num,XMM0_num); 3319 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3320 return OptoRegPair(OptoReg::Bad,XMM0_num); 3321 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3322 %} 3323 3324 %} 3325 3326 //----------ATTRIBUTES--------------------------------------------------------- 3327 //----------Operand Attributes------------------------------------------------- 3328 op_attrib op_cost(0); // Required cost attribute 3329 3330 //----------Instruction Attributes--------------------------------------------- 3331 ins_attrib ins_cost(100); // Required cost attribute 3332 ins_attrib ins_size(8); // Required size attribute (in bits) 3333 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3334 // non-matching short branch variant of some 3335 // long branch? 3336 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3337 // specifies the alignment that some part of the instruction (not 3338 // necessarily the start) requires. If > 1, a compute_padding() 3339 // function must be provided for the instruction 3340 3341 //----------OPERANDS----------------------------------------------------------- 3342 // Operand definitions must precede instruction definitions for correct parsing 3343 // in the ADLC because operands constitute user defined types which are used in 3344 // instruction definitions. 3345 3346 //----------Simple Operands---------------------------------------------------- 3347 // Immediate Operands 3348 // Integer Immediate 3349 operand immI() %{ 3350 match(ConI); 3351 3352 op_cost(10); 3353 format %{ %} 3354 interface(CONST_INTER); 3355 %} 3356 3357 // Constant for test vs zero 3358 operand immI0() %{ 3359 predicate(n->get_int() == 0); 3360 match(ConI); 3361 3362 op_cost(0); 3363 format %{ %} 3364 interface(CONST_INTER); 3365 %} 3366 3367 // Constant for increment 3368 operand immI1() %{ 3369 predicate(n->get_int() == 1); 3370 match(ConI); 3371 3372 op_cost(0); 3373 format %{ %} 3374 interface(CONST_INTER); 3375 %} 3376 3377 // Constant for decrement 3378 operand immI_M1() %{ 3379 predicate(n->get_int() == -1); 3380 match(ConI); 3381 3382 op_cost(0); 3383 format %{ %} 3384 interface(CONST_INTER); 3385 %} 3386 3387 // Valid scale values for addressing modes 3388 operand immI2() %{ 3389 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3390 match(ConI); 3391 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 operand immI8() %{ 3397 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3398 match(ConI); 3399 3400 op_cost(5); 3401 format %{ %} 3402 interface(CONST_INTER); 3403 %} 3404 3405 operand immI16() %{ 3406 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3407 match(ConI); 3408 3409 op_cost(10); 3410 format %{ %} 3411 interface(CONST_INTER); 3412 %} 3413 3414 // Int Immediate non-negative 3415 operand immU31() 3416 %{ 3417 predicate(n->get_int() >= 0); 3418 match(ConI); 3419 3420 op_cost(0); 3421 format %{ %} 3422 interface(CONST_INTER); 3423 %} 3424 3425 // Constant for long shifts 3426 operand immI_32() %{ 3427 predicate( n->get_int() == 32 ); 3428 match(ConI); 3429 3430 op_cost(0); 3431 format %{ %} 3432 interface(CONST_INTER); 3433 %} 3434 3435 operand immI_1_31() %{ 3436 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3437 match(ConI); 3438 3439 op_cost(0); 3440 format %{ %} 3441 interface(CONST_INTER); 3442 %} 3443 3444 operand immI_32_63() %{ 3445 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3446 match(ConI); 3447 op_cost(0); 3448 3449 format %{ %} 3450 interface(CONST_INTER); 3451 %} 3452 3453 operand immI_1() %{ 3454 predicate( n->get_int() == 1 ); 3455 match(ConI); 3456 3457 op_cost(0); 3458 format %{ %} 3459 interface(CONST_INTER); 3460 %} 3461 3462 operand immI_2() %{ 3463 predicate( n->get_int() == 2 ); 3464 match(ConI); 3465 3466 op_cost(0); 3467 format %{ %} 3468 interface(CONST_INTER); 3469 %} 3470 3471 operand immI_3() %{ 3472 predicate( n->get_int() == 3 ); 3473 match(ConI); 3474 3475 op_cost(0); 3476 format %{ %} 3477 interface(CONST_INTER); 3478 %} 3479 3480 // Pointer Immediate 3481 operand immP() %{ 3482 match(ConP); 3483 3484 op_cost(10); 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 // NULL Pointer Immediate 3490 operand immP0() %{ 3491 predicate( n->get_ptr() == 0 ); 3492 match(ConP); 3493 op_cost(0); 3494 3495 format %{ %} 3496 interface(CONST_INTER); 3497 %} 3498 3499 // Long Immediate 3500 operand immL() %{ 3501 match(ConL); 3502 3503 op_cost(20); 3504 format %{ %} 3505 interface(CONST_INTER); 3506 %} 3507 3508 // Long Immediate zero 3509 operand immL0() %{ 3510 predicate( n->get_long() == 0L ); 3511 match(ConL); 3512 op_cost(0); 3513 3514 format %{ %} 3515 interface(CONST_INTER); 3516 %} 3517 3518 // Long Immediate zero 3519 operand immL_M1() %{ 3520 predicate( n->get_long() == -1L ); 3521 match(ConL); 3522 op_cost(0); 3523 3524 format %{ %} 3525 interface(CONST_INTER); 3526 %} 3527 3528 // Long immediate from 0 to 127. 3529 // Used for a shorter form of long mul by 10. 3530 operand immL_127() %{ 3531 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3532 match(ConL); 3533 op_cost(0); 3534 3535 format %{ %} 3536 interface(CONST_INTER); 3537 %} 3538 3539 // Long Immediate: low 32-bit mask 3540 operand immL_32bits() %{ 3541 predicate(n->get_long() == 0xFFFFFFFFL); 3542 match(ConL); 3543 op_cost(0); 3544 3545 format %{ %} 3546 interface(CONST_INTER); 3547 %} 3548 3549 // Long Immediate: low 32-bit mask 3550 operand immL32() %{ 3551 predicate(n->get_long() == (int)(n->get_long())); 3552 match(ConL); 3553 op_cost(20); 3554 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 //Double Immediate zero 3560 operand immDPR0() %{ 3561 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3562 // bug that generates code such that NaNs compare equal to 0.0 3563 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3564 match(ConD); 3565 3566 op_cost(5); 3567 format %{ %} 3568 interface(CONST_INTER); 3569 %} 3570 3571 // Double Immediate one 3572 operand immDPR1() %{ 3573 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3574 match(ConD); 3575 3576 op_cost(5); 3577 format %{ %} 3578 interface(CONST_INTER); 3579 %} 3580 3581 // Double Immediate 3582 operand immDPR() %{ 3583 predicate(UseSSE<=1); 3584 match(ConD); 3585 3586 op_cost(5); 3587 format %{ %} 3588 interface(CONST_INTER); 3589 %} 3590 3591 operand immD() %{ 3592 predicate(UseSSE>=2); 3593 match(ConD); 3594 3595 op_cost(5); 3596 format %{ %} 3597 interface(CONST_INTER); 3598 %} 3599 3600 // Double Immediate zero 3601 operand immD0() %{ 3602 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3603 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3604 // compare equal to -0.0. 3605 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3606 match(ConD); 3607 3608 format %{ %} 3609 interface(CONST_INTER); 3610 %} 3611 3612 // Float Immediate zero 3613 operand immFPR0() %{ 3614 predicate(UseSSE == 0 && n->getf() == 0.0F); 3615 match(ConF); 3616 3617 op_cost(5); 3618 format %{ %} 3619 interface(CONST_INTER); 3620 %} 3621 3622 // Float Immediate one 3623 operand immFPR1() %{ 3624 predicate(UseSSE == 0 && n->getf() == 1.0F); 3625 match(ConF); 3626 3627 op_cost(5); 3628 format %{ %} 3629 interface(CONST_INTER); 3630 %} 3631 3632 // Float Immediate 3633 operand immFPR() %{ 3634 predicate( UseSSE == 0 ); 3635 match(ConF); 3636 3637 op_cost(5); 3638 format %{ %} 3639 interface(CONST_INTER); 3640 %} 3641 3642 // Float Immediate 3643 operand immF() %{ 3644 predicate(UseSSE >= 1); 3645 match(ConF); 3646 3647 op_cost(5); 3648 format %{ %} 3649 interface(CONST_INTER); 3650 %} 3651 3652 // Float Immediate zero. Zero and not -0.0 3653 operand immF0() %{ 3654 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3655 match(ConF); 3656 3657 op_cost(5); 3658 format %{ %} 3659 interface(CONST_INTER); 3660 %} 3661 3662 // Immediates for special shifts (sign extend) 3663 3664 // Constants for increment 3665 operand immI_16() %{ 3666 predicate( n->get_int() == 16 ); 3667 match(ConI); 3668 3669 format %{ %} 3670 interface(CONST_INTER); 3671 %} 3672 3673 operand immI_24() %{ 3674 predicate( n->get_int() == 24 ); 3675 match(ConI); 3676 3677 format %{ %} 3678 interface(CONST_INTER); 3679 %} 3680 3681 // Constant for byte-wide masking 3682 operand immI_255() %{ 3683 predicate( n->get_int() == 255 ); 3684 match(ConI); 3685 3686 format %{ %} 3687 interface(CONST_INTER); 3688 %} 3689 3690 // Constant for short-wide masking 3691 operand immI_65535() %{ 3692 predicate(n->get_int() == 65535); 3693 match(ConI); 3694 3695 format %{ %} 3696 interface(CONST_INTER); 3697 %} 3698 3699 // Register Operands 3700 // Integer Register 3701 operand rRegI() %{ 3702 constraint(ALLOC_IN_RC(int_reg)); 3703 match(RegI); 3704 match(xRegI); 3705 match(eAXRegI); 3706 match(eBXRegI); 3707 match(eCXRegI); 3708 match(eDXRegI); 3709 match(eDIRegI); 3710 match(eSIRegI); 3711 3712 format %{ %} 3713 interface(REG_INTER); 3714 %} 3715 3716 // Subset of Integer Register 3717 operand xRegI(rRegI reg) %{ 3718 constraint(ALLOC_IN_RC(int_x_reg)); 3719 match(reg); 3720 match(eAXRegI); 3721 match(eBXRegI); 3722 match(eCXRegI); 3723 match(eDXRegI); 3724 3725 format %{ %} 3726 interface(REG_INTER); 3727 %} 3728 3729 // Special Registers 3730 operand eAXRegI(xRegI reg) %{ 3731 constraint(ALLOC_IN_RC(eax_reg)); 3732 match(reg); 3733 match(rRegI); 3734 3735 format %{ "EAX" %} 3736 interface(REG_INTER); 3737 %} 3738 3739 // Special Registers 3740 operand eBXRegI(xRegI reg) %{ 3741 constraint(ALLOC_IN_RC(ebx_reg)); 3742 match(reg); 3743 match(rRegI); 3744 3745 format %{ "EBX" %} 3746 interface(REG_INTER); 3747 %} 3748 3749 operand eCXRegI(xRegI reg) %{ 3750 constraint(ALLOC_IN_RC(ecx_reg)); 3751 match(reg); 3752 match(rRegI); 3753 3754 format %{ "ECX" %} 3755 interface(REG_INTER); 3756 %} 3757 3758 operand eDXRegI(xRegI reg) %{ 3759 constraint(ALLOC_IN_RC(edx_reg)); 3760 match(reg); 3761 match(rRegI); 3762 3763 format %{ "EDX" %} 3764 interface(REG_INTER); 3765 %} 3766 3767 operand eDIRegI(xRegI reg) %{ 3768 constraint(ALLOC_IN_RC(edi_reg)); 3769 match(reg); 3770 match(rRegI); 3771 3772 format %{ "EDI" %} 3773 interface(REG_INTER); 3774 %} 3775 3776 operand naxRegI() %{ 3777 constraint(ALLOC_IN_RC(nax_reg)); 3778 match(RegI); 3779 match(eCXRegI); 3780 match(eDXRegI); 3781 match(eSIRegI); 3782 match(eDIRegI); 3783 3784 format %{ %} 3785 interface(REG_INTER); 3786 %} 3787 3788 operand nadxRegI() %{ 3789 constraint(ALLOC_IN_RC(nadx_reg)); 3790 match(RegI); 3791 match(eBXRegI); 3792 match(eCXRegI); 3793 match(eSIRegI); 3794 match(eDIRegI); 3795 3796 format %{ %} 3797 interface(REG_INTER); 3798 %} 3799 3800 operand ncxRegI() %{ 3801 constraint(ALLOC_IN_RC(ncx_reg)); 3802 match(RegI); 3803 match(eAXRegI); 3804 match(eDXRegI); 3805 match(eSIRegI); 3806 match(eDIRegI); 3807 3808 format %{ %} 3809 interface(REG_INTER); 3810 %} 3811 3812 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3813 // // 3814 operand eSIRegI(xRegI reg) %{ 3815 constraint(ALLOC_IN_RC(esi_reg)); 3816 match(reg); 3817 match(rRegI); 3818 3819 format %{ "ESI" %} 3820 interface(REG_INTER); 3821 %} 3822 3823 // Pointer Register 3824 operand anyRegP() %{ 3825 constraint(ALLOC_IN_RC(any_reg)); 3826 match(RegP); 3827 match(eAXRegP); 3828 match(eBXRegP); 3829 match(eCXRegP); 3830 match(eDIRegP); 3831 match(eRegP); 3832 3833 format %{ %} 3834 interface(REG_INTER); 3835 %} 3836 3837 operand eRegP() %{ 3838 constraint(ALLOC_IN_RC(int_reg)); 3839 match(RegP); 3840 match(eAXRegP); 3841 match(eBXRegP); 3842 match(eCXRegP); 3843 match(eDIRegP); 3844 3845 format %{ %} 3846 interface(REG_INTER); 3847 %} 3848 3849 // On windows95, EBP is not safe to use for implicit null tests. 3850 operand eRegP_no_EBP() %{ 3851 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3852 match(RegP); 3853 match(eAXRegP); 3854 match(eBXRegP); 3855 match(eCXRegP); 3856 match(eDIRegP); 3857 3858 op_cost(100); 3859 format %{ %} 3860 interface(REG_INTER); 3861 %} 3862 3863 operand naxRegP() %{ 3864 constraint(ALLOC_IN_RC(nax_reg)); 3865 match(RegP); 3866 match(eBXRegP); 3867 match(eDXRegP); 3868 match(eCXRegP); 3869 match(eSIRegP); 3870 match(eDIRegP); 3871 3872 format %{ %} 3873 interface(REG_INTER); 3874 %} 3875 3876 operand nabxRegP() %{ 3877 constraint(ALLOC_IN_RC(nabx_reg)); 3878 match(RegP); 3879 match(eCXRegP); 3880 match(eDXRegP); 3881 match(eSIRegP); 3882 match(eDIRegP); 3883 3884 format %{ %} 3885 interface(REG_INTER); 3886 %} 3887 3888 operand pRegP() %{ 3889 constraint(ALLOC_IN_RC(p_reg)); 3890 match(RegP); 3891 match(eBXRegP); 3892 match(eDXRegP); 3893 match(eSIRegP); 3894 match(eDIRegP); 3895 3896 format %{ %} 3897 interface(REG_INTER); 3898 %} 3899 3900 // Special Registers 3901 // Return a pointer value 3902 operand eAXRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(eax_reg)); 3904 match(reg); 3905 format %{ "EAX" %} 3906 interface(REG_INTER); 3907 %} 3908 3909 // Used in AtomicAdd 3910 operand eBXRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(ebx_reg)); 3912 match(reg); 3913 format %{ "EBX" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 // Tail-call (interprocedural jump) to interpreter 3918 operand eCXRegP(eRegP reg) %{ 3919 constraint(ALLOC_IN_RC(ecx_reg)); 3920 match(reg); 3921 format %{ "ECX" %} 3922 interface(REG_INTER); 3923 %} 3924 3925 operand eSIRegP(eRegP reg) %{ 3926 constraint(ALLOC_IN_RC(esi_reg)); 3927 match(reg); 3928 format %{ "ESI" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 // Used in rep stosw 3933 operand eDIRegP(eRegP reg) %{ 3934 constraint(ALLOC_IN_RC(edi_reg)); 3935 match(reg); 3936 format %{ "EDI" %} 3937 interface(REG_INTER); 3938 %} 3939 3940 operand eRegL() %{ 3941 constraint(ALLOC_IN_RC(long_reg)); 3942 match(RegL); 3943 match(eADXRegL); 3944 3945 format %{ %} 3946 interface(REG_INTER); 3947 %} 3948 3949 operand eADXRegL( eRegL reg ) %{ 3950 constraint(ALLOC_IN_RC(eadx_reg)); 3951 match(reg); 3952 3953 format %{ "EDX:EAX" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 operand eBCXRegL( eRegL reg ) %{ 3958 constraint(ALLOC_IN_RC(ebcx_reg)); 3959 match(reg); 3960 3961 format %{ "EBX:ECX" %} 3962 interface(REG_INTER); 3963 %} 3964 3965 // Special case for integer high multiply 3966 operand eADXRegL_low_only() %{ 3967 constraint(ALLOC_IN_RC(eadx_reg)); 3968 match(RegL); 3969 3970 format %{ "EAX" %} 3971 interface(REG_INTER); 3972 %} 3973 3974 // Flags register, used as output of compare instructions 3975 operand eFlagsReg() %{ 3976 constraint(ALLOC_IN_RC(int_flags)); 3977 match(RegFlags); 3978 3979 format %{ "EFLAGS" %} 3980 interface(REG_INTER); 3981 %} 3982 3983 // Flags register, used as output of FLOATING POINT compare instructions 3984 operand eFlagsRegU() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 3988 format %{ "EFLAGS_U" %} 3989 interface(REG_INTER); 3990 %} 3991 3992 operand eFlagsRegUCF() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 predicate(false); 3996 3997 format %{ "EFLAGS_U_CF" %} 3998 interface(REG_INTER); 3999 %} 4000 4001 // Condition Code Register used by long compare 4002 operand flagsReg_long_LTGE() %{ 4003 constraint(ALLOC_IN_RC(int_flags)); 4004 match(RegFlags); 4005 format %{ "FLAGS_LTGE" %} 4006 interface(REG_INTER); 4007 %} 4008 operand flagsReg_long_EQNE() %{ 4009 constraint(ALLOC_IN_RC(int_flags)); 4010 match(RegFlags); 4011 format %{ "FLAGS_EQNE" %} 4012 interface(REG_INTER); 4013 %} 4014 operand flagsReg_long_LEGT() %{ 4015 constraint(ALLOC_IN_RC(int_flags)); 4016 match(RegFlags); 4017 format %{ "FLAGS_LEGT" %} 4018 interface(REG_INTER); 4019 %} 4020 4021 // Condition Code Register used by unsigned long compare 4022 operand flagsReg_ulong_LTGE() %{ 4023 constraint(ALLOC_IN_RC(int_flags)); 4024 match(RegFlags); 4025 format %{ "FLAGS_U_LTGE" %} 4026 interface(REG_INTER); 4027 %} 4028 operand flagsReg_ulong_EQNE() %{ 4029 constraint(ALLOC_IN_RC(int_flags)); 4030 match(RegFlags); 4031 format %{ "FLAGS_U_EQNE" %} 4032 interface(REG_INTER); 4033 %} 4034 operand flagsReg_ulong_LEGT() %{ 4035 constraint(ALLOC_IN_RC(int_flags)); 4036 match(RegFlags); 4037 format %{ "FLAGS_U_LEGT" %} 4038 interface(REG_INTER); 4039 %} 4040 4041 // Float register operands 4042 operand regDPR() %{ 4043 predicate( UseSSE < 2 ); 4044 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4045 match(RegD); 4046 match(regDPR1); 4047 match(regDPR2); 4048 format %{ %} 4049 interface(REG_INTER); 4050 %} 4051 4052 operand regDPR1(regDPR reg) %{ 4053 predicate( UseSSE < 2 ); 4054 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4055 match(reg); 4056 format %{ "FPR1" %} 4057 interface(REG_INTER); 4058 %} 4059 4060 operand regDPR2(regDPR reg) %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4063 match(reg); 4064 format %{ "FPR2" %} 4065 interface(REG_INTER); 4066 %} 4067 4068 operand regnotDPR1(regDPR reg) %{ 4069 predicate( UseSSE < 2 ); 4070 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4071 match(reg); 4072 format %{ %} 4073 interface(REG_INTER); 4074 %} 4075 4076 // Float register operands 4077 operand regFPR() %{ 4078 predicate( UseSSE < 2 ); 4079 constraint(ALLOC_IN_RC(fp_flt_reg)); 4080 match(RegF); 4081 match(regFPR1); 4082 format %{ %} 4083 interface(REG_INTER); 4084 %} 4085 4086 // Float register operands 4087 operand regFPR1(regFPR reg) %{ 4088 predicate( UseSSE < 2 ); 4089 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4090 match(reg); 4091 format %{ "FPR1" %} 4092 interface(REG_INTER); 4093 %} 4094 4095 // XMM Float register operands 4096 operand regF() %{ 4097 predicate( UseSSE>=1 ); 4098 constraint(ALLOC_IN_RC(float_reg_legacy)); 4099 match(RegF); 4100 format %{ %} 4101 interface(REG_INTER); 4102 %} 4103 4104 // XMM Double register operands 4105 operand regD() %{ 4106 predicate( UseSSE>=2 ); 4107 constraint(ALLOC_IN_RC(double_reg_legacy)); 4108 match(RegD); 4109 format %{ %} 4110 interface(REG_INTER); 4111 %} 4112 4113 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4114 // runtime code generation via reg_class_dynamic. 4115 operand vecS() %{ 4116 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4117 match(VecS); 4118 4119 format %{ %} 4120 interface(REG_INTER); 4121 %} 4122 4123 operand vecD() %{ 4124 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4125 match(VecD); 4126 4127 format %{ %} 4128 interface(REG_INTER); 4129 %} 4130 4131 operand vecX() %{ 4132 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4133 match(VecX); 4134 4135 format %{ %} 4136 interface(REG_INTER); 4137 %} 4138 4139 operand vecY() %{ 4140 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4141 match(VecY); 4142 4143 format %{ %} 4144 interface(REG_INTER); 4145 %} 4146 4147 //----------Memory Operands---------------------------------------------------- 4148 // Direct Memory Operand 4149 operand direct(immP addr) %{ 4150 match(addr); 4151 4152 format %{ "[$addr]" %} 4153 interface(MEMORY_INTER) %{ 4154 base(0xFFFFFFFF); 4155 index(0x4); 4156 scale(0x0); 4157 disp($addr); 4158 %} 4159 %} 4160 4161 // Indirect Memory Operand 4162 operand indirect(eRegP reg) %{ 4163 constraint(ALLOC_IN_RC(int_reg)); 4164 match(reg); 4165 4166 format %{ "[$reg]" %} 4167 interface(MEMORY_INTER) %{ 4168 base($reg); 4169 index(0x4); 4170 scale(0x0); 4171 disp(0x0); 4172 %} 4173 %} 4174 4175 // Indirect Memory Plus Short Offset Operand 4176 operand indOffset8(eRegP reg, immI8 off) %{ 4177 match(AddP reg off); 4178 4179 format %{ "[$reg + $off]" %} 4180 interface(MEMORY_INTER) %{ 4181 base($reg); 4182 index(0x4); 4183 scale(0x0); 4184 disp($off); 4185 %} 4186 %} 4187 4188 // Indirect Memory Plus Long Offset Operand 4189 operand indOffset32(eRegP reg, immI off) %{ 4190 match(AddP reg off); 4191 4192 format %{ "[$reg + $off]" %} 4193 interface(MEMORY_INTER) %{ 4194 base($reg); 4195 index(0x4); 4196 scale(0x0); 4197 disp($off); 4198 %} 4199 %} 4200 4201 // Indirect Memory Plus Long Offset Operand 4202 operand indOffset32X(rRegI reg, immP off) %{ 4203 match(AddP off reg); 4204 4205 format %{ "[$reg + $off]" %} 4206 interface(MEMORY_INTER) %{ 4207 base($reg); 4208 index(0x4); 4209 scale(0x0); 4210 disp($off); 4211 %} 4212 %} 4213 4214 // Indirect Memory Plus Index Register Plus Offset Operand 4215 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4216 match(AddP (AddP reg ireg) off); 4217 4218 op_cost(10); 4219 format %{"[$reg + $off + $ireg]" %} 4220 interface(MEMORY_INTER) %{ 4221 base($reg); 4222 index($ireg); 4223 scale(0x0); 4224 disp($off); 4225 %} 4226 %} 4227 4228 // Indirect Memory Plus Index Register Plus Offset Operand 4229 operand indIndex(eRegP reg, rRegI ireg) %{ 4230 match(AddP reg ireg); 4231 4232 op_cost(10); 4233 format %{"[$reg + $ireg]" %} 4234 interface(MEMORY_INTER) %{ 4235 base($reg); 4236 index($ireg); 4237 scale(0x0); 4238 disp(0x0); 4239 %} 4240 %} 4241 4242 // // ------------------------------------------------------------------------- 4243 // // 486 architecture doesn't support "scale * index + offset" with out a base 4244 // // ------------------------------------------------------------------------- 4245 // // Scaled Memory Operands 4246 // // Indirect Memory Times Scale Plus Offset Operand 4247 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4248 // match(AddP off (LShiftI ireg scale)); 4249 // 4250 // op_cost(10); 4251 // format %{"[$off + $ireg << $scale]" %} 4252 // interface(MEMORY_INTER) %{ 4253 // base(0x4); 4254 // index($ireg); 4255 // scale($scale); 4256 // disp($off); 4257 // %} 4258 // %} 4259 4260 // Indirect Memory Times Scale Plus Index Register 4261 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4262 match(AddP reg (LShiftI ireg scale)); 4263 4264 op_cost(10); 4265 format %{"[$reg + $ireg << $scale]" %} 4266 interface(MEMORY_INTER) %{ 4267 base($reg); 4268 index($ireg); 4269 scale($scale); 4270 disp(0x0); 4271 %} 4272 %} 4273 4274 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4275 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4276 match(AddP (AddP reg (LShiftI ireg scale)) off); 4277 4278 op_cost(10); 4279 format %{"[$reg + $off + $ireg << $scale]" %} 4280 interface(MEMORY_INTER) %{ 4281 base($reg); 4282 index($ireg); 4283 scale($scale); 4284 disp($off); 4285 %} 4286 %} 4287 4288 //----------Load Long Memory Operands------------------------------------------ 4289 // The load-long idiom will use it's address expression again after loading 4290 // the first word of the long. If the load-long destination overlaps with 4291 // registers used in the addressing expression, the 2nd half will be loaded 4292 // from a clobbered address. Fix this by requiring that load-long use 4293 // address registers that do not overlap with the load-long target. 4294 4295 // load-long support 4296 operand load_long_RegP() %{ 4297 constraint(ALLOC_IN_RC(esi_reg)); 4298 match(RegP); 4299 match(eSIRegP); 4300 op_cost(100); 4301 format %{ %} 4302 interface(REG_INTER); 4303 %} 4304 4305 // Indirect Memory Operand Long 4306 operand load_long_indirect(load_long_RegP reg) %{ 4307 constraint(ALLOC_IN_RC(esi_reg)); 4308 match(reg); 4309 4310 format %{ "[$reg]" %} 4311 interface(MEMORY_INTER) %{ 4312 base($reg); 4313 index(0x4); 4314 scale(0x0); 4315 disp(0x0); 4316 %} 4317 %} 4318 4319 // Indirect Memory Plus Long Offset Operand 4320 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4321 match(AddP reg off); 4322 4323 format %{ "[$reg + $off]" %} 4324 interface(MEMORY_INTER) %{ 4325 base($reg); 4326 index(0x4); 4327 scale(0x0); 4328 disp($off); 4329 %} 4330 %} 4331 4332 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4333 4334 4335 //----------Special Memory Operands-------------------------------------------- 4336 // Stack Slot Operand - This operand is used for loading and storing temporary 4337 // values on the stack where a match requires a value to 4338 // flow through memory. 4339 operand stackSlotP(sRegP reg) %{ 4340 constraint(ALLOC_IN_RC(stack_slots)); 4341 // No match rule because this operand is only generated in matching 4342 format %{ "[$reg]" %} 4343 interface(MEMORY_INTER) %{ 4344 base(0x4); // ESP 4345 index(0x4); // No Index 4346 scale(0x0); // No Scale 4347 disp($reg); // Stack Offset 4348 %} 4349 %} 4350 4351 operand stackSlotI(sRegI reg) %{ 4352 constraint(ALLOC_IN_RC(stack_slots)); 4353 // No match rule because this operand is only generated in matching 4354 format %{ "[$reg]" %} 4355 interface(MEMORY_INTER) %{ 4356 base(0x4); // ESP 4357 index(0x4); // No Index 4358 scale(0x0); // No Scale 4359 disp($reg); // Stack Offset 4360 %} 4361 %} 4362 4363 operand stackSlotF(sRegF reg) %{ 4364 constraint(ALLOC_IN_RC(stack_slots)); 4365 // No match rule because this operand is only generated in matching 4366 format %{ "[$reg]" %} 4367 interface(MEMORY_INTER) %{ 4368 base(0x4); // ESP 4369 index(0x4); // No Index 4370 scale(0x0); // No Scale 4371 disp($reg); // Stack Offset 4372 %} 4373 %} 4374 4375 operand stackSlotD(sRegD reg) %{ 4376 constraint(ALLOC_IN_RC(stack_slots)); 4377 // No match rule because this operand is only generated in matching 4378 format %{ "[$reg]" %} 4379 interface(MEMORY_INTER) %{ 4380 base(0x4); // ESP 4381 index(0x4); // No Index 4382 scale(0x0); // No Scale 4383 disp($reg); // Stack Offset 4384 %} 4385 %} 4386 4387 operand stackSlotL(sRegL reg) %{ 4388 constraint(ALLOC_IN_RC(stack_slots)); 4389 // No match rule because this operand is only generated in matching 4390 format %{ "[$reg]" %} 4391 interface(MEMORY_INTER) %{ 4392 base(0x4); // ESP 4393 index(0x4); // No Index 4394 scale(0x0); // No Scale 4395 disp($reg); // Stack Offset 4396 %} 4397 %} 4398 4399 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4400 // Indirect Memory Operand 4401 operand indirect_win95_safe(eRegP_no_EBP reg) 4402 %{ 4403 constraint(ALLOC_IN_RC(int_reg)); 4404 match(reg); 4405 4406 op_cost(100); 4407 format %{ "[$reg]" %} 4408 interface(MEMORY_INTER) %{ 4409 base($reg); 4410 index(0x4); 4411 scale(0x0); 4412 disp(0x0); 4413 %} 4414 %} 4415 4416 // Indirect Memory Plus Short Offset Operand 4417 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4418 %{ 4419 match(AddP reg off); 4420 4421 op_cost(100); 4422 format %{ "[$reg + $off]" %} 4423 interface(MEMORY_INTER) %{ 4424 base($reg); 4425 index(0x4); 4426 scale(0x0); 4427 disp($off); 4428 %} 4429 %} 4430 4431 // Indirect Memory Plus Long Offset Operand 4432 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4433 %{ 4434 match(AddP reg off); 4435 4436 op_cost(100); 4437 format %{ "[$reg + $off]" %} 4438 interface(MEMORY_INTER) %{ 4439 base($reg); 4440 index(0x4); 4441 scale(0x0); 4442 disp($off); 4443 %} 4444 %} 4445 4446 // Indirect Memory Plus Index Register Plus Offset Operand 4447 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4448 %{ 4449 match(AddP (AddP reg ireg) off); 4450 4451 op_cost(100); 4452 format %{"[$reg + $off + $ireg]" %} 4453 interface(MEMORY_INTER) %{ 4454 base($reg); 4455 index($ireg); 4456 scale(0x0); 4457 disp($off); 4458 %} 4459 %} 4460 4461 // Indirect Memory Times Scale Plus Index Register 4462 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4463 %{ 4464 match(AddP reg (LShiftI ireg scale)); 4465 4466 op_cost(100); 4467 format %{"[$reg + $ireg << $scale]" %} 4468 interface(MEMORY_INTER) %{ 4469 base($reg); 4470 index($ireg); 4471 scale($scale); 4472 disp(0x0); 4473 %} 4474 %} 4475 4476 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4477 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4478 %{ 4479 match(AddP (AddP reg (LShiftI ireg scale)) off); 4480 4481 op_cost(100); 4482 format %{"[$reg + $off + $ireg << $scale]" %} 4483 interface(MEMORY_INTER) %{ 4484 base($reg); 4485 index($ireg); 4486 scale($scale); 4487 disp($off); 4488 %} 4489 %} 4490 4491 //----------Conditional Branch Operands---------------------------------------- 4492 // Comparison Op - This is the operation of the comparison, and is limited to 4493 // the following set of codes: 4494 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4495 // 4496 // Other attributes of the comparison, such as unsignedness, are specified 4497 // by the comparison instruction that sets a condition code flags register. 4498 // That result is represented by a flags operand whose subtype is appropriate 4499 // to the unsignedness (etc.) of the comparison. 4500 // 4501 // Later, the instruction which matches both the Comparison Op (a Bool) and 4502 // the flags (produced by the Cmp) specifies the coding of the comparison op 4503 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4504 4505 // Comparision Code 4506 operand cmpOp() %{ 4507 match(Bool); 4508 4509 format %{ "" %} 4510 interface(COND_INTER) %{ 4511 equal(0x4, "e"); 4512 not_equal(0x5, "ne"); 4513 less(0xC, "l"); 4514 greater_equal(0xD, "ge"); 4515 less_equal(0xE, "le"); 4516 greater(0xF, "g"); 4517 overflow(0x0, "o"); 4518 no_overflow(0x1, "no"); 4519 %} 4520 %} 4521 4522 // Comparison Code, unsigned compare. Used by FP also, with 4523 // C2 (unordered) turned into GT or LT already. The other bits 4524 // C0 and C3 are turned into Carry & Zero flags. 4525 operand cmpOpU() %{ 4526 match(Bool); 4527 4528 format %{ "" %} 4529 interface(COND_INTER) %{ 4530 equal(0x4, "e"); 4531 not_equal(0x5, "ne"); 4532 less(0x2, "b"); 4533 greater_equal(0x3, "nb"); 4534 less_equal(0x6, "be"); 4535 greater(0x7, "nbe"); 4536 overflow(0x0, "o"); 4537 no_overflow(0x1, "no"); 4538 %} 4539 %} 4540 4541 // Floating comparisons that don't require any fixup for the unordered case 4542 operand cmpOpUCF() %{ 4543 match(Bool); 4544 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4545 n->as_Bool()->_test._test == BoolTest::ge || 4546 n->as_Bool()->_test._test == BoolTest::le || 4547 n->as_Bool()->_test._test == BoolTest::gt); 4548 format %{ "" %} 4549 interface(COND_INTER) %{ 4550 equal(0x4, "e"); 4551 not_equal(0x5, "ne"); 4552 less(0x2, "b"); 4553 greater_equal(0x3, "nb"); 4554 less_equal(0x6, "be"); 4555 greater(0x7, "nbe"); 4556 overflow(0x0, "o"); 4557 no_overflow(0x1, "no"); 4558 %} 4559 %} 4560 4561 4562 // Floating comparisons that can be fixed up with extra conditional jumps 4563 operand cmpOpUCF2() %{ 4564 match(Bool); 4565 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4566 n->as_Bool()->_test._test == BoolTest::eq); 4567 format %{ "" %} 4568 interface(COND_INTER) %{ 4569 equal(0x4, "e"); 4570 not_equal(0x5, "ne"); 4571 less(0x2, "b"); 4572 greater_equal(0x3, "nb"); 4573 less_equal(0x6, "be"); 4574 greater(0x7, "nbe"); 4575 overflow(0x0, "o"); 4576 no_overflow(0x1, "no"); 4577 %} 4578 %} 4579 4580 // Comparison Code for FP conditional move 4581 operand cmpOp_fcmov() %{ 4582 match(Bool); 4583 4584 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4585 n->as_Bool()->_test._test != BoolTest::no_overflow); 4586 format %{ "" %} 4587 interface(COND_INTER) %{ 4588 equal (0x0C8); 4589 not_equal (0x1C8); 4590 less (0x0C0); 4591 greater_equal(0x1C0); 4592 less_equal (0x0D0); 4593 greater (0x1D0); 4594 overflow(0x0, "o"); // not really supported by the instruction 4595 no_overflow(0x1, "no"); // not really supported by the instruction 4596 %} 4597 %} 4598 4599 // Comparison Code used in long compares 4600 operand cmpOp_commute() %{ 4601 match(Bool); 4602 4603 format %{ "" %} 4604 interface(COND_INTER) %{ 4605 equal(0x4, "e"); 4606 not_equal(0x5, "ne"); 4607 less(0xF, "g"); 4608 greater_equal(0xE, "le"); 4609 less_equal(0xD, "ge"); 4610 greater(0xC, "l"); 4611 overflow(0x0, "o"); 4612 no_overflow(0x1, "no"); 4613 %} 4614 %} 4615 4616 // Comparison Code used in unsigned long compares 4617 operand cmpOpU_commute() %{ 4618 match(Bool); 4619 4620 format %{ "" %} 4621 interface(COND_INTER) %{ 4622 equal(0x4, "e"); 4623 not_equal(0x5, "ne"); 4624 less(0x7, "nbe"); 4625 greater_equal(0x6, "be"); 4626 less_equal(0x3, "nb"); 4627 greater(0x2, "b"); 4628 overflow(0x0, "o"); 4629 no_overflow(0x1, "no"); 4630 %} 4631 %} 4632 4633 //----------OPERAND CLASSES---------------------------------------------------- 4634 // Operand Classes are groups of operands that are used as to simplify 4635 // instruction definitions by not requiring the AD writer to specify separate 4636 // instructions for every form of operand when the instruction accepts 4637 // multiple operand types with the same basic encoding and format. The classic 4638 // case of this is memory operands. 4639 4640 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4641 indIndex, indIndexScale, indIndexScaleOffset); 4642 4643 // Long memory operations are encoded in 2 instructions and a +4 offset. 4644 // This means some kind of offset is always required and you cannot use 4645 // an oop as the offset (done when working on static globals). 4646 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4647 indIndex, indIndexScale, indIndexScaleOffset); 4648 4649 4650 //----------PIPELINE----------------------------------------------------------- 4651 // Rules which define the behavior of the target architectures pipeline. 4652 pipeline %{ 4653 4654 //----------ATTRIBUTES--------------------------------------------------------- 4655 attributes %{ 4656 variable_size_instructions; // Fixed size instructions 4657 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4658 instruction_unit_size = 1; // An instruction is 1 bytes long 4659 instruction_fetch_unit_size = 16; // The processor fetches one line 4660 instruction_fetch_units = 1; // of 16 bytes 4661 4662 // List of nop instructions 4663 nops( MachNop ); 4664 %} 4665 4666 //----------RESOURCES---------------------------------------------------------- 4667 // Resources are the functional units available to the machine 4668 4669 // Generic P2/P3 pipeline 4670 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4671 // 3 instructions decoded per cycle. 4672 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4673 // 2 ALU op, only ALU0 handles mul/div instructions. 4674 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4675 MS0, MS1, MEM = MS0 | MS1, 4676 BR, FPU, 4677 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4678 4679 //----------PIPELINE DESCRIPTION----------------------------------------------- 4680 // Pipeline Description specifies the stages in the machine's pipeline 4681 4682 // Generic P2/P3 pipeline 4683 pipe_desc(S0, S1, S2, S3, S4, S5); 4684 4685 //----------PIPELINE CLASSES--------------------------------------------------- 4686 // Pipeline Classes describe the stages in which input and output are 4687 // referenced by the hardware pipeline. 4688 4689 // Naming convention: ialu or fpu 4690 // Then: _reg 4691 // Then: _reg if there is a 2nd register 4692 // Then: _long if it's a pair of instructions implementing a long 4693 // Then: _fat if it requires the big decoder 4694 // Or: _mem if it requires the big decoder and a memory unit. 4695 4696 // Integer ALU reg operation 4697 pipe_class ialu_reg(rRegI dst) %{ 4698 single_instruction; 4699 dst : S4(write); 4700 dst : S3(read); 4701 DECODE : S0; // any decoder 4702 ALU : S3; // any alu 4703 %} 4704 4705 // Long ALU reg operation 4706 pipe_class ialu_reg_long(eRegL dst) %{ 4707 instruction_count(2); 4708 dst : S4(write); 4709 dst : S3(read); 4710 DECODE : S0(2); // any 2 decoders 4711 ALU : S3(2); // both alus 4712 %} 4713 4714 // Integer ALU reg operation using big decoder 4715 pipe_class ialu_reg_fat(rRegI dst) %{ 4716 single_instruction; 4717 dst : S4(write); 4718 dst : S3(read); 4719 D0 : S0; // big decoder only 4720 ALU : S3; // any alu 4721 %} 4722 4723 // Long ALU reg operation using big decoder 4724 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4725 instruction_count(2); 4726 dst : S4(write); 4727 dst : S3(read); 4728 D0 : S0(2); // big decoder only; twice 4729 ALU : S3(2); // any 2 alus 4730 %} 4731 4732 // Integer ALU reg-reg operation 4733 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4734 single_instruction; 4735 dst : S4(write); 4736 src : S3(read); 4737 DECODE : S0; // any decoder 4738 ALU : S3; // any alu 4739 %} 4740 4741 // Long ALU reg-reg operation 4742 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4743 instruction_count(2); 4744 dst : S4(write); 4745 src : S3(read); 4746 DECODE : S0(2); // any 2 decoders 4747 ALU : S3(2); // both alus 4748 %} 4749 4750 // Integer ALU reg-reg operation 4751 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4752 single_instruction; 4753 dst : S4(write); 4754 src : S3(read); 4755 D0 : S0; // big decoder only 4756 ALU : S3; // any alu 4757 %} 4758 4759 // Long ALU reg-reg operation 4760 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4761 instruction_count(2); 4762 dst : S4(write); 4763 src : S3(read); 4764 D0 : S0(2); // big decoder only; twice 4765 ALU : S3(2); // both alus 4766 %} 4767 4768 // Integer ALU reg-mem operation 4769 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4770 single_instruction; 4771 dst : S5(write); 4772 mem : S3(read); 4773 D0 : S0; // big decoder only 4774 ALU : S4; // any alu 4775 MEM : S3; // any mem 4776 %} 4777 4778 // Long ALU reg-mem operation 4779 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4780 instruction_count(2); 4781 dst : S5(write); 4782 mem : S3(read); 4783 D0 : S0(2); // big decoder only; twice 4784 ALU : S4(2); // any 2 alus 4785 MEM : S3(2); // both mems 4786 %} 4787 4788 // Integer mem operation (prefetch) 4789 pipe_class ialu_mem(memory mem) 4790 %{ 4791 single_instruction; 4792 mem : S3(read); 4793 D0 : S0; // big decoder only 4794 MEM : S3; // any mem 4795 %} 4796 4797 // Integer Store to Memory 4798 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4799 single_instruction; 4800 mem : S3(read); 4801 src : S5(read); 4802 D0 : S0; // big decoder only 4803 ALU : S4; // any alu 4804 MEM : S3; 4805 %} 4806 4807 // Long Store to Memory 4808 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4809 instruction_count(2); 4810 mem : S3(read); 4811 src : S5(read); 4812 D0 : S0(2); // big decoder only; twice 4813 ALU : S4(2); // any 2 alus 4814 MEM : S3(2); // Both mems 4815 %} 4816 4817 // Integer Store to Memory 4818 pipe_class ialu_mem_imm(memory mem) %{ 4819 single_instruction; 4820 mem : S3(read); 4821 D0 : S0; // big decoder only 4822 ALU : S4; // any alu 4823 MEM : S3; 4824 %} 4825 4826 // Integer ALU0 reg-reg operation 4827 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4828 single_instruction; 4829 dst : S4(write); 4830 src : S3(read); 4831 D0 : S0; // Big decoder only 4832 ALU0 : S3; // only alu0 4833 %} 4834 4835 // Integer ALU0 reg-mem operation 4836 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4837 single_instruction; 4838 dst : S5(write); 4839 mem : S3(read); 4840 D0 : S0; // big decoder only 4841 ALU0 : S4; // ALU0 only 4842 MEM : S3; // any mem 4843 %} 4844 4845 // Integer ALU reg-reg operation 4846 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4847 single_instruction; 4848 cr : S4(write); 4849 src1 : S3(read); 4850 src2 : S3(read); 4851 DECODE : S0; // any decoder 4852 ALU : S3; // any alu 4853 %} 4854 4855 // Integer ALU reg-imm operation 4856 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4857 single_instruction; 4858 cr : S4(write); 4859 src1 : S3(read); 4860 DECODE : S0; // any decoder 4861 ALU : S3; // any alu 4862 %} 4863 4864 // Integer ALU reg-mem operation 4865 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4866 single_instruction; 4867 cr : S4(write); 4868 src1 : S3(read); 4869 src2 : S3(read); 4870 D0 : S0; // big decoder only 4871 ALU : S4; // any alu 4872 MEM : S3; 4873 %} 4874 4875 // Conditional move reg-reg 4876 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4877 instruction_count(4); 4878 y : S4(read); 4879 q : S3(read); 4880 p : S3(read); 4881 DECODE : S0(4); // any decoder 4882 %} 4883 4884 // Conditional move reg-reg 4885 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4886 single_instruction; 4887 dst : S4(write); 4888 src : S3(read); 4889 cr : S3(read); 4890 DECODE : S0; // any decoder 4891 %} 4892 4893 // Conditional move reg-mem 4894 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4895 single_instruction; 4896 dst : S4(write); 4897 src : S3(read); 4898 cr : S3(read); 4899 DECODE : S0; // any decoder 4900 MEM : S3; 4901 %} 4902 4903 // Conditional move reg-reg long 4904 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4905 single_instruction; 4906 dst : S4(write); 4907 src : S3(read); 4908 cr : S3(read); 4909 DECODE : S0(2); // any 2 decoders 4910 %} 4911 4912 // Conditional move double reg-reg 4913 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4914 single_instruction; 4915 dst : S4(write); 4916 src : S3(read); 4917 cr : S3(read); 4918 DECODE : S0; // any decoder 4919 %} 4920 4921 // Float reg-reg operation 4922 pipe_class fpu_reg(regDPR dst) %{ 4923 instruction_count(2); 4924 dst : S3(read); 4925 DECODE : S0(2); // any 2 decoders 4926 FPU : S3; 4927 %} 4928 4929 // Float reg-reg operation 4930 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4931 instruction_count(2); 4932 dst : S4(write); 4933 src : S3(read); 4934 DECODE : S0(2); // any 2 decoders 4935 FPU : S3; 4936 %} 4937 4938 // Float reg-reg operation 4939 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4940 instruction_count(3); 4941 dst : S4(write); 4942 src1 : S3(read); 4943 src2 : S3(read); 4944 DECODE : S0(3); // any 3 decoders 4945 FPU : S3(2); 4946 %} 4947 4948 // Float reg-reg operation 4949 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4950 instruction_count(4); 4951 dst : S4(write); 4952 src1 : S3(read); 4953 src2 : S3(read); 4954 src3 : S3(read); 4955 DECODE : S0(4); // any 3 decoders 4956 FPU : S3(2); 4957 %} 4958 4959 // Float reg-reg operation 4960 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4961 instruction_count(4); 4962 dst : S4(write); 4963 src1 : S3(read); 4964 src2 : S3(read); 4965 src3 : S3(read); 4966 DECODE : S1(3); // any 3 decoders 4967 D0 : S0; // Big decoder only 4968 FPU : S3(2); 4969 MEM : S3; 4970 %} 4971 4972 // Float reg-mem operation 4973 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4974 instruction_count(2); 4975 dst : S5(write); 4976 mem : S3(read); 4977 D0 : S0; // big decoder only 4978 DECODE : S1; // any decoder for FPU POP 4979 FPU : S4; 4980 MEM : S3; // any mem 4981 %} 4982 4983 // Float reg-mem operation 4984 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4985 instruction_count(3); 4986 dst : S5(write); 4987 src1 : S3(read); 4988 mem : S3(read); 4989 D0 : S0; // big decoder only 4990 DECODE : S1(2); // any decoder for FPU POP 4991 FPU : S4; 4992 MEM : S3; // any mem 4993 %} 4994 4995 // Float mem-reg operation 4996 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4997 instruction_count(2); 4998 src : S5(read); 4999 mem : S3(read); 5000 DECODE : S0; // any decoder for FPU PUSH 5001 D0 : S1; // big decoder only 5002 FPU : S4; 5003 MEM : S3; // any mem 5004 %} 5005 5006 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 5007 instruction_count(3); 5008 src1 : S3(read); 5009 src2 : S3(read); 5010 mem : S3(read); 5011 DECODE : S0(2); // any decoder for FPU PUSH 5012 D0 : S1; // big decoder only 5013 FPU : S4; 5014 MEM : S3; // any mem 5015 %} 5016 5017 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 5018 instruction_count(3); 5019 src1 : S3(read); 5020 src2 : S3(read); 5021 mem : S4(read); 5022 DECODE : S0; // any decoder for FPU PUSH 5023 D0 : S0(2); // big decoder only 5024 FPU : S4; 5025 MEM : S3(2); // any mem 5026 %} 5027 5028 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5029 instruction_count(2); 5030 src1 : S3(read); 5031 dst : S4(read); 5032 D0 : S0(2); // big decoder only 5033 MEM : S3(2); // any mem 5034 %} 5035 5036 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5037 instruction_count(3); 5038 src1 : S3(read); 5039 src2 : S3(read); 5040 dst : S4(read); 5041 D0 : S0(3); // big decoder only 5042 FPU : S4; 5043 MEM : S3(3); // any mem 5044 %} 5045 5046 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5047 instruction_count(3); 5048 src1 : S4(read); 5049 mem : S4(read); 5050 DECODE : S0; // any decoder for FPU PUSH 5051 D0 : S0(2); // big decoder only 5052 FPU : S4; 5053 MEM : S3(2); // any mem 5054 %} 5055 5056 // Float load constant 5057 pipe_class fpu_reg_con(regDPR dst) %{ 5058 instruction_count(2); 5059 dst : S5(write); 5060 D0 : S0; // big decoder only for the load 5061 DECODE : S1; // any decoder for FPU POP 5062 FPU : S4; 5063 MEM : S3; // any mem 5064 %} 5065 5066 // Float load constant 5067 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5068 instruction_count(3); 5069 dst : S5(write); 5070 src : S3(read); 5071 D0 : S0; // big decoder only for the load 5072 DECODE : S1(2); // any decoder for FPU POP 5073 FPU : S4; 5074 MEM : S3; // any mem 5075 %} 5076 5077 // UnConditional branch 5078 pipe_class pipe_jmp( label labl ) %{ 5079 single_instruction; 5080 BR : S3; 5081 %} 5082 5083 // Conditional branch 5084 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5085 single_instruction; 5086 cr : S1(read); 5087 BR : S3; 5088 %} 5089 5090 // Allocation idiom 5091 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5092 instruction_count(1); force_serialization; 5093 fixed_latency(6); 5094 heap_ptr : S3(read); 5095 DECODE : S0(3); 5096 D0 : S2; 5097 MEM : S3; 5098 ALU : S3(2); 5099 dst : S5(write); 5100 BR : S5; 5101 %} 5102 5103 // Generic big/slow expanded idiom 5104 pipe_class pipe_slow( ) %{ 5105 instruction_count(10); multiple_bundles; force_serialization; 5106 fixed_latency(100); 5107 D0 : S0(2); 5108 MEM : S3(2); 5109 %} 5110 5111 // The real do-nothing guy 5112 pipe_class empty( ) %{ 5113 instruction_count(0); 5114 %} 5115 5116 // Define the class for the Nop node 5117 define %{ 5118 MachNop = empty; 5119 %} 5120 5121 %} 5122 5123 //----------INSTRUCTIONS------------------------------------------------------- 5124 // 5125 // match -- States which machine-independent subtree may be replaced 5126 // by this instruction. 5127 // ins_cost -- The estimated cost of this instruction is used by instruction 5128 // selection to identify a minimum cost tree of machine 5129 // instructions that matches a tree of machine-independent 5130 // instructions. 5131 // format -- A string providing the disassembly for this instruction. 5132 // The value of an instruction's operand may be inserted 5133 // by referring to it with a '$' prefix. 5134 // opcode -- Three instruction opcodes may be provided. These are referred 5135 // to within an encode class as $primary, $secondary, and $tertiary 5136 // respectively. The primary opcode is commonly used to 5137 // indicate the type of machine instruction, while secondary 5138 // and tertiary are often used for prefix options or addressing 5139 // modes. 5140 // ins_encode -- A list of encode classes with parameters. The encode class 5141 // name must have been defined in an 'enc_class' specification 5142 // in the encode section of the architecture description. 5143 5144 //----------BSWAP-Instruction-------------------------------------------------- 5145 instruct bytes_reverse_int(rRegI dst) %{ 5146 match(Set dst (ReverseBytesI dst)); 5147 5148 format %{ "BSWAP $dst" %} 5149 opcode(0x0F, 0xC8); 5150 ins_encode( OpcP, OpcSReg(dst) ); 5151 ins_pipe( ialu_reg ); 5152 %} 5153 5154 instruct bytes_reverse_long(eRegL dst) %{ 5155 match(Set dst (ReverseBytesL dst)); 5156 5157 format %{ "BSWAP $dst.lo\n\t" 5158 "BSWAP $dst.hi\n\t" 5159 "XCHG $dst.lo $dst.hi" %} 5160 5161 ins_cost(125); 5162 ins_encode( bswap_long_bytes(dst) ); 5163 ins_pipe( ialu_reg_reg); 5164 %} 5165 5166 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5167 match(Set dst (ReverseBytesUS dst)); 5168 effect(KILL cr); 5169 5170 format %{ "BSWAP $dst\n\t" 5171 "SHR $dst,16\n\t" %} 5172 ins_encode %{ 5173 __ bswapl($dst$$Register); 5174 __ shrl($dst$$Register, 16); 5175 %} 5176 ins_pipe( ialu_reg ); 5177 %} 5178 5179 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5180 match(Set dst (ReverseBytesS dst)); 5181 effect(KILL cr); 5182 5183 format %{ "BSWAP $dst\n\t" 5184 "SAR $dst,16\n\t" %} 5185 ins_encode %{ 5186 __ bswapl($dst$$Register); 5187 __ sarl($dst$$Register, 16); 5188 %} 5189 ins_pipe( ialu_reg ); 5190 %} 5191 5192 5193 //---------- Zeros Count Instructions ------------------------------------------ 5194 5195 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5196 predicate(UseCountLeadingZerosInstruction); 5197 match(Set dst (CountLeadingZerosI src)); 5198 effect(KILL cr); 5199 5200 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5201 ins_encode %{ 5202 __ lzcntl($dst$$Register, $src$$Register); 5203 %} 5204 ins_pipe(ialu_reg); 5205 %} 5206 5207 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5208 predicate(!UseCountLeadingZerosInstruction); 5209 match(Set dst (CountLeadingZerosI src)); 5210 effect(KILL cr); 5211 5212 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5213 "JNZ skip\n\t" 5214 "MOV $dst, -1\n" 5215 "skip:\n\t" 5216 "NEG $dst\n\t" 5217 "ADD $dst, 31" %} 5218 ins_encode %{ 5219 Register Rdst = $dst$$Register; 5220 Register Rsrc = $src$$Register; 5221 Label skip; 5222 __ bsrl(Rdst, Rsrc); 5223 __ jccb(Assembler::notZero, skip); 5224 __ movl(Rdst, -1); 5225 __ bind(skip); 5226 __ negl(Rdst); 5227 __ addl(Rdst, BitsPerInt - 1); 5228 %} 5229 ins_pipe(ialu_reg); 5230 %} 5231 5232 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5233 predicate(UseCountLeadingZerosInstruction); 5234 match(Set dst (CountLeadingZerosL src)); 5235 effect(TEMP dst, KILL cr); 5236 5237 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5238 "JNC done\n\t" 5239 "LZCNT $dst, $src.lo\n\t" 5240 "ADD $dst, 32\n" 5241 "done:" %} 5242 ins_encode %{ 5243 Register Rdst = $dst$$Register; 5244 Register Rsrc = $src$$Register; 5245 Label done; 5246 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5247 __ jccb(Assembler::carryClear, done); 5248 __ lzcntl(Rdst, Rsrc); 5249 __ addl(Rdst, BitsPerInt); 5250 __ bind(done); 5251 %} 5252 ins_pipe(ialu_reg); 5253 %} 5254 5255 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5256 predicate(!UseCountLeadingZerosInstruction); 5257 match(Set dst (CountLeadingZerosL src)); 5258 effect(TEMP dst, KILL cr); 5259 5260 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5261 "JZ msw_is_zero\n\t" 5262 "ADD $dst, 32\n\t" 5263 "JMP not_zero\n" 5264 "msw_is_zero:\n\t" 5265 "BSR $dst, $src.lo\n\t" 5266 "JNZ not_zero\n\t" 5267 "MOV $dst, -1\n" 5268 "not_zero:\n\t" 5269 "NEG $dst\n\t" 5270 "ADD $dst, 63\n" %} 5271 ins_encode %{ 5272 Register Rdst = $dst$$Register; 5273 Register Rsrc = $src$$Register; 5274 Label msw_is_zero; 5275 Label not_zero; 5276 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5277 __ jccb(Assembler::zero, msw_is_zero); 5278 __ addl(Rdst, BitsPerInt); 5279 __ jmpb(not_zero); 5280 __ bind(msw_is_zero); 5281 __ bsrl(Rdst, Rsrc); 5282 __ jccb(Assembler::notZero, not_zero); 5283 __ movl(Rdst, -1); 5284 __ bind(not_zero); 5285 __ negl(Rdst); 5286 __ addl(Rdst, BitsPerLong - 1); 5287 %} 5288 ins_pipe(ialu_reg); 5289 %} 5290 5291 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5292 predicate(UseCountTrailingZerosInstruction); 5293 match(Set dst (CountTrailingZerosI src)); 5294 effect(KILL cr); 5295 5296 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5297 ins_encode %{ 5298 __ tzcntl($dst$$Register, $src$$Register); 5299 %} 5300 ins_pipe(ialu_reg); 5301 %} 5302 5303 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5304 predicate(!UseCountTrailingZerosInstruction); 5305 match(Set dst (CountTrailingZerosI src)); 5306 effect(KILL cr); 5307 5308 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5309 "JNZ done\n\t" 5310 "MOV $dst, 32\n" 5311 "done:" %} 5312 ins_encode %{ 5313 Register Rdst = $dst$$Register; 5314 Label done; 5315 __ bsfl(Rdst, $src$$Register); 5316 __ jccb(Assembler::notZero, done); 5317 __ movl(Rdst, BitsPerInt); 5318 __ bind(done); 5319 %} 5320 ins_pipe(ialu_reg); 5321 %} 5322 5323 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5324 predicate(UseCountTrailingZerosInstruction); 5325 match(Set dst (CountTrailingZerosL src)); 5326 effect(TEMP dst, KILL cr); 5327 5328 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5329 "JNC done\n\t" 5330 "TZCNT $dst, $src.hi\n\t" 5331 "ADD $dst, 32\n" 5332 "done:" %} 5333 ins_encode %{ 5334 Register Rdst = $dst$$Register; 5335 Register Rsrc = $src$$Register; 5336 Label done; 5337 __ tzcntl(Rdst, Rsrc); 5338 __ jccb(Assembler::carryClear, done); 5339 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5340 __ addl(Rdst, BitsPerInt); 5341 __ bind(done); 5342 %} 5343 ins_pipe(ialu_reg); 5344 %} 5345 5346 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5347 predicate(!UseCountTrailingZerosInstruction); 5348 match(Set dst (CountTrailingZerosL src)); 5349 effect(TEMP dst, KILL cr); 5350 5351 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5352 "JNZ done\n\t" 5353 "BSF $dst, $src.hi\n\t" 5354 "JNZ msw_not_zero\n\t" 5355 "MOV $dst, 32\n" 5356 "msw_not_zero:\n\t" 5357 "ADD $dst, 32\n" 5358 "done:" %} 5359 ins_encode %{ 5360 Register Rdst = $dst$$Register; 5361 Register Rsrc = $src$$Register; 5362 Label msw_not_zero; 5363 Label done; 5364 __ bsfl(Rdst, Rsrc); 5365 __ jccb(Assembler::notZero, done); 5366 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5367 __ jccb(Assembler::notZero, msw_not_zero); 5368 __ movl(Rdst, BitsPerInt); 5369 __ bind(msw_not_zero); 5370 __ addl(Rdst, BitsPerInt); 5371 __ bind(done); 5372 %} 5373 ins_pipe(ialu_reg); 5374 %} 5375 5376 5377 //---------- Population Count Instructions ------------------------------------- 5378 5379 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5380 predicate(UsePopCountInstruction); 5381 match(Set dst (PopCountI src)); 5382 effect(KILL cr); 5383 5384 format %{ "POPCNT $dst, $src" %} 5385 ins_encode %{ 5386 __ popcntl($dst$$Register, $src$$Register); 5387 %} 5388 ins_pipe(ialu_reg); 5389 %} 5390 5391 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5392 predicate(UsePopCountInstruction); 5393 match(Set dst (PopCountI (LoadI mem))); 5394 effect(KILL cr); 5395 5396 format %{ "POPCNT $dst, $mem" %} 5397 ins_encode %{ 5398 __ popcntl($dst$$Register, $mem$$Address); 5399 %} 5400 ins_pipe(ialu_reg); 5401 %} 5402 5403 // Note: Long.bitCount(long) returns an int. 5404 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5405 predicate(UsePopCountInstruction); 5406 match(Set dst (PopCountL src)); 5407 effect(KILL cr, TEMP tmp, TEMP dst); 5408 5409 format %{ "POPCNT $dst, $src.lo\n\t" 5410 "POPCNT $tmp, $src.hi\n\t" 5411 "ADD $dst, $tmp" %} 5412 ins_encode %{ 5413 __ popcntl($dst$$Register, $src$$Register); 5414 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5415 __ addl($dst$$Register, $tmp$$Register); 5416 %} 5417 ins_pipe(ialu_reg); 5418 %} 5419 5420 // Note: Long.bitCount(long) returns an int. 5421 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5422 predicate(UsePopCountInstruction); 5423 match(Set dst (PopCountL (LoadL mem))); 5424 effect(KILL cr, TEMP tmp, TEMP dst); 5425 5426 format %{ "POPCNT $dst, $mem\n\t" 5427 "POPCNT $tmp, $mem+4\n\t" 5428 "ADD $dst, $tmp" %} 5429 ins_encode %{ 5430 //__ popcntl($dst$$Register, $mem$$Address$$first); 5431 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5432 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5433 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5434 __ addl($dst$$Register, $tmp$$Register); 5435 %} 5436 ins_pipe(ialu_reg); 5437 %} 5438 5439 5440 //----------Load/Store/Move Instructions--------------------------------------- 5441 //----------Load Instructions-------------------------------------------------- 5442 // Load Byte (8bit signed) 5443 instruct loadB(xRegI dst, memory mem) %{ 5444 match(Set dst (LoadB mem)); 5445 5446 ins_cost(125); 5447 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5448 5449 ins_encode %{ 5450 __ movsbl($dst$$Register, $mem$$Address); 5451 %} 5452 5453 ins_pipe(ialu_reg_mem); 5454 %} 5455 5456 // Load Byte (8bit signed) into Long Register 5457 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5458 match(Set dst (ConvI2L (LoadB mem))); 5459 effect(KILL cr); 5460 5461 ins_cost(375); 5462 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5463 "MOV $dst.hi,$dst.lo\n\t" 5464 "SAR $dst.hi,7" %} 5465 5466 ins_encode %{ 5467 __ movsbl($dst$$Register, $mem$$Address); 5468 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5469 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5470 %} 5471 5472 ins_pipe(ialu_reg_mem); 5473 %} 5474 5475 // Load Unsigned Byte (8bit UNsigned) 5476 instruct loadUB(xRegI dst, memory mem) %{ 5477 match(Set dst (LoadUB mem)); 5478 5479 ins_cost(125); 5480 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5481 5482 ins_encode %{ 5483 __ movzbl($dst$$Register, $mem$$Address); 5484 %} 5485 5486 ins_pipe(ialu_reg_mem); 5487 %} 5488 5489 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5490 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5491 match(Set dst (ConvI2L (LoadUB mem))); 5492 effect(KILL cr); 5493 5494 ins_cost(250); 5495 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5496 "XOR $dst.hi,$dst.hi" %} 5497 5498 ins_encode %{ 5499 Register Rdst = $dst$$Register; 5500 __ movzbl(Rdst, $mem$$Address); 5501 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5502 %} 5503 5504 ins_pipe(ialu_reg_mem); 5505 %} 5506 5507 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5508 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5509 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5510 effect(KILL cr); 5511 5512 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5513 "XOR $dst.hi,$dst.hi\n\t" 5514 "AND $dst.lo,right_n_bits($mask, 8)" %} 5515 ins_encode %{ 5516 Register Rdst = $dst$$Register; 5517 __ movzbl(Rdst, $mem$$Address); 5518 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5519 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5520 %} 5521 ins_pipe(ialu_reg_mem); 5522 %} 5523 5524 // Load Short (16bit signed) 5525 instruct loadS(rRegI dst, memory mem) %{ 5526 match(Set dst (LoadS mem)); 5527 5528 ins_cost(125); 5529 format %{ "MOVSX $dst,$mem\t# short" %} 5530 5531 ins_encode %{ 5532 __ movswl($dst$$Register, $mem$$Address); 5533 %} 5534 5535 ins_pipe(ialu_reg_mem); 5536 %} 5537 5538 // Load Short (16 bit signed) to Byte (8 bit signed) 5539 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5540 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5541 5542 ins_cost(125); 5543 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5544 ins_encode %{ 5545 __ movsbl($dst$$Register, $mem$$Address); 5546 %} 5547 ins_pipe(ialu_reg_mem); 5548 %} 5549 5550 // Load Short (16bit signed) into Long Register 5551 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5552 match(Set dst (ConvI2L (LoadS mem))); 5553 effect(KILL cr); 5554 5555 ins_cost(375); 5556 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5557 "MOV $dst.hi,$dst.lo\n\t" 5558 "SAR $dst.hi,15" %} 5559 5560 ins_encode %{ 5561 __ movswl($dst$$Register, $mem$$Address); 5562 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5563 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5564 %} 5565 5566 ins_pipe(ialu_reg_mem); 5567 %} 5568 5569 // Load Unsigned Short/Char (16bit unsigned) 5570 instruct loadUS(rRegI dst, memory mem) %{ 5571 match(Set dst (LoadUS mem)); 5572 5573 ins_cost(125); 5574 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5575 5576 ins_encode %{ 5577 __ movzwl($dst$$Register, $mem$$Address); 5578 %} 5579 5580 ins_pipe(ialu_reg_mem); 5581 %} 5582 5583 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5584 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5585 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5586 5587 ins_cost(125); 5588 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5589 ins_encode %{ 5590 __ movsbl($dst$$Register, $mem$$Address); 5591 %} 5592 ins_pipe(ialu_reg_mem); 5593 %} 5594 5595 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5596 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5597 match(Set dst (ConvI2L (LoadUS mem))); 5598 effect(KILL cr); 5599 5600 ins_cost(250); 5601 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5602 "XOR $dst.hi,$dst.hi" %} 5603 5604 ins_encode %{ 5605 __ movzwl($dst$$Register, $mem$$Address); 5606 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5607 %} 5608 5609 ins_pipe(ialu_reg_mem); 5610 %} 5611 5612 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5613 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5614 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5615 effect(KILL cr); 5616 5617 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5618 "XOR $dst.hi,$dst.hi" %} 5619 ins_encode %{ 5620 Register Rdst = $dst$$Register; 5621 __ movzbl(Rdst, $mem$$Address); 5622 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5623 %} 5624 ins_pipe(ialu_reg_mem); 5625 %} 5626 5627 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5628 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5629 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5630 effect(KILL cr); 5631 5632 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5633 "XOR $dst.hi,$dst.hi\n\t" 5634 "AND $dst.lo,right_n_bits($mask, 16)" %} 5635 ins_encode %{ 5636 Register Rdst = $dst$$Register; 5637 __ movzwl(Rdst, $mem$$Address); 5638 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5639 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5640 %} 5641 ins_pipe(ialu_reg_mem); 5642 %} 5643 5644 // Load Integer 5645 instruct loadI(rRegI dst, memory mem) %{ 5646 match(Set dst (LoadI mem)); 5647 5648 ins_cost(125); 5649 format %{ "MOV $dst,$mem\t# int" %} 5650 5651 ins_encode %{ 5652 __ movl($dst$$Register, $mem$$Address); 5653 %} 5654 5655 ins_pipe(ialu_reg_mem); 5656 %} 5657 5658 // Load Integer (32 bit signed) to Byte (8 bit signed) 5659 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5660 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5661 5662 ins_cost(125); 5663 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5664 ins_encode %{ 5665 __ movsbl($dst$$Register, $mem$$Address); 5666 %} 5667 ins_pipe(ialu_reg_mem); 5668 %} 5669 5670 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5671 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5672 match(Set dst (AndI (LoadI mem) mask)); 5673 5674 ins_cost(125); 5675 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5676 ins_encode %{ 5677 __ movzbl($dst$$Register, $mem$$Address); 5678 %} 5679 ins_pipe(ialu_reg_mem); 5680 %} 5681 5682 // Load Integer (32 bit signed) to Short (16 bit signed) 5683 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5684 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5685 5686 ins_cost(125); 5687 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5688 ins_encode %{ 5689 __ movswl($dst$$Register, $mem$$Address); 5690 %} 5691 ins_pipe(ialu_reg_mem); 5692 %} 5693 5694 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5695 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5696 match(Set dst (AndI (LoadI mem) mask)); 5697 5698 ins_cost(125); 5699 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5700 ins_encode %{ 5701 __ movzwl($dst$$Register, $mem$$Address); 5702 %} 5703 ins_pipe(ialu_reg_mem); 5704 %} 5705 5706 // Load Integer into Long Register 5707 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5708 match(Set dst (ConvI2L (LoadI mem))); 5709 effect(KILL cr); 5710 5711 ins_cost(375); 5712 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5713 "MOV $dst.hi,$dst.lo\n\t" 5714 "SAR $dst.hi,31" %} 5715 5716 ins_encode %{ 5717 __ movl($dst$$Register, $mem$$Address); 5718 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5719 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5720 %} 5721 5722 ins_pipe(ialu_reg_mem); 5723 %} 5724 5725 // Load Integer with mask 0xFF into Long Register 5726 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5727 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5728 effect(KILL cr); 5729 5730 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5731 "XOR $dst.hi,$dst.hi" %} 5732 ins_encode %{ 5733 Register Rdst = $dst$$Register; 5734 __ movzbl(Rdst, $mem$$Address); 5735 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5736 %} 5737 ins_pipe(ialu_reg_mem); 5738 %} 5739 5740 // Load Integer with mask 0xFFFF into Long Register 5741 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5742 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5743 effect(KILL cr); 5744 5745 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5746 "XOR $dst.hi,$dst.hi" %} 5747 ins_encode %{ 5748 Register Rdst = $dst$$Register; 5749 __ movzwl(Rdst, $mem$$Address); 5750 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5751 %} 5752 ins_pipe(ialu_reg_mem); 5753 %} 5754 5755 // Load Integer with 31-bit mask into Long Register 5756 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5757 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5758 effect(KILL cr); 5759 5760 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5761 "XOR $dst.hi,$dst.hi\n\t" 5762 "AND $dst.lo,$mask" %} 5763 ins_encode %{ 5764 Register Rdst = $dst$$Register; 5765 __ movl(Rdst, $mem$$Address); 5766 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5767 __ andl(Rdst, $mask$$constant); 5768 %} 5769 ins_pipe(ialu_reg_mem); 5770 %} 5771 5772 // Load Unsigned Integer into Long Register 5773 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5774 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5775 effect(KILL cr); 5776 5777 ins_cost(250); 5778 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5779 "XOR $dst.hi,$dst.hi" %} 5780 5781 ins_encode %{ 5782 __ movl($dst$$Register, $mem$$Address); 5783 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5784 %} 5785 5786 ins_pipe(ialu_reg_mem); 5787 %} 5788 5789 // Load Long. Cannot clobber address while loading, so restrict address 5790 // register to ESI 5791 instruct loadL(eRegL dst, load_long_memory mem) %{ 5792 predicate(!((LoadLNode*)n)->require_atomic_access()); 5793 match(Set dst (LoadL mem)); 5794 5795 ins_cost(250); 5796 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5797 "MOV $dst.hi,$mem+4" %} 5798 5799 ins_encode %{ 5800 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5801 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5802 __ movl($dst$$Register, Amemlo); 5803 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5804 %} 5805 5806 ins_pipe(ialu_reg_long_mem); 5807 %} 5808 5809 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5810 // then store it down to the stack and reload on the int 5811 // side. 5812 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5813 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5814 match(Set dst (LoadL mem)); 5815 5816 ins_cost(200); 5817 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5818 "FISTp $dst" %} 5819 ins_encode(enc_loadL_volatile(mem,dst)); 5820 ins_pipe( fpu_reg_mem ); 5821 %} 5822 5823 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5824 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5825 match(Set dst (LoadL mem)); 5826 effect(TEMP tmp); 5827 ins_cost(180); 5828 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5829 "MOVSD $dst,$tmp" %} 5830 ins_encode %{ 5831 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5832 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5838 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5839 match(Set dst (LoadL mem)); 5840 effect(TEMP tmp); 5841 ins_cost(160); 5842 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5843 "MOVD $dst.lo,$tmp\n\t" 5844 "PSRLQ $tmp,32\n\t" 5845 "MOVD $dst.hi,$tmp" %} 5846 ins_encode %{ 5847 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5848 __ movdl($dst$$Register, $tmp$$XMMRegister); 5849 __ psrlq($tmp$$XMMRegister, 32); 5850 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5851 %} 5852 ins_pipe( pipe_slow ); 5853 %} 5854 5855 // Load Range 5856 instruct loadRange(rRegI dst, memory mem) %{ 5857 match(Set dst (LoadRange mem)); 5858 5859 ins_cost(125); 5860 format %{ "MOV $dst,$mem" %} 5861 opcode(0x8B); 5862 ins_encode( OpcP, RegMem(dst,mem)); 5863 ins_pipe( ialu_reg_mem ); 5864 %} 5865 5866 5867 // Load Pointer 5868 instruct loadP(eRegP dst, memory mem) %{ 5869 match(Set dst (LoadP mem)); 5870 5871 ins_cost(125); 5872 format %{ "MOV $dst,$mem" %} 5873 opcode(0x8B); 5874 ins_encode( OpcP, RegMem(dst,mem)); 5875 ins_pipe( ialu_reg_mem ); 5876 %} 5877 5878 // Load Klass Pointer 5879 instruct loadKlass(eRegP dst, memory mem) %{ 5880 match(Set dst (LoadKlass mem)); 5881 5882 ins_cost(125); 5883 format %{ "MOV $dst,$mem" %} 5884 opcode(0x8B); 5885 ins_encode( OpcP, RegMem(dst,mem)); 5886 ins_pipe( ialu_reg_mem ); 5887 %} 5888 5889 // Load Double 5890 instruct loadDPR(regDPR dst, memory mem) %{ 5891 predicate(UseSSE<=1); 5892 match(Set dst (LoadD mem)); 5893 5894 ins_cost(150); 5895 format %{ "FLD_D ST,$mem\n\t" 5896 "FSTP $dst" %} 5897 opcode(0xDD); /* DD /0 */ 5898 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5899 Pop_Reg_DPR(dst) ); 5900 ins_pipe( fpu_reg_mem ); 5901 %} 5902 5903 // Load Double to XMM 5904 instruct loadD(regD dst, memory mem) %{ 5905 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5906 match(Set dst (LoadD mem)); 5907 ins_cost(145); 5908 format %{ "MOVSD $dst,$mem" %} 5909 ins_encode %{ 5910 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5911 %} 5912 ins_pipe( pipe_slow ); 5913 %} 5914 5915 instruct loadD_partial(regD dst, memory mem) %{ 5916 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5917 match(Set dst (LoadD mem)); 5918 ins_cost(145); 5919 format %{ "MOVLPD $dst,$mem" %} 5920 ins_encode %{ 5921 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5922 %} 5923 ins_pipe( pipe_slow ); 5924 %} 5925 5926 // Load to XMM register (single-precision floating point) 5927 // MOVSS instruction 5928 instruct loadF(regF dst, memory mem) %{ 5929 predicate(UseSSE>=1); 5930 match(Set dst (LoadF mem)); 5931 ins_cost(145); 5932 format %{ "MOVSS $dst,$mem" %} 5933 ins_encode %{ 5934 __ movflt ($dst$$XMMRegister, $mem$$Address); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 // Load Float 5940 instruct loadFPR(regFPR dst, memory mem) %{ 5941 predicate(UseSSE==0); 5942 match(Set dst (LoadF mem)); 5943 5944 ins_cost(150); 5945 format %{ "FLD_S ST,$mem\n\t" 5946 "FSTP $dst" %} 5947 opcode(0xD9); /* D9 /0 */ 5948 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5949 Pop_Reg_FPR(dst) ); 5950 ins_pipe( fpu_reg_mem ); 5951 %} 5952 5953 // Load Effective Address 5954 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5955 match(Set dst mem); 5956 5957 ins_cost(110); 5958 format %{ "LEA $dst,$mem" %} 5959 opcode(0x8D); 5960 ins_encode( OpcP, RegMem(dst,mem)); 5961 ins_pipe( ialu_reg_reg_fat ); 5962 %} 5963 5964 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5965 match(Set dst mem); 5966 5967 ins_cost(110); 5968 format %{ "LEA $dst,$mem" %} 5969 opcode(0x8D); 5970 ins_encode( OpcP, RegMem(dst,mem)); 5971 ins_pipe( ialu_reg_reg_fat ); 5972 %} 5973 5974 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5975 match(Set dst mem); 5976 5977 ins_cost(110); 5978 format %{ "LEA $dst,$mem" %} 5979 opcode(0x8D); 5980 ins_encode( OpcP, RegMem(dst,mem)); 5981 ins_pipe( ialu_reg_reg_fat ); 5982 %} 5983 5984 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5985 match(Set dst mem); 5986 5987 ins_cost(110); 5988 format %{ "LEA $dst,$mem" %} 5989 opcode(0x8D); 5990 ins_encode( OpcP, RegMem(dst,mem)); 5991 ins_pipe( ialu_reg_reg_fat ); 5992 %} 5993 5994 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5995 match(Set dst mem); 5996 5997 ins_cost(110); 5998 format %{ "LEA $dst,$mem" %} 5999 opcode(0x8D); 6000 ins_encode( OpcP, RegMem(dst,mem)); 6001 ins_pipe( ialu_reg_reg_fat ); 6002 %} 6003 6004 // Load Constant 6005 instruct loadConI(rRegI dst, immI src) %{ 6006 match(Set dst src); 6007 6008 format %{ "MOV $dst,$src" %} 6009 ins_encode( LdImmI(dst, src) ); 6010 ins_pipe( ialu_reg_fat ); 6011 %} 6012 6013 // Load Constant zero 6014 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 6015 match(Set dst src); 6016 effect(KILL cr); 6017 6018 ins_cost(50); 6019 format %{ "XOR $dst,$dst" %} 6020 opcode(0x33); /* + rd */ 6021 ins_encode( OpcP, RegReg( dst, dst ) ); 6022 ins_pipe( ialu_reg ); 6023 %} 6024 6025 instruct loadConP(eRegP dst, immP src) %{ 6026 match(Set dst src); 6027 6028 format %{ "MOV $dst,$src" %} 6029 opcode(0xB8); /* + rd */ 6030 ins_encode( LdImmP(dst, src) ); 6031 ins_pipe( ialu_reg_fat ); 6032 %} 6033 6034 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6035 match(Set dst src); 6036 effect(KILL cr); 6037 ins_cost(200); 6038 format %{ "MOV $dst.lo,$src.lo\n\t" 6039 "MOV $dst.hi,$src.hi" %} 6040 opcode(0xB8); 6041 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6042 ins_pipe( ialu_reg_long_fat ); 6043 %} 6044 6045 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6046 match(Set dst src); 6047 effect(KILL cr); 6048 ins_cost(150); 6049 format %{ "XOR $dst.lo,$dst.lo\n\t" 6050 "XOR $dst.hi,$dst.hi" %} 6051 opcode(0x33,0x33); 6052 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6053 ins_pipe( ialu_reg_long ); 6054 %} 6055 6056 // The instruction usage is guarded by predicate in operand immFPR(). 6057 instruct loadConFPR(regFPR dst, immFPR con) %{ 6058 match(Set dst con); 6059 ins_cost(125); 6060 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6061 "FSTP $dst" %} 6062 ins_encode %{ 6063 __ fld_s($constantaddress($con)); 6064 __ fstp_d($dst$$reg); 6065 %} 6066 ins_pipe(fpu_reg_con); 6067 %} 6068 6069 // The instruction usage is guarded by predicate in operand immFPR0(). 6070 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6071 match(Set dst con); 6072 ins_cost(125); 6073 format %{ "FLDZ ST\n\t" 6074 "FSTP $dst" %} 6075 ins_encode %{ 6076 __ fldz(); 6077 __ fstp_d($dst$$reg); 6078 %} 6079 ins_pipe(fpu_reg_con); 6080 %} 6081 6082 // The instruction usage is guarded by predicate in operand immFPR1(). 6083 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6084 match(Set dst con); 6085 ins_cost(125); 6086 format %{ "FLD1 ST\n\t" 6087 "FSTP $dst" %} 6088 ins_encode %{ 6089 __ fld1(); 6090 __ fstp_d($dst$$reg); 6091 %} 6092 ins_pipe(fpu_reg_con); 6093 %} 6094 6095 // The instruction usage is guarded by predicate in operand immF(). 6096 instruct loadConF(regF dst, immF con) %{ 6097 match(Set dst con); 6098 ins_cost(125); 6099 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6100 ins_encode %{ 6101 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6102 %} 6103 ins_pipe(pipe_slow); 6104 %} 6105 6106 // The instruction usage is guarded by predicate in operand immF0(). 6107 instruct loadConF0(regF dst, immF0 src) %{ 6108 match(Set dst src); 6109 ins_cost(100); 6110 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6111 ins_encode %{ 6112 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6113 %} 6114 ins_pipe(pipe_slow); 6115 %} 6116 6117 // The instruction usage is guarded by predicate in operand immDPR(). 6118 instruct loadConDPR(regDPR dst, immDPR con) %{ 6119 match(Set dst con); 6120 ins_cost(125); 6121 6122 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6123 "FSTP $dst" %} 6124 ins_encode %{ 6125 __ fld_d($constantaddress($con)); 6126 __ fstp_d($dst$$reg); 6127 %} 6128 ins_pipe(fpu_reg_con); 6129 %} 6130 6131 // The instruction usage is guarded by predicate in operand immDPR0(). 6132 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6133 match(Set dst con); 6134 ins_cost(125); 6135 6136 format %{ "FLDZ ST\n\t" 6137 "FSTP $dst" %} 6138 ins_encode %{ 6139 __ fldz(); 6140 __ fstp_d($dst$$reg); 6141 %} 6142 ins_pipe(fpu_reg_con); 6143 %} 6144 6145 // The instruction usage is guarded by predicate in operand immDPR1(). 6146 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6147 match(Set dst con); 6148 ins_cost(125); 6149 6150 format %{ "FLD1 ST\n\t" 6151 "FSTP $dst" %} 6152 ins_encode %{ 6153 __ fld1(); 6154 __ fstp_d($dst$$reg); 6155 %} 6156 ins_pipe(fpu_reg_con); 6157 %} 6158 6159 // The instruction usage is guarded by predicate in operand immD(). 6160 instruct loadConD(regD dst, immD con) %{ 6161 match(Set dst con); 6162 ins_cost(125); 6163 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6164 ins_encode %{ 6165 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6166 %} 6167 ins_pipe(pipe_slow); 6168 %} 6169 6170 // The instruction usage is guarded by predicate in operand immD0(). 6171 instruct loadConD0(regD dst, immD0 src) %{ 6172 match(Set dst src); 6173 ins_cost(100); 6174 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6175 ins_encode %{ 6176 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6177 %} 6178 ins_pipe( pipe_slow ); 6179 %} 6180 6181 // Load Stack Slot 6182 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6183 match(Set dst src); 6184 ins_cost(125); 6185 6186 format %{ "MOV $dst,$src" %} 6187 opcode(0x8B); 6188 ins_encode( OpcP, RegMem(dst,src)); 6189 ins_pipe( ialu_reg_mem ); 6190 %} 6191 6192 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6193 match(Set dst src); 6194 6195 ins_cost(200); 6196 format %{ "MOV $dst,$src.lo\n\t" 6197 "MOV $dst+4,$src.hi" %} 6198 opcode(0x8B, 0x8B); 6199 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6200 ins_pipe( ialu_mem_long_reg ); 6201 %} 6202 6203 // Load Stack Slot 6204 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6205 match(Set dst src); 6206 ins_cost(125); 6207 6208 format %{ "MOV $dst,$src" %} 6209 opcode(0x8B); 6210 ins_encode( OpcP, RegMem(dst,src)); 6211 ins_pipe( ialu_reg_mem ); 6212 %} 6213 6214 // Load Stack Slot 6215 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6216 match(Set dst src); 6217 ins_cost(125); 6218 6219 format %{ "FLD_S $src\n\t" 6220 "FSTP $dst" %} 6221 opcode(0xD9); /* D9 /0, FLD m32real */ 6222 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6223 Pop_Reg_FPR(dst) ); 6224 ins_pipe( fpu_reg_mem ); 6225 %} 6226 6227 // Load Stack Slot 6228 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6229 match(Set dst src); 6230 ins_cost(125); 6231 6232 format %{ "FLD_D $src\n\t" 6233 "FSTP $dst" %} 6234 opcode(0xDD); /* DD /0, FLD m64real */ 6235 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6236 Pop_Reg_DPR(dst) ); 6237 ins_pipe( fpu_reg_mem ); 6238 %} 6239 6240 // Prefetch instructions for allocation. 6241 // Must be safe to execute with invalid address (cannot fault). 6242 6243 instruct prefetchAlloc0( memory mem ) %{ 6244 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6245 match(PrefetchAllocation mem); 6246 ins_cost(0); 6247 size(0); 6248 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6249 ins_encode(); 6250 ins_pipe(empty); 6251 %} 6252 6253 instruct prefetchAlloc( memory mem ) %{ 6254 predicate(AllocatePrefetchInstr==3); 6255 match( PrefetchAllocation mem ); 6256 ins_cost(100); 6257 6258 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6259 ins_encode %{ 6260 __ prefetchw($mem$$Address); 6261 %} 6262 ins_pipe(ialu_mem); 6263 %} 6264 6265 instruct prefetchAllocNTA( memory mem ) %{ 6266 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6267 match(PrefetchAllocation mem); 6268 ins_cost(100); 6269 6270 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6271 ins_encode %{ 6272 __ prefetchnta($mem$$Address); 6273 %} 6274 ins_pipe(ialu_mem); 6275 %} 6276 6277 instruct prefetchAllocT0( memory mem ) %{ 6278 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6279 match(PrefetchAllocation mem); 6280 ins_cost(100); 6281 6282 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6283 ins_encode %{ 6284 __ prefetcht0($mem$$Address); 6285 %} 6286 ins_pipe(ialu_mem); 6287 %} 6288 6289 instruct prefetchAllocT2( memory mem ) %{ 6290 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6291 match(PrefetchAllocation mem); 6292 ins_cost(100); 6293 6294 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6295 ins_encode %{ 6296 __ prefetcht2($mem$$Address); 6297 %} 6298 ins_pipe(ialu_mem); 6299 %} 6300 6301 //----------Store Instructions------------------------------------------------- 6302 6303 // Store Byte 6304 instruct storeB(memory mem, xRegI src) %{ 6305 match(Set mem (StoreB mem src)); 6306 6307 ins_cost(125); 6308 format %{ "MOV8 $mem,$src" %} 6309 opcode(0x88); 6310 ins_encode( OpcP, RegMem( src, mem ) ); 6311 ins_pipe( ialu_mem_reg ); 6312 %} 6313 6314 // Store Char/Short 6315 instruct storeC(memory mem, rRegI src) %{ 6316 match(Set mem (StoreC mem src)); 6317 6318 ins_cost(125); 6319 format %{ "MOV16 $mem,$src" %} 6320 opcode(0x89, 0x66); 6321 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6322 ins_pipe( ialu_mem_reg ); 6323 %} 6324 6325 // Store Integer 6326 instruct storeI(memory mem, rRegI src) %{ 6327 match(Set mem (StoreI mem src)); 6328 6329 ins_cost(125); 6330 format %{ "MOV $mem,$src" %} 6331 opcode(0x89); 6332 ins_encode( OpcP, RegMem( src, mem ) ); 6333 ins_pipe( ialu_mem_reg ); 6334 %} 6335 6336 // Store Long 6337 instruct storeL(long_memory mem, eRegL src) %{ 6338 predicate(!((StoreLNode*)n)->require_atomic_access()); 6339 match(Set mem (StoreL mem src)); 6340 6341 ins_cost(200); 6342 format %{ "MOV $mem,$src.lo\n\t" 6343 "MOV $mem+4,$src.hi" %} 6344 opcode(0x89, 0x89); 6345 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6346 ins_pipe( ialu_mem_long_reg ); 6347 %} 6348 6349 // Store Long to Integer 6350 instruct storeL2I(memory mem, eRegL src) %{ 6351 match(Set mem (StoreI mem (ConvL2I src))); 6352 6353 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6354 ins_encode %{ 6355 __ movl($mem$$Address, $src$$Register); 6356 %} 6357 ins_pipe(ialu_mem_reg); 6358 %} 6359 6360 // Volatile Store Long. Must be atomic, so move it into 6361 // the FP TOS and then do a 64-bit FIST. Has to probe the 6362 // target address before the store (for null-ptr checks) 6363 // so the memory operand is used twice in the encoding. 6364 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6365 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6366 match(Set mem (StoreL mem src)); 6367 effect( KILL cr ); 6368 ins_cost(400); 6369 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6370 "FILD $src\n\t" 6371 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6372 opcode(0x3B); 6373 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6374 ins_pipe( fpu_reg_mem ); 6375 %} 6376 6377 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6378 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6379 match(Set mem (StoreL mem src)); 6380 effect( TEMP tmp, KILL cr ); 6381 ins_cost(380); 6382 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6383 "MOVSD $tmp,$src\n\t" 6384 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6385 ins_encode %{ 6386 __ cmpl(rax, $mem$$Address); 6387 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6388 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6394 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6395 match(Set mem (StoreL mem src)); 6396 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6397 ins_cost(360); 6398 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6399 "MOVD $tmp,$src.lo\n\t" 6400 "MOVD $tmp2,$src.hi\n\t" 6401 "PUNPCKLDQ $tmp,$tmp2\n\t" 6402 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6403 ins_encode %{ 6404 __ cmpl(rax, $mem$$Address); 6405 __ movdl($tmp$$XMMRegister, $src$$Register); 6406 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6407 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6408 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6409 %} 6410 ins_pipe( pipe_slow ); 6411 %} 6412 6413 // Store Pointer; for storing unknown oops and raw pointers 6414 instruct storeP(memory mem, anyRegP src) %{ 6415 match(Set mem (StoreP mem src)); 6416 6417 ins_cost(125); 6418 format %{ "MOV $mem,$src" %} 6419 opcode(0x89); 6420 ins_encode( OpcP, RegMem( src, mem ) ); 6421 ins_pipe( ialu_mem_reg ); 6422 %} 6423 6424 // Store Integer Immediate 6425 instruct storeImmI(memory mem, immI src) %{ 6426 match(Set mem (StoreI mem src)); 6427 6428 ins_cost(150); 6429 format %{ "MOV $mem,$src" %} 6430 opcode(0xC7); /* C7 /0 */ 6431 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6432 ins_pipe( ialu_mem_imm ); 6433 %} 6434 6435 // Store Short/Char Immediate 6436 instruct storeImmI16(memory mem, immI16 src) %{ 6437 predicate(UseStoreImmI16); 6438 match(Set mem (StoreC mem src)); 6439 6440 ins_cost(150); 6441 format %{ "MOV16 $mem,$src" %} 6442 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6443 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6444 ins_pipe( ialu_mem_imm ); 6445 %} 6446 6447 // Store Pointer Immediate; null pointers or constant oops that do not 6448 // need card-mark barriers. 6449 instruct storeImmP(memory mem, immP src) %{ 6450 match(Set mem (StoreP mem src)); 6451 6452 ins_cost(150); 6453 format %{ "MOV $mem,$src" %} 6454 opcode(0xC7); /* C7 /0 */ 6455 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6456 ins_pipe( ialu_mem_imm ); 6457 %} 6458 6459 // Store Byte Immediate 6460 instruct storeImmB(memory mem, immI8 src) %{ 6461 match(Set mem (StoreB mem src)); 6462 6463 ins_cost(150); 6464 format %{ "MOV8 $mem,$src" %} 6465 opcode(0xC6); /* C6 /0 */ 6466 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6467 ins_pipe( ialu_mem_imm ); 6468 %} 6469 6470 // Store CMS card-mark Immediate 6471 instruct storeImmCM(memory mem, immI8 src) %{ 6472 match(Set mem (StoreCM mem src)); 6473 6474 ins_cost(150); 6475 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6476 opcode(0xC6); /* C6 /0 */ 6477 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6478 ins_pipe( ialu_mem_imm ); 6479 %} 6480 6481 // Store Double 6482 instruct storeDPR( memory mem, regDPR1 src) %{ 6483 predicate(UseSSE<=1); 6484 match(Set mem (StoreD mem src)); 6485 6486 ins_cost(100); 6487 format %{ "FST_D $mem,$src" %} 6488 opcode(0xDD); /* DD /2 */ 6489 ins_encode( enc_FPR_store(mem,src) ); 6490 ins_pipe( fpu_mem_reg ); 6491 %} 6492 6493 // Store double does rounding on x86 6494 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6495 predicate(UseSSE<=1); 6496 match(Set mem (StoreD mem (RoundDouble src))); 6497 6498 ins_cost(100); 6499 format %{ "FST_D $mem,$src\t# round" %} 6500 opcode(0xDD); /* DD /2 */ 6501 ins_encode( enc_FPR_store(mem,src) ); 6502 ins_pipe( fpu_mem_reg ); 6503 %} 6504 6505 // Store XMM register to memory (double-precision floating points) 6506 // MOVSD instruction 6507 instruct storeD(memory mem, regD src) %{ 6508 predicate(UseSSE>=2); 6509 match(Set mem (StoreD mem src)); 6510 ins_cost(95); 6511 format %{ "MOVSD $mem,$src" %} 6512 ins_encode %{ 6513 __ movdbl($mem$$Address, $src$$XMMRegister); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 // Store XMM register to memory (single-precision floating point) 6519 // MOVSS instruction 6520 instruct storeF(memory mem, regF src) %{ 6521 predicate(UseSSE>=1); 6522 match(Set mem (StoreF mem src)); 6523 ins_cost(95); 6524 format %{ "MOVSS $mem,$src" %} 6525 ins_encode %{ 6526 __ movflt($mem$$Address, $src$$XMMRegister); 6527 %} 6528 ins_pipe( pipe_slow ); 6529 %} 6530 6531 // Store Float 6532 instruct storeFPR( memory mem, regFPR1 src) %{ 6533 predicate(UseSSE==0); 6534 match(Set mem (StoreF mem src)); 6535 6536 ins_cost(100); 6537 format %{ "FST_S $mem,$src" %} 6538 opcode(0xD9); /* D9 /2 */ 6539 ins_encode( enc_FPR_store(mem,src) ); 6540 ins_pipe( fpu_mem_reg ); 6541 %} 6542 6543 // Store Float does rounding on x86 6544 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6545 predicate(UseSSE==0); 6546 match(Set mem (StoreF mem (RoundFloat src))); 6547 6548 ins_cost(100); 6549 format %{ "FST_S $mem,$src\t# round" %} 6550 opcode(0xD9); /* D9 /2 */ 6551 ins_encode( enc_FPR_store(mem,src) ); 6552 ins_pipe( fpu_mem_reg ); 6553 %} 6554 6555 // Store Float does rounding on x86 6556 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6557 predicate(UseSSE<=1); 6558 match(Set mem (StoreF mem (ConvD2F src))); 6559 6560 ins_cost(100); 6561 format %{ "FST_S $mem,$src\t# D-round" %} 6562 opcode(0xD9); /* D9 /2 */ 6563 ins_encode( enc_FPR_store(mem,src) ); 6564 ins_pipe( fpu_mem_reg ); 6565 %} 6566 6567 // Store immediate Float value (it is faster than store from FPU register) 6568 // The instruction usage is guarded by predicate in operand immFPR(). 6569 instruct storeFPR_imm( memory mem, immFPR src) %{ 6570 match(Set mem (StoreF mem src)); 6571 6572 ins_cost(50); 6573 format %{ "MOV $mem,$src\t# store float" %} 6574 opcode(0xC7); /* C7 /0 */ 6575 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6576 ins_pipe( ialu_mem_imm ); 6577 %} 6578 6579 // Store immediate Float value (it is faster than store from XMM register) 6580 // The instruction usage is guarded by predicate in operand immF(). 6581 instruct storeF_imm( memory mem, immF src) %{ 6582 match(Set mem (StoreF mem src)); 6583 6584 ins_cost(50); 6585 format %{ "MOV $mem,$src\t# store float" %} 6586 opcode(0xC7); /* C7 /0 */ 6587 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6588 ins_pipe( ialu_mem_imm ); 6589 %} 6590 6591 // Store Integer to stack slot 6592 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6593 match(Set dst src); 6594 6595 ins_cost(100); 6596 format %{ "MOV $dst,$src" %} 6597 opcode(0x89); 6598 ins_encode( OpcPRegSS( dst, src ) ); 6599 ins_pipe( ialu_mem_reg ); 6600 %} 6601 6602 // Store Integer to stack slot 6603 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6604 match(Set dst src); 6605 6606 ins_cost(100); 6607 format %{ "MOV $dst,$src" %} 6608 opcode(0x89); 6609 ins_encode( OpcPRegSS( dst, src ) ); 6610 ins_pipe( ialu_mem_reg ); 6611 %} 6612 6613 // Store Long to stack slot 6614 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6615 match(Set dst src); 6616 6617 ins_cost(200); 6618 format %{ "MOV $dst,$src.lo\n\t" 6619 "MOV $dst+4,$src.hi" %} 6620 opcode(0x89, 0x89); 6621 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6622 ins_pipe( ialu_mem_long_reg ); 6623 %} 6624 6625 //----------MemBar Instructions----------------------------------------------- 6626 // Memory barrier flavors 6627 6628 instruct membar_acquire() %{ 6629 match(MemBarAcquire); 6630 match(LoadFence); 6631 ins_cost(400); 6632 6633 size(0); 6634 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6635 ins_encode(); 6636 ins_pipe(empty); 6637 %} 6638 6639 instruct membar_acquire_lock() %{ 6640 match(MemBarAcquireLock); 6641 ins_cost(0); 6642 6643 size(0); 6644 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6645 ins_encode( ); 6646 ins_pipe(empty); 6647 %} 6648 6649 instruct membar_release() %{ 6650 match(MemBarRelease); 6651 match(StoreFence); 6652 ins_cost(400); 6653 6654 size(0); 6655 format %{ "MEMBAR-release ! (empty encoding)" %} 6656 ins_encode( ); 6657 ins_pipe(empty); 6658 %} 6659 6660 instruct membar_release_lock() %{ 6661 match(MemBarReleaseLock); 6662 ins_cost(0); 6663 6664 size(0); 6665 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6666 ins_encode( ); 6667 ins_pipe(empty); 6668 %} 6669 6670 instruct membar_volatile(eFlagsReg cr) %{ 6671 match(MemBarVolatile); 6672 effect(KILL cr); 6673 ins_cost(400); 6674 6675 format %{ 6676 $$template 6677 if (os::is_MP()) { 6678 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6679 } else { 6680 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6681 } 6682 %} 6683 ins_encode %{ 6684 __ membar(Assembler::StoreLoad); 6685 %} 6686 ins_pipe(pipe_slow); 6687 %} 6688 6689 instruct unnecessary_membar_volatile() %{ 6690 match(MemBarVolatile); 6691 predicate(Matcher::post_store_load_barrier(n)); 6692 ins_cost(0); 6693 6694 size(0); 6695 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6696 ins_encode( ); 6697 ins_pipe(empty); 6698 %} 6699 6700 instruct membar_storestore() %{ 6701 match(MemBarStoreStore); 6702 ins_cost(0); 6703 6704 size(0); 6705 format %{ "MEMBAR-storestore (empty encoding)" %} 6706 ins_encode( ); 6707 ins_pipe(empty); 6708 %} 6709 6710 //----------Move Instructions-------------------------------------------------- 6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6712 match(Set dst (CastX2P src)); 6713 format %{ "# X2P $dst, $src" %} 6714 ins_encode( /*empty encoding*/ ); 6715 ins_cost(0); 6716 ins_pipe(empty); 6717 %} 6718 6719 instruct castP2X(rRegI dst, eRegP src ) %{ 6720 match(Set dst (CastP2X src)); 6721 ins_cost(50); 6722 format %{ "MOV $dst, $src\t# CastP2X" %} 6723 ins_encode( enc_Copy( dst, src) ); 6724 ins_pipe( ialu_reg_reg ); 6725 %} 6726 6727 //----------Conditional Move--------------------------------------------------- 6728 // Conditional move 6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6730 predicate(!VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6732 ins_cost(200); 6733 format %{ "J$cop,us skip\t# signed cmove\n\t" 6734 "MOV $dst,$src\n" 6735 "skip:" %} 6736 ins_encode %{ 6737 Label Lskip; 6738 // Invert sense of branch from sense of CMOV 6739 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6740 __ movl($dst$$Register, $src$$Register); 6741 __ bind(Lskip); 6742 %} 6743 ins_pipe( pipe_cmov_reg ); 6744 %} 6745 6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6747 predicate(!VM_Version::supports_cmov() ); 6748 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6749 ins_cost(200); 6750 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6751 "MOV $dst,$src\n" 6752 "skip:" %} 6753 ins_encode %{ 6754 Label Lskip; 6755 // Invert sense of branch from sense of CMOV 6756 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6757 __ movl($dst$$Register, $src$$Register); 6758 __ bind(Lskip); 6759 %} 6760 ins_pipe( pipe_cmov_reg ); 6761 %} 6762 6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6764 predicate(VM_Version::supports_cmov() ); 6765 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6766 ins_cost(200); 6767 format %{ "CMOV$cop $dst,$src" %} 6768 opcode(0x0F,0x40); 6769 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6770 ins_pipe( pipe_cmov_reg ); 6771 %} 6772 6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6774 predicate(VM_Version::supports_cmov() ); 6775 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6776 ins_cost(200); 6777 format %{ "CMOV$cop $dst,$src" %} 6778 opcode(0x0F,0x40); 6779 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6780 ins_pipe( pipe_cmov_reg ); 6781 %} 6782 6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6784 predicate(VM_Version::supports_cmov() ); 6785 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6786 ins_cost(200); 6787 expand %{ 6788 cmovI_regU(cop, cr, dst, src); 6789 %} 6790 %} 6791 6792 // Conditional move 6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6794 predicate(VM_Version::supports_cmov() ); 6795 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6796 ins_cost(250); 6797 format %{ "CMOV$cop $dst,$src" %} 6798 opcode(0x0F,0x40); 6799 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6800 ins_pipe( pipe_cmov_mem ); 6801 %} 6802 6803 // Conditional move 6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6805 predicate(VM_Version::supports_cmov() ); 6806 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6807 ins_cost(250); 6808 format %{ "CMOV$cop $dst,$src" %} 6809 opcode(0x0F,0x40); 6810 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6811 ins_pipe( pipe_cmov_mem ); 6812 %} 6813 6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6815 predicate(VM_Version::supports_cmov() ); 6816 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6817 ins_cost(250); 6818 expand %{ 6819 cmovI_memU(cop, cr, dst, src); 6820 %} 6821 %} 6822 6823 // Conditional move 6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6825 predicate(VM_Version::supports_cmov() ); 6826 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6827 ins_cost(200); 6828 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6829 opcode(0x0F,0x40); 6830 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6831 ins_pipe( pipe_cmov_reg ); 6832 %} 6833 6834 // Conditional move (non-P6 version) 6835 // Note: a CMoveP is generated for stubs and native wrappers 6836 // regardless of whether we are on a P6, so we 6837 // emulate a cmov here 6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6839 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6840 ins_cost(300); 6841 format %{ "Jn$cop skip\n\t" 6842 "MOV $dst,$src\t# pointer\n" 6843 "skip:" %} 6844 opcode(0x8b); 6845 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6846 ins_pipe( pipe_cmov_reg ); 6847 %} 6848 6849 // Conditional move 6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6851 predicate(VM_Version::supports_cmov() ); 6852 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6853 ins_cost(200); 6854 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6855 opcode(0x0F,0x40); 6856 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6857 ins_pipe( pipe_cmov_reg ); 6858 %} 6859 6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6861 predicate(VM_Version::supports_cmov() ); 6862 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6863 ins_cost(200); 6864 expand %{ 6865 cmovP_regU(cop, cr, dst, src); 6866 %} 6867 %} 6868 6869 // DISABLED: Requires the ADLC to emit a bottom_type call that 6870 // correctly meets the two pointer arguments; one is an incoming 6871 // register but the other is a memory operand. ALSO appears to 6872 // be buggy with implicit null checks. 6873 // 6874 //// Conditional move 6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6876 // predicate(VM_Version::supports_cmov() ); 6877 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6878 // ins_cost(250); 6879 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6880 // opcode(0x0F,0x40); 6881 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6882 // ins_pipe( pipe_cmov_mem ); 6883 //%} 6884 // 6885 //// Conditional move 6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6887 // predicate(VM_Version::supports_cmov() ); 6888 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6889 // ins_cost(250); 6890 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6891 // opcode(0x0F,0x40); 6892 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6893 // ins_pipe( pipe_cmov_mem ); 6894 //%} 6895 6896 // Conditional move 6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6898 predicate(UseSSE<=1); 6899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6900 ins_cost(200); 6901 format %{ "FCMOV$cop $dst,$src\t# double" %} 6902 opcode(0xDA); 6903 ins_encode( enc_cmov_dpr(cop,src) ); 6904 ins_pipe( pipe_cmovDPR_reg ); 6905 %} 6906 6907 // Conditional move 6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6909 predicate(UseSSE==0); 6910 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6911 ins_cost(200); 6912 format %{ "FCMOV$cop $dst,$src\t# float" %} 6913 opcode(0xDA); 6914 ins_encode( enc_cmov_dpr(cop,src) ); 6915 ins_pipe( pipe_cmovDPR_reg ); 6916 %} 6917 6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6920 predicate(UseSSE<=1); 6921 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6922 ins_cost(200); 6923 format %{ "Jn$cop skip\n\t" 6924 "MOV $dst,$src\t# double\n" 6925 "skip:" %} 6926 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6927 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6928 ins_pipe( pipe_cmovDPR_reg ); 6929 %} 6930 6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6933 predicate(UseSSE==0); 6934 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6935 ins_cost(200); 6936 format %{ "Jn$cop skip\n\t" 6937 "MOV $dst,$src\t# float\n" 6938 "skip:" %} 6939 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6940 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6941 ins_pipe( pipe_cmovDPR_reg ); 6942 %} 6943 6944 // No CMOVE with SSE/SSE2 6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6946 predicate (UseSSE>=1); 6947 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 format %{ "Jn$cop skip\n\t" 6950 "MOVSS $dst,$src\t# float\n" 6951 "skip:" %} 6952 ins_encode %{ 6953 Label skip; 6954 // Invert sense of branch from sense of CMOV 6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6956 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6957 __ bind(skip); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 // No CMOVE with SSE/SSE2 6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6964 predicate (UseSSE>=2); 6965 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6966 ins_cost(200); 6967 format %{ "Jn$cop skip\n\t" 6968 "MOVSD $dst,$src\t# float\n" 6969 "skip:" %} 6970 ins_encode %{ 6971 Label skip; 6972 // Invert sense of branch from sense of CMOV 6973 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6974 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6975 __ bind(skip); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 // unsigned version 6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6982 predicate (UseSSE>=1); 6983 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6984 ins_cost(200); 6985 format %{ "Jn$cop skip\n\t" 6986 "MOVSS $dst,$src\t# float\n" 6987 "skip:" %} 6988 ins_encode %{ 6989 Label skip; 6990 // Invert sense of branch from sense of CMOV 6991 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6992 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6993 __ bind(skip); 6994 %} 6995 ins_pipe( pipe_slow ); 6996 %} 6997 6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6999 predicate (UseSSE>=1); 7000 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7001 ins_cost(200); 7002 expand %{ 7003 fcmovF_regU(cop, cr, dst, src); 7004 %} 7005 %} 7006 7007 // unsigned version 7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7009 predicate (UseSSE>=2); 7010 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7011 ins_cost(200); 7012 format %{ "Jn$cop skip\n\t" 7013 "MOVSD $dst,$src\t# float\n" 7014 "skip:" %} 7015 ins_encode %{ 7016 Label skip; 7017 // Invert sense of branch from sense of CMOV 7018 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7019 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7020 __ bind(skip); 7021 %} 7022 ins_pipe( pipe_slow ); 7023 %} 7024 7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7026 predicate (UseSSE>=2); 7027 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7028 ins_cost(200); 7029 expand %{ 7030 fcmovD_regU(cop, cr, dst, src); 7031 %} 7032 %} 7033 7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7035 predicate(VM_Version::supports_cmov() ); 7036 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7037 ins_cost(200); 7038 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7039 "CMOV$cop $dst.hi,$src.hi" %} 7040 opcode(0x0F,0x40); 7041 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7042 ins_pipe( pipe_cmov_reg_long ); 7043 %} 7044 7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7046 predicate(VM_Version::supports_cmov() ); 7047 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7048 ins_cost(200); 7049 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7050 "CMOV$cop $dst.hi,$src.hi" %} 7051 opcode(0x0F,0x40); 7052 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7053 ins_pipe( pipe_cmov_reg_long ); 7054 %} 7055 7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7057 predicate(VM_Version::supports_cmov() ); 7058 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7059 ins_cost(200); 7060 expand %{ 7061 cmovL_regU(cop, cr, dst, src); 7062 %} 7063 %} 7064 7065 //----------Arithmetic Instructions-------------------------------------------- 7066 //----------Addition Instructions---------------------------------------------- 7067 7068 // Integer Addition Instructions 7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7070 match(Set dst (AddI dst src)); 7071 effect(KILL cr); 7072 7073 size(2); 7074 format %{ "ADD $dst,$src" %} 7075 opcode(0x03); 7076 ins_encode( OpcP, RegReg( dst, src) ); 7077 ins_pipe( ialu_reg_reg ); 7078 %} 7079 7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7081 match(Set dst (AddI dst src)); 7082 effect(KILL cr); 7083 7084 format %{ "ADD $dst,$src" %} 7085 opcode(0x81, 0x00); /* /0 id */ 7086 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7087 ins_pipe( ialu_reg ); 7088 %} 7089 7090 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7091 predicate(UseIncDec); 7092 match(Set dst (AddI dst src)); 7093 effect(KILL cr); 7094 7095 size(1); 7096 format %{ "INC $dst" %} 7097 opcode(0x40); /* */ 7098 ins_encode( Opc_plus( primary, dst ) ); 7099 ins_pipe( ialu_reg ); 7100 %} 7101 7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7103 match(Set dst (AddI src0 src1)); 7104 ins_cost(110); 7105 7106 format %{ "LEA $dst,[$src0 + $src1]" %} 7107 opcode(0x8D); /* 0x8D /r */ 7108 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7109 ins_pipe( ialu_reg_reg ); 7110 %} 7111 7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7113 match(Set dst (AddP src0 src1)); 7114 ins_cost(110); 7115 7116 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7117 opcode(0x8D); /* 0x8D /r */ 7118 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7119 ins_pipe( ialu_reg_reg ); 7120 %} 7121 7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7123 predicate(UseIncDec); 7124 match(Set dst (AddI dst src)); 7125 effect(KILL cr); 7126 7127 size(1); 7128 format %{ "DEC $dst" %} 7129 opcode(0x48); /* */ 7130 ins_encode( Opc_plus( primary, dst ) ); 7131 ins_pipe( ialu_reg ); 7132 %} 7133 7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7135 match(Set dst (AddP dst src)); 7136 effect(KILL cr); 7137 7138 size(2); 7139 format %{ "ADD $dst,$src" %} 7140 opcode(0x03); 7141 ins_encode( OpcP, RegReg( dst, src) ); 7142 ins_pipe( ialu_reg_reg ); 7143 %} 7144 7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7146 match(Set dst (AddP dst src)); 7147 effect(KILL cr); 7148 7149 format %{ "ADD $dst,$src" %} 7150 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7151 // ins_encode( RegImm( dst, src) ); 7152 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7153 ins_pipe( ialu_reg ); 7154 %} 7155 7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7157 match(Set dst (AddI dst (LoadI src))); 7158 effect(KILL cr); 7159 7160 ins_cost(125); 7161 format %{ "ADD $dst,$src" %} 7162 opcode(0x03); 7163 ins_encode( OpcP, RegMem( dst, src) ); 7164 ins_pipe( ialu_reg_mem ); 7165 %} 7166 7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7168 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7169 effect(KILL cr); 7170 7171 ins_cost(150); 7172 format %{ "ADD $dst,$src" %} 7173 opcode(0x01); /* Opcode 01 /r */ 7174 ins_encode( OpcP, RegMem( src, dst ) ); 7175 ins_pipe( ialu_mem_reg ); 7176 %} 7177 7178 // Add Memory with Immediate 7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7180 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7181 effect(KILL cr); 7182 7183 ins_cost(125); 7184 format %{ "ADD $dst,$src" %} 7185 opcode(0x81); /* Opcode 81 /0 id */ 7186 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7187 ins_pipe( ialu_mem_imm ); 7188 %} 7189 7190 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7191 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7192 effect(KILL cr); 7193 7194 ins_cost(125); 7195 format %{ "INC $dst" %} 7196 opcode(0xFF); /* Opcode FF /0 */ 7197 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7198 ins_pipe( ialu_mem_imm ); 7199 %} 7200 7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7202 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7203 effect(KILL cr); 7204 7205 ins_cost(125); 7206 format %{ "DEC $dst" %} 7207 opcode(0xFF); /* Opcode FF /1 */ 7208 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7209 ins_pipe( ialu_mem_imm ); 7210 %} 7211 7212 7213 instruct checkCastPP( eRegP dst ) %{ 7214 match(Set dst (CheckCastPP dst)); 7215 7216 size(0); 7217 format %{ "#checkcastPP of $dst" %} 7218 ins_encode( /*empty encoding*/ ); 7219 ins_pipe( empty ); 7220 %} 7221 7222 instruct castPP( eRegP dst ) %{ 7223 match(Set dst (CastPP dst)); 7224 format %{ "#castPP of $dst" %} 7225 ins_encode( /*empty encoding*/ ); 7226 ins_pipe( empty ); 7227 %} 7228 7229 instruct castII( rRegI dst ) %{ 7230 match(Set dst (CastII dst)); 7231 format %{ "#castII of $dst" %} 7232 ins_encode( /*empty encoding*/ ); 7233 ins_cost(0); 7234 ins_pipe( empty ); 7235 %} 7236 7237 7238 // Load-locked - same as a regular pointer load when used with compare-swap 7239 instruct loadPLocked(eRegP dst, memory mem) %{ 7240 match(Set dst (LoadPLocked mem)); 7241 7242 ins_cost(125); 7243 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7244 opcode(0x8B); 7245 ins_encode( OpcP, RegMem(dst,mem)); 7246 ins_pipe( ialu_reg_mem ); 7247 %} 7248 7249 // Conditional-store of the updated heap-top. 7250 // Used during allocation of the shared heap. 7251 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7252 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7253 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7254 // EAX is killed if there is contention, but then it's also unused. 7255 // In the common case of no contention, EAX holds the new oop address. 7256 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7257 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7258 ins_pipe( pipe_cmpxchg ); 7259 %} 7260 7261 // Conditional-store of an int value. 7262 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7263 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7264 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7265 effect(KILL oldval); 7266 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7267 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7268 ins_pipe( pipe_cmpxchg ); 7269 %} 7270 7271 // Conditional-store of a long value. 7272 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7273 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7274 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7275 effect(KILL oldval); 7276 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7277 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7278 "XCHG EBX,ECX" 7279 %} 7280 ins_encode %{ 7281 // Note: we need to swap rbx, and rcx before and after the 7282 // cmpxchg8 instruction because the instruction uses 7283 // rcx as the high order word of the new value to store but 7284 // our register encoding uses rbx. 7285 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7286 if( os::is_MP() ) 7287 __ lock(); 7288 __ cmpxchg8($mem$$Address); 7289 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7290 %} 7291 ins_pipe( pipe_cmpxchg ); 7292 %} 7293 7294 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7295 7296 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7297 predicate(VM_Version::supports_cx8()); 7298 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7299 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7300 effect(KILL cr, KILL oldval); 7301 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7302 "MOV $res,0\n\t" 7303 "JNE,s fail\n\t" 7304 "MOV $res,1\n" 7305 "fail:" %} 7306 ins_encode( enc_cmpxchg8(mem_ptr), 7307 enc_flags_ne_to_boolean(res) ); 7308 ins_pipe( pipe_cmpxchg ); 7309 %} 7310 7311 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7312 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7313 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7314 effect(KILL cr, KILL oldval); 7315 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7316 "MOV $res,0\n\t" 7317 "JNE,s fail\n\t" 7318 "MOV $res,1\n" 7319 "fail:" %} 7320 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7321 ins_pipe( pipe_cmpxchg ); 7322 %} 7323 7324 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7325 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7326 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7327 effect(KILL cr, KILL oldval); 7328 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7329 "MOV $res,0\n\t" 7330 "JNE,s fail\n\t" 7331 "MOV $res,1\n" 7332 "fail:" %} 7333 ins_encode( enc_cmpxchgb(mem_ptr), 7334 enc_flags_ne_to_boolean(res) ); 7335 ins_pipe( pipe_cmpxchg ); 7336 %} 7337 7338 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7339 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7340 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7341 effect(KILL cr, KILL oldval); 7342 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7343 "MOV $res,0\n\t" 7344 "JNE,s fail\n\t" 7345 "MOV $res,1\n" 7346 "fail:" %} 7347 ins_encode( enc_cmpxchgw(mem_ptr), 7348 enc_flags_ne_to_boolean(res) ); 7349 ins_pipe( pipe_cmpxchg ); 7350 %} 7351 7352 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7353 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7354 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7355 effect(KILL cr, KILL oldval); 7356 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7357 "MOV $res,0\n\t" 7358 "JNE,s fail\n\t" 7359 "MOV $res,1\n" 7360 "fail:" %} 7361 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7362 ins_pipe( pipe_cmpxchg ); 7363 %} 7364 7365 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7366 predicate(VM_Version::supports_cx8()); 7367 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7368 effect(KILL cr); 7369 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7370 ins_encode( enc_cmpxchg8(mem_ptr) ); 7371 ins_pipe( pipe_cmpxchg ); 7372 %} 7373 7374 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7375 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7376 effect(KILL cr); 7377 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7378 ins_encode( enc_cmpxchg(mem_ptr) ); 7379 ins_pipe( pipe_cmpxchg ); 7380 %} 7381 7382 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7383 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7384 effect(KILL cr); 7385 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7386 ins_encode( enc_cmpxchgb(mem_ptr) ); 7387 ins_pipe( pipe_cmpxchg ); 7388 %} 7389 7390 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7391 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7392 effect(KILL cr); 7393 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7394 ins_encode( enc_cmpxchgw(mem_ptr) ); 7395 ins_pipe( pipe_cmpxchg ); 7396 %} 7397 7398 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7399 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7400 effect(KILL cr); 7401 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7402 ins_encode( enc_cmpxchg(mem_ptr) ); 7403 ins_pipe( pipe_cmpxchg ); 7404 %} 7405 7406 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7407 predicate(n->as_LoadStore()->result_not_used()); 7408 match(Set dummy (GetAndAddB mem add)); 7409 effect(KILL cr); 7410 format %{ "ADDB [$mem],$add" %} 7411 ins_encode %{ 7412 if (os::is_MP()) { __ lock(); } 7413 __ addb($mem$$Address, $add$$constant); 7414 %} 7415 ins_pipe( pipe_cmpxchg ); 7416 %} 7417 7418 // Important to match to xRegI: only 8-bit regs. 7419 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7420 match(Set newval (GetAndAddB mem newval)); 7421 effect(KILL cr); 7422 format %{ "XADDB [$mem],$newval" %} 7423 ins_encode %{ 7424 if (os::is_MP()) { __ lock(); } 7425 __ xaddb($mem$$Address, $newval$$Register); 7426 %} 7427 ins_pipe( pipe_cmpxchg ); 7428 %} 7429 7430 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7431 predicate(n->as_LoadStore()->result_not_used()); 7432 match(Set dummy (GetAndAddS mem add)); 7433 effect(KILL cr); 7434 format %{ "ADDS [$mem],$add" %} 7435 ins_encode %{ 7436 if (os::is_MP()) { __ lock(); } 7437 __ addw($mem$$Address, $add$$constant); 7438 %} 7439 ins_pipe( pipe_cmpxchg ); 7440 %} 7441 7442 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7443 match(Set newval (GetAndAddS mem newval)); 7444 effect(KILL cr); 7445 format %{ "XADDS [$mem],$newval" %} 7446 ins_encode %{ 7447 if (os::is_MP()) { __ lock(); } 7448 __ xaddw($mem$$Address, $newval$$Register); 7449 %} 7450 ins_pipe( pipe_cmpxchg ); 7451 %} 7452 7453 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7454 predicate(n->as_LoadStore()->result_not_used()); 7455 match(Set dummy (GetAndAddI mem add)); 7456 effect(KILL cr); 7457 format %{ "ADDL [$mem],$add" %} 7458 ins_encode %{ 7459 if (os::is_MP()) { __ lock(); } 7460 __ addl($mem$$Address, $add$$constant); 7461 %} 7462 ins_pipe( pipe_cmpxchg ); 7463 %} 7464 7465 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7466 match(Set newval (GetAndAddI mem newval)); 7467 effect(KILL cr); 7468 format %{ "XADDL [$mem],$newval" %} 7469 ins_encode %{ 7470 if (os::is_MP()) { __ lock(); } 7471 __ xaddl($mem$$Address, $newval$$Register); 7472 %} 7473 ins_pipe( pipe_cmpxchg ); 7474 %} 7475 7476 // Important to match to xRegI: only 8-bit regs. 7477 instruct xchgB( memory mem, xRegI newval) %{ 7478 match(Set newval (GetAndSetB mem newval)); 7479 format %{ "XCHGB $newval,[$mem]" %} 7480 ins_encode %{ 7481 __ xchgb($newval$$Register, $mem$$Address); 7482 %} 7483 ins_pipe( pipe_cmpxchg ); 7484 %} 7485 7486 instruct xchgS( memory mem, rRegI newval) %{ 7487 match(Set newval (GetAndSetS mem newval)); 7488 format %{ "XCHGW $newval,[$mem]" %} 7489 ins_encode %{ 7490 __ xchgw($newval$$Register, $mem$$Address); 7491 %} 7492 ins_pipe( pipe_cmpxchg ); 7493 %} 7494 7495 instruct xchgI( memory mem, rRegI newval) %{ 7496 match(Set newval (GetAndSetI mem newval)); 7497 format %{ "XCHGL $newval,[$mem]" %} 7498 ins_encode %{ 7499 __ xchgl($newval$$Register, $mem$$Address); 7500 %} 7501 ins_pipe( pipe_cmpxchg ); 7502 %} 7503 7504 instruct xchgP( memory mem, pRegP newval) %{ 7505 match(Set newval (GetAndSetP mem newval)); 7506 format %{ "XCHGL $newval,[$mem]" %} 7507 ins_encode %{ 7508 __ xchgl($newval$$Register, $mem$$Address); 7509 %} 7510 ins_pipe( pipe_cmpxchg ); 7511 %} 7512 7513 //----------Subtraction Instructions------------------------------------------- 7514 7515 // Integer Subtraction Instructions 7516 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7517 match(Set dst (SubI dst src)); 7518 effect(KILL cr); 7519 7520 size(2); 7521 format %{ "SUB $dst,$src" %} 7522 opcode(0x2B); 7523 ins_encode( OpcP, RegReg( dst, src) ); 7524 ins_pipe( ialu_reg_reg ); 7525 %} 7526 7527 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7528 match(Set dst (SubI dst src)); 7529 effect(KILL cr); 7530 7531 format %{ "SUB $dst,$src" %} 7532 opcode(0x81,0x05); /* Opcode 81 /5 */ 7533 // ins_encode( RegImm( dst, src) ); 7534 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7535 ins_pipe( ialu_reg ); 7536 %} 7537 7538 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7539 match(Set dst (SubI dst (LoadI src))); 7540 effect(KILL cr); 7541 7542 ins_cost(125); 7543 format %{ "SUB $dst,$src" %} 7544 opcode(0x2B); 7545 ins_encode( OpcP, RegMem( dst, src) ); 7546 ins_pipe( ialu_reg_mem ); 7547 %} 7548 7549 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7550 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7551 effect(KILL cr); 7552 7553 ins_cost(150); 7554 format %{ "SUB $dst,$src" %} 7555 opcode(0x29); /* Opcode 29 /r */ 7556 ins_encode( OpcP, RegMem( src, dst ) ); 7557 ins_pipe( ialu_mem_reg ); 7558 %} 7559 7560 // Subtract from a pointer 7561 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7562 match(Set dst (AddP dst (SubI zero src))); 7563 effect(KILL cr); 7564 7565 size(2); 7566 format %{ "SUB $dst,$src" %} 7567 opcode(0x2B); 7568 ins_encode( OpcP, RegReg( dst, src) ); 7569 ins_pipe( ialu_reg_reg ); 7570 %} 7571 7572 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7573 match(Set dst (SubI zero dst)); 7574 effect(KILL cr); 7575 7576 size(2); 7577 format %{ "NEG $dst" %} 7578 opcode(0xF7,0x03); // Opcode F7 /3 7579 ins_encode( OpcP, RegOpc( dst ) ); 7580 ins_pipe( ialu_reg ); 7581 %} 7582 7583 //----------Multiplication/Division Instructions------------------------------- 7584 // Integer Multiplication Instructions 7585 // Multiply Register 7586 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7587 match(Set dst (MulI dst src)); 7588 effect(KILL cr); 7589 7590 size(3); 7591 ins_cost(300); 7592 format %{ "IMUL $dst,$src" %} 7593 opcode(0xAF, 0x0F); 7594 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7595 ins_pipe( ialu_reg_reg_alu0 ); 7596 %} 7597 7598 // Multiply 32-bit Immediate 7599 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7600 match(Set dst (MulI src imm)); 7601 effect(KILL cr); 7602 7603 ins_cost(300); 7604 format %{ "IMUL $dst,$src,$imm" %} 7605 opcode(0x69); /* 69 /r id */ 7606 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7607 ins_pipe( ialu_reg_reg_alu0 ); 7608 %} 7609 7610 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7611 match(Set dst src); 7612 effect(KILL cr); 7613 7614 // Note that this is artificially increased to make it more expensive than loadConL 7615 ins_cost(250); 7616 format %{ "MOV EAX,$src\t// low word only" %} 7617 opcode(0xB8); 7618 ins_encode( LdImmL_Lo(dst, src) ); 7619 ins_pipe( ialu_reg_fat ); 7620 %} 7621 7622 // Multiply by 32-bit Immediate, taking the shifted high order results 7623 // (special case for shift by 32) 7624 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7625 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7626 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7627 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7628 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7629 effect(USE src1, KILL cr); 7630 7631 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7632 ins_cost(0*100 + 1*400 - 150); 7633 format %{ "IMUL EDX:EAX,$src1" %} 7634 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 // Multiply by 32-bit Immediate, taking the shifted high order results 7639 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7640 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7641 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7642 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7643 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7644 effect(USE src1, KILL cr); 7645 7646 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7647 ins_cost(1*100 + 1*400 - 150); 7648 format %{ "IMUL EDX:EAX,$src1\n\t" 7649 "SAR EDX,$cnt-32" %} 7650 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7651 ins_pipe( pipe_slow ); 7652 %} 7653 7654 // Multiply Memory 32-bit Immediate 7655 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7656 match(Set dst (MulI (LoadI src) imm)); 7657 effect(KILL cr); 7658 7659 ins_cost(300); 7660 format %{ "IMUL $dst,$src,$imm" %} 7661 opcode(0x69); /* 69 /r id */ 7662 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7663 ins_pipe( ialu_reg_mem_alu0 ); 7664 %} 7665 7666 // Multiply Memory 7667 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7668 match(Set dst (MulI dst (LoadI src))); 7669 effect(KILL cr); 7670 7671 ins_cost(350); 7672 format %{ "IMUL $dst,$src" %} 7673 opcode(0xAF, 0x0F); 7674 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7675 ins_pipe( ialu_reg_mem_alu0 ); 7676 %} 7677 7678 // Multiply Register Int to Long 7679 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7680 // Basic Idea: long = (long)int * (long)int 7681 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7682 effect(DEF dst, USE src, USE src1, KILL flags); 7683 7684 ins_cost(300); 7685 format %{ "IMUL $dst,$src1" %} 7686 7687 ins_encode( long_int_multiply( dst, src1 ) ); 7688 ins_pipe( ialu_reg_reg_alu0 ); 7689 %} 7690 7691 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7692 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7693 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7694 effect(KILL flags); 7695 7696 ins_cost(300); 7697 format %{ "MUL $dst,$src1" %} 7698 7699 ins_encode( long_uint_multiply(dst, src1) ); 7700 ins_pipe( ialu_reg_reg_alu0 ); 7701 %} 7702 7703 // Multiply Register Long 7704 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7705 match(Set dst (MulL dst src)); 7706 effect(KILL cr, TEMP tmp); 7707 ins_cost(4*100+3*400); 7708 // Basic idea: lo(result) = lo(x_lo * y_lo) 7709 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7710 format %{ "MOV $tmp,$src.lo\n\t" 7711 "IMUL $tmp,EDX\n\t" 7712 "MOV EDX,$src.hi\n\t" 7713 "IMUL EDX,EAX\n\t" 7714 "ADD $tmp,EDX\n\t" 7715 "MUL EDX:EAX,$src.lo\n\t" 7716 "ADD EDX,$tmp" %} 7717 ins_encode( long_multiply( dst, src, tmp ) ); 7718 ins_pipe( pipe_slow ); 7719 %} 7720 7721 // Multiply Register Long where the left operand's high 32 bits are zero 7722 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7723 predicate(is_operand_hi32_zero(n->in(1))); 7724 match(Set dst (MulL dst src)); 7725 effect(KILL cr, TEMP tmp); 7726 ins_cost(2*100+2*400); 7727 // Basic idea: lo(result) = lo(x_lo * y_lo) 7728 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7729 format %{ "MOV $tmp,$src.hi\n\t" 7730 "IMUL $tmp,EAX\n\t" 7731 "MUL EDX:EAX,$src.lo\n\t" 7732 "ADD EDX,$tmp" %} 7733 ins_encode %{ 7734 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7735 __ imull($tmp$$Register, rax); 7736 __ mull($src$$Register); 7737 __ addl(rdx, $tmp$$Register); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 // Multiply Register Long where the right operand's high 32 bits are zero 7743 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7744 predicate(is_operand_hi32_zero(n->in(2))); 7745 match(Set dst (MulL dst src)); 7746 effect(KILL cr, TEMP tmp); 7747 ins_cost(2*100+2*400); 7748 // Basic idea: lo(result) = lo(x_lo * y_lo) 7749 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7750 format %{ "MOV $tmp,$src.lo\n\t" 7751 "IMUL $tmp,EDX\n\t" 7752 "MUL EDX:EAX,$src.lo\n\t" 7753 "ADD EDX,$tmp" %} 7754 ins_encode %{ 7755 __ movl($tmp$$Register, $src$$Register); 7756 __ imull($tmp$$Register, rdx); 7757 __ mull($src$$Register); 7758 __ addl(rdx, $tmp$$Register); 7759 %} 7760 ins_pipe( pipe_slow ); 7761 %} 7762 7763 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7764 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7765 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7766 match(Set dst (MulL dst src)); 7767 effect(KILL cr); 7768 ins_cost(1*400); 7769 // Basic idea: lo(result) = lo(x_lo * y_lo) 7770 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7771 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7772 ins_encode %{ 7773 __ mull($src$$Register); 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 // Multiply Register Long by small constant 7779 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7780 match(Set dst (MulL dst src)); 7781 effect(KILL cr, TEMP tmp); 7782 ins_cost(2*100+2*400); 7783 size(12); 7784 // Basic idea: lo(result) = lo(src * EAX) 7785 // hi(result) = hi(src * EAX) + lo(src * EDX) 7786 format %{ "IMUL $tmp,EDX,$src\n\t" 7787 "MOV EDX,$src\n\t" 7788 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7789 "ADD EDX,$tmp" %} 7790 ins_encode( long_multiply_con( dst, src, tmp ) ); 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 // Integer DIV with Register 7795 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7796 match(Set rax (DivI rax div)); 7797 effect(KILL rdx, KILL cr); 7798 size(26); 7799 ins_cost(30*100+10*100); 7800 format %{ "CMP EAX,0x80000000\n\t" 7801 "JNE,s normal\n\t" 7802 "XOR EDX,EDX\n\t" 7803 "CMP ECX,-1\n\t" 7804 "JE,s done\n" 7805 "normal: CDQ\n\t" 7806 "IDIV $div\n\t" 7807 "done:" %} 7808 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7809 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7810 ins_pipe( ialu_reg_reg_alu0 ); 7811 %} 7812 7813 // Divide Register Long 7814 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7815 match(Set dst (DivL src1 src2)); 7816 effect( KILL cr, KILL cx, KILL bx ); 7817 ins_cost(10000); 7818 format %{ "PUSH $src1.hi\n\t" 7819 "PUSH $src1.lo\n\t" 7820 "PUSH $src2.hi\n\t" 7821 "PUSH $src2.lo\n\t" 7822 "CALL SharedRuntime::ldiv\n\t" 7823 "ADD ESP,16" %} 7824 ins_encode( long_div(src1,src2) ); 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 // Integer DIVMOD with Register, both quotient and mod results 7829 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7830 match(DivModI rax div); 7831 effect(KILL cr); 7832 size(26); 7833 ins_cost(30*100+10*100); 7834 format %{ "CMP EAX,0x80000000\n\t" 7835 "JNE,s normal\n\t" 7836 "XOR EDX,EDX\n\t" 7837 "CMP ECX,-1\n\t" 7838 "JE,s done\n" 7839 "normal: CDQ\n\t" 7840 "IDIV $div\n\t" 7841 "done:" %} 7842 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7843 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 // Integer MOD with Register 7848 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7849 match(Set rdx (ModI rax div)); 7850 effect(KILL rax, KILL cr); 7851 7852 size(26); 7853 ins_cost(300); 7854 format %{ "CDQ\n\t" 7855 "IDIV $div" %} 7856 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7857 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7858 ins_pipe( ialu_reg_reg_alu0 ); 7859 %} 7860 7861 // Remainder Register Long 7862 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7863 match(Set dst (ModL src1 src2)); 7864 effect( KILL cr, KILL cx, KILL bx ); 7865 ins_cost(10000); 7866 format %{ "PUSH $src1.hi\n\t" 7867 "PUSH $src1.lo\n\t" 7868 "PUSH $src2.hi\n\t" 7869 "PUSH $src2.lo\n\t" 7870 "CALL SharedRuntime::lrem\n\t" 7871 "ADD ESP,16" %} 7872 ins_encode( long_mod(src1,src2) ); 7873 ins_pipe( pipe_slow ); 7874 %} 7875 7876 // Divide Register Long (no special case since divisor != -1) 7877 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7878 match(Set dst (DivL dst imm)); 7879 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7880 ins_cost(1000); 7881 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7882 "XOR $tmp2,$tmp2\n\t" 7883 "CMP $tmp,EDX\n\t" 7884 "JA,s fast\n\t" 7885 "MOV $tmp2,EAX\n\t" 7886 "MOV EAX,EDX\n\t" 7887 "MOV EDX,0\n\t" 7888 "JLE,s pos\n\t" 7889 "LNEG EAX : $tmp2\n\t" 7890 "DIV $tmp # unsigned division\n\t" 7891 "XCHG EAX,$tmp2\n\t" 7892 "DIV $tmp\n\t" 7893 "LNEG $tmp2 : EAX\n\t" 7894 "JMP,s done\n" 7895 "pos:\n\t" 7896 "DIV $tmp\n\t" 7897 "XCHG EAX,$tmp2\n" 7898 "fast:\n\t" 7899 "DIV $tmp\n" 7900 "done:\n\t" 7901 "MOV EDX,$tmp2\n\t" 7902 "NEG EDX:EAX # if $imm < 0" %} 7903 ins_encode %{ 7904 int con = (int)$imm$$constant; 7905 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7906 int pcon = (con > 0) ? con : -con; 7907 Label Lfast, Lpos, Ldone; 7908 7909 __ movl($tmp$$Register, pcon); 7910 __ xorl($tmp2$$Register,$tmp2$$Register); 7911 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7912 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7913 7914 __ movl($tmp2$$Register, $dst$$Register); // save 7915 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7916 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7917 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7918 7919 // Negative dividend. 7920 // convert value to positive to use unsigned division 7921 __ lneg($dst$$Register, $tmp2$$Register); 7922 __ divl($tmp$$Register); 7923 __ xchgl($dst$$Register, $tmp2$$Register); 7924 __ divl($tmp$$Register); 7925 // revert result back to negative 7926 __ lneg($tmp2$$Register, $dst$$Register); 7927 __ jmpb(Ldone); 7928 7929 __ bind(Lpos); 7930 __ divl($tmp$$Register); // Use unsigned division 7931 __ xchgl($dst$$Register, $tmp2$$Register); 7932 // Fallthrow for final divide, tmp2 has 32 bit hi result 7933 7934 __ bind(Lfast); 7935 // fast path: src is positive 7936 __ divl($tmp$$Register); // Use unsigned division 7937 7938 __ bind(Ldone); 7939 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7940 if (con < 0) { 7941 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7942 } 7943 %} 7944 ins_pipe( pipe_slow ); 7945 %} 7946 7947 // Remainder Register Long (remainder fit into 32 bits) 7948 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7949 match(Set dst (ModL dst imm)); 7950 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7951 ins_cost(1000); 7952 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7953 "CMP $tmp,EDX\n\t" 7954 "JA,s fast\n\t" 7955 "MOV $tmp2,EAX\n\t" 7956 "MOV EAX,EDX\n\t" 7957 "MOV EDX,0\n\t" 7958 "JLE,s pos\n\t" 7959 "LNEG EAX : $tmp2\n\t" 7960 "DIV $tmp # unsigned division\n\t" 7961 "MOV EAX,$tmp2\n\t" 7962 "DIV $tmp\n\t" 7963 "NEG EDX\n\t" 7964 "JMP,s done\n" 7965 "pos:\n\t" 7966 "DIV $tmp\n\t" 7967 "MOV EAX,$tmp2\n" 7968 "fast:\n\t" 7969 "DIV $tmp\n" 7970 "done:\n\t" 7971 "MOV EAX,EDX\n\t" 7972 "SAR EDX,31\n\t" %} 7973 ins_encode %{ 7974 int con = (int)$imm$$constant; 7975 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7976 int pcon = (con > 0) ? con : -con; 7977 Label Lfast, Lpos, Ldone; 7978 7979 __ movl($tmp$$Register, pcon); 7980 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7981 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7982 7983 __ movl($tmp2$$Register, $dst$$Register); // save 7984 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7985 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7986 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7987 7988 // Negative dividend. 7989 // convert value to positive to use unsigned division 7990 __ lneg($dst$$Register, $tmp2$$Register); 7991 __ divl($tmp$$Register); 7992 __ movl($dst$$Register, $tmp2$$Register); 7993 __ divl($tmp$$Register); 7994 // revert remainder back to negative 7995 __ negl(HIGH_FROM_LOW($dst$$Register)); 7996 __ jmpb(Ldone); 7997 7998 __ bind(Lpos); 7999 __ divl($tmp$$Register); 8000 __ movl($dst$$Register, $tmp2$$Register); 8001 8002 __ bind(Lfast); 8003 // fast path: src is positive 8004 __ divl($tmp$$Register); 8005 8006 __ bind(Ldone); 8007 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8008 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8009 8010 %} 8011 ins_pipe( pipe_slow ); 8012 %} 8013 8014 // Integer Shift Instructions 8015 // Shift Left by one 8016 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8017 match(Set dst (LShiftI dst shift)); 8018 effect(KILL cr); 8019 8020 size(2); 8021 format %{ "SHL $dst,$shift" %} 8022 opcode(0xD1, 0x4); /* D1 /4 */ 8023 ins_encode( OpcP, RegOpc( dst ) ); 8024 ins_pipe( ialu_reg ); 8025 %} 8026 8027 // Shift Left by 8-bit immediate 8028 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8029 match(Set dst (LShiftI dst shift)); 8030 effect(KILL cr); 8031 8032 size(3); 8033 format %{ "SHL $dst,$shift" %} 8034 opcode(0xC1, 0x4); /* C1 /4 ib */ 8035 ins_encode( RegOpcImm( dst, shift) ); 8036 ins_pipe( ialu_reg ); 8037 %} 8038 8039 // Shift Left by variable 8040 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8041 match(Set dst (LShiftI dst shift)); 8042 effect(KILL cr); 8043 8044 size(2); 8045 format %{ "SHL $dst,$shift" %} 8046 opcode(0xD3, 0x4); /* D3 /4 */ 8047 ins_encode( OpcP, RegOpc( dst ) ); 8048 ins_pipe( ialu_reg_reg ); 8049 %} 8050 8051 // Arithmetic shift right by one 8052 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8053 match(Set dst (RShiftI dst shift)); 8054 effect(KILL cr); 8055 8056 size(2); 8057 format %{ "SAR $dst,$shift" %} 8058 opcode(0xD1, 0x7); /* D1 /7 */ 8059 ins_encode( OpcP, RegOpc( dst ) ); 8060 ins_pipe( ialu_reg ); 8061 %} 8062 8063 // Arithmetic shift right by one 8064 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8065 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8066 effect(KILL cr); 8067 format %{ "SAR $dst,$shift" %} 8068 opcode(0xD1, 0x7); /* D1 /7 */ 8069 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8070 ins_pipe( ialu_mem_imm ); 8071 %} 8072 8073 // Arithmetic Shift Right by 8-bit immediate 8074 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8075 match(Set dst (RShiftI dst shift)); 8076 effect(KILL cr); 8077 8078 size(3); 8079 format %{ "SAR $dst,$shift" %} 8080 opcode(0xC1, 0x7); /* C1 /7 ib */ 8081 ins_encode( RegOpcImm( dst, shift ) ); 8082 ins_pipe( ialu_mem_imm ); 8083 %} 8084 8085 // Arithmetic Shift Right by 8-bit immediate 8086 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8087 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8088 effect(KILL cr); 8089 8090 format %{ "SAR $dst,$shift" %} 8091 opcode(0xC1, 0x7); /* C1 /7 ib */ 8092 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8093 ins_pipe( ialu_mem_imm ); 8094 %} 8095 8096 // Arithmetic Shift Right by variable 8097 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8098 match(Set dst (RShiftI dst shift)); 8099 effect(KILL cr); 8100 8101 size(2); 8102 format %{ "SAR $dst,$shift" %} 8103 opcode(0xD3, 0x7); /* D3 /7 */ 8104 ins_encode( OpcP, RegOpc( dst ) ); 8105 ins_pipe( ialu_reg_reg ); 8106 %} 8107 8108 // Logical shift right by one 8109 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8110 match(Set dst (URShiftI dst shift)); 8111 effect(KILL cr); 8112 8113 size(2); 8114 format %{ "SHR $dst,$shift" %} 8115 opcode(0xD1, 0x5); /* D1 /5 */ 8116 ins_encode( OpcP, RegOpc( dst ) ); 8117 ins_pipe( ialu_reg ); 8118 %} 8119 8120 // Logical Shift Right by 8-bit immediate 8121 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8122 match(Set dst (URShiftI dst shift)); 8123 effect(KILL cr); 8124 8125 size(3); 8126 format %{ "SHR $dst,$shift" %} 8127 opcode(0xC1, 0x5); /* C1 /5 ib */ 8128 ins_encode( RegOpcImm( dst, shift) ); 8129 ins_pipe( ialu_reg ); 8130 %} 8131 8132 8133 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8134 // This idiom is used by the compiler for the i2b bytecode. 8135 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8136 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8137 8138 size(3); 8139 format %{ "MOVSX $dst,$src :8" %} 8140 ins_encode %{ 8141 __ movsbl($dst$$Register, $src$$Register); 8142 %} 8143 ins_pipe(ialu_reg_reg); 8144 %} 8145 8146 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8147 // This idiom is used by the compiler the i2s bytecode. 8148 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8149 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8150 8151 size(3); 8152 format %{ "MOVSX $dst,$src :16" %} 8153 ins_encode %{ 8154 __ movswl($dst$$Register, $src$$Register); 8155 %} 8156 ins_pipe(ialu_reg_reg); 8157 %} 8158 8159 8160 // Logical Shift Right by variable 8161 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8162 match(Set dst (URShiftI dst shift)); 8163 effect(KILL cr); 8164 8165 size(2); 8166 format %{ "SHR $dst,$shift" %} 8167 opcode(0xD3, 0x5); /* D3 /5 */ 8168 ins_encode( OpcP, RegOpc( dst ) ); 8169 ins_pipe( ialu_reg_reg ); 8170 %} 8171 8172 8173 //----------Logical Instructions----------------------------------------------- 8174 //----------Integer Logical Instructions--------------------------------------- 8175 // And Instructions 8176 // And Register with Register 8177 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8178 match(Set dst (AndI dst src)); 8179 effect(KILL cr); 8180 8181 size(2); 8182 format %{ "AND $dst,$src" %} 8183 opcode(0x23); 8184 ins_encode( OpcP, RegReg( dst, src) ); 8185 ins_pipe( ialu_reg_reg ); 8186 %} 8187 8188 // And Register with Immediate 8189 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8190 match(Set dst (AndI dst src)); 8191 effect(KILL cr); 8192 8193 format %{ "AND $dst,$src" %} 8194 opcode(0x81,0x04); /* Opcode 81 /4 */ 8195 // ins_encode( RegImm( dst, src) ); 8196 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8197 ins_pipe( ialu_reg ); 8198 %} 8199 8200 // And Register with Memory 8201 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8202 match(Set dst (AndI dst (LoadI src))); 8203 effect(KILL cr); 8204 8205 ins_cost(125); 8206 format %{ "AND $dst,$src" %} 8207 opcode(0x23); 8208 ins_encode( OpcP, RegMem( dst, src) ); 8209 ins_pipe( ialu_reg_mem ); 8210 %} 8211 8212 // And Memory with Register 8213 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8214 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8215 effect(KILL cr); 8216 8217 ins_cost(150); 8218 format %{ "AND $dst,$src" %} 8219 opcode(0x21); /* Opcode 21 /r */ 8220 ins_encode( OpcP, RegMem( src, dst ) ); 8221 ins_pipe( ialu_mem_reg ); 8222 %} 8223 8224 // And Memory with Immediate 8225 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8226 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8227 effect(KILL cr); 8228 8229 ins_cost(125); 8230 format %{ "AND $dst,$src" %} 8231 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8232 // ins_encode( MemImm( dst, src) ); 8233 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8234 ins_pipe( ialu_mem_imm ); 8235 %} 8236 8237 // BMI1 instructions 8238 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8239 match(Set dst (AndI (XorI src1 minus_1) src2)); 8240 predicate(UseBMI1Instructions); 8241 effect(KILL cr); 8242 8243 format %{ "ANDNL $dst, $src1, $src2" %} 8244 8245 ins_encode %{ 8246 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8247 %} 8248 ins_pipe(ialu_reg); 8249 %} 8250 8251 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8252 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8253 predicate(UseBMI1Instructions); 8254 effect(KILL cr); 8255 8256 ins_cost(125); 8257 format %{ "ANDNL $dst, $src1, $src2" %} 8258 8259 ins_encode %{ 8260 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8261 %} 8262 ins_pipe(ialu_reg_mem); 8263 %} 8264 8265 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8266 match(Set dst (AndI (SubI imm_zero src) src)); 8267 predicate(UseBMI1Instructions); 8268 effect(KILL cr); 8269 8270 format %{ "BLSIL $dst, $src" %} 8271 8272 ins_encode %{ 8273 __ blsil($dst$$Register, $src$$Register); 8274 %} 8275 ins_pipe(ialu_reg); 8276 %} 8277 8278 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8279 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8280 predicate(UseBMI1Instructions); 8281 effect(KILL cr); 8282 8283 ins_cost(125); 8284 format %{ "BLSIL $dst, $src" %} 8285 8286 ins_encode %{ 8287 __ blsil($dst$$Register, $src$$Address); 8288 %} 8289 ins_pipe(ialu_reg_mem); 8290 %} 8291 8292 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8293 %{ 8294 match(Set dst (XorI (AddI src minus_1) src)); 8295 predicate(UseBMI1Instructions); 8296 effect(KILL cr); 8297 8298 format %{ "BLSMSKL $dst, $src" %} 8299 8300 ins_encode %{ 8301 __ blsmskl($dst$$Register, $src$$Register); 8302 %} 8303 8304 ins_pipe(ialu_reg); 8305 %} 8306 8307 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8308 %{ 8309 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8310 predicate(UseBMI1Instructions); 8311 effect(KILL cr); 8312 8313 ins_cost(125); 8314 format %{ "BLSMSKL $dst, $src" %} 8315 8316 ins_encode %{ 8317 __ blsmskl($dst$$Register, $src$$Address); 8318 %} 8319 8320 ins_pipe(ialu_reg_mem); 8321 %} 8322 8323 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8324 %{ 8325 match(Set dst (AndI (AddI src minus_1) src) ); 8326 predicate(UseBMI1Instructions); 8327 effect(KILL cr); 8328 8329 format %{ "BLSRL $dst, $src" %} 8330 8331 ins_encode %{ 8332 __ blsrl($dst$$Register, $src$$Register); 8333 %} 8334 8335 ins_pipe(ialu_reg); 8336 %} 8337 8338 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8339 %{ 8340 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8341 predicate(UseBMI1Instructions); 8342 effect(KILL cr); 8343 8344 ins_cost(125); 8345 format %{ "BLSRL $dst, $src" %} 8346 8347 ins_encode %{ 8348 __ blsrl($dst$$Register, $src$$Address); 8349 %} 8350 8351 ins_pipe(ialu_reg_mem); 8352 %} 8353 8354 // Or Instructions 8355 // Or Register with Register 8356 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8357 match(Set dst (OrI dst src)); 8358 effect(KILL cr); 8359 8360 size(2); 8361 format %{ "OR $dst,$src" %} 8362 opcode(0x0B); 8363 ins_encode( OpcP, RegReg( dst, src) ); 8364 ins_pipe( ialu_reg_reg ); 8365 %} 8366 8367 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8368 match(Set dst (OrI dst (CastP2X src))); 8369 effect(KILL cr); 8370 8371 size(2); 8372 format %{ "OR $dst,$src" %} 8373 opcode(0x0B); 8374 ins_encode( OpcP, RegReg( dst, src) ); 8375 ins_pipe( ialu_reg_reg ); 8376 %} 8377 8378 8379 // Or Register with Immediate 8380 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8381 match(Set dst (OrI dst src)); 8382 effect(KILL cr); 8383 8384 format %{ "OR $dst,$src" %} 8385 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8386 // ins_encode( RegImm( dst, src) ); 8387 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8388 ins_pipe( ialu_reg ); 8389 %} 8390 8391 // Or Register with Memory 8392 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8393 match(Set dst (OrI dst (LoadI src))); 8394 effect(KILL cr); 8395 8396 ins_cost(125); 8397 format %{ "OR $dst,$src" %} 8398 opcode(0x0B); 8399 ins_encode( OpcP, RegMem( dst, src) ); 8400 ins_pipe( ialu_reg_mem ); 8401 %} 8402 8403 // Or Memory with Register 8404 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8405 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8406 effect(KILL cr); 8407 8408 ins_cost(150); 8409 format %{ "OR $dst,$src" %} 8410 opcode(0x09); /* Opcode 09 /r */ 8411 ins_encode( OpcP, RegMem( src, dst ) ); 8412 ins_pipe( ialu_mem_reg ); 8413 %} 8414 8415 // Or Memory with Immediate 8416 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8417 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8418 effect(KILL cr); 8419 8420 ins_cost(125); 8421 format %{ "OR $dst,$src" %} 8422 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8423 // ins_encode( MemImm( dst, src) ); 8424 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8425 ins_pipe( ialu_mem_imm ); 8426 %} 8427 8428 // ROL/ROR 8429 // ROL expand 8430 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8431 effect(USE_DEF dst, USE shift, KILL cr); 8432 8433 format %{ "ROL $dst, $shift" %} 8434 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8435 ins_encode( OpcP, RegOpc( dst )); 8436 ins_pipe( ialu_reg ); 8437 %} 8438 8439 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8440 effect(USE_DEF dst, USE shift, KILL cr); 8441 8442 format %{ "ROL $dst, $shift" %} 8443 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8444 ins_encode( RegOpcImm(dst, shift) ); 8445 ins_pipe(ialu_reg); 8446 %} 8447 8448 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8449 effect(USE_DEF dst, USE shift, KILL cr); 8450 8451 format %{ "ROL $dst, $shift" %} 8452 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8453 ins_encode(OpcP, RegOpc(dst)); 8454 ins_pipe( ialu_reg_reg ); 8455 %} 8456 // end of ROL expand 8457 8458 // ROL 32bit by one once 8459 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8460 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8461 8462 expand %{ 8463 rolI_eReg_imm1(dst, lshift, cr); 8464 %} 8465 %} 8466 8467 // ROL 32bit var by imm8 once 8468 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8469 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8470 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8471 8472 expand %{ 8473 rolI_eReg_imm8(dst, lshift, cr); 8474 %} 8475 %} 8476 8477 // ROL 32bit var by var once 8478 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8479 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8480 8481 expand %{ 8482 rolI_eReg_CL(dst, shift, cr); 8483 %} 8484 %} 8485 8486 // ROL 32bit var by var once 8487 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8488 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8489 8490 expand %{ 8491 rolI_eReg_CL(dst, shift, cr); 8492 %} 8493 %} 8494 8495 // ROR expand 8496 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8497 effect(USE_DEF dst, USE shift, KILL cr); 8498 8499 format %{ "ROR $dst, $shift" %} 8500 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8501 ins_encode( OpcP, RegOpc( dst ) ); 8502 ins_pipe( ialu_reg ); 8503 %} 8504 8505 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8506 effect (USE_DEF dst, USE shift, KILL cr); 8507 8508 format %{ "ROR $dst, $shift" %} 8509 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8510 ins_encode( RegOpcImm(dst, shift) ); 8511 ins_pipe( ialu_reg ); 8512 %} 8513 8514 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8515 effect(USE_DEF dst, USE shift, KILL cr); 8516 8517 format %{ "ROR $dst, $shift" %} 8518 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8519 ins_encode(OpcP, RegOpc(dst)); 8520 ins_pipe( ialu_reg_reg ); 8521 %} 8522 // end of ROR expand 8523 8524 // ROR right once 8525 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8526 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8527 8528 expand %{ 8529 rorI_eReg_imm1(dst, rshift, cr); 8530 %} 8531 %} 8532 8533 // ROR 32bit by immI8 once 8534 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8535 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8536 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8537 8538 expand %{ 8539 rorI_eReg_imm8(dst, rshift, cr); 8540 %} 8541 %} 8542 8543 // ROR 32bit var by var once 8544 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8545 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8546 8547 expand %{ 8548 rorI_eReg_CL(dst, shift, cr); 8549 %} 8550 %} 8551 8552 // ROR 32bit var by var once 8553 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8554 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8555 8556 expand %{ 8557 rorI_eReg_CL(dst, shift, cr); 8558 %} 8559 %} 8560 8561 // Xor Instructions 8562 // Xor Register with Register 8563 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8564 match(Set dst (XorI dst src)); 8565 effect(KILL cr); 8566 8567 size(2); 8568 format %{ "XOR $dst,$src" %} 8569 opcode(0x33); 8570 ins_encode( OpcP, RegReg( dst, src) ); 8571 ins_pipe( ialu_reg_reg ); 8572 %} 8573 8574 // Xor Register with Immediate -1 8575 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8576 match(Set dst (XorI dst imm)); 8577 8578 size(2); 8579 format %{ "NOT $dst" %} 8580 ins_encode %{ 8581 __ notl($dst$$Register); 8582 %} 8583 ins_pipe( ialu_reg ); 8584 %} 8585 8586 // Xor Register with Immediate 8587 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8588 match(Set dst (XorI dst src)); 8589 effect(KILL cr); 8590 8591 format %{ "XOR $dst,$src" %} 8592 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8593 // ins_encode( RegImm( dst, src) ); 8594 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8595 ins_pipe( ialu_reg ); 8596 %} 8597 8598 // Xor Register with Memory 8599 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8600 match(Set dst (XorI dst (LoadI src))); 8601 effect(KILL cr); 8602 8603 ins_cost(125); 8604 format %{ "XOR $dst,$src" %} 8605 opcode(0x33); 8606 ins_encode( OpcP, RegMem(dst, src) ); 8607 ins_pipe( ialu_reg_mem ); 8608 %} 8609 8610 // Xor Memory with Register 8611 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8612 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8613 effect(KILL cr); 8614 8615 ins_cost(150); 8616 format %{ "XOR $dst,$src" %} 8617 opcode(0x31); /* Opcode 31 /r */ 8618 ins_encode( OpcP, RegMem( src, dst ) ); 8619 ins_pipe( ialu_mem_reg ); 8620 %} 8621 8622 // Xor Memory with Immediate 8623 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8624 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8625 effect(KILL cr); 8626 8627 ins_cost(125); 8628 format %{ "XOR $dst,$src" %} 8629 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8630 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8631 ins_pipe( ialu_mem_imm ); 8632 %} 8633 8634 //----------Convert Int to Boolean--------------------------------------------- 8635 8636 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8637 effect( DEF dst, USE src ); 8638 format %{ "MOV $dst,$src" %} 8639 ins_encode( enc_Copy( dst, src) ); 8640 ins_pipe( ialu_reg_reg ); 8641 %} 8642 8643 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8644 effect( USE_DEF dst, USE src, KILL cr ); 8645 8646 size(4); 8647 format %{ "NEG $dst\n\t" 8648 "ADC $dst,$src" %} 8649 ins_encode( neg_reg(dst), 8650 OpcRegReg(0x13,dst,src) ); 8651 ins_pipe( ialu_reg_reg_long ); 8652 %} 8653 8654 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8655 match(Set dst (Conv2B src)); 8656 8657 expand %{ 8658 movI_nocopy(dst,src); 8659 ci2b(dst,src,cr); 8660 %} 8661 %} 8662 8663 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8664 effect( DEF dst, USE src ); 8665 format %{ "MOV $dst,$src" %} 8666 ins_encode( enc_Copy( dst, src) ); 8667 ins_pipe( ialu_reg_reg ); 8668 %} 8669 8670 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8671 effect( USE_DEF dst, USE src, KILL cr ); 8672 format %{ "NEG $dst\n\t" 8673 "ADC $dst,$src" %} 8674 ins_encode( neg_reg(dst), 8675 OpcRegReg(0x13,dst,src) ); 8676 ins_pipe( ialu_reg_reg_long ); 8677 %} 8678 8679 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8680 match(Set dst (Conv2B src)); 8681 8682 expand %{ 8683 movP_nocopy(dst,src); 8684 cp2b(dst,src,cr); 8685 %} 8686 %} 8687 8688 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8689 match(Set dst (CmpLTMask p q)); 8690 effect(KILL cr); 8691 ins_cost(400); 8692 8693 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8694 format %{ "XOR $dst,$dst\n\t" 8695 "CMP $p,$q\n\t" 8696 "SETlt $dst\n\t" 8697 "NEG $dst" %} 8698 ins_encode %{ 8699 Register Rp = $p$$Register; 8700 Register Rq = $q$$Register; 8701 Register Rd = $dst$$Register; 8702 Label done; 8703 __ xorl(Rd, Rd); 8704 __ cmpl(Rp, Rq); 8705 __ setb(Assembler::less, Rd); 8706 __ negl(Rd); 8707 %} 8708 8709 ins_pipe(pipe_slow); 8710 %} 8711 8712 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8713 match(Set dst (CmpLTMask dst zero)); 8714 effect(DEF dst, KILL cr); 8715 ins_cost(100); 8716 8717 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8718 ins_encode %{ 8719 __ sarl($dst$$Register, 31); 8720 %} 8721 ins_pipe(ialu_reg); 8722 %} 8723 8724 /* better to save a register than avoid a branch */ 8725 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8726 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8727 effect(KILL cr); 8728 ins_cost(400); 8729 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8730 "JGE done\n\t" 8731 "ADD $p,$y\n" 8732 "done: " %} 8733 ins_encode %{ 8734 Register Rp = $p$$Register; 8735 Register Rq = $q$$Register; 8736 Register Ry = $y$$Register; 8737 Label done; 8738 __ subl(Rp, Rq); 8739 __ jccb(Assembler::greaterEqual, done); 8740 __ addl(Rp, Ry); 8741 __ bind(done); 8742 %} 8743 8744 ins_pipe(pipe_cmplt); 8745 %} 8746 8747 /* better to save a register than avoid a branch */ 8748 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8749 match(Set y (AndI (CmpLTMask p q) y)); 8750 effect(KILL cr); 8751 8752 ins_cost(300); 8753 8754 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8755 "JLT done\n\t" 8756 "XORL $y, $y\n" 8757 "done: " %} 8758 ins_encode %{ 8759 Register Rp = $p$$Register; 8760 Register Rq = $q$$Register; 8761 Register Ry = $y$$Register; 8762 Label done; 8763 __ cmpl(Rp, Rq); 8764 __ jccb(Assembler::less, done); 8765 __ xorl(Ry, Ry); 8766 __ bind(done); 8767 %} 8768 8769 ins_pipe(pipe_cmplt); 8770 %} 8771 8772 /* If I enable this, I encourage spilling in the inner loop of compress. 8773 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8774 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8775 */ 8776 //----------Overflow Math Instructions----------------------------------------- 8777 8778 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8779 %{ 8780 match(Set cr (OverflowAddI op1 op2)); 8781 effect(DEF cr, USE_KILL op1, USE op2); 8782 8783 format %{ "ADD $op1, $op2\t# overflow check int" %} 8784 8785 ins_encode %{ 8786 __ addl($op1$$Register, $op2$$Register); 8787 %} 8788 ins_pipe(ialu_reg_reg); 8789 %} 8790 8791 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8792 %{ 8793 match(Set cr (OverflowAddI op1 op2)); 8794 effect(DEF cr, USE_KILL op1, USE op2); 8795 8796 format %{ "ADD $op1, $op2\t# overflow check int" %} 8797 8798 ins_encode %{ 8799 __ addl($op1$$Register, $op2$$constant); 8800 %} 8801 ins_pipe(ialu_reg_reg); 8802 %} 8803 8804 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8805 %{ 8806 match(Set cr (OverflowSubI op1 op2)); 8807 8808 format %{ "CMP $op1, $op2\t# overflow check int" %} 8809 ins_encode %{ 8810 __ cmpl($op1$$Register, $op2$$Register); 8811 %} 8812 ins_pipe(ialu_reg_reg); 8813 %} 8814 8815 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8816 %{ 8817 match(Set cr (OverflowSubI op1 op2)); 8818 8819 format %{ "CMP $op1, $op2\t# overflow check int" %} 8820 ins_encode %{ 8821 __ cmpl($op1$$Register, $op2$$constant); 8822 %} 8823 ins_pipe(ialu_reg_reg); 8824 %} 8825 8826 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8827 %{ 8828 match(Set cr (OverflowSubI zero op2)); 8829 effect(DEF cr, USE_KILL op2); 8830 8831 format %{ "NEG $op2\t# overflow check int" %} 8832 ins_encode %{ 8833 __ negl($op2$$Register); 8834 %} 8835 ins_pipe(ialu_reg_reg); 8836 %} 8837 8838 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8839 %{ 8840 match(Set cr (OverflowMulI op1 op2)); 8841 effect(DEF cr, USE_KILL op1, USE op2); 8842 8843 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8844 ins_encode %{ 8845 __ imull($op1$$Register, $op2$$Register); 8846 %} 8847 ins_pipe(ialu_reg_reg_alu0); 8848 %} 8849 8850 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8851 %{ 8852 match(Set cr (OverflowMulI op1 op2)); 8853 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8854 8855 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8856 ins_encode %{ 8857 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8858 %} 8859 ins_pipe(ialu_reg_reg_alu0); 8860 %} 8861 8862 //----------Long Instructions------------------------------------------------ 8863 // Add Long Register with Register 8864 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8865 match(Set dst (AddL dst src)); 8866 effect(KILL cr); 8867 ins_cost(200); 8868 format %{ "ADD $dst.lo,$src.lo\n\t" 8869 "ADC $dst.hi,$src.hi" %} 8870 opcode(0x03, 0x13); 8871 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8872 ins_pipe( ialu_reg_reg_long ); 8873 %} 8874 8875 // Add Long Register with Immediate 8876 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8877 match(Set dst (AddL dst src)); 8878 effect(KILL cr); 8879 format %{ "ADD $dst.lo,$src.lo\n\t" 8880 "ADC $dst.hi,$src.hi" %} 8881 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8882 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8883 ins_pipe( ialu_reg_long ); 8884 %} 8885 8886 // Add Long Register with Memory 8887 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8888 match(Set dst (AddL dst (LoadL mem))); 8889 effect(KILL cr); 8890 ins_cost(125); 8891 format %{ "ADD $dst.lo,$mem\n\t" 8892 "ADC $dst.hi,$mem+4" %} 8893 opcode(0x03, 0x13); 8894 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8895 ins_pipe( ialu_reg_long_mem ); 8896 %} 8897 8898 // Subtract Long Register with Register. 8899 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8900 match(Set dst (SubL dst src)); 8901 effect(KILL cr); 8902 ins_cost(200); 8903 format %{ "SUB $dst.lo,$src.lo\n\t" 8904 "SBB $dst.hi,$src.hi" %} 8905 opcode(0x2B, 0x1B); 8906 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8907 ins_pipe( ialu_reg_reg_long ); 8908 %} 8909 8910 // Subtract Long Register with Immediate 8911 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8912 match(Set dst (SubL dst src)); 8913 effect(KILL cr); 8914 format %{ "SUB $dst.lo,$src.lo\n\t" 8915 "SBB $dst.hi,$src.hi" %} 8916 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8917 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8918 ins_pipe( ialu_reg_long ); 8919 %} 8920 8921 // Subtract Long Register with Memory 8922 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8923 match(Set dst (SubL dst (LoadL mem))); 8924 effect(KILL cr); 8925 ins_cost(125); 8926 format %{ "SUB $dst.lo,$mem\n\t" 8927 "SBB $dst.hi,$mem+4" %} 8928 opcode(0x2B, 0x1B); 8929 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8930 ins_pipe( ialu_reg_long_mem ); 8931 %} 8932 8933 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8934 match(Set dst (SubL zero dst)); 8935 effect(KILL cr); 8936 ins_cost(300); 8937 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8938 ins_encode( neg_long(dst) ); 8939 ins_pipe( ialu_reg_reg_long ); 8940 %} 8941 8942 // And Long Register with Register 8943 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8944 match(Set dst (AndL dst src)); 8945 effect(KILL cr); 8946 format %{ "AND $dst.lo,$src.lo\n\t" 8947 "AND $dst.hi,$src.hi" %} 8948 opcode(0x23,0x23); 8949 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8950 ins_pipe( ialu_reg_reg_long ); 8951 %} 8952 8953 // And Long Register with Immediate 8954 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8955 match(Set dst (AndL dst src)); 8956 effect(KILL cr); 8957 format %{ "AND $dst.lo,$src.lo\n\t" 8958 "AND $dst.hi,$src.hi" %} 8959 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8960 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8961 ins_pipe( ialu_reg_long ); 8962 %} 8963 8964 // And Long Register with Memory 8965 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8966 match(Set dst (AndL dst (LoadL mem))); 8967 effect(KILL cr); 8968 ins_cost(125); 8969 format %{ "AND $dst.lo,$mem\n\t" 8970 "AND $dst.hi,$mem+4" %} 8971 opcode(0x23, 0x23); 8972 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8973 ins_pipe( ialu_reg_long_mem ); 8974 %} 8975 8976 // BMI1 instructions 8977 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8978 match(Set dst (AndL (XorL src1 minus_1) src2)); 8979 predicate(UseBMI1Instructions); 8980 effect(KILL cr, TEMP dst); 8981 8982 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8983 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8984 %} 8985 8986 ins_encode %{ 8987 Register Rdst = $dst$$Register; 8988 Register Rsrc1 = $src1$$Register; 8989 Register Rsrc2 = $src2$$Register; 8990 __ andnl(Rdst, Rsrc1, Rsrc2); 8991 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8992 %} 8993 ins_pipe(ialu_reg_reg_long); 8994 %} 8995 8996 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8997 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8998 predicate(UseBMI1Instructions); 8999 effect(KILL cr, TEMP dst); 9000 9001 ins_cost(125); 9002 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9003 "ANDNL $dst.hi, $src1.hi, $src2+4" 9004 %} 9005 9006 ins_encode %{ 9007 Register Rdst = $dst$$Register; 9008 Register Rsrc1 = $src1$$Register; 9009 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9010 9011 __ andnl(Rdst, Rsrc1, $src2$$Address); 9012 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9013 %} 9014 ins_pipe(ialu_reg_mem); 9015 %} 9016 9017 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9018 match(Set dst (AndL (SubL imm_zero src) src)); 9019 predicate(UseBMI1Instructions); 9020 effect(KILL cr, TEMP dst); 9021 9022 format %{ "MOVL $dst.hi, 0\n\t" 9023 "BLSIL $dst.lo, $src.lo\n\t" 9024 "JNZ done\n\t" 9025 "BLSIL $dst.hi, $src.hi\n" 9026 "done:" 9027 %} 9028 9029 ins_encode %{ 9030 Label done; 9031 Register Rdst = $dst$$Register; 9032 Register Rsrc = $src$$Register; 9033 __ movl(HIGH_FROM_LOW(Rdst), 0); 9034 __ blsil(Rdst, Rsrc); 9035 __ jccb(Assembler::notZero, done); 9036 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9037 __ bind(done); 9038 %} 9039 ins_pipe(ialu_reg); 9040 %} 9041 9042 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9043 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9044 predicate(UseBMI1Instructions); 9045 effect(KILL cr, TEMP dst); 9046 9047 ins_cost(125); 9048 format %{ "MOVL $dst.hi, 0\n\t" 9049 "BLSIL $dst.lo, $src\n\t" 9050 "JNZ done\n\t" 9051 "BLSIL $dst.hi, $src+4\n" 9052 "done:" 9053 %} 9054 9055 ins_encode %{ 9056 Label done; 9057 Register Rdst = $dst$$Register; 9058 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9059 9060 __ movl(HIGH_FROM_LOW(Rdst), 0); 9061 __ blsil(Rdst, $src$$Address); 9062 __ jccb(Assembler::notZero, done); 9063 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9064 __ bind(done); 9065 %} 9066 ins_pipe(ialu_reg_mem); 9067 %} 9068 9069 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9070 %{ 9071 match(Set dst (XorL (AddL src minus_1) src)); 9072 predicate(UseBMI1Instructions); 9073 effect(KILL cr, TEMP dst); 9074 9075 format %{ "MOVL $dst.hi, 0\n\t" 9076 "BLSMSKL $dst.lo, $src.lo\n\t" 9077 "JNC done\n\t" 9078 "BLSMSKL $dst.hi, $src.hi\n" 9079 "done:" 9080 %} 9081 9082 ins_encode %{ 9083 Label done; 9084 Register Rdst = $dst$$Register; 9085 Register Rsrc = $src$$Register; 9086 __ movl(HIGH_FROM_LOW(Rdst), 0); 9087 __ blsmskl(Rdst, Rsrc); 9088 __ jccb(Assembler::carryClear, done); 9089 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9090 __ bind(done); 9091 %} 9092 9093 ins_pipe(ialu_reg); 9094 %} 9095 9096 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9097 %{ 9098 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9099 predicate(UseBMI1Instructions); 9100 effect(KILL cr, TEMP dst); 9101 9102 ins_cost(125); 9103 format %{ "MOVL $dst.hi, 0\n\t" 9104 "BLSMSKL $dst.lo, $src\n\t" 9105 "JNC done\n\t" 9106 "BLSMSKL $dst.hi, $src+4\n" 9107 "done:" 9108 %} 9109 9110 ins_encode %{ 9111 Label done; 9112 Register Rdst = $dst$$Register; 9113 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9114 9115 __ movl(HIGH_FROM_LOW(Rdst), 0); 9116 __ blsmskl(Rdst, $src$$Address); 9117 __ jccb(Assembler::carryClear, done); 9118 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9119 __ bind(done); 9120 %} 9121 9122 ins_pipe(ialu_reg_mem); 9123 %} 9124 9125 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9126 %{ 9127 match(Set dst (AndL (AddL src minus_1) src) ); 9128 predicate(UseBMI1Instructions); 9129 effect(KILL cr, TEMP dst); 9130 9131 format %{ "MOVL $dst.hi, $src.hi\n\t" 9132 "BLSRL $dst.lo, $src.lo\n\t" 9133 "JNC done\n\t" 9134 "BLSRL $dst.hi, $src.hi\n" 9135 "done:" 9136 %} 9137 9138 ins_encode %{ 9139 Label done; 9140 Register Rdst = $dst$$Register; 9141 Register Rsrc = $src$$Register; 9142 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9143 __ blsrl(Rdst, Rsrc); 9144 __ jccb(Assembler::carryClear, done); 9145 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9146 __ bind(done); 9147 %} 9148 9149 ins_pipe(ialu_reg); 9150 %} 9151 9152 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9153 %{ 9154 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9155 predicate(UseBMI1Instructions); 9156 effect(KILL cr, TEMP dst); 9157 9158 ins_cost(125); 9159 format %{ "MOVL $dst.hi, $src+4\n\t" 9160 "BLSRL $dst.lo, $src\n\t" 9161 "JNC done\n\t" 9162 "BLSRL $dst.hi, $src+4\n" 9163 "done:" 9164 %} 9165 9166 ins_encode %{ 9167 Label done; 9168 Register Rdst = $dst$$Register; 9169 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9170 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9171 __ blsrl(Rdst, $src$$Address); 9172 __ jccb(Assembler::carryClear, done); 9173 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9174 __ bind(done); 9175 %} 9176 9177 ins_pipe(ialu_reg_mem); 9178 %} 9179 9180 // Or Long Register with Register 9181 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9182 match(Set dst (OrL dst src)); 9183 effect(KILL cr); 9184 format %{ "OR $dst.lo,$src.lo\n\t" 9185 "OR $dst.hi,$src.hi" %} 9186 opcode(0x0B,0x0B); 9187 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9188 ins_pipe( ialu_reg_reg_long ); 9189 %} 9190 9191 // Or Long Register with Immediate 9192 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9193 match(Set dst (OrL dst src)); 9194 effect(KILL cr); 9195 format %{ "OR $dst.lo,$src.lo\n\t" 9196 "OR $dst.hi,$src.hi" %} 9197 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9198 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9199 ins_pipe( ialu_reg_long ); 9200 %} 9201 9202 // Or Long Register with Memory 9203 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9204 match(Set dst (OrL dst (LoadL mem))); 9205 effect(KILL cr); 9206 ins_cost(125); 9207 format %{ "OR $dst.lo,$mem\n\t" 9208 "OR $dst.hi,$mem+4" %} 9209 opcode(0x0B,0x0B); 9210 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9211 ins_pipe( ialu_reg_long_mem ); 9212 %} 9213 9214 // Xor Long Register with Register 9215 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9216 match(Set dst (XorL dst src)); 9217 effect(KILL cr); 9218 format %{ "XOR $dst.lo,$src.lo\n\t" 9219 "XOR $dst.hi,$src.hi" %} 9220 opcode(0x33,0x33); 9221 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9222 ins_pipe( ialu_reg_reg_long ); 9223 %} 9224 9225 // Xor Long Register with Immediate -1 9226 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9227 match(Set dst (XorL dst imm)); 9228 format %{ "NOT $dst.lo\n\t" 9229 "NOT $dst.hi" %} 9230 ins_encode %{ 9231 __ notl($dst$$Register); 9232 __ notl(HIGH_FROM_LOW($dst$$Register)); 9233 %} 9234 ins_pipe( ialu_reg_long ); 9235 %} 9236 9237 // Xor Long Register with Immediate 9238 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9239 match(Set dst (XorL dst src)); 9240 effect(KILL cr); 9241 format %{ "XOR $dst.lo,$src.lo\n\t" 9242 "XOR $dst.hi,$src.hi" %} 9243 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9244 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9245 ins_pipe( ialu_reg_long ); 9246 %} 9247 9248 // Xor Long Register with Memory 9249 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9250 match(Set dst (XorL dst (LoadL mem))); 9251 effect(KILL cr); 9252 ins_cost(125); 9253 format %{ "XOR $dst.lo,$mem\n\t" 9254 "XOR $dst.hi,$mem+4" %} 9255 opcode(0x33,0x33); 9256 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9257 ins_pipe( ialu_reg_long_mem ); 9258 %} 9259 9260 // Shift Left Long by 1 9261 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9262 predicate(UseNewLongLShift); 9263 match(Set dst (LShiftL dst cnt)); 9264 effect(KILL cr); 9265 ins_cost(100); 9266 format %{ "ADD $dst.lo,$dst.lo\n\t" 9267 "ADC $dst.hi,$dst.hi" %} 9268 ins_encode %{ 9269 __ addl($dst$$Register,$dst$$Register); 9270 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9271 %} 9272 ins_pipe( ialu_reg_long ); 9273 %} 9274 9275 // Shift Left Long by 2 9276 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9277 predicate(UseNewLongLShift); 9278 match(Set dst (LShiftL dst cnt)); 9279 effect(KILL cr); 9280 ins_cost(100); 9281 format %{ "ADD $dst.lo,$dst.lo\n\t" 9282 "ADC $dst.hi,$dst.hi\n\t" 9283 "ADD $dst.lo,$dst.lo\n\t" 9284 "ADC $dst.hi,$dst.hi" %} 9285 ins_encode %{ 9286 __ addl($dst$$Register,$dst$$Register); 9287 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9288 __ addl($dst$$Register,$dst$$Register); 9289 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9290 %} 9291 ins_pipe( ialu_reg_long ); 9292 %} 9293 9294 // Shift Left Long by 3 9295 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9296 predicate(UseNewLongLShift); 9297 match(Set dst (LShiftL dst cnt)); 9298 effect(KILL cr); 9299 ins_cost(100); 9300 format %{ "ADD $dst.lo,$dst.lo\n\t" 9301 "ADC $dst.hi,$dst.hi\n\t" 9302 "ADD $dst.lo,$dst.lo\n\t" 9303 "ADC $dst.hi,$dst.hi\n\t" 9304 "ADD $dst.lo,$dst.lo\n\t" 9305 "ADC $dst.hi,$dst.hi" %} 9306 ins_encode %{ 9307 __ addl($dst$$Register,$dst$$Register); 9308 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9309 __ addl($dst$$Register,$dst$$Register); 9310 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9311 __ addl($dst$$Register,$dst$$Register); 9312 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9313 %} 9314 ins_pipe( ialu_reg_long ); 9315 %} 9316 9317 // Shift Left Long by 1-31 9318 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9319 match(Set dst (LShiftL dst cnt)); 9320 effect(KILL cr); 9321 ins_cost(200); 9322 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9323 "SHL $dst.lo,$cnt" %} 9324 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9325 ins_encode( move_long_small_shift(dst,cnt) ); 9326 ins_pipe( ialu_reg_long ); 9327 %} 9328 9329 // Shift Left Long by 32-63 9330 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9331 match(Set dst (LShiftL dst cnt)); 9332 effect(KILL cr); 9333 ins_cost(300); 9334 format %{ "MOV $dst.hi,$dst.lo\n" 9335 "\tSHL $dst.hi,$cnt-32\n" 9336 "\tXOR $dst.lo,$dst.lo" %} 9337 opcode(0xC1, 0x4); /* C1 /4 ib */ 9338 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9339 ins_pipe( ialu_reg_long ); 9340 %} 9341 9342 // Shift Left Long by variable 9343 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9344 match(Set dst (LShiftL dst shift)); 9345 effect(KILL cr); 9346 ins_cost(500+200); 9347 size(17); 9348 format %{ "TEST $shift,32\n\t" 9349 "JEQ,s small\n\t" 9350 "MOV $dst.hi,$dst.lo\n\t" 9351 "XOR $dst.lo,$dst.lo\n" 9352 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9353 "SHL $dst.lo,$shift" %} 9354 ins_encode( shift_left_long( dst, shift ) ); 9355 ins_pipe( pipe_slow ); 9356 %} 9357 9358 // Shift Right Long by 1-31 9359 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9360 match(Set dst (URShiftL dst cnt)); 9361 effect(KILL cr); 9362 ins_cost(200); 9363 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9364 "SHR $dst.hi,$cnt" %} 9365 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9366 ins_encode( move_long_small_shift(dst,cnt) ); 9367 ins_pipe( ialu_reg_long ); 9368 %} 9369 9370 // Shift Right Long by 32-63 9371 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9372 match(Set dst (URShiftL dst cnt)); 9373 effect(KILL cr); 9374 ins_cost(300); 9375 format %{ "MOV $dst.lo,$dst.hi\n" 9376 "\tSHR $dst.lo,$cnt-32\n" 9377 "\tXOR $dst.hi,$dst.hi" %} 9378 opcode(0xC1, 0x5); /* C1 /5 ib */ 9379 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9380 ins_pipe( ialu_reg_long ); 9381 %} 9382 9383 // Shift Right Long by variable 9384 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9385 match(Set dst (URShiftL dst shift)); 9386 effect(KILL cr); 9387 ins_cost(600); 9388 size(17); 9389 format %{ "TEST $shift,32\n\t" 9390 "JEQ,s small\n\t" 9391 "MOV $dst.lo,$dst.hi\n\t" 9392 "XOR $dst.hi,$dst.hi\n" 9393 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9394 "SHR $dst.hi,$shift" %} 9395 ins_encode( shift_right_long( dst, shift ) ); 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 // Shift Right Long by 1-31 9400 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9401 match(Set dst (RShiftL dst cnt)); 9402 effect(KILL cr); 9403 ins_cost(200); 9404 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9405 "SAR $dst.hi,$cnt" %} 9406 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9407 ins_encode( move_long_small_shift(dst,cnt) ); 9408 ins_pipe( ialu_reg_long ); 9409 %} 9410 9411 // Shift Right Long by 32-63 9412 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9413 match(Set dst (RShiftL dst cnt)); 9414 effect(KILL cr); 9415 ins_cost(300); 9416 format %{ "MOV $dst.lo,$dst.hi\n" 9417 "\tSAR $dst.lo,$cnt-32\n" 9418 "\tSAR $dst.hi,31" %} 9419 opcode(0xC1, 0x7); /* C1 /7 ib */ 9420 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9421 ins_pipe( ialu_reg_long ); 9422 %} 9423 9424 // Shift Right arithmetic Long by variable 9425 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9426 match(Set dst (RShiftL dst shift)); 9427 effect(KILL cr); 9428 ins_cost(600); 9429 size(18); 9430 format %{ "TEST $shift,32\n\t" 9431 "JEQ,s small\n\t" 9432 "MOV $dst.lo,$dst.hi\n\t" 9433 "SAR $dst.hi,31\n" 9434 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9435 "SAR $dst.hi,$shift" %} 9436 ins_encode( shift_right_arith_long( dst, shift ) ); 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 9441 //----------Double Instructions------------------------------------------------ 9442 // Double Math 9443 9444 // Compare & branch 9445 9446 // P6 version of float compare, sets condition codes in EFLAGS 9447 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9448 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9449 match(Set cr (CmpD src1 src2)); 9450 effect(KILL rax); 9451 ins_cost(150); 9452 format %{ "FLD $src1\n\t" 9453 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9454 "JNP exit\n\t" 9455 "MOV ah,1 // saw a NaN, set CF\n\t" 9456 "SAHF\n" 9457 "exit:\tNOP // avoid branch to branch" %} 9458 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9459 ins_encode( Push_Reg_DPR(src1), 9460 OpcP, RegOpc(src2), 9461 cmpF_P6_fixup ); 9462 ins_pipe( pipe_slow ); 9463 %} 9464 9465 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9466 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9467 match(Set cr (CmpD src1 src2)); 9468 ins_cost(150); 9469 format %{ "FLD $src1\n\t" 9470 "FUCOMIP ST,$src2 // P6 instruction" %} 9471 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9472 ins_encode( Push_Reg_DPR(src1), 9473 OpcP, RegOpc(src2)); 9474 ins_pipe( pipe_slow ); 9475 %} 9476 9477 // Compare & branch 9478 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9479 predicate(UseSSE<=1); 9480 match(Set cr (CmpD src1 src2)); 9481 effect(KILL rax); 9482 ins_cost(200); 9483 format %{ "FLD $src1\n\t" 9484 "FCOMp $src2\n\t" 9485 "FNSTSW AX\n\t" 9486 "TEST AX,0x400\n\t" 9487 "JZ,s flags\n\t" 9488 "MOV AH,1\t# unordered treat as LT\n" 9489 "flags:\tSAHF" %} 9490 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9491 ins_encode( Push_Reg_DPR(src1), 9492 OpcP, RegOpc(src2), 9493 fpu_flags); 9494 ins_pipe( pipe_slow ); 9495 %} 9496 9497 // Compare vs zero into -1,0,1 9498 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9499 predicate(UseSSE<=1); 9500 match(Set dst (CmpD3 src1 zero)); 9501 effect(KILL cr, KILL rax); 9502 ins_cost(280); 9503 format %{ "FTSTD $dst,$src1" %} 9504 opcode(0xE4, 0xD9); 9505 ins_encode( Push_Reg_DPR(src1), 9506 OpcS, OpcP, PopFPU, 9507 CmpF_Result(dst)); 9508 ins_pipe( pipe_slow ); 9509 %} 9510 9511 // Compare into -1,0,1 9512 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9513 predicate(UseSSE<=1); 9514 match(Set dst (CmpD3 src1 src2)); 9515 effect(KILL cr, KILL rax); 9516 ins_cost(300); 9517 format %{ "FCMPD $dst,$src1,$src2" %} 9518 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9519 ins_encode( Push_Reg_DPR(src1), 9520 OpcP, RegOpc(src2), 9521 CmpF_Result(dst)); 9522 ins_pipe( pipe_slow ); 9523 %} 9524 9525 // float compare and set condition codes in EFLAGS by XMM regs 9526 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9527 predicate(UseSSE>=2); 9528 match(Set cr (CmpD src1 src2)); 9529 ins_cost(145); 9530 format %{ "UCOMISD $src1,$src2\n\t" 9531 "JNP,s exit\n\t" 9532 "PUSHF\t# saw NaN, set CF\n\t" 9533 "AND [rsp], #0xffffff2b\n\t" 9534 "POPF\n" 9535 "exit:" %} 9536 ins_encode %{ 9537 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9538 emit_cmpfp_fixup(_masm); 9539 %} 9540 ins_pipe( pipe_slow ); 9541 %} 9542 9543 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9544 predicate(UseSSE>=2); 9545 match(Set cr (CmpD src1 src2)); 9546 ins_cost(100); 9547 format %{ "UCOMISD $src1,$src2" %} 9548 ins_encode %{ 9549 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9550 %} 9551 ins_pipe( pipe_slow ); 9552 %} 9553 9554 // float compare and set condition codes in EFLAGS by XMM regs 9555 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9556 predicate(UseSSE>=2); 9557 match(Set cr (CmpD src1 (LoadD src2))); 9558 ins_cost(145); 9559 format %{ "UCOMISD $src1,$src2\n\t" 9560 "JNP,s exit\n\t" 9561 "PUSHF\t# saw NaN, set CF\n\t" 9562 "AND [rsp], #0xffffff2b\n\t" 9563 "POPF\n" 9564 "exit:" %} 9565 ins_encode %{ 9566 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9567 emit_cmpfp_fixup(_masm); 9568 %} 9569 ins_pipe( pipe_slow ); 9570 %} 9571 9572 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9573 predicate(UseSSE>=2); 9574 match(Set cr (CmpD src1 (LoadD src2))); 9575 ins_cost(100); 9576 format %{ "UCOMISD $src1,$src2" %} 9577 ins_encode %{ 9578 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9579 %} 9580 ins_pipe( pipe_slow ); 9581 %} 9582 9583 // Compare into -1,0,1 in XMM 9584 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9585 predicate(UseSSE>=2); 9586 match(Set dst (CmpD3 src1 src2)); 9587 effect(KILL cr); 9588 ins_cost(255); 9589 format %{ "UCOMISD $src1, $src2\n\t" 9590 "MOV $dst, #-1\n\t" 9591 "JP,s done\n\t" 9592 "JB,s done\n\t" 9593 "SETNE $dst\n\t" 9594 "MOVZB $dst, $dst\n" 9595 "done:" %} 9596 ins_encode %{ 9597 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9598 emit_cmpfp3(_masm, $dst$$Register); 9599 %} 9600 ins_pipe( pipe_slow ); 9601 %} 9602 9603 // Compare into -1,0,1 in XMM and memory 9604 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9605 predicate(UseSSE>=2); 9606 match(Set dst (CmpD3 src1 (LoadD src2))); 9607 effect(KILL cr); 9608 ins_cost(275); 9609 format %{ "UCOMISD $src1, $src2\n\t" 9610 "MOV $dst, #-1\n\t" 9611 "JP,s done\n\t" 9612 "JB,s done\n\t" 9613 "SETNE $dst\n\t" 9614 "MOVZB $dst, $dst\n" 9615 "done:" %} 9616 ins_encode %{ 9617 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9618 emit_cmpfp3(_masm, $dst$$Register); 9619 %} 9620 ins_pipe( pipe_slow ); 9621 %} 9622 9623 9624 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9625 predicate (UseSSE <=1); 9626 match(Set dst (SubD dst src)); 9627 9628 format %{ "FLD $src\n\t" 9629 "DSUBp $dst,ST" %} 9630 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9631 ins_cost(150); 9632 ins_encode( Push_Reg_DPR(src), 9633 OpcP, RegOpc(dst) ); 9634 ins_pipe( fpu_reg_reg ); 9635 %} 9636 9637 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9638 predicate (UseSSE <=1); 9639 match(Set dst (RoundDouble (SubD src1 src2))); 9640 ins_cost(250); 9641 9642 format %{ "FLD $src2\n\t" 9643 "DSUB ST,$src1\n\t" 9644 "FSTP_D $dst\t# D-round" %} 9645 opcode(0xD8, 0x5); 9646 ins_encode( Push_Reg_DPR(src2), 9647 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9648 ins_pipe( fpu_mem_reg_reg ); 9649 %} 9650 9651 9652 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9653 predicate (UseSSE <=1); 9654 match(Set dst (SubD dst (LoadD src))); 9655 ins_cost(150); 9656 9657 format %{ "FLD $src\n\t" 9658 "DSUBp $dst,ST" %} 9659 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9660 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9661 OpcP, RegOpc(dst) ); 9662 ins_pipe( fpu_reg_mem ); 9663 %} 9664 9665 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9666 predicate (UseSSE<=1); 9667 match(Set dst (AbsD src)); 9668 ins_cost(100); 9669 format %{ "FABS" %} 9670 opcode(0xE1, 0xD9); 9671 ins_encode( OpcS, OpcP ); 9672 ins_pipe( fpu_reg_reg ); 9673 %} 9674 9675 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9676 predicate(UseSSE<=1); 9677 match(Set dst (NegD src)); 9678 ins_cost(100); 9679 format %{ "FCHS" %} 9680 opcode(0xE0, 0xD9); 9681 ins_encode( OpcS, OpcP ); 9682 ins_pipe( fpu_reg_reg ); 9683 %} 9684 9685 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9686 predicate(UseSSE<=1); 9687 match(Set dst (AddD dst src)); 9688 format %{ "FLD $src\n\t" 9689 "DADD $dst,ST" %} 9690 size(4); 9691 ins_cost(150); 9692 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9693 ins_encode( Push_Reg_DPR(src), 9694 OpcP, RegOpc(dst) ); 9695 ins_pipe( fpu_reg_reg ); 9696 %} 9697 9698 9699 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9700 predicate(UseSSE<=1); 9701 match(Set dst (RoundDouble (AddD src1 src2))); 9702 ins_cost(250); 9703 9704 format %{ "FLD $src2\n\t" 9705 "DADD ST,$src1\n\t" 9706 "FSTP_D $dst\t# D-round" %} 9707 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9708 ins_encode( Push_Reg_DPR(src2), 9709 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9710 ins_pipe( fpu_mem_reg_reg ); 9711 %} 9712 9713 9714 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9715 predicate(UseSSE<=1); 9716 match(Set dst (AddD dst (LoadD src))); 9717 ins_cost(150); 9718 9719 format %{ "FLD $src\n\t" 9720 "DADDp $dst,ST" %} 9721 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9722 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9723 OpcP, RegOpc(dst) ); 9724 ins_pipe( fpu_reg_mem ); 9725 %} 9726 9727 // add-to-memory 9728 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9729 predicate(UseSSE<=1); 9730 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9731 ins_cost(150); 9732 9733 format %{ "FLD_D $dst\n\t" 9734 "DADD ST,$src\n\t" 9735 "FST_D $dst" %} 9736 opcode(0xDD, 0x0); 9737 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9738 Opcode(0xD8), RegOpc(src), 9739 set_instruction_start, 9740 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9741 ins_pipe( fpu_reg_mem ); 9742 %} 9743 9744 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9745 predicate(UseSSE<=1); 9746 match(Set dst (AddD dst con)); 9747 ins_cost(125); 9748 format %{ "FLD1\n\t" 9749 "DADDp $dst,ST" %} 9750 ins_encode %{ 9751 __ fld1(); 9752 __ faddp($dst$$reg); 9753 %} 9754 ins_pipe(fpu_reg); 9755 %} 9756 9757 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9758 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9759 match(Set dst (AddD dst con)); 9760 ins_cost(200); 9761 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9762 "DADDp $dst,ST" %} 9763 ins_encode %{ 9764 __ fld_d($constantaddress($con)); 9765 __ faddp($dst$$reg); 9766 %} 9767 ins_pipe(fpu_reg_mem); 9768 %} 9769 9770 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9771 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9772 match(Set dst (RoundDouble (AddD src con))); 9773 ins_cost(200); 9774 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9775 "DADD ST,$src\n\t" 9776 "FSTP_D $dst\t# D-round" %} 9777 ins_encode %{ 9778 __ fld_d($constantaddress($con)); 9779 __ fadd($src$$reg); 9780 __ fstp_d(Address(rsp, $dst$$disp)); 9781 %} 9782 ins_pipe(fpu_mem_reg_con); 9783 %} 9784 9785 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9786 predicate(UseSSE<=1); 9787 match(Set dst (MulD dst src)); 9788 format %{ "FLD $src\n\t" 9789 "DMULp $dst,ST" %} 9790 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9791 ins_cost(150); 9792 ins_encode( Push_Reg_DPR(src), 9793 OpcP, RegOpc(dst) ); 9794 ins_pipe( fpu_reg_reg ); 9795 %} 9796 9797 // Strict FP instruction biases argument before multiply then 9798 // biases result to avoid double rounding of subnormals. 9799 // 9800 // scale arg1 by multiplying arg1 by 2^(-15360) 9801 // load arg2 9802 // multiply scaled arg1 by arg2 9803 // rescale product by 2^(15360) 9804 // 9805 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9806 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9807 match(Set dst (MulD dst src)); 9808 ins_cost(1); // Select this instruction for all strict FP double multiplies 9809 9810 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9811 "DMULp $dst,ST\n\t" 9812 "FLD $src\n\t" 9813 "DMULp $dst,ST\n\t" 9814 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9815 "DMULp $dst,ST\n\t" %} 9816 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9817 ins_encode( strictfp_bias1(dst), 9818 Push_Reg_DPR(src), 9819 OpcP, RegOpc(dst), 9820 strictfp_bias2(dst) ); 9821 ins_pipe( fpu_reg_reg ); 9822 %} 9823 9824 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9825 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9826 match(Set dst (MulD dst con)); 9827 ins_cost(200); 9828 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9829 "DMULp $dst,ST" %} 9830 ins_encode %{ 9831 __ fld_d($constantaddress($con)); 9832 __ fmulp($dst$$reg); 9833 %} 9834 ins_pipe(fpu_reg_mem); 9835 %} 9836 9837 9838 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9839 predicate( UseSSE<=1 ); 9840 match(Set dst (MulD dst (LoadD src))); 9841 ins_cost(200); 9842 format %{ "FLD_D $src\n\t" 9843 "DMULp $dst,ST" %} 9844 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9845 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9846 OpcP, RegOpc(dst) ); 9847 ins_pipe( fpu_reg_mem ); 9848 %} 9849 9850 // 9851 // Cisc-alternate to reg-reg multiply 9852 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9853 predicate( UseSSE<=1 ); 9854 match(Set dst (MulD src (LoadD mem))); 9855 ins_cost(250); 9856 format %{ "FLD_D $mem\n\t" 9857 "DMUL ST,$src\n\t" 9858 "FSTP_D $dst" %} 9859 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9860 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9861 OpcReg_FPR(src), 9862 Pop_Reg_DPR(dst) ); 9863 ins_pipe( fpu_reg_reg_mem ); 9864 %} 9865 9866 9867 // MACRO3 -- addDPR a mulDPR 9868 // This instruction is a '2-address' instruction in that the result goes 9869 // back to src2. This eliminates a move from the macro; possibly the 9870 // register allocator will have to add it back (and maybe not). 9871 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9872 predicate( UseSSE<=1 ); 9873 match(Set src2 (AddD (MulD src0 src1) src2)); 9874 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9875 "DMUL ST,$src1\n\t" 9876 "DADDp $src2,ST" %} 9877 ins_cost(250); 9878 opcode(0xDD); /* LoadD DD /0 */ 9879 ins_encode( Push_Reg_FPR(src0), 9880 FMul_ST_reg(src1), 9881 FAddP_reg_ST(src2) ); 9882 ins_pipe( fpu_reg_reg_reg ); 9883 %} 9884 9885 9886 // MACRO3 -- subDPR a mulDPR 9887 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9888 predicate( UseSSE<=1 ); 9889 match(Set src2 (SubD (MulD src0 src1) src2)); 9890 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9891 "DMUL ST,$src1\n\t" 9892 "DSUBRp $src2,ST" %} 9893 ins_cost(250); 9894 ins_encode( Push_Reg_FPR(src0), 9895 FMul_ST_reg(src1), 9896 Opcode(0xDE), Opc_plus(0xE0,src2)); 9897 ins_pipe( fpu_reg_reg_reg ); 9898 %} 9899 9900 9901 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9902 predicate( UseSSE<=1 ); 9903 match(Set dst (DivD dst src)); 9904 9905 format %{ "FLD $src\n\t" 9906 "FDIVp $dst,ST" %} 9907 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9908 ins_cost(150); 9909 ins_encode( Push_Reg_DPR(src), 9910 OpcP, RegOpc(dst) ); 9911 ins_pipe( fpu_reg_reg ); 9912 %} 9913 9914 // Strict FP instruction biases argument before division then 9915 // biases result, to avoid double rounding of subnormals. 9916 // 9917 // scale dividend by multiplying dividend by 2^(-15360) 9918 // load divisor 9919 // divide scaled dividend by divisor 9920 // rescale quotient by 2^(15360) 9921 // 9922 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9923 predicate (UseSSE<=1); 9924 match(Set dst (DivD dst src)); 9925 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9926 ins_cost(01); 9927 9928 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9929 "DMULp $dst,ST\n\t" 9930 "FLD $src\n\t" 9931 "FDIVp $dst,ST\n\t" 9932 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9933 "DMULp $dst,ST\n\t" %} 9934 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9935 ins_encode( strictfp_bias1(dst), 9936 Push_Reg_DPR(src), 9937 OpcP, RegOpc(dst), 9938 strictfp_bias2(dst) ); 9939 ins_pipe( fpu_reg_reg ); 9940 %} 9941 9942 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9943 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9944 match(Set dst (RoundDouble (DivD src1 src2))); 9945 9946 format %{ "FLD $src1\n\t" 9947 "FDIV ST,$src2\n\t" 9948 "FSTP_D $dst\t# D-round" %} 9949 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9950 ins_encode( Push_Reg_DPR(src1), 9951 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9952 ins_pipe( fpu_mem_reg_reg ); 9953 %} 9954 9955 9956 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9957 predicate(UseSSE<=1); 9958 match(Set dst (ModD dst src)); 9959 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9960 9961 format %{ "DMOD $dst,$src" %} 9962 ins_cost(250); 9963 ins_encode(Push_Reg_Mod_DPR(dst, src), 9964 emitModDPR(), 9965 Push_Result_Mod_DPR(src), 9966 Pop_Reg_DPR(dst)); 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9971 predicate(UseSSE>=2); 9972 match(Set dst (ModD src0 src1)); 9973 effect(KILL rax, KILL cr); 9974 9975 format %{ "SUB ESP,8\t # DMOD\n" 9976 "\tMOVSD [ESP+0],$src1\n" 9977 "\tFLD_D [ESP+0]\n" 9978 "\tMOVSD [ESP+0],$src0\n" 9979 "\tFLD_D [ESP+0]\n" 9980 "loop:\tFPREM\n" 9981 "\tFWAIT\n" 9982 "\tFNSTSW AX\n" 9983 "\tSAHF\n" 9984 "\tJP loop\n" 9985 "\tFSTP_D [ESP+0]\n" 9986 "\tMOVSD $dst,[ESP+0]\n" 9987 "\tADD ESP,8\n" 9988 "\tFSTP ST0\t # Restore FPU Stack" 9989 %} 9990 ins_cost(250); 9991 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9992 ins_pipe( pipe_slow ); 9993 %} 9994 9995 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9996 predicate (UseSSE<=1); 9997 match(Set dst(AtanD dst src)); 9998 format %{ "DATA $dst,$src" %} 9999 opcode(0xD9, 0xF3); 10000 ins_encode( Push_Reg_DPR(src), 10001 OpcP, OpcS, RegOpc(dst) ); 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10006 predicate (UseSSE>=2); 10007 match(Set dst(AtanD dst src)); 10008 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10009 format %{ "DATA $dst,$src" %} 10010 opcode(0xD9, 0xF3); 10011 ins_encode( Push_SrcD(src), 10012 OpcP, OpcS, Push_ResultD(dst) ); 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10017 predicate (UseSSE<=1); 10018 match(Set dst (SqrtD src)); 10019 format %{ "DSQRT $dst,$src" %} 10020 opcode(0xFA, 0xD9); 10021 ins_encode( Push_Reg_DPR(src), 10022 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 //-------------Float Instructions------------------------------- 10027 // Float Math 10028 10029 // Code for float compare: 10030 // fcompp(); 10031 // fwait(); fnstsw_ax(); 10032 // sahf(); 10033 // movl(dst, unordered_result); 10034 // jcc(Assembler::parity, exit); 10035 // movl(dst, less_result); 10036 // jcc(Assembler::below, exit); 10037 // movl(dst, equal_result); 10038 // jcc(Assembler::equal, exit); 10039 // movl(dst, greater_result); 10040 // exit: 10041 10042 // P6 version of float compare, sets condition codes in EFLAGS 10043 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10044 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10045 match(Set cr (CmpF src1 src2)); 10046 effect(KILL rax); 10047 ins_cost(150); 10048 format %{ "FLD $src1\n\t" 10049 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10050 "JNP exit\n\t" 10051 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10052 "SAHF\n" 10053 "exit:\tNOP // avoid branch to branch" %} 10054 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10055 ins_encode( Push_Reg_DPR(src1), 10056 OpcP, RegOpc(src2), 10057 cmpF_P6_fixup ); 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10062 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10063 match(Set cr (CmpF src1 src2)); 10064 ins_cost(100); 10065 format %{ "FLD $src1\n\t" 10066 "FUCOMIP ST,$src2 // P6 instruction" %} 10067 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10068 ins_encode( Push_Reg_DPR(src1), 10069 OpcP, RegOpc(src2)); 10070 ins_pipe( pipe_slow ); 10071 %} 10072 10073 10074 // Compare & branch 10075 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10076 predicate(UseSSE == 0); 10077 match(Set cr (CmpF src1 src2)); 10078 effect(KILL rax); 10079 ins_cost(200); 10080 format %{ "FLD $src1\n\t" 10081 "FCOMp $src2\n\t" 10082 "FNSTSW AX\n\t" 10083 "TEST AX,0x400\n\t" 10084 "JZ,s flags\n\t" 10085 "MOV AH,1\t# unordered treat as LT\n" 10086 "flags:\tSAHF" %} 10087 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10088 ins_encode( Push_Reg_DPR(src1), 10089 OpcP, RegOpc(src2), 10090 fpu_flags); 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 // Compare vs zero into -1,0,1 10095 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10096 predicate(UseSSE == 0); 10097 match(Set dst (CmpF3 src1 zero)); 10098 effect(KILL cr, KILL rax); 10099 ins_cost(280); 10100 format %{ "FTSTF $dst,$src1" %} 10101 opcode(0xE4, 0xD9); 10102 ins_encode( Push_Reg_DPR(src1), 10103 OpcS, OpcP, PopFPU, 10104 CmpF_Result(dst)); 10105 ins_pipe( pipe_slow ); 10106 %} 10107 10108 // Compare into -1,0,1 10109 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10110 predicate(UseSSE == 0); 10111 match(Set dst (CmpF3 src1 src2)); 10112 effect(KILL cr, KILL rax); 10113 ins_cost(300); 10114 format %{ "FCMPF $dst,$src1,$src2" %} 10115 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10116 ins_encode( Push_Reg_DPR(src1), 10117 OpcP, RegOpc(src2), 10118 CmpF_Result(dst)); 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 // float compare and set condition codes in EFLAGS by XMM regs 10123 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10124 predicate(UseSSE>=1); 10125 match(Set cr (CmpF src1 src2)); 10126 ins_cost(145); 10127 format %{ "UCOMISS $src1,$src2\n\t" 10128 "JNP,s exit\n\t" 10129 "PUSHF\t# saw NaN, set CF\n\t" 10130 "AND [rsp], #0xffffff2b\n\t" 10131 "POPF\n" 10132 "exit:" %} 10133 ins_encode %{ 10134 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10135 emit_cmpfp_fixup(_masm); 10136 %} 10137 ins_pipe( pipe_slow ); 10138 %} 10139 10140 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10141 predicate(UseSSE>=1); 10142 match(Set cr (CmpF src1 src2)); 10143 ins_cost(100); 10144 format %{ "UCOMISS $src1,$src2" %} 10145 ins_encode %{ 10146 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10147 %} 10148 ins_pipe( pipe_slow ); 10149 %} 10150 10151 // float compare and set condition codes in EFLAGS by XMM regs 10152 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10153 predicate(UseSSE>=1); 10154 match(Set cr (CmpF src1 (LoadF src2))); 10155 ins_cost(165); 10156 format %{ "UCOMISS $src1,$src2\n\t" 10157 "JNP,s exit\n\t" 10158 "PUSHF\t# saw NaN, set CF\n\t" 10159 "AND [rsp], #0xffffff2b\n\t" 10160 "POPF\n" 10161 "exit:" %} 10162 ins_encode %{ 10163 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10164 emit_cmpfp_fixup(_masm); 10165 %} 10166 ins_pipe( pipe_slow ); 10167 %} 10168 10169 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10170 predicate(UseSSE>=1); 10171 match(Set cr (CmpF src1 (LoadF src2))); 10172 ins_cost(100); 10173 format %{ "UCOMISS $src1,$src2" %} 10174 ins_encode %{ 10175 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10176 %} 10177 ins_pipe( pipe_slow ); 10178 %} 10179 10180 // Compare into -1,0,1 in XMM 10181 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10182 predicate(UseSSE>=1); 10183 match(Set dst (CmpF3 src1 src2)); 10184 effect(KILL cr); 10185 ins_cost(255); 10186 format %{ "UCOMISS $src1, $src2\n\t" 10187 "MOV $dst, #-1\n\t" 10188 "JP,s done\n\t" 10189 "JB,s done\n\t" 10190 "SETNE $dst\n\t" 10191 "MOVZB $dst, $dst\n" 10192 "done:" %} 10193 ins_encode %{ 10194 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10195 emit_cmpfp3(_masm, $dst$$Register); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 // Compare into -1,0,1 in XMM and memory 10201 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10202 predicate(UseSSE>=1); 10203 match(Set dst (CmpF3 src1 (LoadF src2))); 10204 effect(KILL cr); 10205 ins_cost(275); 10206 format %{ "UCOMISS $src1, $src2\n\t" 10207 "MOV $dst, #-1\n\t" 10208 "JP,s done\n\t" 10209 "JB,s done\n\t" 10210 "SETNE $dst\n\t" 10211 "MOVZB $dst, $dst\n" 10212 "done:" %} 10213 ins_encode %{ 10214 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10215 emit_cmpfp3(_masm, $dst$$Register); 10216 %} 10217 ins_pipe( pipe_slow ); 10218 %} 10219 10220 // Spill to obtain 24-bit precision 10221 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10222 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10223 match(Set dst (SubF src1 src2)); 10224 10225 format %{ "FSUB $dst,$src1 - $src2" %} 10226 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10227 ins_encode( Push_Reg_FPR(src1), 10228 OpcReg_FPR(src2), 10229 Pop_Mem_FPR(dst) ); 10230 ins_pipe( fpu_mem_reg_reg ); 10231 %} 10232 // 10233 // This instruction does not round to 24-bits 10234 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10235 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10236 match(Set dst (SubF dst src)); 10237 10238 format %{ "FSUB $dst,$src" %} 10239 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10240 ins_encode( Push_Reg_FPR(src), 10241 OpcP, RegOpc(dst) ); 10242 ins_pipe( fpu_reg_reg ); 10243 %} 10244 10245 // Spill to obtain 24-bit precision 10246 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10247 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10248 match(Set dst (AddF src1 src2)); 10249 10250 format %{ "FADD $dst,$src1,$src2" %} 10251 opcode(0xD8, 0x0); /* D8 C0+i */ 10252 ins_encode( Push_Reg_FPR(src2), 10253 OpcReg_FPR(src1), 10254 Pop_Mem_FPR(dst) ); 10255 ins_pipe( fpu_mem_reg_reg ); 10256 %} 10257 // 10258 // This instruction does not round to 24-bits 10259 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10260 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10261 match(Set dst (AddF dst src)); 10262 10263 format %{ "FLD $src\n\t" 10264 "FADDp $dst,ST" %} 10265 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10266 ins_encode( Push_Reg_FPR(src), 10267 OpcP, RegOpc(dst) ); 10268 ins_pipe( fpu_reg_reg ); 10269 %} 10270 10271 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10272 predicate(UseSSE==0); 10273 match(Set dst (AbsF src)); 10274 ins_cost(100); 10275 format %{ "FABS" %} 10276 opcode(0xE1, 0xD9); 10277 ins_encode( OpcS, OpcP ); 10278 ins_pipe( fpu_reg_reg ); 10279 %} 10280 10281 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10282 predicate(UseSSE==0); 10283 match(Set dst (NegF src)); 10284 ins_cost(100); 10285 format %{ "FCHS" %} 10286 opcode(0xE0, 0xD9); 10287 ins_encode( OpcS, OpcP ); 10288 ins_pipe( fpu_reg_reg ); 10289 %} 10290 10291 // Cisc-alternate to addFPR_reg 10292 // Spill to obtain 24-bit precision 10293 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10294 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10295 match(Set dst (AddF src1 (LoadF src2))); 10296 10297 format %{ "FLD $src2\n\t" 10298 "FADD ST,$src1\n\t" 10299 "FSTP_S $dst" %} 10300 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10301 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10302 OpcReg_FPR(src1), 10303 Pop_Mem_FPR(dst) ); 10304 ins_pipe( fpu_mem_reg_mem ); 10305 %} 10306 // 10307 // Cisc-alternate to addFPR_reg 10308 // This instruction does not round to 24-bits 10309 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10310 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10311 match(Set dst (AddF dst (LoadF src))); 10312 10313 format %{ "FADD $dst,$src" %} 10314 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10315 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10316 OpcP, RegOpc(dst) ); 10317 ins_pipe( fpu_reg_mem ); 10318 %} 10319 10320 // // Following two instructions for _222_mpegaudio 10321 // Spill to obtain 24-bit precision 10322 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10323 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10324 match(Set dst (AddF src1 src2)); 10325 10326 format %{ "FADD $dst,$src1,$src2" %} 10327 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10328 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10329 OpcReg_FPR(src2), 10330 Pop_Mem_FPR(dst) ); 10331 ins_pipe( fpu_mem_reg_mem ); 10332 %} 10333 10334 // Cisc-spill variant 10335 // Spill to obtain 24-bit precision 10336 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10337 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10338 match(Set dst (AddF src1 (LoadF src2))); 10339 10340 format %{ "FADD $dst,$src1,$src2 cisc" %} 10341 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10342 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10343 set_instruction_start, 10344 OpcP, RMopc_Mem(secondary,src1), 10345 Pop_Mem_FPR(dst) ); 10346 ins_pipe( fpu_mem_mem_mem ); 10347 %} 10348 10349 // Spill to obtain 24-bit precision 10350 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10351 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10352 match(Set dst (AddF src1 src2)); 10353 10354 format %{ "FADD $dst,$src1,$src2" %} 10355 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10356 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10357 set_instruction_start, 10358 OpcP, RMopc_Mem(secondary,src1), 10359 Pop_Mem_FPR(dst) ); 10360 ins_pipe( fpu_mem_mem_mem ); 10361 %} 10362 10363 10364 // Spill to obtain 24-bit precision 10365 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10366 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10367 match(Set dst (AddF src con)); 10368 format %{ "FLD $src\n\t" 10369 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10370 "FSTP_S $dst" %} 10371 ins_encode %{ 10372 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10373 __ fadd_s($constantaddress($con)); 10374 __ fstp_s(Address(rsp, $dst$$disp)); 10375 %} 10376 ins_pipe(fpu_mem_reg_con); 10377 %} 10378 // 10379 // This instruction does not round to 24-bits 10380 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10381 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10382 match(Set dst (AddF src con)); 10383 format %{ "FLD $src\n\t" 10384 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10385 "FSTP $dst" %} 10386 ins_encode %{ 10387 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10388 __ fadd_s($constantaddress($con)); 10389 __ fstp_d($dst$$reg); 10390 %} 10391 ins_pipe(fpu_reg_reg_con); 10392 %} 10393 10394 // Spill to obtain 24-bit precision 10395 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10396 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10397 match(Set dst (MulF src1 src2)); 10398 10399 format %{ "FLD $src1\n\t" 10400 "FMUL $src2\n\t" 10401 "FSTP_S $dst" %} 10402 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10403 ins_encode( Push_Reg_FPR(src1), 10404 OpcReg_FPR(src2), 10405 Pop_Mem_FPR(dst) ); 10406 ins_pipe( fpu_mem_reg_reg ); 10407 %} 10408 // 10409 // This instruction does not round to 24-bits 10410 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10411 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10412 match(Set dst (MulF src1 src2)); 10413 10414 format %{ "FLD $src1\n\t" 10415 "FMUL $src2\n\t" 10416 "FSTP_S $dst" %} 10417 opcode(0xD8, 0x1); /* D8 C8+i */ 10418 ins_encode( Push_Reg_FPR(src2), 10419 OpcReg_FPR(src1), 10420 Pop_Reg_FPR(dst) ); 10421 ins_pipe( fpu_reg_reg_reg ); 10422 %} 10423 10424 10425 // Spill to obtain 24-bit precision 10426 // Cisc-alternate to reg-reg multiply 10427 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10428 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10429 match(Set dst (MulF src1 (LoadF src2))); 10430 10431 format %{ "FLD_S $src2\n\t" 10432 "FMUL $src1\n\t" 10433 "FSTP_S $dst" %} 10434 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10435 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10436 OpcReg_FPR(src1), 10437 Pop_Mem_FPR(dst) ); 10438 ins_pipe( fpu_mem_reg_mem ); 10439 %} 10440 // 10441 // This instruction does not round to 24-bits 10442 // Cisc-alternate to reg-reg multiply 10443 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10444 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10445 match(Set dst (MulF src1 (LoadF src2))); 10446 10447 format %{ "FMUL $dst,$src1,$src2" %} 10448 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10449 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10450 OpcReg_FPR(src1), 10451 Pop_Reg_FPR(dst) ); 10452 ins_pipe( fpu_reg_reg_mem ); 10453 %} 10454 10455 // Spill to obtain 24-bit precision 10456 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10457 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10458 match(Set dst (MulF src1 src2)); 10459 10460 format %{ "FMUL $dst,$src1,$src2" %} 10461 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10462 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10463 set_instruction_start, 10464 OpcP, RMopc_Mem(secondary,src1), 10465 Pop_Mem_FPR(dst) ); 10466 ins_pipe( fpu_mem_mem_mem ); 10467 %} 10468 10469 // Spill to obtain 24-bit precision 10470 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10471 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10472 match(Set dst (MulF src con)); 10473 10474 format %{ "FLD $src\n\t" 10475 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10476 "FSTP_S $dst" %} 10477 ins_encode %{ 10478 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10479 __ fmul_s($constantaddress($con)); 10480 __ fstp_s(Address(rsp, $dst$$disp)); 10481 %} 10482 ins_pipe(fpu_mem_reg_con); 10483 %} 10484 // 10485 // This instruction does not round to 24-bits 10486 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10487 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10488 match(Set dst (MulF src con)); 10489 10490 format %{ "FLD $src\n\t" 10491 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10492 "FSTP $dst" %} 10493 ins_encode %{ 10494 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10495 __ fmul_s($constantaddress($con)); 10496 __ fstp_d($dst$$reg); 10497 %} 10498 ins_pipe(fpu_reg_reg_con); 10499 %} 10500 10501 10502 // 10503 // MACRO1 -- subsume unshared load into mulFPR 10504 // This instruction does not round to 24-bits 10505 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10506 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10507 match(Set dst (MulF (LoadF mem1) src)); 10508 10509 format %{ "FLD $mem1 ===MACRO1===\n\t" 10510 "FMUL ST,$src\n\t" 10511 "FSTP $dst" %} 10512 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10513 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10514 OpcReg_FPR(src), 10515 Pop_Reg_FPR(dst) ); 10516 ins_pipe( fpu_reg_reg_mem ); 10517 %} 10518 // 10519 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10520 // This instruction does not round to 24-bits 10521 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10522 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10523 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10524 ins_cost(95); 10525 10526 format %{ "FLD $mem1 ===MACRO2===\n\t" 10527 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10528 "FADD ST,$src2\n\t" 10529 "FSTP $dst" %} 10530 opcode(0xD9); /* LoadF D9 /0 */ 10531 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10532 FMul_ST_reg(src1), 10533 FAdd_ST_reg(src2), 10534 Pop_Reg_FPR(dst) ); 10535 ins_pipe( fpu_reg_mem_reg_reg ); 10536 %} 10537 10538 // MACRO3 -- addFPR a mulFPR 10539 // This instruction does not round to 24-bits. It is a '2-address' 10540 // instruction in that the result goes back to src2. This eliminates 10541 // a move from the macro; possibly the register allocator will have 10542 // to add it back (and maybe not). 10543 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10544 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10545 match(Set src2 (AddF (MulF src0 src1) src2)); 10546 10547 format %{ "FLD $src0 ===MACRO3===\n\t" 10548 "FMUL ST,$src1\n\t" 10549 "FADDP $src2,ST" %} 10550 opcode(0xD9); /* LoadF D9 /0 */ 10551 ins_encode( Push_Reg_FPR(src0), 10552 FMul_ST_reg(src1), 10553 FAddP_reg_ST(src2) ); 10554 ins_pipe( fpu_reg_reg_reg ); 10555 %} 10556 10557 // MACRO4 -- divFPR subFPR 10558 // This instruction does not round to 24-bits 10559 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10560 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10561 match(Set dst (DivF (SubF src2 src1) src3)); 10562 10563 format %{ "FLD $src2 ===MACRO4===\n\t" 10564 "FSUB ST,$src1\n\t" 10565 "FDIV ST,$src3\n\t" 10566 "FSTP $dst" %} 10567 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10568 ins_encode( Push_Reg_FPR(src2), 10569 subFPR_divFPR_encode(src1,src3), 10570 Pop_Reg_FPR(dst) ); 10571 ins_pipe( fpu_reg_reg_reg_reg ); 10572 %} 10573 10574 // Spill to obtain 24-bit precision 10575 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10576 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10577 match(Set dst (DivF src1 src2)); 10578 10579 format %{ "FDIV $dst,$src1,$src2" %} 10580 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10581 ins_encode( Push_Reg_FPR(src1), 10582 OpcReg_FPR(src2), 10583 Pop_Mem_FPR(dst) ); 10584 ins_pipe( fpu_mem_reg_reg ); 10585 %} 10586 // 10587 // This instruction does not round to 24-bits 10588 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10589 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10590 match(Set dst (DivF dst src)); 10591 10592 format %{ "FDIV $dst,$src" %} 10593 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10594 ins_encode( Push_Reg_FPR(src), 10595 OpcP, RegOpc(dst) ); 10596 ins_pipe( fpu_reg_reg ); 10597 %} 10598 10599 10600 // Spill to obtain 24-bit precision 10601 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10602 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10603 match(Set dst (ModF src1 src2)); 10604 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10605 10606 format %{ "FMOD $dst,$src1,$src2" %} 10607 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10608 emitModDPR(), 10609 Push_Result_Mod_DPR(src2), 10610 Pop_Mem_FPR(dst)); 10611 ins_pipe( pipe_slow ); 10612 %} 10613 // 10614 // This instruction does not round to 24-bits 10615 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10616 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10617 match(Set dst (ModF dst src)); 10618 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10619 10620 format %{ "FMOD $dst,$src" %} 10621 ins_encode(Push_Reg_Mod_DPR(dst, src), 10622 emitModDPR(), 10623 Push_Result_Mod_DPR(src), 10624 Pop_Reg_FPR(dst)); 10625 ins_pipe( pipe_slow ); 10626 %} 10627 10628 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10629 predicate(UseSSE>=1); 10630 match(Set dst (ModF src0 src1)); 10631 effect(KILL rax, KILL cr); 10632 format %{ "SUB ESP,4\t # FMOD\n" 10633 "\tMOVSS [ESP+0],$src1\n" 10634 "\tFLD_S [ESP+0]\n" 10635 "\tMOVSS [ESP+0],$src0\n" 10636 "\tFLD_S [ESP+0]\n" 10637 "loop:\tFPREM\n" 10638 "\tFWAIT\n" 10639 "\tFNSTSW AX\n" 10640 "\tSAHF\n" 10641 "\tJP loop\n" 10642 "\tFSTP_S [ESP+0]\n" 10643 "\tMOVSS $dst,[ESP+0]\n" 10644 "\tADD ESP,4\n" 10645 "\tFSTP ST0\t # Restore FPU Stack" 10646 %} 10647 ins_cost(250); 10648 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10649 ins_pipe( pipe_slow ); 10650 %} 10651 10652 10653 //----------Arithmetic Conversion Instructions--------------------------------- 10654 // The conversions operations are all Alpha sorted. Please keep it that way! 10655 10656 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10657 predicate(UseSSE==0); 10658 match(Set dst (RoundFloat src)); 10659 ins_cost(125); 10660 format %{ "FST_S $dst,$src\t# F-round" %} 10661 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10662 ins_pipe( fpu_mem_reg ); 10663 %} 10664 10665 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10666 predicate(UseSSE<=1); 10667 match(Set dst (RoundDouble src)); 10668 ins_cost(125); 10669 format %{ "FST_D $dst,$src\t# D-round" %} 10670 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10671 ins_pipe( fpu_mem_reg ); 10672 %} 10673 10674 // Force rounding to 24-bit precision and 6-bit exponent 10675 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10676 predicate(UseSSE==0); 10677 match(Set dst (ConvD2F src)); 10678 format %{ "FST_S $dst,$src\t# F-round" %} 10679 expand %{ 10680 roundFloat_mem_reg(dst,src); 10681 %} 10682 %} 10683 10684 // Force rounding to 24-bit precision and 6-bit exponent 10685 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10686 predicate(UseSSE==1); 10687 match(Set dst (ConvD2F src)); 10688 effect( KILL cr ); 10689 format %{ "SUB ESP,4\n\t" 10690 "FST_S [ESP],$src\t# F-round\n\t" 10691 "MOVSS $dst,[ESP]\n\t" 10692 "ADD ESP,4" %} 10693 ins_encode %{ 10694 __ subptr(rsp, 4); 10695 if ($src$$reg != FPR1L_enc) { 10696 __ fld_s($src$$reg-1); 10697 __ fstp_s(Address(rsp, 0)); 10698 } else { 10699 __ fst_s(Address(rsp, 0)); 10700 } 10701 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10702 __ addptr(rsp, 4); 10703 %} 10704 ins_pipe( pipe_slow ); 10705 %} 10706 10707 // Force rounding double precision to single precision 10708 instruct convD2F_reg(regF dst, regD src) %{ 10709 predicate(UseSSE>=2); 10710 match(Set dst (ConvD2F src)); 10711 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10712 ins_encode %{ 10713 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10714 %} 10715 ins_pipe( pipe_slow ); 10716 %} 10717 10718 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10719 predicate(UseSSE==0); 10720 match(Set dst (ConvF2D src)); 10721 format %{ "FST_S $dst,$src\t# D-round" %} 10722 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10723 ins_pipe( fpu_reg_reg ); 10724 %} 10725 10726 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10727 predicate(UseSSE==1); 10728 match(Set dst (ConvF2D src)); 10729 format %{ "FST_D $dst,$src\t# D-round" %} 10730 expand %{ 10731 roundDouble_mem_reg(dst,src); 10732 %} 10733 %} 10734 10735 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10736 predicate(UseSSE==1); 10737 match(Set dst (ConvF2D src)); 10738 effect( KILL cr ); 10739 format %{ "SUB ESP,4\n\t" 10740 "MOVSS [ESP] $src\n\t" 10741 "FLD_S [ESP]\n\t" 10742 "ADD ESP,4\n\t" 10743 "FSTP $dst\t# D-round" %} 10744 ins_encode %{ 10745 __ subptr(rsp, 4); 10746 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10747 __ fld_s(Address(rsp, 0)); 10748 __ addptr(rsp, 4); 10749 __ fstp_d($dst$$reg); 10750 %} 10751 ins_pipe( pipe_slow ); 10752 %} 10753 10754 instruct convF2D_reg(regD dst, regF src) %{ 10755 predicate(UseSSE>=2); 10756 match(Set dst (ConvF2D src)); 10757 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10758 ins_encode %{ 10759 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10760 %} 10761 ins_pipe( pipe_slow ); 10762 %} 10763 10764 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10765 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10766 predicate(UseSSE<=1); 10767 match(Set dst (ConvD2I src)); 10768 effect( KILL tmp, KILL cr ); 10769 format %{ "FLD $src\t# Convert double to int \n\t" 10770 "FLDCW trunc mode\n\t" 10771 "SUB ESP,4\n\t" 10772 "FISTp [ESP + #0]\n\t" 10773 "FLDCW std/24-bit mode\n\t" 10774 "POP EAX\n\t" 10775 "CMP EAX,0x80000000\n\t" 10776 "JNE,s fast\n\t" 10777 "FLD_D $src\n\t" 10778 "CALL d2i_wrapper\n" 10779 "fast:" %} 10780 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10781 ins_pipe( pipe_slow ); 10782 %} 10783 10784 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10785 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10786 predicate(UseSSE>=2); 10787 match(Set dst (ConvD2I src)); 10788 effect( KILL tmp, KILL cr ); 10789 format %{ "CVTTSD2SI $dst, $src\n\t" 10790 "CMP $dst,0x80000000\n\t" 10791 "JNE,s fast\n\t" 10792 "SUB ESP, 8\n\t" 10793 "MOVSD [ESP], $src\n\t" 10794 "FLD_D [ESP]\n\t" 10795 "ADD ESP, 8\n\t" 10796 "CALL d2i_wrapper\n" 10797 "fast:" %} 10798 ins_encode %{ 10799 Label fast; 10800 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10801 __ cmpl($dst$$Register, 0x80000000); 10802 __ jccb(Assembler::notEqual, fast); 10803 __ subptr(rsp, 8); 10804 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10805 __ fld_d(Address(rsp, 0)); 10806 __ addptr(rsp, 8); 10807 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10808 __ bind(fast); 10809 %} 10810 ins_pipe( pipe_slow ); 10811 %} 10812 10813 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10814 predicate(UseSSE<=1); 10815 match(Set dst (ConvD2L src)); 10816 effect( KILL cr ); 10817 format %{ "FLD $src\t# Convert double to long\n\t" 10818 "FLDCW trunc mode\n\t" 10819 "SUB ESP,8\n\t" 10820 "FISTp [ESP + #0]\n\t" 10821 "FLDCW std/24-bit mode\n\t" 10822 "POP EAX\n\t" 10823 "POP EDX\n\t" 10824 "CMP EDX,0x80000000\n\t" 10825 "JNE,s fast\n\t" 10826 "TEST EAX,EAX\n\t" 10827 "JNE,s fast\n\t" 10828 "FLD $src\n\t" 10829 "CALL d2l_wrapper\n" 10830 "fast:" %} 10831 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10832 ins_pipe( pipe_slow ); 10833 %} 10834 10835 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10836 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10837 predicate (UseSSE>=2); 10838 match(Set dst (ConvD2L src)); 10839 effect( KILL cr ); 10840 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10841 "MOVSD [ESP],$src\n\t" 10842 "FLD_D [ESP]\n\t" 10843 "FLDCW trunc mode\n\t" 10844 "FISTp [ESP + #0]\n\t" 10845 "FLDCW std/24-bit mode\n\t" 10846 "POP EAX\n\t" 10847 "POP EDX\n\t" 10848 "CMP EDX,0x80000000\n\t" 10849 "JNE,s fast\n\t" 10850 "TEST EAX,EAX\n\t" 10851 "JNE,s fast\n\t" 10852 "SUB ESP,8\n\t" 10853 "MOVSD [ESP],$src\n\t" 10854 "FLD_D [ESP]\n\t" 10855 "ADD ESP,8\n\t" 10856 "CALL d2l_wrapper\n" 10857 "fast:" %} 10858 ins_encode %{ 10859 Label fast; 10860 __ subptr(rsp, 8); 10861 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10862 __ fld_d(Address(rsp, 0)); 10863 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10864 __ fistp_d(Address(rsp, 0)); 10865 // Restore the rounding mode, mask the exception 10866 if (Compile::current()->in_24_bit_fp_mode()) { 10867 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10868 } else { 10869 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10870 } 10871 // Load the converted long, adjust CPU stack 10872 __ pop(rax); 10873 __ pop(rdx); 10874 __ cmpl(rdx, 0x80000000); 10875 __ jccb(Assembler::notEqual, fast); 10876 __ testl(rax, rax); 10877 __ jccb(Assembler::notEqual, fast); 10878 __ subptr(rsp, 8); 10879 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10880 __ fld_d(Address(rsp, 0)); 10881 __ addptr(rsp, 8); 10882 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10883 __ bind(fast); 10884 %} 10885 ins_pipe( pipe_slow ); 10886 %} 10887 10888 // Convert a double to an int. Java semantics require we do complex 10889 // manglations in the corner cases. So we set the rounding mode to 10890 // 'zero', store the darned double down as an int, and reset the 10891 // rounding mode to 'nearest'. The hardware stores a flag value down 10892 // if we would overflow or converted a NAN; we check for this and 10893 // and go the slow path if needed. 10894 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10895 predicate(UseSSE==0); 10896 match(Set dst (ConvF2I src)); 10897 effect( KILL tmp, KILL cr ); 10898 format %{ "FLD $src\t# Convert float to int \n\t" 10899 "FLDCW trunc mode\n\t" 10900 "SUB ESP,4\n\t" 10901 "FISTp [ESP + #0]\n\t" 10902 "FLDCW std/24-bit mode\n\t" 10903 "POP EAX\n\t" 10904 "CMP EAX,0x80000000\n\t" 10905 "JNE,s fast\n\t" 10906 "FLD $src\n\t" 10907 "CALL d2i_wrapper\n" 10908 "fast:" %} 10909 // DPR2I_encoding works for FPR2I 10910 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10911 ins_pipe( pipe_slow ); 10912 %} 10913 10914 // Convert a float in xmm to an int reg. 10915 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10916 predicate(UseSSE>=1); 10917 match(Set dst (ConvF2I src)); 10918 effect( KILL tmp, KILL cr ); 10919 format %{ "CVTTSS2SI $dst, $src\n\t" 10920 "CMP $dst,0x80000000\n\t" 10921 "JNE,s fast\n\t" 10922 "SUB ESP, 4\n\t" 10923 "MOVSS [ESP], $src\n\t" 10924 "FLD [ESP]\n\t" 10925 "ADD ESP, 4\n\t" 10926 "CALL d2i_wrapper\n" 10927 "fast:" %} 10928 ins_encode %{ 10929 Label fast; 10930 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10931 __ cmpl($dst$$Register, 0x80000000); 10932 __ jccb(Assembler::notEqual, fast); 10933 __ subptr(rsp, 4); 10934 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10935 __ fld_s(Address(rsp, 0)); 10936 __ addptr(rsp, 4); 10937 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10938 __ bind(fast); 10939 %} 10940 ins_pipe( pipe_slow ); 10941 %} 10942 10943 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10944 predicate(UseSSE==0); 10945 match(Set dst (ConvF2L src)); 10946 effect( KILL cr ); 10947 format %{ "FLD $src\t# Convert float to long\n\t" 10948 "FLDCW trunc mode\n\t" 10949 "SUB ESP,8\n\t" 10950 "FISTp [ESP + #0]\n\t" 10951 "FLDCW std/24-bit mode\n\t" 10952 "POP EAX\n\t" 10953 "POP EDX\n\t" 10954 "CMP EDX,0x80000000\n\t" 10955 "JNE,s fast\n\t" 10956 "TEST EAX,EAX\n\t" 10957 "JNE,s fast\n\t" 10958 "FLD $src\n\t" 10959 "CALL d2l_wrapper\n" 10960 "fast:" %} 10961 // DPR2L_encoding works for FPR2L 10962 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10963 ins_pipe( pipe_slow ); 10964 %} 10965 10966 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10967 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10968 predicate (UseSSE>=1); 10969 match(Set dst (ConvF2L src)); 10970 effect( KILL cr ); 10971 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10972 "MOVSS [ESP],$src\n\t" 10973 "FLD_S [ESP]\n\t" 10974 "FLDCW trunc mode\n\t" 10975 "FISTp [ESP + #0]\n\t" 10976 "FLDCW std/24-bit mode\n\t" 10977 "POP EAX\n\t" 10978 "POP EDX\n\t" 10979 "CMP EDX,0x80000000\n\t" 10980 "JNE,s fast\n\t" 10981 "TEST EAX,EAX\n\t" 10982 "JNE,s fast\n\t" 10983 "SUB ESP,4\t# Convert float to long\n\t" 10984 "MOVSS [ESP],$src\n\t" 10985 "FLD_S [ESP]\n\t" 10986 "ADD ESP,4\n\t" 10987 "CALL d2l_wrapper\n" 10988 "fast:" %} 10989 ins_encode %{ 10990 Label fast; 10991 __ subptr(rsp, 8); 10992 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10993 __ fld_s(Address(rsp, 0)); 10994 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10995 __ fistp_d(Address(rsp, 0)); 10996 // Restore the rounding mode, mask the exception 10997 if (Compile::current()->in_24_bit_fp_mode()) { 10998 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10999 } else { 11000 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 11001 } 11002 // Load the converted long, adjust CPU stack 11003 __ pop(rax); 11004 __ pop(rdx); 11005 __ cmpl(rdx, 0x80000000); 11006 __ jccb(Assembler::notEqual, fast); 11007 __ testl(rax, rax); 11008 __ jccb(Assembler::notEqual, fast); 11009 __ subptr(rsp, 4); 11010 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11011 __ fld_s(Address(rsp, 0)); 11012 __ addptr(rsp, 4); 11013 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 11014 __ bind(fast); 11015 %} 11016 ins_pipe( pipe_slow ); 11017 %} 11018 11019 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11020 predicate( UseSSE<=1 ); 11021 match(Set dst (ConvI2D src)); 11022 format %{ "FILD $src\n\t" 11023 "FSTP $dst" %} 11024 opcode(0xDB, 0x0); /* DB /0 */ 11025 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11026 ins_pipe( fpu_reg_mem ); 11027 %} 11028 11029 instruct convI2D_reg(regD dst, rRegI src) %{ 11030 predicate( UseSSE>=2 && !UseXmmI2D ); 11031 match(Set dst (ConvI2D src)); 11032 format %{ "CVTSI2SD $dst,$src" %} 11033 ins_encode %{ 11034 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11035 %} 11036 ins_pipe( pipe_slow ); 11037 %} 11038 11039 instruct convI2D_mem(regD dst, memory mem) %{ 11040 predicate( UseSSE>=2 ); 11041 match(Set dst (ConvI2D (LoadI mem))); 11042 format %{ "CVTSI2SD $dst,$mem" %} 11043 ins_encode %{ 11044 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11045 %} 11046 ins_pipe( pipe_slow ); 11047 %} 11048 11049 instruct convXI2D_reg(regD dst, rRegI src) 11050 %{ 11051 predicate( UseSSE>=2 && UseXmmI2D ); 11052 match(Set dst (ConvI2D src)); 11053 11054 format %{ "MOVD $dst,$src\n\t" 11055 "CVTDQ2PD $dst,$dst\t# i2d" %} 11056 ins_encode %{ 11057 __ movdl($dst$$XMMRegister, $src$$Register); 11058 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11059 %} 11060 ins_pipe(pipe_slow); // XXX 11061 %} 11062 11063 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11064 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11065 match(Set dst (ConvI2D (LoadI mem))); 11066 format %{ "FILD $mem\n\t" 11067 "FSTP $dst" %} 11068 opcode(0xDB); /* DB /0 */ 11069 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11070 Pop_Reg_DPR(dst)); 11071 ins_pipe( fpu_reg_mem ); 11072 %} 11073 11074 // Convert a byte to a float; no rounding step needed. 11075 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11076 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11077 match(Set dst (ConvI2F src)); 11078 format %{ "FILD $src\n\t" 11079 "FSTP $dst" %} 11080 11081 opcode(0xDB, 0x0); /* DB /0 */ 11082 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11083 ins_pipe( fpu_reg_mem ); 11084 %} 11085 11086 // In 24-bit mode, force exponent rounding by storing back out 11087 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11088 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11089 match(Set dst (ConvI2F src)); 11090 ins_cost(200); 11091 format %{ "FILD $src\n\t" 11092 "FSTP_S $dst" %} 11093 opcode(0xDB, 0x0); /* DB /0 */ 11094 ins_encode( Push_Mem_I(src), 11095 Pop_Mem_FPR(dst)); 11096 ins_pipe( fpu_mem_mem ); 11097 %} 11098 11099 // In 24-bit mode, force exponent rounding by storing back out 11100 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11101 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11102 match(Set dst (ConvI2F (LoadI mem))); 11103 ins_cost(200); 11104 format %{ "FILD $mem\n\t" 11105 "FSTP_S $dst" %} 11106 opcode(0xDB); /* DB /0 */ 11107 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11108 Pop_Mem_FPR(dst)); 11109 ins_pipe( fpu_mem_mem ); 11110 %} 11111 11112 // This instruction does not round to 24-bits 11113 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11114 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11115 match(Set dst (ConvI2F src)); 11116 format %{ "FILD $src\n\t" 11117 "FSTP $dst" %} 11118 opcode(0xDB, 0x0); /* DB /0 */ 11119 ins_encode( Push_Mem_I(src), 11120 Pop_Reg_FPR(dst)); 11121 ins_pipe( fpu_reg_mem ); 11122 %} 11123 11124 // This instruction does not round to 24-bits 11125 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11126 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11127 match(Set dst (ConvI2F (LoadI mem))); 11128 format %{ "FILD $mem\n\t" 11129 "FSTP $dst" %} 11130 opcode(0xDB); /* DB /0 */ 11131 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11132 Pop_Reg_FPR(dst)); 11133 ins_pipe( fpu_reg_mem ); 11134 %} 11135 11136 // Convert an int to a float in xmm; no rounding step needed. 11137 instruct convI2F_reg(regF dst, rRegI src) %{ 11138 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11139 match(Set dst (ConvI2F src)); 11140 format %{ "CVTSI2SS $dst, $src" %} 11141 ins_encode %{ 11142 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11143 %} 11144 ins_pipe( pipe_slow ); 11145 %} 11146 11147 instruct convXI2F_reg(regF dst, rRegI src) 11148 %{ 11149 predicate( UseSSE>=2 && UseXmmI2F ); 11150 match(Set dst (ConvI2F src)); 11151 11152 format %{ "MOVD $dst,$src\n\t" 11153 "CVTDQ2PS $dst,$dst\t# i2f" %} 11154 ins_encode %{ 11155 __ movdl($dst$$XMMRegister, $src$$Register); 11156 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11157 %} 11158 ins_pipe(pipe_slow); // XXX 11159 %} 11160 11161 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11162 match(Set dst (ConvI2L src)); 11163 effect(KILL cr); 11164 ins_cost(375); 11165 format %{ "MOV $dst.lo,$src\n\t" 11166 "MOV $dst.hi,$src\n\t" 11167 "SAR $dst.hi,31" %} 11168 ins_encode(convert_int_long(dst,src)); 11169 ins_pipe( ialu_reg_reg_long ); 11170 %} 11171 11172 // Zero-extend convert int to long 11173 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11174 match(Set dst (AndL (ConvI2L src) mask) ); 11175 effect( KILL flags ); 11176 ins_cost(250); 11177 format %{ "MOV $dst.lo,$src\n\t" 11178 "XOR $dst.hi,$dst.hi" %} 11179 opcode(0x33); // XOR 11180 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11181 ins_pipe( ialu_reg_reg_long ); 11182 %} 11183 11184 // Zero-extend long 11185 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11186 match(Set dst (AndL src mask) ); 11187 effect( KILL flags ); 11188 ins_cost(250); 11189 format %{ "MOV $dst.lo,$src.lo\n\t" 11190 "XOR $dst.hi,$dst.hi\n\t" %} 11191 opcode(0x33); // XOR 11192 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11193 ins_pipe( ialu_reg_reg_long ); 11194 %} 11195 11196 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11197 predicate (UseSSE<=1); 11198 match(Set dst (ConvL2D src)); 11199 effect( KILL cr ); 11200 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11201 "PUSH $src.lo\n\t" 11202 "FILD ST,[ESP + #0]\n\t" 11203 "ADD ESP,8\n\t" 11204 "FSTP_D $dst\t# D-round" %} 11205 opcode(0xDF, 0x5); /* DF /5 */ 11206 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11207 ins_pipe( pipe_slow ); 11208 %} 11209 11210 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11211 predicate (UseSSE>=2); 11212 match(Set dst (ConvL2D src)); 11213 effect( KILL cr ); 11214 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11215 "PUSH $src.lo\n\t" 11216 "FILD_D [ESP]\n\t" 11217 "FSTP_D [ESP]\n\t" 11218 "MOVSD $dst,[ESP]\n\t" 11219 "ADD ESP,8" %} 11220 opcode(0xDF, 0x5); /* DF /5 */ 11221 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11222 ins_pipe( pipe_slow ); 11223 %} 11224 11225 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11226 predicate (UseSSE>=1); 11227 match(Set dst (ConvL2F src)); 11228 effect( KILL cr ); 11229 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11230 "PUSH $src.lo\n\t" 11231 "FILD_D [ESP]\n\t" 11232 "FSTP_S [ESP]\n\t" 11233 "MOVSS $dst,[ESP]\n\t" 11234 "ADD ESP,8" %} 11235 opcode(0xDF, 0x5); /* DF /5 */ 11236 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11237 ins_pipe( pipe_slow ); 11238 %} 11239 11240 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11241 match(Set dst (ConvL2F src)); 11242 effect( KILL cr ); 11243 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11244 "PUSH $src.lo\n\t" 11245 "FILD ST,[ESP + #0]\n\t" 11246 "ADD ESP,8\n\t" 11247 "FSTP_S $dst\t# F-round" %} 11248 opcode(0xDF, 0x5); /* DF /5 */ 11249 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11250 ins_pipe( pipe_slow ); 11251 %} 11252 11253 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11254 match(Set dst (ConvL2I src)); 11255 effect( DEF dst, USE src ); 11256 format %{ "MOV $dst,$src.lo" %} 11257 ins_encode(enc_CopyL_Lo(dst,src)); 11258 ins_pipe( ialu_reg_reg ); 11259 %} 11260 11261 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11262 match(Set dst (MoveF2I src)); 11263 effect( DEF dst, USE src ); 11264 ins_cost(100); 11265 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11266 ins_encode %{ 11267 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11268 %} 11269 ins_pipe( ialu_reg_mem ); 11270 %} 11271 11272 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11273 predicate(UseSSE==0); 11274 match(Set dst (MoveF2I src)); 11275 effect( DEF dst, USE src ); 11276 11277 ins_cost(125); 11278 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11279 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11280 ins_pipe( fpu_mem_reg ); 11281 %} 11282 11283 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11284 predicate(UseSSE>=1); 11285 match(Set dst (MoveF2I src)); 11286 effect( DEF dst, USE src ); 11287 11288 ins_cost(95); 11289 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11290 ins_encode %{ 11291 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11292 %} 11293 ins_pipe( pipe_slow ); 11294 %} 11295 11296 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11297 predicate(UseSSE>=2); 11298 match(Set dst (MoveF2I src)); 11299 effect( DEF dst, USE src ); 11300 ins_cost(85); 11301 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11302 ins_encode %{ 11303 __ movdl($dst$$Register, $src$$XMMRegister); 11304 %} 11305 ins_pipe( pipe_slow ); 11306 %} 11307 11308 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11309 match(Set dst (MoveI2F src)); 11310 effect( DEF dst, USE src ); 11311 11312 ins_cost(100); 11313 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11314 ins_encode %{ 11315 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11316 %} 11317 ins_pipe( ialu_mem_reg ); 11318 %} 11319 11320 11321 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11322 predicate(UseSSE==0); 11323 match(Set dst (MoveI2F src)); 11324 effect(DEF dst, USE src); 11325 11326 ins_cost(125); 11327 format %{ "FLD_S $src\n\t" 11328 "FSTP $dst\t# MoveI2F_stack_reg" %} 11329 opcode(0xD9); /* D9 /0, FLD m32real */ 11330 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11331 Pop_Reg_FPR(dst) ); 11332 ins_pipe( fpu_reg_mem ); 11333 %} 11334 11335 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11336 predicate(UseSSE>=1); 11337 match(Set dst (MoveI2F src)); 11338 effect( DEF dst, USE src ); 11339 11340 ins_cost(95); 11341 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11342 ins_encode %{ 11343 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11344 %} 11345 ins_pipe( pipe_slow ); 11346 %} 11347 11348 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11349 predicate(UseSSE>=2); 11350 match(Set dst (MoveI2F src)); 11351 effect( DEF dst, USE src ); 11352 11353 ins_cost(85); 11354 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11355 ins_encode %{ 11356 __ movdl($dst$$XMMRegister, $src$$Register); 11357 %} 11358 ins_pipe( pipe_slow ); 11359 %} 11360 11361 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11362 match(Set dst (MoveD2L src)); 11363 effect(DEF dst, USE src); 11364 11365 ins_cost(250); 11366 format %{ "MOV $dst.lo,$src\n\t" 11367 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11368 opcode(0x8B, 0x8B); 11369 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11370 ins_pipe( ialu_mem_long_reg ); 11371 %} 11372 11373 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11374 predicate(UseSSE<=1); 11375 match(Set dst (MoveD2L src)); 11376 effect(DEF dst, USE src); 11377 11378 ins_cost(125); 11379 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11380 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11381 ins_pipe( fpu_mem_reg ); 11382 %} 11383 11384 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11385 predicate(UseSSE>=2); 11386 match(Set dst (MoveD2L src)); 11387 effect(DEF dst, USE src); 11388 ins_cost(95); 11389 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11390 ins_encode %{ 11391 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11392 %} 11393 ins_pipe( pipe_slow ); 11394 %} 11395 11396 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11397 predicate(UseSSE>=2); 11398 match(Set dst (MoveD2L src)); 11399 effect(DEF dst, USE src, TEMP tmp); 11400 ins_cost(85); 11401 format %{ "MOVD $dst.lo,$src\n\t" 11402 "PSHUFLW $tmp,$src,0x4E\n\t" 11403 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11404 ins_encode %{ 11405 __ movdl($dst$$Register, $src$$XMMRegister); 11406 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11407 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11408 %} 11409 ins_pipe( pipe_slow ); 11410 %} 11411 11412 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11413 match(Set dst (MoveL2D src)); 11414 effect(DEF dst, USE src); 11415 11416 ins_cost(200); 11417 format %{ "MOV $dst,$src.lo\n\t" 11418 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11419 opcode(0x89, 0x89); 11420 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11421 ins_pipe( ialu_mem_long_reg ); 11422 %} 11423 11424 11425 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11426 predicate(UseSSE<=1); 11427 match(Set dst (MoveL2D src)); 11428 effect(DEF dst, USE src); 11429 ins_cost(125); 11430 11431 format %{ "FLD_D $src\n\t" 11432 "FSTP $dst\t# MoveL2D_stack_reg" %} 11433 opcode(0xDD); /* DD /0, FLD m64real */ 11434 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11435 Pop_Reg_DPR(dst) ); 11436 ins_pipe( fpu_reg_mem ); 11437 %} 11438 11439 11440 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11441 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11442 match(Set dst (MoveL2D src)); 11443 effect(DEF dst, USE src); 11444 11445 ins_cost(95); 11446 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11447 ins_encode %{ 11448 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11449 %} 11450 ins_pipe( pipe_slow ); 11451 %} 11452 11453 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11454 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11455 match(Set dst (MoveL2D src)); 11456 effect(DEF dst, USE src); 11457 11458 ins_cost(95); 11459 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11460 ins_encode %{ 11461 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11462 %} 11463 ins_pipe( pipe_slow ); 11464 %} 11465 11466 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11467 predicate(UseSSE>=2); 11468 match(Set dst (MoveL2D src)); 11469 effect(TEMP dst, USE src, TEMP tmp); 11470 ins_cost(85); 11471 format %{ "MOVD $dst,$src.lo\n\t" 11472 "MOVD $tmp,$src.hi\n\t" 11473 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11474 ins_encode %{ 11475 __ movdl($dst$$XMMRegister, $src$$Register); 11476 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11477 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11478 %} 11479 ins_pipe( pipe_slow ); 11480 %} 11481 11482 11483 // ======================================================================= 11484 // fast clearing of an array 11485 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11486 predicate(!((ClearArrayNode*)n)->is_large()); 11487 match(Set dummy (ClearArray cnt base)); 11488 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11489 11490 format %{ $$template 11491 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11492 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11493 $$emit$$"JG LARGE\n\t" 11494 $$emit$$"SHL ECX, 1\n\t" 11495 $$emit$$"DEC ECX\n\t" 11496 $$emit$$"JS DONE\t# Zero length\n\t" 11497 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11498 $$emit$$"DEC ECX\n\t" 11499 $$emit$$"JGE LOOP\n\t" 11500 $$emit$$"JMP DONE\n\t" 11501 $$emit$$"# LARGE:\n\t" 11502 if (UseFastStosb) { 11503 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11504 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11505 } else { 11506 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11507 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11508 } 11509 $$emit$$"# DONE" 11510 %} 11511 ins_encode %{ 11512 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11513 %} 11514 ins_pipe( pipe_slow ); 11515 %} 11516 11517 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11518 predicate(((ClearArrayNode*)n)->is_large()); 11519 match(Set dummy (ClearArray cnt base)); 11520 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11521 format %{ $$template 11522 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11523 if (UseFastStosb) { 11524 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11525 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11526 } else { 11527 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11528 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11529 } 11530 $$emit$$"# DONE" 11531 %} 11532 ins_encode %{ 11533 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11534 %} 11535 ins_pipe( pipe_slow ); 11536 %} 11537 11538 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11539 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11540 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11541 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11542 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11543 11544 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11545 ins_encode %{ 11546 __ string_compare($str1$$Register, $str2$$Register, 11547 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11548 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11549 %} 11550 ins_pipe( pipe_slow ); 11551 %} 11552 11553 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11554 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11555 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11556 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11557 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11558 11559 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11560 ins_encode %{ 11561 __ string_compare($str1$$Register, $str2$$Register, 11562 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11563 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11564 %} 11565 ins_pipe( pipe_slow ); 11566 %} 11567 11568 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11569 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11570 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11571 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11572 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11573 11574 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11575 ins_encode %{ 11576 __ string_compare($str1$$Register, $str2$$Register, 11577 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11578 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11579 %} 11580 ins_pipe( pipe_slow ); 11581 %} 11582 11583 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11584 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11585 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11586 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11587 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11588 11589 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11590 ins_encode %{ 11591 __ string_compare($str2$$Register, $str1$$Register, 11592 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11593 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11594 %} 11595 ins_pipe( pipe_slow ); 11596 %} 11597 11598 // fast string equals 11599 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11600 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11601 match(Set result (StrEquals (Binary str1 str2) cnt)); 11602 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11603 11604 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11605 ins_encode %{ 11606 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11607 $cnt$$Register, $result$$Register, $tmp3$$Register, 11608 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11609 %} 11610 11611 ins_pipe( pipe_slow ); 11612 %} 11613 11614 // fast search of substring with known size. 11615 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11616 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11617 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11618 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11619 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11620 11621 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11622 ins_encode %{ 11623 int icnt2 = (int)$int_cnt2$$constant; 11624 if (icnt2 >= 16) { 11625 // IndexOf for constant substrings with size >= 16 elements 11626 // which don't need to be loaded through stack. 11627 __ string_indexofC8($str1$$Register, $str2$$Register, 11628 $cnt1$$Register, $cnt2$$Register, 11629 icnt2, $result$$Register, 11630 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11631 } else { 11632 // Small strings are loaded through stack if they cross page boundary. 11633 __ string_indexof($str1$$Register, $str2$$Register, 11634 $cnt1$$Register, $cnt2$$Register, 11635 icnt2, $result$$Register, 11636 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11637 } 11638 %} 11639 ins_pipe( pipe_slow ); 11640 %} 11641 11642 // fast search of substring with known size. 11643 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11644 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11645 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11646 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11647 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11648 11649 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11650 ins_encode %{ 11651 int icnt2 = (int)$int_cnt2$$constant; 11652 if (icnt2 >= 8) { 11653 // IndexOf for constant substrings with size >= 8 elements 11654 // which don't need to be loaded through stack. 11655 __ string_indexofC8($str1$$Register, $str2$$Register, 11656 $cnt1$$Register, $cnt2$$Register, 11657 icnt2, $result$$Register, 11658 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11659 } else { 11660 // Small strings are loaded through stack if they cross page boundary. 11661 __ string_indexof($str1$$Register, $str2$$Register, 11662 $cnt1$$Register, $cnt2$$Register, 11663 icnt2, $result$$Register, 11664 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11665 } 11666 %} 11667 ins_pipe( pipe_slow ); 11668 %} 11669 11670 // fast search of substring with known size. 11671 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11672 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11673 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11674 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11675 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11676 11677 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11678 ins_encode %{ 11679 int icnt2 = (int)$int_cnt2$$constant; 11680 if (icnt2 >= 8) { 11681 // IndexOf for constant substrings with size >= 8 elements 11682 // which don't need to be loaded through stack. 11683 __ string_indexofC8($str1$$Register, $str2$$Register, 11684 $cnt1$$Register, $cnt2$$Register, 11685 icnt2, $result$$Register, 11686 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11687 } else { 11688 // Small strings are loaded through stack if they cross page boundary. 11689 __ string_indexof($str1$$Register, $str2$$Register, 11690 $cnt1$$Register, $cnt2$$Register, 11691 icnt2, $result$$Register, 11692 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11693 } 11694 %} 11695 ins_pipe( pipe_slow ); 11696 %} 11697 11698 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11699 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11700 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11701 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11702 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11703 11704 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11705 ins_encode %{ 11706 __ string_indexof($str1$$Register, $str2$$Register, 11707 $cnt1$$Register, $cnt2$$Register, 11708 (-1), $result$$Register, 11709 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11710 %} 11711 ins_pipe( pipe_slow ); 11712 %} 11713 11714 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11715 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11716 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11717 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11718 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11719 11720 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11721 ins_encode %{ 11722 __ string_indexof($str1$$Register, $str2$$Register, 11723 $cnt1$$Register, $cnt2$$Register, 11724 (-1), $result$$Register, 11725 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11726 %} 11727 ins_pipe( pipe_slow ); 11728 %} 11729 11730 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11731 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11732 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11733 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11734 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11735 11736 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11737 ins_encode %{ 11738 __ string_indexof($str1$$Register, $str2$$Register, 11739 $cnt1$$Register, $cnt2$$Register, 11740 (-1), $result$$Register, 11741 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11742 %} 11743 ins_pipe( pipe_slow ); 11744 %} 11745 11746 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11747 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11748 predicate(UseSSE42Intrinsics); 11749 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11750 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11751 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11752 ins_encode %{ 11753 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11754 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11755 %} 11756 ins_pipe( pipe_slow ); 11757 %} 11758 11759 // fast array equals 11760 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11761 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11762 %{ 11763 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11764 match(Set result (AryEq ary1 ary2)); 11765 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11766 //ins_cost(300); 11767 11768 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11769 ins_encode %{ 11770 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11771 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11772 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11773 %} 11774 ins_pipe( pipe_slow ); 11775 %} 11776 11777 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11778 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11779 %{ 11780 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11781 match(Set result (AryEq ary1 ary2)); 11782 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11783 //ins_cost(300); 11784 11785 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11786 ins_encode %{ 11787 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11788 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11789 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11790 %} 11791 ins_pipe( pipe_slow ); 11792 %} 11793 11794 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11795 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11796 %{ 11797 match(Set result (HasNegatives ary1 len)); 11798 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11799 11800 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11801 ins_encode %{ 11802 __ has_negatives($ary1$$Register, $len$$Register, 11803 $result$$Register, $tmp3$$Register, 11804 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11805 %} 11806 ins_pipe( pipe_slow ); 11807 %} 11808 11809 // fast char[] to byte[] compression 11810 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11811 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11812 match(Set result (StrCompressedCopy src (Binary dst len))); 11813 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11814 11815 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11816 ins_encode %{ 11817 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11818 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11819 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11820 %} 11821 ins_pipe( pipe_slow ); 11822 %} 11823 11824 // fast byte[] to char[] inflation 11825 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11826 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11827 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11828 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11829 11830 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11831 ins_encode %{ 11832 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11833 $tmp1$$XMMRegister, $tmp2$$Register); 11834 %} 11835 ins_pipe( pipe_slow ); 11836 %} 11837 11838 // encode char[] to byte[] in ISO_8859_1 11839 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11840 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11841 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11842 match(Set result (EncodeISOArray src (Binary dst len))); 11843 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11844 11845 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11846 ins_encode %{ 11847 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11848 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11849 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11850 %} 11851 ins_pipe( pipe_slow ); 11852 %} 11853 11854 11855 //----------Control Flow Instructions------------------------------------------ 11856 // Signed compare Instructions 11857 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11858 match(Set cr (CmpI op1 op2)); 11859 effect( DEF cr, USE op1, USE op2 ); 11860 format %{ "CMP $op1,$op2" %} 11861 opcode(0x3B); /* Opcode 3B /r */ 11862 ins_encode( OpcP, RegReg( op1, op2) ); 11863 ins_pipe( ialu_cr_reg_reg ); 11864 %} 11865 11866 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11867 match(Set cr (CmpI op1 op2)); 11868 effect( DEF cr, USE op1 ); 11869 format %{ "CMP $op1,$op2" %} 11870 opcode(0x81,0x07); /* Opcode 81 /7 */ 11871 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11872 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11873 ins_pipe( ialu_cr_reg_imm ); 11874 %} 11875 11876 // Cisc-spilled version of cmpI_eReg 11877 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11878 match(Set cr (CmpI op1 (LoadI op2))); 11879 11880 format %{ "CMP $op1,$op2" %} 11881 ins_cost(500); 11882 opcode(0x3B); /* Opcode 3B /r */ 11883 ins_encode( OpcP, RegMem( op1, op2) ); 11884 ins_pipe( ialu_cr_reg_mem ); 11885 %} 11886 11887 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11888 match(Set cr (CmpI src zero)); 11889 effect( DEF cr, USE src ); 11890 11891 format %{ "TEST $src,$src" %} 11892 opcode(0x85); 11893 ins_encode( OpcP, RegReg( src, src ) ); 11894 ins_pipe( ialu_cr_reg_imm ); 11895 %} 11896 11897 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11898 match(Set cr (CmpI (AndI src con) zero)); 11899 11900 format %{ "TEST $src,$con" %} 11901 opcode(0xF7,0x00); 11902 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11903 ins_pipe( ialu_cr_reg_imm ); 11904 %} 11905 11906 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11907 match(Set cr (CmpI (AndI src mem) zero)); 11908 11909 format %{ "TEST $src,$mem" %} 11910 opcode(0x85); 11911 ins_encode( OpcP, RegMem( src, mem ) ); 11912 ins_pipe( ialu_cr_reg_mem ); 11913 %} 11914 11915 // Unsigned compare Instructions; really, same as signed except they 11916 // produce an eFlagsRegU instead of eFlagsReg. 11917 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11918 match(Set cr (CmpU op1 op2)); 11919 11920 format %{ "CMPu $op1,$op2" %} 11921 opcode(0x3B); /* Opcode 3B /r */ 11922 ins_encode( OpcP, RegReg( op1, op2) ); 11923 ins_pipe( ialu_cr_reg_reg ); 11924 %} 11925 11926 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11927 match(Set cr (CmpU op1 op2)); 11928 11929 format %{ "CMPu $op1,$op2" %} 11930 opcode(0x81,0x07); /* Opcode 81 /7 */ 11931 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11932 ins_pipe( ialu_cr_reg_imm ); 11933 %} 11934 11935 // // Cisc-spilled version of cmpU_eReg 11936 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11937 match(Set cr (CmpU op1 (LoadI op2))); 11938 11939 format %{ "CMPu $op1,$op2" %} 11940 ins_cost(500); 11941 opcode(0x3B); /* Opcode 3B /r */ 11942 ins_encode( OpcP, RegMem( op1, op2) ); 11943 ins_pipe( ialu_cr_reg_mem ); 11944 %} 11945 11946 // // Cisc-spilled version of cmpU_eReg 11947 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11948 // match(Set cr (CmpU (LoadI op1) op2)); 11949 // 11950 // format %{ "CMPu $op1,$op2" %} 11951 // ins_cost(500); 11952 // opcode(0x39); /* Opcode 39 /r */ 11953 // ins_encode( OpcP, RegMem( op1, op2) ); 11954 //%} 11955 11956 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11957 match(Set cr (CmpU src zero)); 11958 11959 format %{ "TESTu $src,$src" %} 11960 opcode(0x85); 11961 ins_encode( OpcP, RegReg( src, src ) ); 11962 ins_pipe( ialu_cr_reg_imm ); 11963 %} 11964 11965 // Unsigned pointer compare Instructions 11966 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11967 match(Set cr (CmpP op1 op2)); 11968 11969 format %{ "CMPu $op1,$op2" %} 11970 opcode(0x3B); /* Opcode 3B /r */ 11971 ins_encode( OpcP, RegReg( op1, op2) ); 11972 ins_pipe( ialu_cr_reg_reg ); 11973 %} 11974 11975 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11976 match(Set cr (CmpP op1 op2)); 11977 11978 format %{ "CMPu $op1,$op2" %} 11979 opcode(0x81,0x07); /* Opcode 81 /7 */ 11980 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11981 ins_pipe( ialu_cr_reg_imm ); 11982 %} 11983 11984 // // Cisc-spilled version of cmpP_eReg 11985 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11986 match(Set cr (CmpP op1 (LoadP op2))); 11987 11988 format %{ "CMPu $op1,$op2" %} 11989 ins_cost(500); 11990 opcode(0x3B); /* Opcode 3B /r */ 11991 ins_encode( OpcP, RegMem( op1, op2) ); 11992 ins_pipe( ialu_cr_reg_mem ); 11993 %} 11994 11995 // // Cisc-spilled version of cmpP_eReg 11996 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11997 // match(Set cr (CmpP (LoadP op1) op2)); 11998 // 11999 // format %{ "CMPu $op1,$op2" %} 12000 // ins_cost(500); 12001 // opcode(0x39); /* Opcode 39 /r */ 12002 // ins_encode( OpcP, RegMem( op1, op2) ); 12003 //%} 12004 12005 // Compare raw pointer (used in out-of-heap check). 12006 // Only works because non-oop pointers must be raw pointers 12007 // and raw pointers have no anti-dependencies. 12008 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12009 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12010 match(Set cr (CmpP op1 (LoadP op2))); 12011 12012 format %{ "CMPu $op1,$op2" %} 12013 opcode(0x3B); /* Opcode 3B /r */ 12014 ins_encode( OpcP, RegMem( op1, op2) ); 12015 ins_pipe( ialu_cr_reg_mem ); 12016 %} 12017 12018 // 12019 // This will generate a signed flags result. This should be ok 12020 // since any compare to a zero should be eq/neq. 12021 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12022 match(Set cr (CmpP src zero)); 12023 12024 format %{ "TEST $src,$src" %} 12025 opcode(0x85); 12026 ins_encode( OpcP, RegReg( src, src ) ); 12027 ins_pipe( ialu_cr_reg_imm ); 12028 %} 12029 12030 // Cisc-spilled version of testP_reg 12031 // This will generate a signed flags result. This should be ok 12032 // since any compare to a zero should be eq/neq. 12033 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12034 match(Set cr (CmpP (LoadP op) zero)); 12035 12036 format %{ "TEST $op,0xFFFFFFFF" %} 12037 ins_cost(500); 12038 opcode(0xF7); /* Opcode F7 /0 */ 12039 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12040 ins_pipe( ialu_cr_reg_imm ); 12041 %} 12042 12043 // Yanked all unsigned pointer compare operations. 12044 // Pointer compares are done with CmpP which is already unsigned. 12045 12046 //----------Max and Min-------------------------------------------------------- 12047 // Min Instructions 12048 //// 12049 // *** Min and Max using the conditional move are slower than the 12050 // *** branch version on a Pentium III. 12051 // // Conditional move for min 12052 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12053 // effect( USE_DEF op2, USE op1, USE cr ); 12054 // format %{ "CMOVlt $op2,$op1\t! min" %} 12055 // opcode(0x4C,0x0F); 12056 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12057 // ins_pipe( pipe_cmov_reg ); 12058 //%} 12059 // 12060 //// Min Register with Register (P6 version) 12061 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12062 // predicate(VM_Version::supports_cmov() ); 12063 // match(Set op2 (MinI op1 op2)); 12064 // ins_cost(200); 12065 // expand %{ 12066 // eFlagsReg cr; 12067 // compI_eReg(cr,op1,op2); 12068 // cmovI_reg_lt(op2,op1,cr); 12069 // %} 12070 //%} 12071 12072 // Min Register with Register (generic version) 12073 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12074 match(Set dst (MinI dst src)); 12075 effect(KILL flags); 12076 ins_cost(300); 12077 12078 format %{ "MIN $dst,$src" %} 12079 opcode(0xCC); 12080 ins_encode( min_enc(dst,src) ); 12081 ins_pipe( pipe_slow ); 12082 %} 12083 12084 // Max Register with Register 12085 // *** Min and Max using the conditional move are slower than the 12086 // *** branch version on a Pentium III. 12087 // // Conditional move for max 12088 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12089 // effect( USE_DEF op2, USE op1, USE cr ); 12090 // format %{ "CMOVgt $op2,$op1\t! max" %} 12091 // opcode(0x4F,0x0F); 12092 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12093 // ins_pipe( pipe_cmov_reg ); 12094 //%} 12095 // 12096 // // Max Register with Register (P6 version) 12097 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12098 // predicate(VM_Version::supports_cmov() ); 12099 // match(Set op2 (MaxI op1 op2)); 12100 // ins_cost(200); 12101 // expand %{ 12102 // eFlagsReg cr; 12103 // compI_eReg(cr,op1,op2); 12104 // cmovI_reg_gt(op2,op1,cr); 12105 // %} 12106 //%} 12107 12108 // Max Register with Register (generic version) 12109 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12110 match(Set dst (MaxI dst src)); 12111 effect(KILL flags); 12112 ins_cost(300); 12113 12114 format %{ "MAX $dst,$src" %} 12115 opcode(0xCC); 12116 ins_encode( max_enc(dst,src) ); 12117 ins_pipe( pipe_slow ); 12118 %} 12119 12120 // ============================================================================ 12121 // Counted Loop limit node which represents exact final iterator value. 12122 // Note: the resulting value should fit into integer range since 12123 // counted loops have limit check on overflow. 12124 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12125 match(Set limit (LoopLimit (Binary init limit) stride)); 12126 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12127 ins_cost(300); 12128 12129 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12130 ins_encode %{ 12131 int strd = (int)$stride$$constant; 12132 assert(strd != 1 && strd != -1, "sanity"); 12133 int m1 = (strd > 0) ? 1 : -1; 12134 // Convert limit to long (EAX:EDX) 12135 __ cdql(); 12136 // Convert init to long (init:tmp) 12137 __ movl($tmp$$Register, $init$$Register); 12138 __ sarl($tmp$$Register, 31); 12139 // $limit - $init 12140 __ subl($limit$$Register, $init$$Register); 12141 __ sbbl($limit_hi$$Register, $tmp$$Register); 12142 // + ($stride - 1) 12143 if (strd > 0) { 12144 __ addl($limit$$Register, (strd - 1)); 12145 __ adcl($limit_hi$$Register, 0); 12146 __ movl($tmp$$Register, strd); 12147 } else { 12148 __ addl($limit$$Register, (strd + 1)); 12149 __ adcl($limit_hi$$Register, -1); 12150 __ lneg($limit_hi$$Register, $limit$$Register); 12151 __ movl($tmp$$Register, -strd); 12152 } 12153 // signed devision: (EAX:EDX) / pos_stride 12154 __ idivl($tmp$$Register); 12155 if (strd < 0) { 12156 // restore sign 12157 __ negl($tmp$$Register); 12158 } 12159 // (EAX) * stride 12160 __ mull($tmp$$Register); 12161 // + init (ignore upper bits) 12162 __ addl($limit$$Register, $init$$Register); 12163 %} 12164 ins_pipe( pipe_slow ); 12165 %} 12166 12167 // ============================================================================ 12168 // Branch Instructions 12169 // Jump Table 12170 instruct jumpXtnd(rRegI switch_val) %{ 12171 match(Jump switch_val); 12172 ins_cost(350); 12173 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12174 ins_encode %{ 12175 // Jump to Address(table_base + switch_reg) 12176 Address index(noreg, $switch_val$$Register, Address::times_1); 12177 __ jump(ArrayAddress($constantaddress, index)); 12178 %} 12179 ins_pipe(pipe_jmp); 12180 %} 12181 12182 // Jump Direct - Label defines a relative address from JMP+1 12183 instruct jmpDir(label labl) %{ 12184 match(Goto); 12185 effect(USE labl); 12186 12187 ins_cost(300); 12188 format %{ "JMP $labl" %} 12189 size(5); 12190 ins_encode %{ 12191 Label* L = $labl$$label; 12192 __ jmp(*L, false); // Always long jump 12193 %} 12194 ins_pipe( pipe_jmp ); 12195 %} 12196 12197 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12198 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12199 match(If cop cr); 12200 effect(USE labl); 12201 12202 ins_cost(300); 12203 format %{ "J$cop $labl" %} 12204 size(6); 12205 ins_encode %{ 12206 Label* L = $labl$$label; 12207 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12208 %} 12209 ins_pipe( pipe_jcc ); 12210 %} 12211 12212 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12213 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12214 predicate(!n->has_vector_mask_set()); 12215 match(CountedLoopEnd cop cr); 12216 effect(USE labl); 12217 12218 ins_cost(300); 12219 format %{ "J$cop $labl\t# Loop end" %} 12220 size(6); 12221 ins_encode %{ 12222 Label* L = $labl$$label; 12223 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12224 %} 12225 ins_pipe( pipe_jcc ); 12226 %} 12227 12228 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12229 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12230 predicate(!n->has_vector_mask_set()); 12231 match(CountedLoopEnd cop cmp); 12232 effect(USE labl); 12233 12234 ins_cost(300); 12235 format %{ "J$cop,u $labl\t# Loop end" %} 12236 size(6); 12237 ins_encode %{ 12238 Label* L = $labl$$label; 12239 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12240 %} 12241 ins_pipe( pipe_jcc ); 12242 %} 12243 12244 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12245 predicate(!n->has_vector_mask_set()); 12246 match(CountedLoopEnd cop cmp); 12247 effect(USE labl); 12248 12249 ins_cost(200); 12250 format %{ "J$cop,u $labl\t# Loop end" %} 12251 size(6); 12252 ins_encode %{ 12253 Label* L = $labl$$label; 12254 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12255 %} 12256 ins_pipe( pipe_jcc ); 12257 %} 12258 12259 // mask version 12260 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12261 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12262 predicate(n->has_vector_mask_set()); 12263 match(CountedLoopEnd cop cr); 12264 effect(USE labl); 12265 12266 ins_cost(400); 12267 format %{ "J$cop $labl\t# Loop end\n\t" 12268 "restorevectmask \t# vector mask restore for loops" %} 12269 size(10); 12270 ins_encode %{ 12271 Label* L = $labl$$label; 12272 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12273 __ restorevectmask(); 12274 %} 12275 ins_pipe( pipe_jcc ); 12276 %} 12277 12278 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12279 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12280 predicate(n->has_vector_mask_set()); 12281 match(CountedLoopEnd cop cmp); 12282 effect(USE labl); 12283 12284 ins_cost(400); 12285 format %{ "J$cop,u $labl\t# Loop end\n\t" 12286 "restorevectmask \t# vector mask restore for loops" %} 12287 size(10); 12288 ins_encode %{ 12289 Label* L = $labl$$label; 12290 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12291 __ restorevectmask(); 12292 %} 12293 ins_pipe( pipe_jcc ); 12294 %} 12295 12296 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12297 predicate(n->has_vector_mask_set()); 12298 match(CountedLoopEnd cop cmp); 12299 effect(USE labl); 12300 12301 ins_cost(300); 12302 format %{ "J$cop,u $labl\t# Loop end\n\t" 12303 "restorevectmask \t# vector mask restore for loops" %} 12304 size(10); 12305 ins_encode %{ 12306 Label* L = $labl$$label; 12307 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12308 __ restorevectmask(); 12309 %} 12310 ins_pipe( pipe_jcc ); 12311 %} 12312 12313 // Jump Direct Conditional - using unsigned comparison 12314 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12315 match(If cop cmp); 12316 effect(USE labl); 12317 12318 ins_cost(300); 12319 format %{ "J$cop,u $labl" %} 12320 size(6); 12321 ins_encode %{ 12322 Label* L = $labl$$label; 12323 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12324 %} 12325 ins_pipe(pipe_jcc); 12326 %} 12327 12328 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12329 match(If cop cmp); 12330 effect(USE labl); 12331 12332 ins_cost(200); 12333 format %{ "J$cop,u $labl" %} 12334 size(6); 12335 ins_encode %{ 12336 Label* L = $labl$$label; 12337 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12338 %} 12339 ins_pipe(pipe_jcc); 12340 %} 12341 12342 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12343 match(If cop cmp); 12344 effect(USE labl); 12345 12346 ins_cost(200); 12347 format %{ $$template 12348 if ($cop$$cmpcode == Assembler::notEqual) { 12349 $$emit$$"JP,u $labl\n\t" 12350 $$emit$$"J$cop,u $labl" 12351 } else { 12352 $$emit$$"JP,u done\n\t" 12353 $$emit$$"J$cop,u $labl\n\t" 12354 $$emit$$"done:" 12355 } 12356 %} 12357 ins_encode %{ 12358 Label* l = $labl$$label; 12359 if ($cop$$cmpcode == Assembler::notEqual) { 12360 __ jcc(Assembler::parity, *l, false); 12361 __ jcc(Assembler::notEqual, *l, false); 12362 } else if ($cop$$cmpcode == Assembler::equal) { 12363 Label done; 12364 __ jccb(Assembler::parity, done); 12365 __ jcc(Assembler::equal, *l, false); 12366 __ bind(done); 12367 } else { 12368 ShouldNotReachHere(); 12369 } 12370 %} 12371 ins_pipe(pipe_jcc); 12372 %} 12373 12374 // ============================================================================ 12375 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12376 // array for an instance of the superklass. Set a hidden internal cache on a 12377 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12378 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12379 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12380 match(Set result (PartialSubtypeCheck sub super)); 12381 effect( KILL rcx, KILL cr ); 12382 12383 ins_cost(1100); // slightly larger than the next version 12384 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12385 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12386 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12387 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12388 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12389 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12390 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12391 "miss:\t" %} 12392 12393 opcode(0x1); // Force a XOR of EDI 12394 ins_encode( enc_PartialSubtypeCheck() ); 12395 ins_pipe( pipe_slow ); 12396 %} 12397 12398 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12399 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12400 effect( KILL rcx, KILL result ); 12401 12402 ins_cost(1000); 12403 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12404 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12405 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12406 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12407 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12408 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12409 "miss:\t" %} 12410 12411 opcode(0x0); // No need to XOR EDI 12412 ins_encode( enc_PartialSubtypeCheck() ); 12413 ins_pipe( pipe_slow ); 12414 %} 12415 12416 // ============================================================================ 12417 // Branch Instructions -- short offset versions 12418 // 12419 // These instructions are used to replace jumps of a long offset (the default 12420 // match) with jumps of a shorter offset. These instructions are all tagged 12421 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12422 // match rules in general matching. Instead, the ADLC generates a conversion 12423 // method in the MachNode which can be used to do in-place replacement of the 12424 // long variant with the shorter variant. The compiler will determine if a 12425 // branch can be taken by the is_short_branch_offset() predicate in the machine 12426 // specific code section of the file. 12427 12428 // Jump Direct - Label defines a relative address from JMP+1 12429 instruct jmpDir_short(label labl) %{ 12430 match(Goto); 12431 effect(USE labl); 12432 12433 ins_cost(300); 12434 format %{ "JMP,s $labl" %} 12435 size(2); 12436 ins_encode %{ 12437 Label* L = $labl$$label; 12438 __ jmpb(*L); 12439 %} 12440 ins_pipe( pipe_jmp ); 12441 ins_short_branch(1); 12442 %} 12443 12444 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12445 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12446 match(If cop cr); 12447 effect(USE labl); 12448 12449 ins_cost(300); 12450 format %{ "J$cop,s $labl" %} 12451 size(2); 12452 ins_encode %{ 12453 Label* L = $labl$$label; 12454 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12455 %} 12456 ins_pipe( pipe_jcc ); 12457 ins_short_branch(1); 12458 %} 12459 12460 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12461 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12462 match(CountedLoopEnd cop cr); 12463 effect(USE labl); 12464 12465 ins_cost(300); 12466 format %{ "J$cop,s $labl\t# Loop end" %} 12467 size(2); 12468 ins_encode %{ 12469 Label* L = $labl$$label; 12470 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12471 %} 12472 ins_pipe( pipe_jcc ); 12473 ins_short_branch(1); 12474 %} 12475 12476 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12477 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12478 match(CountedLoopEnd cop cmp); 12479 effect(USE labl); 12480 12481 ins_cost(300); 12482 format %{ "J$cop,us $labl\t# Loop end" %} 12483 size(2); 12484 ins_encode %{ 12485 Label* L = $labl$$label; 12486 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12487 %} 12488 ins_pipe( pipe_jcc ); 12489 ins_short_branch(1); 12490 %} 12491 12492 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12493 match(CountedLoopEnd cop cmp); 12494 effect(USE labl); 12495 12496 ins_cost(300); 12497 format %{ "J$cop,us $labl\t# Loop end" %} 12498 size(2); 12499 ins_encode %{ 12500 Label* L = $labl$$label; 12501 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12502 %} 12503 ins_pipe( pipe_jcc ); 12504 ins_short_branch(1); 12505 %} 12506 12507 // Jump Direct Conditional - using unsigned comparison 12508 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12509 match(If cop cmp); 12510 effect(USE labl); 12511 12512 ins_cost(300); 12513 format %{ "J$cop,us $labl" %} 12514 size(2); 12515 ins_encode %{ 12516 Label* L = $labl$$label; 12517 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12518 %} 12519 ins_pipe( pipe_jcc ); 12520 ins_short_branch(1); 12521 %} 12522 12523 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12524 match(If cop cmp); 12525 effect(USE labl); 12526 12527 ins_cost(300); 12528 format %{ "J$cop,us $labl" %} 12529 size(2); 12530 ins_encode %{ 12531 Label* L = $labl$$label; 12532 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12533 %} 12534 ins_pipe( pipe_jcc ); 12535 ins_short_branch(1); 12536 %} 12537 12538 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12539 match(If cop cmp); 12540 effect(USE labl); 12541 12542 ins_cost(300); 12543 format %{ $$template 12544 if ($cop$$cmpcode == Assembler::notEqual) { 12545 $$emit$$"JP,u,s $labl\n\t" 12546 $$emit$$"J$cop,u,s $labl" 12547 } else { 12548 $$emit$$"JP,u,s done\n\t" 12549 $$emit$$"J$cop,u,s $labl\n\t" 12550 $$emit$$"done:" 12551 } 12552 %} 12553 size(4); 12554 ins_encode %{ 12555 Label* l = $labl$$label; 12556 if ($cop$$cmpcode == Assembler::notEqual) { 12557 __ jccb(Assembler::parity, *l); 12558 __ jccb(Assembler::notEqual, *l); 12559 } else if ($cop$$cmpcode == Assembler::equal) { 12560 Label done; 12561 __ jccb(Assembler::parity, done); 12562 __ jccb(Assembler::equal, *l); 12563 __ bind(done); 12564 } else { 12565 ShouldNotReachHere(); 12566 } 12567 %} 12568 ins_pipe(pipe_jcc); 12569 ins_short_branch(1); 12570 %} 12571 12572 // ============================================================================ 12573 // Long Compare 12574 // 12575 // Currently we hold longs in 2 registers. Comparing such values efficiently 12576 // is tricky. The flavor of compare used depends on whether we are testing 12577 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12578 // The GE test is the negated LT test. The LE test can be had by commuting 12579 // the operands (yielding a GE test) and then negating; negate again for the 12580 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12581 // NE test is negated from that. 12582 12583 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12584 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12585 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12586 // are collapsed internally in the ADLC's dfa-gen code. The match for 12587 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12588 // foo match ends up with the wrong leaf. One fix is to not match both 12589 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12590 // both forms beat the trinary form of long-compare and both are very useful 12591 // on Intel which has so few registers. 12592 12593 // Manifest a CmpL result in an integer register. Very painful. 12594 // This is the test to avoid. 12595 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12596 match(Set dst (CmpL3 src1 src2)); 12597 effect( KILL flags ); 12598 ins_cost(1000); 12599 format %{ "XOR $dst,$dst\n\t" 12600 "CMP $src1.hi,$src2.hi\n\t" 12601 "JLT,s m_one\n\t" 12602 "JGT,s p_one\n\t" 12603 "CMP $src1.lo,$src2.lo\n\t" 12604 "JB,s m_one\n\t" 12605 "JEQ,s done\n" 12606 "p_one:\tINC $dst\n\t" 12607 "JMP,s done\n" 12608 "m_one:\tDEC $dst\n" 12609 "done:" %} 12610 ins_encode %{ 12611 Label p_one, m_one, done; 12612 __ xorptr($dst$$Register, $dst$$Register); 12613 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12614 __ jccb(Assembler::less, m_one); 12615 __ jccb(Assembler::greater, p_one); 12616 __ cmpl($src1$$Register, $src2$$Register); 12617 __ jccb(Assembler::below, m_one); 12618 __ jccb(Assembler::equal, done); 12619 __ bind(p_one); 12620 __ incrementl($dst$$Register); 12621 __ jmpb(done); 12622 __ bind(m_one); 12623 __ decrementl($dst$$Register); 12624 __ bind(done); 12625 %} 12626 ins_pipe( pipe_slow ); 12627 %} 12628 12629 //====== 12630 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12631 // compares. Can be used for LE or GT compares by reversing arguments. 12632 // NOT GOOD FOR EQ/NE tests. 12633 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12634 match( Set flags (CmpL src zero )); 12635 ins_cost(100); 12636 format %{ "TEST $src.hi,$src.hi" %} 12637 opcode(0x85); 12638 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12639 ins_pipe( ialu_cr_reg_reg ); 12640 %} 12641 12642 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12643 // compares. Can be used for LE or GT compares by reversing arguments. 12644 // NOT GOOD FOR EQ/NE tests. 12645 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12646 match( Set flags (CmpL src1 src2 )); 12647 effect( TEMP tmp ); 12648 ins_cost(300); 12649 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12650 "MOV $tmp,$src1.hi\n\t" 12651 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12652 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12653 ins_pipe( ialu_cr_reg_reg ); 12654 %} 12655 12656 // Long compares reg < zero/req OR reg >= zero/req. 12657 // Just a wrapper for a normal branch, plus the predicate test. 12658 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12659 match(If cmp flags); 12660 effect(USE labl); 12661 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12662 expand %{ 12663 jmpCon(cmp,flags,labl); // JLT or JGE... 12664 %} 12665 %} 12666 12667 //====== 12668 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12669 // compares. Can be used for LE or GT compares by reversing arguments. 12670 // NOT GOOD FOR EQ/NE tests. 12671 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 12672 match(Set flags (CmpUL src zero)); 12673 ins_cost(100); 12674 format %{ "TEST $src.hi,$src.hi" %} 12675 opcode(0x85); 12676 ins_encode(OpcP, RegReg_Hi2(src, src)); 12677 ins_pipe(ialu_cr_reg_reg); 12678 %} 12679 12680 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 12681 // compares. Can be used for LE or GT compares by reversing arguments. 12682 // NOT GOOD FOR EQ/NE tests. 12683 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 12684 match(Set flags (CmpUL src1 src2)); 12685 effect(TEMP tmp); 12686 ins_cost(300); 12687 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12688 "MOV $tmp,$src1.hi\n\t" 12689 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 12690 ins_encode(long_cmp_flags2(src1, src2, tmp)); 12691 ins_pipe(ialu_cr_reg_reg); 12692 %} 12693 12694 // Unsigned long compares reg < zero/req OR reg >= zero/req. 12695 // Just a wrapper for a normal branch, plus the predicate test. 12696 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 12697 match(If cmp flags); 12698 effect(USE labl); 12699 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 12700 expand %{ 12701 jmpCon(cmp, flags, labl); // JLT or JGE... 12702 %} 12703 %} 12704 12705 // Compare 2 longs and CMOVE longs. 12706 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12707 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12708 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12709 ins_cost(400); 12710 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12711 "CMOV$cmp $dst.hi,$src.hi" %} 12712 opcode(0x0F,0x40); 12713 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12714 ins_pipe( pipe_cmov_reg_long ); 12715 %} 12716 12717 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12718 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12719 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12720 ins_cost(500); 12721 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12722 "CMOV$cmp $dst.hi,$src.hi" %} 12723 opcode(0x0F,0x40); 12724 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12725 ins_pipe( pipe_cmov_reg_long ); 12726 %} 12727 12728 // Compare 2 longs and CMOVE ints. 12729 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12730 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12731 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12732 ins_cost(200); 12733 format %{ "CMOV$cmp $dst,$src" %} 12734 opcode(0x0F,0x40); 12735 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12736 ins_pipe( pipe_cmov_reg ); 12737 %} 12738 12739 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12740 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12741 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12742 ins_cost(250); 12743 format %{ "CMOV$cmp $dst,$src" %} 12744 opcode(0x0F,0x40); 12745 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12746 ins_pipe( pipe_cmov_mem ); 12747 %} 12748 12749 // Compare 2 longs and CMOVE ints. 12750 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12751 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12752 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12753 ins_cost(200); 12754 format %{ "CMOV$cmp $dst,$src" %} 12755 opcode(0x0F,0x40); 12756 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12757 ins_pipe( pipe_cmov_reg ); 12758 %} 12759 12760 // Compare 2 longs and CMOVE doubles 12761 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12762 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12763 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12764 ins_cost(200); 12765 expand %{ 12766 fcmovDPR_regS(cmp,flags,dst,src); 12767 %} 12768 %} 12769 12770 // Compare 2 longs and CMOVE doubles 12771 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12772 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12773 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12774 ins_cost(200); 12775 expand %{ 12776 fcmovD_regS(cmp,flags,dst,src); 12777 %} 12778 %} 12779 12780 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12781 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12782 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12783 ins_cost(200); 12784 expand %{ 12785 fcmovFPR_regS(cmp,flags,dst,src); 12786 %} 12787 %} 12788 12789 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12790 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12791 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12792 ins_cost(200); 12793 expand %{ 12794 fcmovF_regS(cmp,flags,dst,src); 12795 %} 12796 %} 12797 12798 //====== 12799 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12800 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12801 match( Set flags (CmpL src zero )); 12802 effect(TEMP tmp); 12803 ins_cost(200); 12804 format %{ "MOV $tmp,$src.lo\n\t" 12805 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12806 ins_encode( long_cmp_flags0( src, tmp ) ); 12807 ins_pipe( ialu_reg_reg_long ); 12808 %} 12809 12810 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12811 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12812 match( Set flags (CmpL src1 src2 )); 12813 ins_cost(200+300); 12814 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12815 "JNE,s skip\n\t" 12816 "CMP $src1.hi,$src2.hi\n\t" 12817 "skip:\t" %} 12818 ins_encode( long_cmp_flags1( src1, src2 ) ); 12819 ins_pipe( ialu_cr_reg_reg ); 12820 %} 12821 12822 // Long compare reg == zero/reg OR reg != zero/reg 12823 // Just a wrapper for a normal branch, plus the predicate test. 12824 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12825 match(If cmp flags); 12826 effect(USE labl); 12827 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12828 expand %{ 12829 jmpCon(cmp,flags,labl); // JEQ or JNE... 12830 %} 12831 %} 12832 12833 //====== 12834 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12835 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 12836 match(Set flags (CmpUL src zero)); 12837 effect(TEMP tmp); 12838 ins_cost(200); 12839 format %{ "MOV $tmp,$src.lo\n\t" 12840 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 12841 ins_encode(long_cmp_flags0(src, tmp)); 12842 ins_pipe(ialu_reg_reg_long); 12843 %} 12844 12845 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 12846 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 12847 match(Set flags (CmpUL src1 src2)); 12848 ins_cost(200+300); 12849 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 12850 "JNE,s skip\n\t" 12851 "CMP $src1.hi,$src2.hi\n\t" 12852 "skip:\t" %} 12853 ins_encode(long_cmp_flags1(src1, src2)); 12854 ins_pipe(ialu_cr_reg_reg); 12855 %} 12856 12857 // Unsigned long compare reg == zero/reg OR reg != zero/reg 12858 // Just a wrapper for a normal branch, plus the predicate test. 12859 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 12860 match(If cmp flags); 12861 effect(USE labl); 12862 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 12863 expand %{ 12864 jmpCon(cmp, flags, labl); // JEQ or JNE... 12865 %} 12866 %} 12867 12868 // Compare 2 longs and CMOVE longs. 12869 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12870 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12871 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12872 ins_cost(400); 12873 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12874 "CMOV$cmp $dst.hi,$src.hi" %} 12875 opcode(0x0F,0x40); 12876 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12877 ins_pipe( pipe_cmov_reg_long ); 12878 %} 12879 12880 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12881 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12882 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12883 ins_cost(500); 12884 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12885 "CMOV$cmp $dst.hi,$src.hi" %} 12886 opcode(0x0F,0x40); 12887 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12888 ins_pipe( pipe_cmov_reg_long ); 12889 %} 12890 12891 // Compare 2 longs and CMOVE ints. 12892 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12893 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12894 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12895 ins_cost(200); 12896 format %{ "CMOV$cmp $dst,$src" %} 12897 opcode(0x0F,0x40); 12898 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12899 ins_pipe( pipe_cmov_reg ); 12900 %} 12901 12902 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12903 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12904 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12905 ins_cost(250); 12906 format %{ "CMOV$cmp $dst,$src" %} 12907 opcode(0x0F,0x40); 12908 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12909 ins_pipe( pipe_cmov_mem ); 12910 %} 12911 12912 // Compare 2 longs and CMOVE ints. 12913 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12914 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12915 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12916 ins_cost(200); 12917 format %{ "CMOV$cmp $dst,$src" %} 12918 opcode(0x0F,0x40); 12919 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12920 ins_pipe( pipe_cmov_reg ); 12921 %} 12922 12923 // Compare 2 longs and CMOVE doubles 12924 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12925 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12926 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12927 ins_cost(200); 12928 expand %{ 12929 fcmovDPR_regS(cmp,flags,dst,src); 12930 %} 12931 %} 12932 12933 // Compare 2 longs and CMOVE doubles 12934 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12935 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12936 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12937 ins_cost(200); 12938 expand %{ 12939 fcmovD_regS(cmp,flags,dst,src); 12940 %} 12941 %} 12942 12943 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12944 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12945 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12946 ins_cost(200); 12947 expand %{ 12948 fcmovFPR_regS(cmp,flags,dst,src); 12949 %} 12950 %} 12951 12952 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12953 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12954 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12955 ins_cost(200); 12956 expand %{ 12957 fcmovF_regS(cmp,flags,dst,src); 12958 %} 12959 %} 12960 12961 //====== 12962 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12963 // Same as cmpL_reg_flags_LEGT except must negate src 12964 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12965 match( Set flags (CmpL src zero )); 12966 effect( TEMP tmp ); 12967 ins_cost(300); 12968 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12969 "CMP $tmp,$src.lo\n\t" 12970 "SBB $tmp,$src.hi\n\t" %} 12971 ins_encode( long_cmp_flags3(src, tmp) ); 12972 ins_pipe( ialu_reg_reg_long ); 12973 %} 12974 12975 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12976 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12977 // requires a commuted test to get the same result. 12978 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12979 match( Set flags (CmpL src1 src2 )); 12980 effect( TEMP tmp ); 12981 ins_cost(300); 12982 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12983 "MOV $tmp,$src2.hi\n\t" 12984 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12985 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12986 ins_pipe( ialu_cr_reg_reg ); 12987 %} 12988 12989 // Long compares reg < zero/req OR reg >= zero/req. 12990 // Just a wrapper for a normal branch, plus the predicate test 12991 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12992 match(If cmp flags); 12993 effect(USE labl); 12994 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12995 ins_cost(300); 12996 expand %{ 12997 jmpCon(cmp,flags,labl); // JGT or JLE... 12998 %} 12999 %} 13000 13001 //====== 13002 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13003 // Same as cmpUL_reg_flags_LEGT except must negate src 13004 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13005 match(Set flags (CmpUL src zero)); 13006 effect(TEMP tmp); 13007 ins_cost(300); 13008 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13009 "CMP $tmp,$src.lo\n\t" 13010 "SBB $tmp,$src.hi\n\t" %} 13011 ins_encode(long_cmp_flags3(src, tmp)); 13012 ins_pipe(ialu_reg_reg_long); 13013 %} 13014 13015 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13016 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13017 // requires a commuted test to get the same result. 13018 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13019 match(Set flags (CmpUL src1 src2)); 13020 effect(TEMP tmp); 13021 ins_cost(300); 13022 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13023 "MOV $tmp,$src2.hi\n\t" 13024 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13025 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13026 ins_pipe(ialu_cr_reg_reg); 13027 %} 13028 13029 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13030 // Just a wrapper for a normal branch, plus the predicate test 13031 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13032 match(If cmp flags); 13033 effect(USE labl); 13034 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13035 ins_cost(300); 13036 expand %{ 13037 jmpCon(cmp, flags, labl); // JGT or JLE... 13038 %} 13039 %} 13040 13041 // Compare 2 longs and CMOVE longs. 13042 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13043 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13044 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13045 ins_cost(400); 13046 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13047 "CMOV$cmp $dst.hi,$src.hi" %} 13048 opcode(0x0F,0x40); 13049 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13050 ins_pipe( pipe_cmov_reg_long ); 13051 %} 13052 13053 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13054 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13055 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13056 ins_cost(500); 13057 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13058 "CMOV$cmp $dst.hi,$src.hi+4" %} 13059 opcode(0x0F,0x40); 13060 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13061 ins_pipe( pipe_cmov_reg_long ); 13062 %} 13063 13064 // Compare 2 longs and CMOVE ints. 13065 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13066 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13067 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13068 ins_cost(200); 13069 format %{ "CMOV$cmp $dst,$src" %} 13070 opcode(0x0F,0x40); 13071 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13072 ins_pipe( pipe_cmov_reg ); 13073 %} 13074 13075 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13076 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13077 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13078 ins_cost(250); 13079 format %{ "CMOV$cmp $dst,$src" %} 13080 opcode(0x0F,0x40); 13081 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13082 ins_pipe( pipe_cmov_mem ); 13083 %} 13084 13085 // Compare 2 longs and CMOVE ptrs. 13086 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13087 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13088 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13089 ins_cost(200); 13090 format %{ "CMOV$cmp $dst,$src" %} 13091 opcode(0x0F,0x40); 13092 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13093 ins_pipe( pipe_cmov_reg ); 13094 %} 13095 13096 // Compare 2 longs and CMOVE doubles 13097 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13098 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13099 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13100 ins_cost(200); 13101 expand %{ 13102 fcmovDPR_regS(cmp,flags,dst,src); 13103 %} 13104 %} 13105 13106 // Compare 2 longs and CMOVE doubles 13107 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13108 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13109 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13110 ins_cost(200); 13111 expand %{ 13112 fcmovD_regS(cmp,flags,dst,src); 13113 %} 13114 %} 13115 13116 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13117 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13118 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13119 ins_cost(200); 13120 expand %{ 13121 fcmovFPR_regS(cmp,flags,dst,src); 13122 %} 13123 %} 13124 13125 13126 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13127 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13128 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13129 ins_cost(200); 13130 expand %{ 13131 fcmovF_regS(cmp,flags,dst,src); 13132 %} 13133 %} 13134 13135 13136 // ============================================================================ 13137 // Procedure Call/Return Instructions 13138 // Call Java Static Instruction 13139 // Note: If this code changes, the corresponding ret_addr_offset() and 13140 // compute_padding() functions will have to be adjusted. 13141 instruct CallStaticJavaDirect(method meth) %{ 13142 match(CallStaticJava); 13143 effect(USE meth); 13144 13145 ins_cost(300); 13146 format %{ "CALL,static " %} 13147 opcode(0xE8); /* E8 cd */ 13148 ins_encode( pre_call_resets, 13149 Java_Static_Call( meth ), 13150 call_epilog, 13151 post_call_FPU ); 13152 ins_pipe( pipe_slow ); 13153 ins_alignment(4); 13154 %} 13155 13156 // Call Java Dynamic Instruction 13157 // Note: If this code changes, the corresponding ret_addr_offset() and 13158 // compute_padding() functions will have to be adjusted. 13159 instruct CallDynamicJavaDirect(method meth) %{ 13160 match(CallDynamicJava); 13161 effect(USE meth); 13162 13163 ins_cost(300); 13164 format %{ "MOV EAX,(oop)-1\n\t" 13165 "CALL,dynamic" %} 13166 opcode(0xE8); /* E8 cd */ 13167 ins_encode( pre_call_resets, 13168 Java_Dynamic_Call( meth ), 13169 call_epilog, 13170 post_call_FPU ); 13171 ins_pipe( pipe_slow ); 13172 ins_alignment(4); 13173 %} 13174 13175 // Call Runtime Instruction 13176 instruct CallRuntimeDirect(method meth) %{ 13177 match(CallRuntime ); 13178 effect(USE meth); 13179 13180 ins_cost(300); 13181 format %{ "CALL,runtime " %} 13182 opcode(0xE8); /* E8 cd */ 13183 // Use FFREEs to clear entries in float stack 13184 ins_encode( pre_call_resets, 13185 FFree_Float_Stack_All, 13186 Java_To_Runtime( meth ), 13187 post_call_FPU ); 13188 ins_pipe( pipe_slow ); 13189 %} 13190 13191 // Call runtime without safepoint 13192 instruct CallLeafDirect(method meth) %{ 13193 match(CallLeaf); 13194 effect(USE meth); 13195 13196 ins_cost(300); 13197 format %{ "CALL_LEAF,runtime " %} 13198 opcode(0xE8); /* E8 cd */ 13199 ins_encode( pre_call_resets, 13200 FFree_Float_Stack_All, 13201 Java_To_Runtime( meth ), 13202 Verify_FPU_For_Leaf, post_call_FPU ); 13203 ins_pipe( pipe_slow ); 13204 %} 13205 13206 instruct CallLeafNoFPDirect(method meth) %{ 13207 match(CallLeafNoFP); 13208 effect(USE meth); 13209 13210 ins_cost(300); 13211 format %{ "CALL_LEAF_NOFP,runtime " %} 13212 opcode(0xE8); /* E8 cd */ 13213 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13214 ins_pipe( pipe_slow ); 13215 %} 13216 13217 13218 // Return Instruction 13219 // Remove the return address & jump to it. 13220 instruct Ret() %{ 13221 match(Return); 13222 format %{ "RET" %} 13223 opcode(0xC3); 13224 ins_encode(OpcP); 13225 ins_pipe( pipe_jmp ); 13226 %} 13227 13228 // Tail Call; Jump from runtime stub to Java code. 13229 // Also known as an 'interprocedural jump'. 13230 // Target of jump will eventually return to caller. 13231 // TailJump below removes the return address. 13232 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13233 match(TailCall jump_target method_oop ); 13234 ins_cost(300); 13235 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13236 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13237 ins_encode( OpcP, RegOpc(jump_target) ); 13238 ins_pipe( pipe_jmp ); 13239 %} 13240 13241 13242 // Tail Jump; remove the return address; jump to target. 13243 // TailCall above leaves the return address around. 13244 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13245 match( TailJump jump_target ex_oop ); 13246 ins_cost(300); 13247 format %{ "POP EDX\t# pop return address into dummy\n\t" 13248 "JMP $jump_target " %} 13249 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13250 ins_encode( enc_pop_rdx, 13251 OpcP, RegOpc(jump_target) ); 13252 ins_pipe( pipe_jmp ); 13253 %} 13254 13255 // Create exception oop: created by stack-crawling runtime code. 13256 // Created exception is now available to this handler, and is setup 13257 // just prior to jumping to this handler. No code emitted. 13258 instruct CreateException( eAXRegP ex_oop ) 13259 %{ 13260 match(Set ex_oop (CreateEx)); 13261 13262 size(0); 13263 // use the following format syntax 13264 format %{ "# exception oop is in EAX; no code emitted" %} 13265 ins_encode(); 13266 ins_pipe( empty ); 13267 %} 13268 13269 13270 // Rethrow exception: 13271 // The exception oop will come in the first argument position. 13272 // Then JUMP (not call) to the rethrow stub code. 13273 instruct RethrowException() 13274 %{ 13275 match(Rethrow); 13276 13277 // use the following format syntax 13278 format %{ "JMP rethrow_stub" %} 13279 ins_encode(enc_rethrow); 13280 ins_pipe( pipe_jmp ); 13281 %} 13282 13283 // inlined locking and unlocking 13284 13285 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13286 predicate(Compile::current()->use_rtm()); 13287 match(Set cr (FastLock object box)); 13288 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13289 ins_cost(300); 13290 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13291 ins_encode %{ 13292 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13293 $scr$$Register, $cx1$$Register, $cx2$$Register, 13294 _counters, _rtm_counters, _stack_rtm_counters, 13295 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13296 true, ra_->C->profile_rtm()); 13297 %} 13298 ins_pipe(pipe_slow); 13299 %} 13300 13301 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13302 predicate(!Compile::current()->use_rtm()); 13303 match(Set cr (FastLock object box)); 13304 effect(TEMP tmp, TEMP scr, USE_KILL box); 13305 ins_cost(300); 13306 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13307 ins_encode %{ 13308 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13309 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13310 %} 13311 ins_pipe(pipe_slow); 13312 %} 13313 13314 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13315 match(Set cr (FastUnlock object box)); 13316 effect(TEMP tmp, USE_KILL box); 13317 ins_cost(300); 13318 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13319 ins_encode %{ 13320 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13321 %} 13322 ins_pipe(pipe_slow); 13323 %} 13324 13325 13326 13327 // ============================================================================ 13328 // Safepoint Instruction 13329 instruct safePoint_poll(eFlagsReg cr) %{ 13330 predicate(SafepointMechanism::uses_global_page_poll()); 13331 match(SafePoint); 13332 effect(KILL cr); 13333 13334 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13335 // On SPARC that might be acceptable as we can generate the address with 13336 // just a sethi, saving an or. By polling at offset 0 we can end up 13337 // putting additional pressure on the index-0 in the D$. Because of 13338 // alignment (just like the situation at hand) the lower indices tend 13339 // to see more traffic. It'd be better to change the polling address 13340 // to offset 0 of the last $line in the polling page. 13341 13342 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13343 ins_cost(125); 13344 size(6) ; 13345 ins_encode( Safepoint_Poll() ); 13346 ins_pipe( ialu_reg_mem ); 13347 %} 13348 13349 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13350 predicate(SafepointMechanism::uses_thread_local_poll()); 13351 match(SafePoint poll); 13352 effect(KILL cr, USE poll); 13353 13354 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13355 ins_cost(125); 13356 // EBP would need size(3) 13357 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13358 ins_encode %{ 13359 __ relocate(relocInfo::poll_type); 13360 address pre_pc = __ pc(); 13361 __ testl(rax, Address($poll$$Register, 0)); 13362 address post_pc = __ pc(); 13363 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13364 %} 13365 ins_pipe(ialu_reg_mem); 13366 %} 13367 13368 13369 // ============================================================================ 13370 // This name is KNOWN by the ADLC and cannot be changed. 13371 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13372 // for this guy. 13373 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13374 match(Set dst (ThreadLocal)); 13375 effect(DEF dst, KILL cr); 13376 13377 format %{ "MOV $dst, Thread::current()" %} 13378 ins_encode %{ 13379 Register dstReg = as_Register($dst$$reg); 13380 __ get_thread(dstReg); 13381 %} 13382 ins_pipe( ialu_reg_fat ); 13383 %} 13384 13385 13386 13387 //----------PEEPHOLE RULES----------------------------------------------------- 13388 // These must follow all instruction definitions as they use the names 13389 // defined in the instructions definitions. 13390 // 13391 // peepmatch ( root_instr_name [preceding_instruction]* ); 13392 // 13393 // peepconstraint %{ 13394 // (instruction_number.operand_name relational_op instruction_number.operand_name 13395 // [, ...] ); 13396 // // instruction numbers are zero-based using left to right order in peepmatch 13397 // 13398 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13399 // // provide an instruction_number.operand_name for each operand that appears 13400 // // in the replacement instruction's match rule 13401 // 13402 // ---------VM FLAGS--------------------------------------------------------- 13403 // 13404 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13405 // 13406 // Each peephole rule is given an identifying number starting with zero and 13407 // increasing by one in the order seen by the parser. An individual peephole 13408 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13409 // on the command-line. 13410 // 13411 // ---------CURRENT LIMITATIONS---------------------------------------------- 13412 // 13413 // Only match adjacent instructions in same basic block 13414 // Only equality constraints 13415 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13416 // Only one replacement instruction 13417 // 13418 // ---------EXAMPLE---------------------------------------------------------- 13419 // 13420 // // pertinent parts of existing instructions in architecture description 13421 // instruct movI(rRegI dst, rRegI src) %{ 13422 // match(Set dst (CopyI src)); 13423 // %} 13424 // 13425 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13426 // match(Set dst (AddI dst src)); 13427 // effect(KILL cr); 13428 // %} 13429 // 13430 // // Change (inc mov) to lea 13431 // peephole %{ 13432 // // increment preceeded by register-register move 13433 // peepmatch ( incI_eReg movI ); 13434 // // require that the destination register of the increment 13435 // // match the destination register of the move 13436 // peepconstraint ( 0.dst == 1.dst ); 13437 // // construct a replacement instruction that sets 13438 // // the destination to ( move's source register + one ) 13439 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13440 // %} 13441 // 13442 // Implementation no longer uses movX instructions since 13443 // machine-independent system no longer uses CopyX nodes. 13444 // 13445 // peephole %{ 13446 // peepmatch ( incI_eReg movI ); 13447 // peepconstraint ( 0.dst == 1.dst ); 13448 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13449 // %} 13450 // 13451 // peephole %{ 13452 // peepmatch ( decI_eReg movI ); 13453 // peepconstraint ( 0.dst == 1.dst ); 13454 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13455 // %} 13456 // 13457 // peephole %{ 13458 // peepmatch ( addI_eReg_imm movI ); 13459 // peepconstraint ( 0.dst == 1.dst ); 13460 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13461 // %} 13462 // 13463 // peephole %{ 13464 // peepmatch ( addP_eReg_imm movP ); 13465 // peepconstraint ( 0.dst == 1.dst ); 13466 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13467 // %} 13468 13469 // // Change load of spilled value to only a spill 13470 // instruct storeI(memory mem, rRegI src) %{ 13471 // match(Set mem (StoreI mem src)); 13472 // %} 13473 // 13474 // instruct loadI(rRegI dst, memory mem) %{ 13475 // match(Set dst (LoadI mem)); 13476 // %} 13477 // 13478 peephole %{ 13479 peepmatch ( loadI storeI ); 13480 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13481 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13482 %} 13483 13484 //----------SMARTSPILL RULES--------------------------------------------------- 13485 // These must follow all instruction definitions as they use the names 13486 // defined in the instructions definitions.