1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 799 // it maps more cases to single byte displacement 800 _masm.set_managed(); 801 if (reg_lo+1 == reg_hi) { // double move? 802 if (is_load) { 803 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 804 } else { 805 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 806 } 807 } else { 808 if (is_load) { 809 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 810 } else { 811 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 812 } 813 } 814 #ifndef PRODUCT 815 } else if (!do_size) { 816 if (size != 0) st->print("\n\t"); 817 if (reg_lo+1 == reg_hi) { // double move? 818 if (is_load) st->print("%s %s,[ESP + #%d]", 819 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSD [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } else { 824 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 825 Matcher::regName[reg_lo], offset); 826 else st->print("MOVSS [ESP + #%d],%s", 827 offset, Matcher::regName[reg_lo]); 828 } 829 #endif 830 } 831 bool is_single_byte = false; 832 if ((UseAVX > 2) && (offset != 0)) { 833 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 834 } 835 int offset_size = 0; 836 if (UseAVX > 2 ) { 837 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 838 } else { 839 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 840 } 841 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 842 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 843 return size+5+offset_size; 844 } 845 846 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 848 int src_hi, int dst_hi, int size, outputStream* st ) { 849 if (cbuf) { 850 MacroAssembler _masm(cbuf); 851 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 852 _masm.set_managed(); 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } else { 857 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 858 as_XMMRegister(Matcher::_regEncode[src_lo])); 859 } 860 #ifndef PRODUCT 861 } else if (!do_size) { 862 if (size != 0) st->print("\n\t"); 863 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 864 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 865 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 866 } else { 867 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } 869 } else { 870 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 871 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 872 } else { 873 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 874 } 875 } 876 #endif 877 } 878 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 879 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 880 int sz = (UseAVX > 2) ? 6 : 4; 881 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 882 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 883 return size + sz; 884 } 885 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 894 as_Register(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 905 int src_hi, int dst_hi, int size, outputStream* st ) { 906 // 32-bit 907 if (cbuf) { 908 MacroAssembler _masm(cbuf); 909 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 910 _masm.set_managed(); 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 912 as_XMMRegister(Matcher::_regEncode[src_lo])); 913 #ifndef PRODUCT 914 } else if (!do_size) { 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 916 #endif 917 } 918 return (UseAVX> 2) ? 6 : 4; 919 } 920 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 922 if( cbuf ) { 923 emit_opcode(*cbuf, 0x8B ); 924 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 929 #endif 930 } 931 return size+2; 932 } 933 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 935 int offset, int size, outputStream* st ) { 936 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 937 if( cbuf ) { 938 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 939 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 940 #ifndef PRODUCT 941 } else if( !do_size ) { 942 if( size != 0 ) st->print("\n\t"); 943 st->print("FLD %s",Matcher::regName[src_lo]); 944 #endif 945 } 946 size += 2; 947 } 948 949 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 950 const char *op_str; 951 int op; 952 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 953 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 954 op = 0xDD; 955 } else { // 32-bit store 956 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 957 op = 0xD9; 958 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 959 } 960 961 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 962 } 963 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 966 int src_hi, int dst_hi, uint ireg, outputStream* st); 967 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 969 int stack_offset, int reg, uint ireg, outputStream* st); 970 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 972 int dst_offset, uint ireg, outputStream* st) { 973 int calc_size = 0; 974 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 975 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 976 switch (ireg) { 977 case Op_VecS: 978 calc_size = 3+src_offset_size + 3+dst_offset_size; 979 break; 980 case Op_VecD: { 981 calc_size = 3+src_offset_size + 3+dst_offset_size; 982 int tmp_src_offset = src_offset + 4; 983 int tmp_dst_offset = dst_offset + 4; 984 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 985 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 986 calc_size += 3+src_offset_size + 3+dst_offset_size; 987 break; 988 } 989 case Op_VecX: 990 case Op_VecY: 991 case Op_VecZ: 992 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 993 break; 994 default: 995 ShouldNotReachHere(); 996 } 997 if (cbuf) { 998 MacroAssembler _masm(cbuf); 999 int offset = __ offset(); 1000 switch (ireg) { 1001 case Op_VecS: 1002 __ pushl(Address(rsp, src_offset)); 1003 __ popl (Address(rsp, dst_offset)); 1004 break; 1005 case Op_VecD: 1006 __ pushl(Address(rsp, src_offset)); 1007 __ popl (Address(rsp, dst_offset)); 1008 __ pushl(Address(rsp, src_offset+4)); 1009 __ popl (Address(rsp, dst_offset+4)); 1010 break; 1011 case Op_VecX: 1012 __ movdqu(Address(rsp, -16), xmm0); 1013 __ movdqu(xmm0, Address(rsp, src_offset)); 1014 __ movdqu(Address(rsp, dst_offset), xmm0); 1015 __ movdqu(xmm0, Address(rsp, -16)); 1016 break; 1017 case Op_VecY: 1018 __ vmovdqu(Address(rsp, -32), xmm0); 1019 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1020 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1021 __ vmovdqu(xmm0, Address(rsp, -32)); 1022 break; 1023 case Op_VecZ: 1024 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1025 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1026 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1027 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1028 break; 1029 default: 1030 ShouldNotReachHere(); 1031 } 1032 int size = __ offset() - offset; 1033 assert(size == calc_size, "incorrect size calculation"); 1034 return size; 1035 #ifndef PRODUCT 1036 } else if (!do_size) { 1037 switch (ireg) { 1038 case Op_VecS: 1039 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1040 "popl [rsp + #%d]", 1041 src_offset, dst_offset); 1042 break; 1043 case Op_VecD: 1044 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1045 "popq [rsp + #%d]\n\t" 1046 "pushl [rsp + #%d]\n\t" 1047 "popq [rsp + #%d]", 1048 src_offset, dst_offset, src_offset+4, dst_offset+4); 1049 break; 1050 case Op_VecX: 1051 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1052 "movdqu xmm0, [rsp + #%d]\n\t" 1053 "movdqu [rsp + #%d], xmm0\n\t" 1054 "movdqu xmm0, [rsp - #16]", 1055 src_offset, dst_offset); 1056 break; 1057 case Op_VecY: 1058 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1059 "vmovdqu xmm0, [rsp + #%d]\n\t" 1060 "vmovdqu [rsp + #%d], xmm0\n\t" 1061 "vmovdqu xmm0, [rsp - #32]", 1062 src_offset, dst_offset); 1063 break; 1064 case Op_VecZ: 1065 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1066 "vmovdqu xmm0, [rsp + #%d]\n\t" 1067 "vmovdqu [rsp + #%d], xmm0\n\t" 1068 "vmovdqu xmm0, [rsp - #64]", 1069 src_offset, dst_offset); 1070 break; 1071 default: 1072 ShouldNotReachHere(); 1073 } 1074 #endif 1075 } 1076 return calc_size; 1077 } 1078 1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1080 // Get registers to move 1081 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1082 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1083 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1084 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1085 1086 enum RC src_second_rc = rc_class(src_second); 1087 enum RC src_first_rc = rc_class(src_first); 1088 enum RC dst_second_rc = rc_class(dst_second); 1089 enum RC dst_first_rc = rc_class(dst_first); 1090 1091 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1092 1093 // Generate spill code! 1094 int size = 0; 1095 1096 if( src_first == dst_first && src_second == dst_second ) 1097 return size; // Self copy, no move 1098 1099 if (bottom_type()->isa_vect() != NULL) { 1100 uint ireg = ideal_reg(); 1101 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1102 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1103 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1105 // mem -> mem 1106 int src_offset = ra_->reg2offset(src_first); 1107 int dst_offset = ra_->reg2offset(dst_first); 1108 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1109 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1110 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1111 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1112 int stack_offset = ra_->reg2offset(dst_first); 1113 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1114 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1115 int stack_offset = ra_->reg2offset(src_first); 1116 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1117 } else { 1118 ShouldNotReachHere(); 1119 } 1120 } 1121 1122 // -------------------------------------- 1123 // Check for mem-mem move. push/pop to move. 1124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1125 if( src_second == dst_first ) { // overlapping stack copy ranges 1126 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1127 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1128 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1129 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1130 } 1131 // move low bits 1132 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1133 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1134 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1135 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1137 } 1138 return size; 1139 } 1140 1141 // -------------------------------------- 1142 // Check for integer reg-reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1144 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1145 1146 // Check for integer store 1147 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1148 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1149 1150 // Check for integer load 1151 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1152 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1153 1154 // Check for integer reg-xmm reg copy 1155 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1156 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1157 "no 64 bit integer-float reg moves" ); 1158 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1159 } 1160 // -------------------------------------- 1161 // Check for float reg-reg copy 1162 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1163 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1164 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1165 if( cbuf ) { 1166 1167 // Note the mucking with the register encode to compensate for the 0/1 1168 // indexing issue mentioned in a comment in the reg_def sections 1169 // for FPR registers many lines above here. 1170 1171 if( src_first != FPR1L_num ) { 1172 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1173 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1174 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1175 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1176 } else { 1177 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1178 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1179 } 1180 #ifndef PRODUCT 1181 } else if( !do_size ) { 1182 if( size != 0 ) st->print("\n\t"); 1183 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1184 else st->print( "FST %s", Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1188 } 1189 1190 // Check for float store 1191 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1192 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1193 } 1194 1195 // Check for float load 1196 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1197 int offset = ra_->reg2offset(src_first); 1198 const char *op_str; 1199 int op; 1200 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1201 op_str = "FLD_D"; 1202 op = 0xDD; 1203 } else { // 32-bit load 1204 op_str = "FLD_S"; 1205 op = 0xD9; 1206 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1207 } 1208 if( cbuf ) { 1209 emit_opcode (*cbuf, op ); 1210 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1211 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1212 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1213 #ifndef PRODUCT 1214 } else if( !do_size ) { 1215 if( size != 0 ) st->print("\n\t"); 1216 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1217 #endif 1218 } 1219 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1220 return size + 3+offset_size+2; 1221 } 1222 1223 // Check for xmm reg-reg copy 1224 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1225 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1226 (src_first+1 == src_second && dst_first+1 == dst_second), 1227 "no non-adjacent float-moves" ); 1228 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1229 } 1230 1231 // Check for xmm reg-integer reg copy 1232 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1233 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1234 "no 64 bit float-integer reg moves" ); 1235 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1236 } 1237 1238 // Check for xmm store 1239 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1240 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1241 } 1242 1243 // Check for float xmm load 1244 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1245 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1246 } 1247 1248 // Copy from float reg to xmm reg 1249 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1250 // copy to the top of stack from floating point reg 1251 // and use LEA to preserve flags 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0xF8); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP-8]"); 1261 #endif 1262 } 1263 size += 4; 1264 1265 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1266 1267 // Copy from the temp memory to the xmm reg. 1268 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1269 1270 if( cbuf ) { 1271 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1272 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1273 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1274 emit_d8(*cbuf,0x08); 1275 #ifndef PRODUCT 1276 } else if( !do_size ) { 1277 if( size != 0 ) st->print("\n\t"); 1278 st->print("LEA ESP,[ESP+8]"); 1279 #endif 1280 } 1281 size += 4; 1282 return size; 1283 } 1284 1285 assert( size > 0, "missed a case" ); 1286 1287 // -------------------------------------------------------------------- 1288 // Check for second bits still needing moving. 1289 if( src_second == dst_second ) 1290 return size; // Self copy; no move 1291 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1292 1293 // Check for second word int-int move 1294 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1295 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1296 1297 // Check for second word integer store 1298 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1299 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1300 1301 // Check for second word integer load 1302 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1303 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1304 1305 1306 Unimplemented(); 1307 return 0; // Mute compiler 1308 } 1309 1310 #ifndef PRODUCT 1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1312 implementation( NULL, ra_, false, st ); 1313 } 1314 #endif 1315 1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 implementation( &cbuf, ra_, false, NULL ); 1318 } 1319 1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1321 return implementation( NULL, ra_, true, NULL ); 1322 } 1323 1324 1325 //============================================================================= 1326 #ifndef PRODUCT 1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 int reg = ra_->get_reg_first(this); 1330 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1331 } 1332 #endif 1333 1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1335 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1336 int reg = ra_->get_encode(this); 1337 if( offset >= 128 ) { 1338 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1339 emit_rm(cbuf, 0x2, reg, 0x04); 1340 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1341 emit_d32(cbuf, offset); 1342 } 1343 else { 1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(cbuf, 0x1, reg, 0x04); 1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1347 emit_d8(cbuf, offset); 1348 } 1349 } 1350 1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1352 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1353 if( offset >= 128 ) { 1354 return 7; 1355 } 1356 else { 1357 return 4; 1358 } 1359 } 1360 1361 //============================================================================= 1362 #ifndef PRODUCT 1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1364 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1365 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1366 st->print_cr("\tNOP"); 1367 st->print_cr("\tNOP"); 1368 if( !OptoBreakpoint ) 1369 st->print_cr("\tNOP"); 1370 } 1371 #endif 1372 1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1374 MacroAssembler masm(&cbuf); 1375 #ifdef ASSERT 1376 uint insts_size = cbuf.insts_size(); 1377 #endif 1378 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1379 masm.jump_cc(Assembler::notEqual, 1380 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1381 /* WARNING these NOPs are critical so that verified entry point is properly 1382 aligned for patching by NativeJump::patch_verified_entry() */ 1383 int nops_cnt = 2; 1384 if( !OptoBreakpoint ) // Leave space for int3 1385 nops_cnt += 1; 1386 masm.nop(nops_cnt); 1387 1388 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1389 } 1390 1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1392 return OptoBreakpoint ? 11 : 12; 1393 } 1394 1395 1396 //============================================================================= 1397 1398 int Matcher::regnum_to_fpu_offset(int regnum) { 1399 return regnum - 32; // The FP registers are in the second chunk 1400 } 1401 1402 // This is UltraSparc specific, true just means we have fast l2f conversion 1403 const bool Matcher::convL2FSupported(void) { 1404 return true; 1405 } 1406 1407 // Is this branch offset short enough that a short branch can be used? 1408 // 1409 // NOTE: If the platform does not provide any short branch variants, then 1410 // this method should return false for offset 0. 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1412 // The passed offset is relative to address of the branch. 1413 // On 86 a branch displacement is calculated relative to address 1414 // of a next instruction. 1415 offset -= br_size; 1416 1417 // the short version of jmpConUCF2 contains multiple branches, 1418 // making the reach slightly less 1419 if (rule == jmpConUCF2_rule) 1420 return (-126 <= offset && offset <= 125); 1421 return (-128 <= offset && offset <= 127); 1422 } 1423 1424 const bool Matcher::isSimpleConstant64(jlong value) { 1425 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1426 return false; 1427 } 1428 1429 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1430 const bool Matcher::init_array_count_is_in_bytes = false; 1431 1432 // Needs 2 CMOV's for longs. 1433 const int Matcher::long_cmove_cost() { return 1; } 1434 1435 // No CMOVF/CMOVD with SSE/SSE2 1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1437 1438 // Does the CPU require late expand (see block.cpp for description of late expand)? 1439 const bool Matcher::require_postalloc_expand = false; 1440 1441 // Do we need to mask the count passed to shift instructions or does 1442 // the cpu only look at the lower 5/6 bits anyway? 1443 const bool Matcher::need_masked_shift_count = false; 1444 1445 bool Matcher::narrow_oop_use_complex_address() { 1446 ShouldNotCallThis(); 1447 return true; 1448 } 1449 1450 bool Matcher::narrow_klass_use_complex_address() { 1451 ShouldNotCallThis(); 1452 return true; 1453 } 1454 1455 bool Matcher::const_oop_prefer_decode() { 1456 ShouldNotCallThis(); 1457 return true; 1458 } 1459 1460 bool Matcher::const_klass_prefer_decode() { 1461 ShouldNotCallThis(); 1462 return true; 1463 } 1464 1465 // Is it better to copy float constants, or load them directly from memory? 1466 // Intel can load a float constant from a direct address, requiring no 1467 // extra registers. Most RISCs will have to materialize an address into a 1468 // register first, so they would do better to copy the constant from stack. 1469 const bool Matcher::rematerialize_float_constants = true; 1470 1471 // If CPU can load and store mis-aligned doubles directly then no fixup is 1472 // needed. Else we split the double into 2 integer pieces and move it 1473 // piece-by-piece. Only happens when passing doubles into C code as the 1474 // Java calling convention forces doubles to be aligned. 1475 const bool Matcher::misaligned_doubles_ok = true; 1476 1477 1478 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1479 // Get the memory operand from the node 1480 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1481 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1482 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1483 uint opcnt = 1; // First operand 1484 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1485 while( idx >= skipped+num_edges ) { 1486 skipped += num_edges; 1487 opcnt++; // Bump operand count 1488 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1489 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1490 } 1491 1492 MachOper *memory = node->_opnds[opcnt]; 1493 MachOper *new_memory = NULL; 1494 switch (memory->opcode()) { 1495 case DIRECT: 1496 case INDOFFSET32X: 1497 // No transformation necessary. 1498 return; 1499 case INDIRECT: 1500 new_memory = new indirect_win95_safeOper( ); 1501 break; 1502 case INDOFFSET8: 1503 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1504 break; 1505 case INDOFFSET32: 1506 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1507 break; 1508 case INDINDEXOFFSET: 1509 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1510 break; 1511 case INDINDEXSCALE: 1512 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1513 break; 1514 case INDINDEXSCALEOFFSET: 1515 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1516 break; 1517 case LOAD_LONG_INDIRECT: 1518 case LOAD_LONG_INDOFFSET32: 1519 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1520 return; 1521 default: 1522 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1523 return; 1524 } 1525 node->_opnds[opcnt] = new_memory; 1526 } 1527 1528 // Advertise here if the CPU requires explicit rounding operations 1529 // to implement the UseStrictFP mode. 1530 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1531 1532 // Are floats conerted to double when stored to stack during deoptimization? 1533 // On x32 it is stored with convertion only when FPU is used for floats. 1534 bool Matcher::float_in_double() { return (UseSSE == 0); } 1535 1536 // Do ints take an entire long register or just half? 1537 const bool Matcher::int_in_long = false; 1538 1539 // Return whether or not this register is ever used as an argument. This 1540 // function is used on startup to build the trampoline stubs in generateOptoStub. 1541 // Registers not mentioned will be killed by the VM call in the trampoline, and 1542 // arguments in those registers not be available to the callee. 1543 bool Matcher::can_be_java_arg( int reg ) { 1544 if( reg == ECX_num || reg == EDX_num ) return true; 1545 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1546 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1547 return false; 1548 } 1549 1550 bool Matcher::is_spillable_arg( int reg ) { 1551 return can_be_java_arg(reg); 1552 } 1553 1554 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1555 // Use hardware integer DIV instruction when 1556 // it is faster than a code which use multiply. 1557 // Only when constant divisor fits into 32 bit 1558 // (min_jint is excluded to get only correct 1559 // positive 32 bit values from negative). 1560 return VM_Version::has_fast_idiv() && 1561 (divisor == (int)divisor && divisor != min_jint); 1562 } 1563 1564 // Register for DIVI projection of divmodI 1565 RegMask Matcher::divI_proj_mask() { 1566 return EAX_REG_mask(); 1567 } 1568 1569 // Register for MODI projection of divmodI 1570 RegMask Matcher::modI_proj_mask() { 1571 return EDX_REG_mask(); 1572 } 1573 1574 // Register for DIVL projection of divmodL 1575 RegMask Matcher::divL_proj_mask() { 1576 ShouldNotReachHere(); 1577 return RegMask(); 1578 } 1579 1580 // Register for MODL projection of divmodL 1581 RegMask Matcher::modL_proj_mask() { 1582 ShouldNotReachHere(); 1583 return RegMask(); 1584 } 1585 1586 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1587 return NO_REG_mask(); 1588 } 1589 1590 // Returns true if the high 32 bits of the value is known to be zero. 1591 bool is_operand_hi32_zero(Node* n) { 1592 int opc = n->Opcode(); 1593 if (opc == Op_AndL) { 1594 Node* o2 = n->in(2); 1595 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1596 return true; 1597 } 1598 } 1599 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1600 return true; 1601 } 1602 return false; 1603 } 1604 1605 %} 1606 1607 //----------ENCODING BLOCK----------------------------------------------------- 1608 // This block specifies the encoding classes used by the compiler to output 1609 // byte streams. Encoding classes generate functions which are called by 1610 // Machine Instruction Nodes in order to generate the bit encoding of the 1611 // instruction. Operands specify their base encoding interface with the 1612 // interface keyword. There are currently supported four interfaces, 1613 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1614 // operand to generate a function which returns its register number when 1615 // queried. CONST_INTER causes an operand to generate a function which 1616 // returns the value of the constant when queried. MEMORY_INTER causes an 1617 // operand to generate four functions which return the Base Register, the 1618 // Index Register, the Scale Value, and the Offset Value of the operand when 1619 // queried. COND_INTER causes an operand to generate six functions which 1620 // return the encoding code (ie - encoding bits for the instruction) 1621 // associated with each basic boolean condition for a conditional instruction. 1622 // Instructions specify two basic values for encoding. They use the 1623 // ins_encode keyword to specify their encoding class (which must be one of 1624 // the class names specified in the encoding block), and they use the 1625 // opcode keyword to specify, in order, their primary, secondary, and 1626 // tertiary opcode. Only the opcode sections which a particular instruction 1627 // needs for encoding need to be specified. 1628 encode %{ 1629 // Build emit functions for each basic byte or larger field in the intel 1630 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1631 // code in the enc_class source block. Emit functions will live in the 1632 // main source block for now. In future, we can generalize this by 1633 // adding a syntax that specifies the sizes of fields in an order, 1634 // so that the adlc can build the emit functions automagically 1635 1636 // Emit primary opcode 1637 enc_class OpcP %{ 1638 emit_opcode(cbuf, $primary); 1639 %} 1640 1641 // Emit secondary opcode 1642 enc_class OpcS %{ 1643 emit_opcode(cbuf, $secondary); 1644 %} 1645 1646 // Emit opcode directly 1647 enc_class Opcode(immI d8) %{ 1648 emit_opcode(cbuf, $d8$$constant); 1649 %} 1650 1651 enc_class SizePrefix %{ 1652 emit_opcode(cbuf,0x66); 1653 %} 1654 1655 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1656 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1657 %} 1658 1659 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1660 emit_opcode(cbuf,$opcode$$constant); 1661 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1662 %} 1663 1664 enc_class mov_r32_imm0( rRegI dst ) %{ 1665 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1666 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1667 %} 1668 1669 enc_class cdq_enc %{ 1670 // Full implementation of Java idiv and irem; checks for 1671 // special case as described in JVM spec., p.243 & p.271. 1672 // 1673 // normal case special case 1674 // 1675 // input : rax,: dividend min_int 1676 // reg: divisor -1 1677 // 1678 // output: rax,: quotient (= rax, idiv reg) min_int 1679 // rdx: remainder (= rax, irem reg) 0 1680 // 1681 // Code sequnce: 1682 // 1683 // 81 F8 00 00 00 80 cmp rax,80000000h 1684 // 0F 85 0B 00 00 00 jne normal_case 1685 // 33 D2 xor rdx,edx 1686 // 83 F9 FF cmp rcx,0FFh 1687 // 0F 84 03 00 00 00 je done 1688 // normal_case: 1689 // 99 cdq 1690 // F7 F9 idiv rax,ecx 1691 // done: 1692 // 1693 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1695 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1696 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1697 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1698 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1699 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1700 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1701 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1702 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1703 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1704 // normal_case: 1705 emit_opcode(cbuf,0x99); // cdq 1706 // idiv (note: must be emitted by the user of this rule) 1707 // normal: 1708 %} 1709 1710 // Dense encoding for older common ops 1711 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1712 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1713 %} 1714 1715 1716 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1717 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1718 // Check for 8-bit immediate, and set sign extend bit in opcode 1719 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1720 emit_opcode(cbuf, $primary | 0x02); 1721 } 1722 else { // If 32-bit immediate 1723 emit_opcode(cbuf, $primary); 1724 } 1725 %} 1726 1727 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1728 // Emit primary opcode and set sign-extend bit 1729 // Check for 8-bit immediate, and set sign extend bit in opcode 1730 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1731 emit_opcode(cbuf, $primary | 0x02); } 1732 else { // If 32-bit immediate 1733 emit_opcode(cbuf, $primary); 1734 } 1735 // Emit r/m byte with secondary opcode, after primary opcode. 1736 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1737 %} 1738 1739 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1740 // Check for 8-bit immediate, and set sign extend bit in opcode 1741 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1742 $$$emit8$imm$$constant; 1743 } 1744 else { // If 32-bit immediate 1745 // Output immediate 1746 $$$emit32$imm$$constant; 1747 } 1748 %} 1749 1750 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1751 // Emit primary opcode and set sign-extend bit 1752 // Check for 8-bit immediate, and set sign extend bit in opcode 1753 int con = (int)$imm$$constant; // Throw away top bits 1754 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1755 // Emit r/m byte with secondary opcode, after primary opcode. 1756 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1757 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1758 else emit_d32(cbuf,con); 1759 %} 1760 1761 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1762 // Emit primary opcode and set sign-extend bit 1763 // Check for 8-bit immediate, and set sign extend bit in opcode 1764 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1765 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1766 // Emit r/m byte with tertiary opcode, after primary opcode. 1767 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1768 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1769 else emit_d32(cbuf,con); 1770 %} 1771 1772 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1773 emit_cc(cbuf, $secondary, $dst$$reg ); 1774 %} 1775 1776 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1777 int destlo = $dst$$reg; 1778 int desthi = HIGH_FROM_LOW(destlo); 1779 // bswap lo 1780 emit_opcode(cbuf, 0x0F); 1781 emit_cc(cbuf, 0xC8, destlo); 1782 // bswap hi 1783 emit_opcode(cbuf, 0x0F); 1784 emit_cc(cbuf, 0xC8, desthi); 1785 // xchg lo and hi 1786 emit_opcode(cbuf, 0x87); 1787 emit_rm(cbuf, 0x3, destlo, desthi); 1788 %} 1789 1790 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1791 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1792 %} 1793 1794 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1795 $$$emit8$primary; 1796 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1797 %} 1798 1799 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1800 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1801 emit_d8(cbuf, op >> 8 ); 1802 emit_d8(cbuf, op & 255); 1803 %} 1804 1805 // emulate a CMOV with a conditional branch around a MOV 1806 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1807 // Invert sense of branch from sense of CMOV 1808 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1809 emit_d8( cbuf, $brOffs$$constant ); 1810 %} 1811 1812 enc_class enc_PartialSubtypeCheck( ) %{ 1813 Register Redi = as_Register(EDI_enc); // result register 1814 Register Reax = as_Register(EAX_enc); // super class 1815 Register Recx = as_Register(ECX_enc); // killed 1816 Register Resi = as_Register(ESI_enc); // sub class 1817 Label miss; 1818 1819 MacroAssembler _masm(&cbuf); 1820 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1821 NULL, &miss, 1822 /*set_cond_codes:*/ true); 1823 if ($primary) { 1824 __ xorptr(Redi, Redi); 1825 } 1826 __ bind(miss); 1827 %} 1828 1829 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1830 MacroAssembler masm(&cbuf); 1831 int start = masm.offset(); 1832 if (UseSSE >= 2) { 1833 if (VerifyFPU) { 1834 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1835 } 1836 } else { 1837 // External c_calling_convention expects the FPU stack to be 'clean'. 1838 // Compiled code leaves it dirty. Do cleanup now. 1839 masm.empty_FPU_stack(); 1840 } 1841 if (sizeof_FFree_Float_Stack_All == -1) { 1842 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1843 } else { 1844 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1845 } 1846 %} 1847 1848 enc_class Verify_FPU_For_Leaf %{ 1849 if( VerifyFPU ) { 1850 MacroAssembler masm(&cbuf); 1851 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1852 } 1853 %} 1854 1855 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1856 // This is the instruction starting address for relocation info. 1857 cbuf.set_insts_mark(); 1858 $$$emit8$primary; 1859 // CALL directly to the runtime 1860 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1861 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1862 1863 if (UseSSE >= 2) { 1864 MacroAssembler _masm(&cbuf); 1865 BasicType rt = tf()->return_type(); 1866 1867 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1868 // A C runtime call where the return value is unused. In SSE2+ 1869 // mode the result needs to be removed from the FPU stack. It's 1870 // likely that this function call could be removed by the 1871 // optimizer if the C function is a pure function. 1872 __ ffree(0); 1873 } else if (rt == T_FLOAT) { 1874 __ lea(rsp, Address(rsp, -4)); 1875 __ fstp_s(Address(rsp, 0)); 1876 __ movflt(xmm0, Address(rsp, 0)); 1877 __ lea(rsp, Address(rsp, 4)); 1878 } else if (rt == T_DOUBLE) { 1879 __ lea(rsp, Address(rsp, -8)); 1880 __ fstp_d(Address(rsp, 0)); 1881 __ movdbl(xmm0, Address(rsp, 0)); 1882 __ lea(rsp, Address(rsp, 8)); 1883 } 1884 } 1885 %} 1886 1887 1888 enc_class pre_call_resets %{ 1889 // If method sets FPU control word restore it here 1890 debug_only(int off0 = cbuf.insts_size()); 1891 if (ra_->C->in_24_bit_fp_mode()) { 1892 MacroAssembler _masm(&cbuf); 1893 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1894 } 1895 if (ra_->C->max_vector_size() > 16) { 1896 // Clear upper bits of YMM registers when current compiled code uses 1897 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1898 MacroAssembler _masm(&cbuf); 1899 __ vzeroupper(); 1900 } 1901 debug_only(int off1 = cbuf.insts_size()); 1902 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1903 %} 1904 1905 enc_class post_call_FPU %{ 1906 // If method sets FPU control word do it here also 1907 if (Compile::current()->in_24_bit_fp_mode()) { 1908 MacroAssembler masm(&cbuf); 1909 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1910 } 1911 %} 1912 1913 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1914 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1915 // who we intended to call. 1916 cbuf.set_insts_mark(); 1917 $$$emit8$primary; 1918 1919 if (!_method) { 1920 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1921 runtime_call_Relocation::spec(), 1922 RELOC_IMM32); 1923 } else { 1924 int method_index = resolved_method_index(cbuf); 1925 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1926 : static_call_Relocation::spec(method_index); 1927 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1928 rspec, RELOC_DISP32); 1929 // Emit stubs for static call. 1930 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1931 if (stub == NULL) { 1932 ciEnv::current()->record_failure("CodeCache is full"); 1933 return; 1934 } 1935 } 1936 %} 1937 1938 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1939 MacroAssembler _masm(&cbuf); 1940 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1941 %} 1942 1943 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1944 int disp = in_bytes(Method::from_compiled_offset()); 1945 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1946 1947 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1948 cbuf.set_insts_mark(); 1949 $$$emit8$primary; 1950 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1951 emit_d8(cbuf, disp); // Displacement 1952 1953 %} 1954 1955 // Following encoding is no longer used, but may be restored if calling 1956 // convention changes significantly. 1957 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1958 // 1959 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1960 // // int ic_reg = Matcher::inline_cache_reg(); 1961 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1962 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1963 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1964 // 1965 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1966 // // // so we load it immediately before the call 1967 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1968 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1969 // 1970 // // xor rbp,ebp 1971 // emit_opcode(cbuf, 0x33); 1972 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1973 // 1974 // // CALL to interpreter. 1975 // cbuf.set_insts_mark(); 1976 // $$$emit8$primary; 1977 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1978 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1979 // %} 1980 1981 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1982 $$$emit8$primary; 1983 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1984 $$$emit8$shift$$constant; 1985 %} 1986 1987 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1988 // Load immediate does not have a zero or sign extended version 1989 // for 8-bit immediates 1990 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1991 $$$emit32$src$$constant; 1992 %} 1993 1994 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1995 // Load immediate does not have a zero or sign extended version 1996 // for 8-bit immediates 1997 emit_opcode(cbuf, $primary + $dst$$reg); 1998 $$$emit32$src$$constant; 1999 %} 2000 2001 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 2002 // Load immediate does not have a zero or sign extended version 2003 // for 8-bit immediates 2004 int dst_enc = $dst$$reg; 2005 int src_con = $src$$constant & 0x0FFFFFFFFL; 2006 if (src_con == 0) { 2007 // xor dst, dst 2008 emit_opcode(cbuf, 0x33); 2009 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2010 } else { 2011 emit_opcode(cbuf, $primary + dst_enc); 2012 emit_d32(cbuf, src_con); 2013 } 2014 %} 2015 2016 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2017 // Load immediate does not have a zero or sign extended version 2018 // for 8-bit immediates 2019 int dst_enc = $dst$$reg + 2; 2020 int src_con = ((julong)($src$$constant)) >> 32; 2021 if (src_con == 0) { 2022 // xor dst, dst 2023 emit_opcode(cbuf, 0x33); 2024 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2025 } else { 2026 emit_opcode(cbuf, $primary + dst_enc); 2027 emit_d32(cbuf, src_con); 2028 } 2029 %} 2030 2031 2032 // Encode a reg-reg copy. If it is useless, then empty encoding. 2033 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2034 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2035 %} 2036 2037 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2038 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2039 %} 2040 2041 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2042 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2043 %} 2044 2045 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2046 $$$emit8$primary; 2047 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2048 %} 2049 2050 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2051 $$$emit8$secondary; 2052 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2056 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2057 %} 2058 2059 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2060 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2061 %} 2062 2063 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2064 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2065 %} 2066 2067 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2068 // Output immediate 2069 $$$emit32$src$$constant; 2070 %} 2071 2072 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2073 // Output Float immediate bits 2074 jfloat jf = $src$$constant; 2075 int jf_as_bits = jint_cast( jf ); 2076 emit_d32(cbuf, jf_as_bits); 2077 %} 2078 2079 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2080 // Output Float immediate bits 2081 jfloat jf = $src$$constant; 2082 int jf_as_bits = jint_cast( jf ); 2083 emit_d32(cbuf, jf_as_bits); 2084 %} 2085 2086 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2087 // Output immediate 2088 $$$emit16$src$$constant; 2089 %} 2090 2091 enc_class Con_d32(immI src) %{ 2092 emit_d32(cbuf,$src$$constant); 2093 %} 2094 2095 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2096 // Output immediate memory reference 2097 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2098 emit_d32(cbuf, 0x00); 2099 %} 2100 2101 enc_class lock_prefix( ) %{ 2102 if( os::is_MP() ) 2103 emit_opcode(cbuf,0xF0); // [Lock] 2104 %} 2105 2106 // Cmp-xchg long value. 2107 // Note: we need to swap rbx, and rcx before and after the 2108 // cmpxchg8 instruction because the instruction uses 2109 // rcx as the high order word of the new value to store but 2110 // our register encoding uses rbx,. 2111 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2112 2113 // XCHG rbx,ecx 2114 emit_opcode(cbuf,0x87); 2115 emit_opcode(cbuf,0xD9); 2116 // [Lock] 2117 if( os::is_MP() ) 2118 emit_opcode(cbuf,0xF0); 2119 // CMPXCHG8 [Eptr] 2120 emit_opcode(cbuf,0x0F); 2121 emit_opcode(cbuf,0xC7); 2122 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2123 // XCHG rbx,ecx 2124 emit_opcode(cbuf,0x87); 2125 emit_opcode(cbuf,0xD9); 2126 %} 2127 2128 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2129 // [Lock] 2130 if( os::is_MP() ) 2131 emit_opcode(cbuf,0xF0); 2132 2133 // CMPXCHG [Eptr] 2134 emit_opcode(cbuf,0x0F); 2135 emit_opcode(cbuf,0xB1); 2136 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2137 %} 2138 2139 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2140 // [Lock] 2141 if( os::is_MP() ) 2142 emit_opcode(cbuf,0xF0); 2143 2144 // CMPXCHGB [Eptr] 2145 emit_opcode(cbuf,0x0F); 2146 emit_opcode(cbuf,0xB0); 2147 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2148 %} 2149 2150 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2151 // [Lock] 2152 if( os::is_MP() ) 2153 emit_opcode(cbuf,0xF0); 2154 2155 // 16-bit mode 2156 emit_opcode(cbuf, 0x66); 2157 2158 // CMPXCHGW [Eptr] 2159 emit_opcode(cbuf,0x0F); 2160 emit_opcode(cbuf,0xB1); 2161 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2162 %} 2163 2164 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2165 int res_encoding = $res$$reg; 2166 2167 // MOV res,0 2168 emit_opcode( cbuf, 0xB8 + res_encoding); 2169 emit_d32( cbuf, 0 ); 2170 // JNE,s fail 2171 emit_opcode(cbuf,0x75); 2172 emit_d8(cbuf, 5 ); 2173 // MOV res,1 2174 emit_opcode( cbuf, 0xB8 + res_encoding); 2175 emit_d32( cbuf, 1 ); 2176 // fail: 2177 %} 2178 2179 enc_class set_instruction_start( ) %{ 2180 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2181 %} 2182 2183 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2184 int reg_encoding = $ereg$$reg; 2185 int base = $mem$$base; 2186 int index = $mem$$index; 2187 int scale = $mem$$scale; 2188 int displace = $mem$$disp; 2189 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2190 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2191 %} 2192 2193 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2194 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2195 int base = $mem$$base; 2196 int index = $mem$$index; 2197 int scale = $mem$$scale; 2198 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2199 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2200 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2201 %} 2202 2203 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2204 int r1, r2; 2205 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2206 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2207 emit_opcode(cbuf,0x0F); 2208 emit_opcode(cbuf,$tertiary); 2209 emit_rm(cbuf, 0x3, r1, r2); 2210 emit_d8(cbuf,$cnt$$constant); 2211 emit_d8(cbuf,$primary); 2212 emit_rm(cbuf, 0x3, $secondary, r1); 2213 emit_d8(cbuf,$cnt$$constant); 2214 %} 2215 2216 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2217 emit_opcode( cbuf, 0x8B ); // Move 2218 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2219 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2220 emit_d8(cbuf,$primary); 2221 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2222 emit_d8(cbuf,$cnt$$constant-32); 2223 } 2224 emit_d8(cbuf,$primary); 2225 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2226 emit_d8(cbuf,31); 2227 %} 2228 2229 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2230 int r1, r2; 2231 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2232 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2233 2234 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2235 emit_rm(cbuf, 0x3, r1, r2); 2236 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2237 emit_opcode(cbuf,$primary); 2238 emit_rm(cbuf, 0x3, $secondary, r1); 2239 emit_d8(cbuf,$cnt$$constant-32); 2240 } 2241 emit_opcode(cbuf,0x33); // XOR r2,r2 2242 emit_rm(cbuf, 0x3, r2, r2); 2243 %} 2244 2245 // Clone of RegMem but accepts an extra parameter to access each 2246 // half of a double in memory; it never needs relocation info. 2247 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2248 emit_opcode(cbuf,$opcode$$constant); 2249 int reg_encoding = $rm_reg$$reg; 2250 int base = $mem$$base; 2251 int index = $mem$$index; 2252 int scale = $mem$$scale; 2253 int displace = $mem$$disp + $disp_for_half$$constant; 2254 relocInfo::relocType disp_reloc = relocInfo::none; 2255 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2256 %} 2257 2258 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2259 // 2260 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2261 // and it never needs relocation information. 2262 // Frequently used to move data between FPU's Stack Top and memory. 2263 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2264 int rm_byte_opcode = $rm_opcode$$constant; 2265 int base = $mem$$base; 2266 int index = $mem$$index; 2267 int scale = $mem$$scale; 2268 int displace = $mem$$disp; 2269 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2270 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2271 %} 2272 2273 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2274 int rm_byte_opcode = $rm_opcode$$constant; 2275 int base = $mem$$base; 2276 int index = $mem$$index; 2277 int scale = $mem$$scale; 2278 int displace = $mem$$disp; 2279 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2280 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2281 %} 2282 2283 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2284 int reg_encoding = $dst$$reg; 2285 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2286 int index = 0x04; // 0x04 indicates no index 2287 int scale = 0x00; // 0x00 indicates no scale 2288 int displace = $src1$$constant; // 0x00 indicates no displacement 2289 relocInfo::relocType disp_reloc = relocInfo::none; 2290 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2291 %} 2292 2293 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2294 // Compare dst,src 2295 emit_opcode(cbuf,0x3B); 2296 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2297 // jmp dst < src around move 2298 emit_opcode(cbuf,0x7C); 2299 emit_d8(cbuf,2); 2300 // move dst,src 2301 emit_opcode(cbuf,0x8B); 2302 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2303 %} 2304 2305 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2306 // Compare dst,src 2307 emit_opcode(cbuf,0x3B); 2308 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2309 // jmp dst > src around move 2310 emit_opcode(cbuf,0x7F); 2311 emit_d8(cbuf,2); 2312 // move dst,src 2313 emit_opcode(cbuf,0x8B); 2314 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2315 %} 2316 2317 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2318 // If src is FPR1, we can just FST to store it. 2319 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2320 int reg_encoding = 0x2; // Just store 2321 int base = $mem$$base; 2322 int index = $mem$$index; 2323 int scale = $mem$$scale; 2324 int displace = $mem$$disp; 2325 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2326 if( $src$$reg != FPR1L_enc ) { 2327 reg_encoding = 0x3; // Store & pop 2328 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2329 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2330 } 2331 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2332 emit_opcode(cbuf,$primary); 2333 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2334 %} 2335 2336 enc_class neg_reg(rRegI dst) %{ 2337 // NEG $dst 2338 emit_opcode(cbuf,0xF7); 2339 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2340 %} 2341 2342 enc_class setLT_reg(eCXRegI dst) %{ 2343 // SETLT $dst 2344 emit_opcode(cbuf,0x0F); 2345 emit_opcode(cbuf,0x9C); 2346 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2347 %} 2348 2349 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2350 int tmpReg = $tmp$$reg; 2351 2352 // SUB $p,$q 2353 emit_opcode(cbuf,0x2B); 2354 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2355 // SBB $tmp,$tmp 2356 emit_opcode(cbuf,0x1B); 2357 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2358 // AND $tmp,$y 2359 emit_opcode(cbuf,0x23); 2360 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2361 // ADD $p,$tmp 2362 emit_opcode(cbuf,0x03); 2363 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2364 %} 2365 2366 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2367 // TEST shift,32 2368 emit_opcode(cbuf,0xF7); 2369 emit_rm(cbuf, 0x3, 0, ECX_enc); 2370 emit_d32(cbuf,0x20); 2371 // JEQ,s small 2372 emit_opcode(cbuf, 0x74); 2373 emit_d8(cbuf, 0x04); 2374 // MOV $dst.hi,$dst.lo 2375 emit_opcode( cbuf, 0x8B ); 2376 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2377 // CLR $dst.lo 2378 emit_opcode(cbuf, 0x33); 2379 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2380 // small: 2381 // SHLD $dst.hi,$dst.lo,$shift 2382 emit_opcode(cbuf,0x0F); 2383 emit_opcode(cbuf,0xA5); 2384 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2385 // SHL $dst.lo,$shift" 2386 emit_opcode(cbuf,0xD3); 2387 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2388 %} 2389 2390 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2391 // TEST shift,32 2392 emit_opcode(cbuf,0xF7); 2393 emit_rm(cbuf, 0x3, 0, ECX_enc); 2394 emit_d32(cbuf,0x20); 2395 // JEQ,s small 2396 emit_opcode(cbuf, 0x74); 2397 emit_d8(cbuf, 0x04); 2398 // MOV $dst.lo,$dst.hi 2399 emit_opcode( cbuf, 0x8B ); 2400 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2401 // CLR $dst.hi 2402 emit_opcode(cbuf, 0x33); 2403 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2404 // small: 2405 // SHRD $dst.lo,$dst.hi,$shift 2406 emit_opcode(cbuf,0x0F); 2407 emit_opcode(cbuf,0xAD); 2408 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2409 // SHR $dst.hi,$shift" 2410 emit_opcode(cbuf,0xD3); 2411 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2412 %} 2413 2414 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2415 // TEST shift,32 2416 emit_opcode(cbuf,0xF7); 2417 emit_rm(cbuf, 0x3, 0, ECX_enc); 2418 emit_d32(cbuf,0x20); 2419 // JEQ,s small 2420 emit_opcode(cbuf, 0x74); 2421 emit_d8(cbuf, 0x05); 2422 // MOV $dst.lo,$dst.hi 2423 emit_opcode( cbuf, 0x8B ); 2424 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2425 // SAR $dst.hi,31 2426 emit_opcode(cbuf, 0xC1); 2427 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2428 emit_d8(cbuf, 0x1F ); 2429 // small: 2430 // SHRD $dst.lo,$dst.hi,$shift 2431 emit_opcode(cbuf,0x0F); 2432 emit_opcode(cbuf,0xAD); 2433 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2434 // SAR $dst.hi,$shift" 2435 emit_opcode(cbuf,0xD3); 2436 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2437 %} 2438 2439 2440 // ----------------- Encodings for floating point unit ----------------- 2441 // May leave result in FPU-TOS or FPU reg depending on opcodes 2442 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2443 $$$emit8$primary; 2444 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2445 %} 2446 2447 // Pop argument in FPR0 with FSTP ST(0) 2448 enc_class PopFPU() %{ 2449 emit_opcode( cbuf, 0xDD ); 2450 emit_d8( cbuf, 0xD8 ); 2451 %} 2452 2453 // !!!!! equivalent to Pop_Reg_F 2454 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2455 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2456 emit_d8( cbuf, 0xD8+$dst$$reg ); 2457 %} 2458 2459 enc_class Push_Reg_DPR( regDPR dst ) %{ 2460 emit_opcode( cbuf, 0xD9 ); 2461 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2462 %} 2463 2464 enc_class strictfp_bias1( regDPR dst ) %{ 2465 emit_opcode( cbuf, 0xDB ); // FLD m80real 2466 emit_opcode( cbuf, 0x2D ); 2467 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2468 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2469 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2470 %} 2471 2472 enc_class strictfp_bias2( regDPR dst ) %{ 2473 emit_opcode( cbuf, 0xDB ); // FLD m80real 2474 emit_opcode( cbuf, 0x2D ); 2475 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2476 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2477 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2478 %} 2479 2480 // Special case for moving an integer register to a stack slot. 2481 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2482 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2483 %} 2484 2485 // Special case for moving a register to a stack slot. 2486 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2487 // Opcode already emitted 2488 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2489 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2490 emit_d32(cbuf, $dst$$disp); // Displacement 2491 %} 2492 2493 // Push the integer in stackSlot 'src' onto FP-stack 2494 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2495 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2496 %} 2497 2498 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2499 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2500 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2501 %} 2502 2503 // Same as Pop_Mem_F except for opcode 2504 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2505 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2506 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2507 %} 2508 2509 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2510 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2511 emit_d8( cbuf, 0xD8+$dst$$reg ); 2512 %} 2513 2514 enc_class Push_Reg_FPR( regFPR dst ) %{ 2515 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2516 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2517 %} 2518 2519 // Push FPU's float to a stack-slot, and pop FPU-stack 2520 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2521 int pop = 0x02; 2522 if ($src$$reg != FPR1L_enc) { 2523 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2524 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2525 pop = 0x03; 2526 } 2527 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2528 %} 2529 2530 // Push FPU's double to a stack-slot, and pop FPU-stack 2531 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2532 int pop = 0x02; 2533 if ($src$$reg != FPR1L_enc) { 2534 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2535 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2536 pop = 0x03; 2537 } 2538 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2539 %} 2540 2541 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2542 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2543 int pop = 0xD0 - 1; // -1 since we skip FLD 2544 if ($src$$reg != FPR1L_enc) { 2545 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2546 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2547 pop = 0xD8; 2548 } 2549 emit_opcode( cbuf, 0xDD ); 2550 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2551 %} 2552 2553 2554 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2555 // load dst in FPR0 2556 emit_opcode( cbuf, 0xD9 ); 2557 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2558 if ($src$$reg != FPR1L_enc) { 2559 // fincstp 2560 emit_opcode (cbuf, 0xD9); 2561 emit_opcode (cbuf, 0xF7); 2562 // swap src with FPR1: 2563 // FXCH FPR1 with src 2564 emit_opcode(cbuf, 0xD9); 2565 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2566 // fdecstp 2567 emit_opcode (cbuf, 0xD9); 2568 emit_opcode (cbuf, 0xF6); 2569 } 2570 %} 2571 2572 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2573 MacroAssembler _masm(&cbuf); 2574 __ subptr(rsp, 8); 2575 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2576 __ fld_d(Address(rsp, 0)); 2577 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2578 __ fld_d(Address(rsp, 0)); 2579 %} 2580 2581 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2582 MacroAssembler _masm(&cbuf); 2583 __ subptr(rsp, 4); 2584 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2585 __ fld_s(Address(rsp, 0)); 2586 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2587 __ fld_s(Address(rsp, 0)); 2588 %} 2589 2590 enc_class Push_ResultD(regD dst) %{ 2591 MacroAssembler _masm(&cbuf); 2592 __ fstp_d(Address(rsp, 0)); 2593 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2594 __ addptr(rsp, 8); 2595 %} 2596 2597 enc_class Push_ResultF(regF dst, immI d8) %{ 2598 MacroAssembler _masm(&cbuf); 2599 __ fstp_s(Address(rsp, 0)); 2600 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2601 __ addptr(rsp, $d8$$constant); 2602 %} 2603 2604 enc_class Push_SrcD(regD src) %{ 2605 MacroAssembler _masm(&cbuf); 2606 __ subptr(rsp, 8); 2607 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2608 __ fld_d(Address(rsp, 0)); 2609 %} 2610 2611 enc_class push_stack_temp_qword() %{ 2612 MacroAssembler _masm(&cbuf); 2613 __ subptr(rsp, 8); 2614 %} 2615 2616 enc_class pop_stack_temp_qword() %{ 2617 MacroAssembler _masm(&cbuf); 2618 __ addptr(rsp, 8); 2619 %} 2620 2621 enc_class push_xmm_to_fpr1(regD src) %{ 2622 MacroAssembler _masm(&cbuf); 2623 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2624 __ fld_d(Address(rsp, 0)); 2625 %} 2626 2627 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2628 if ($src$$reg != FPR1L_enc) { 2629 // fincstp 2630 emit_opcode (cbuf, 0xD9); 2631 emit_opcode (cbuf, 0xF7); 2632 // FXCH FPR1 with src 2633 emit_opcode(cbuf, 0xD9); 2634 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2635 // fdecstp 2636 emit_opcode (cbuf, 0xD9); 2637 emit_opcode (cbuf, 0xF6); 2638 } 2639 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2640 // // FSTP FPR$dst$$reg 2641 // emit_opcode( cbuf, 0xDD ); 2642 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2643 %} 2644 2645 enc_class fnstsw_sahf_skip_parity() %{ 2646 // fnstsw ax 2647 emit_opcode( cbuf, 0xDF ); 2648 emit_opcode( cbuf, 0xE0 ); 2649 // sahf 2650 emit_opcode( cbuf, 0x9E ); 2651 // jnp ::skip 2652 emit_opcode( cbuf, 0x7B ); 2653 emit_opcode( cbuf, 0x05 ); 2654 %} 2655 2656 enc_class emitModDPR() %{ 2657 // fprem must be iterative 2658 // :: loop 2659 // fprem 2660 emit_opcode( cbuf, 0xD9 ); 2661 emit_opcode( cbuf, 0xF8 ); 2662 // wait 2663 emit_opcode( cbuf, 0x9b ); 2664 // fnstsw ax 2665 emit_opcode( cbuf, 0xDF ); 2666 emit_opcode( cbuf, 0xE0 ); 2667 // sahf 2668 emit_opcode( cbuf, 0x9E ); 2669 // jp ::loop 2670 emit_opcode( cbuf, 0x0F ); 2671 emit_opcode( cbuf, 0x8A ); 2672 emit_opcode( cbuf, 0xF4 ); 2673 emit_opcode( cbuf, 0xFF ); 2674 emit_opcode( cbuf, 0xFF ); 2675 emit_opcode( cbuf, 0xFF ); 2676 %} 2677 2678 enc_class fpu_flags() %{ 2679 // fnstsw_ax 2680 emit_opcode( cbuf, 0xDF); 2681 emit_opcode( cbuf, 0xE0); 2682 // test ax,0x0400 2683 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2684 emit_opcode( cbuf, 0xA9 ); 2685 emit_d16 ( cbuf, 0x0400 ); 2686 // // // This sequence works, but stalls for 12-16 cycles on PPro 2687 // // test rax,0x0400 2688 // emit_opcode( cbuf, 0xA9 ); 2689 // emit_d32 ( cbuf, 0x00000400 ); 2690 // 2691 // jz exit (no unordered comparison) 2692 emit_opcode( cbuf, 0x74 ); 2693 emit_d8 ( cbuf, 0x02 ); 2694 // mov ah,1 - treat as LT case (set carry flag) 2695 emit_opcode( cbuf, 0xB4 ); 2696 emit_d8 ( cbuf, 0x01 ); 2697 // sahf 2698 emit_opcode( cbuf, 0x9E); 2699 %} 2700 2701 enc_class cmpF_P6_fixup() %{ 2702 // Fixup the integer flags in case comparison involved a NaN 2703 // 2704 // JNP exit (no unordered comparison, P-flag is set by NaN) 2705 emit_opcode( cbuf, 0x7B ); 2706 emit_d8 ( cbuf, 0x03 ); 2707 // MOV AH,1 - treat as LT case (set carry flag) 2708 emit_opcode( cbuf, 0xB4 ); 2709 emit_d8 ( cbuf, 0x01 ); 2710 // SAHF 2711 emit_opcode( cbuf, 0x9E); 2712 // NOP // target for branch to avoid branch to branch 2713 emit_opcode( cbuf, 0x90); 2714 %} 2715 2716 // fnstsw_ax(); 2717 // sahf(); 2718 // movl(dst, nan_result); 2719 // jcc(Assembler::parity, exit); 2720 // movl(dst, less_result); 2721 // jcc(Assembler::below, exit); 2722 // movl(dst, equal_result); 2723 // jcc(Assembler::equal, exit); 2724 // movl(dst, greater_result); 2725 2726 // less_result = 1; 2727 // greater_result = -1; 2728 // equal_result = 0; 2729 // nan_result = -1; 2730 2731 enc_class CmpF_Result(rRegI dst) %{ 2732 // fnstsw_ax(); 2733 emit_opcode( cbuf, 0xDF); 2734 emit_opcode( cbuf, 0xE0); 2735 // sahf 2736 emit_opcode( cbuf, 0x9E); 2737 // movl(dst, nan_result); 2738 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2739 emit_d32( cbuf, -1 ); 2740 // jcc(Assembler::parity, exit); 2741 emit_opcode( cbuf, 0x7A ); 2742 emit_d8 ( cbuf, 0x13 ); 2743 // movl(dst, less_result); 2744 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2745 emit_d32( cbuf, -1 ); 2746 // jcc(Assembler::below, exit); 2747 emit_opcode( cbuf, 0x72 ); 2748 emit_d8 ( cbuf, 0x0C ); 2749 // movl(dst, equal_result); 2750 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2751 emit_d32( cbuf, 0 ); 2752 // jcc(Assembler::equal, exit); 2753 emit_opcode( cbuf, 0x74 ); 2754 emit_d8 ( cbuf, 0x05 ); 2755 // movl(dst, greater_result); 2756 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2757 emit_d32( cbuf, 1 ); 2758 %} 2759 2760 2761 // Compare the longs and set flags 2762 // BROKEN! Do Not use as-is 2763 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2764 // CMP $src1.hi,$src2.hi 2765 emit_opcode( cbuf, 0x3B ); 2766 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2767 // JNE,s done 2768 emit_opcode(cbuf,0x75); 2769 emit_d8(cbuf, 2 ); 2770 // CMP $src1.lo,$src2.lo 2771 emit_opcode( cbuf, 0x3B ); 2772 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2773 // done: 2774 %} 2775 2776 enc_class convert_int_long( regL dst, rRegI src ) %{ 2777 // mov $dst.lo,$src 2778 int dst_encoding = $dst$$reg; 2779 int src_encoding = $src$$reg; 2780 encode_Copy( cbuf, dst_encoding , src_encoding ); 2781 // mov $dst.hi,$src 2782 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2783 // sar $dst.hi,31 2784 emit_opcode( cbuf, 0xC1 ); 2785 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2786 emit_d8(cbuf, 0x1F ); 2787 %} 2788 2789 enc_class convert_long_double( eRegL src ) %{ 2790 // push $src.hi 2791 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2792 // push $src.lo 2793 emit_opcode(cbuf, 0x50+$src$$reg ); 2794 // fild 64-bits at [SP] 2795 emit_opcode(cbuf,0xdf); 2796 emit_d8(cbuf, 0x6C); 2797 emit_d8(cbuf, 0x24); 2798 emit_d8(cbuf, 0x00); 2799 // pop stack 2800 emit_opcode(cbuf, 0x83); // add SP, #8 2801 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2802 emit_d8(cbuf, 0x8); 2803 %} 2804 2805 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2806 // IMUL EDX:EAX,$src1 2807 emit_opcode( cbuf, 0xF7 ); 2808 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2809 // SAR EDX,$cnt-32 2810 int shift_count = ((int)$cnt$$constant) - 32; 2811 if (shift_count > 0) { 2812 emit_opcode(cbuf, 0xC1); 2813 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2814 emit_d8(cbuf, shift_count); 2815 } 2816 %} 2817 2818 // this version doesn't have add sp, 8 2819 enc_class convert_long_double2( eRegL src ) %{ 2820 // push $src.hi 2821 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2822 // push $src.lo 2823 emit_opcode(cbuf, 0x50+$src$$reg ); 2824 // fild 64-bits at [SP] 2825 emit_opcode(cbuf,0xdf); 2826 emit_d8(cbuf, 0x6C); 2827 emit_d8(cbuf, 0x24); 2828 emit_d8(cbuf, 0x00); 2829 %} 2830 2831 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2832 // Basic idea: long = (long)int * (long)int 2833 // IMUL EDX:EAX, src 2834 emit_opcode( cbuf, 0xF7 ); 2835 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2836 %} 2837 2838 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2839 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2840 // MUL EDX:EAX, src 2841 emit_opcode( cbuf, 0xF7 ); 2842 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2843 %} 2844 2845 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2846 // Basic idea: lo(result) = lo(x_lo * y_lo) 2847 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2848 // MOV $tmp,$src.lo 2849 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2850 // IMUL $tmp,EDX 2851 emit_opcode( cbuf, 0x0F ); 2852 emit_opcode( cbuf, 0xAF ); 2853 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2854 // MOV EDX,$src.hi 2855 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2856 // IMUL EDX,EAX 2857 emit_opcode( cbuf, 0x0F ); 2858 emit_opcode( cbuf, 0xAF ); 2859 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2860 // ADD $tmp,EDX 2861 emit_opcode( cbuf, 0x03 ); 2862 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2863 // MUL EDX:EAX,$src.lo 2864 emit_opcode( cbuf, 0xF7 ); 2865 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2866 // ADD EDX,ESI 2867 emit_opcode( cbuf, 0x03 ); 2868 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2869 %} 2870 2871 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2872 // Basic idea: lo(result) = lo(src * y_lo) 2873 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2874 // IMUL $tmp,EDX,$src 2875 emit_opcode( cbuf, 0x6B ); 2876 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2877 emit_d8( cbuf, (int)$src$$constant ); 2878 // MOV EDX,$src 2879 emit_opcode(cbuf, 0xB8 + EDX_enc); 2880 emit_d32( cbuf, (int)$src$$constant ); 2881 // MUL EDX:EAX,EDX 2882 emit_opcode( cbuf, 0xF7 ); 2883 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2884 // ADD EDX,ESI 2885 emit_opcode( cbuf, 0x03 ); 2886 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2887 %} 2888 2889 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2890 // PUSH src1.hi 2891 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2892 // PUSH src1.lo 2893 emit_opcode(cbuf, 0x50+$src1$$reg ); 2894 // PUSH src2.hi 2895 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2896 // PUSH src2.lo 2897 emit_opcode(cbuf, 0x50+$src2$$reg ); 2898 // CALL directly to the runtime 2899 cbuf.set_insts_mark(); 2900 emit_opcode(cbuf,0xE8); // Call into runtime 2901 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2902 // Restore stack 2903 emit_opcode(cbuf, 0x83); // add SP, #framesize 2904 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2905 emit_d8(cbuf, 4*4); 2906 %} 2907 2908 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2909 // PUSH src1.hi 2910 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2911 // PUSH src1.lo 2912 emit_opcode(cbuf, 0x50+$src1$$reg ); 2913 // PUSH src2.hi 2914 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2915 // PUSH src2.lo 2916 emit_opcode(cbuf, 0x50+$src2$$reg ); 2917 // CALL directly to the runtime 2918 cbuf.set_insts_mark(); 2919 emit_opcode(cbuf,0xE8); // Call into runtime 2920 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2921 // Restore stack 2922 emit_opcode(cbuf, 0x83); // add SP, #framesize 2923 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2924 emit_d8(cbuf, 4*4); 2925 %} 2926 2927 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2928 // MOV $tmp,$src.lo 2929 emit_opcode(cbuf, 0x8B); 2930 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2931 // OR $tmp,$src.hi 2932 emit_opcode(cbuf, 0x0B); 2933 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2934 %} 2935 2936 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2937 // CMP $src1.lo,$src2.lo 2938 emit_opcode( cbuf, 0x3B ); 2939 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2940 // JNE,s skip 2941 emit_cc(cbuf, 0x70, 0x5); 2942 emit_d8(cbuf,2); 2943 // CMP $src1.hi,$src2.hi 2944 emit_opcode( cbuf, 0x3B ); 2945 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2946 %} 2947 2948 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2949 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2950 emit_opcode( cbuf, 0x3B ); 2951 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2952 // MOV $tmp,$src1.hi 2953 emit_opcode( cbuf, 0x8B ); 2954 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2955 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2956 emit_opcode( cbuf, 0x1B ); 2957 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2958 %} 2959 2960 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2961 // XOR $tmp,$tmp 2962 emit_opcode(cbuf,0x33); // XOR 2963 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2964 // CMP $tmp,$src.lo 2965 emit_opcode( cbuf, 0x3B ); 2966 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2967 // SBB $tmp,$src.hi 2968 emit_opcode( cbuf, 0x1B ); 2969 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2970 %} 2971 2972 // Sniff, sniff... smells like Gnu Superoptimizer 2973 enc_class neg_long( eRegL dst ) %{ 2974 emit_opcode(cbuf,0xF7); // NEG hi 2975 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2976 emit_opcode(cbuf,0xF7); // NEG lo 2977 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2978 emit_opcode(cbuf,0x83); // SBB hi,0 2979 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2980 emit_d8 (cbuf,0 ); 2981 %} 2982 2983 enc_class enc_pop_rdx() %{ 2984 emit_opcode(cbuf,0x5A); 2985 %} 2986 2987 enc_class enc_rethrow() %{ 2988 cbuf.set_insts_mark(); 2989 emit_opcode(cbuf, 0xE9); // jmp entry 2990 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2991 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2992 %} 2993 2994 2995 // Convert a double to an int. Java semantics require we do complex 2996 // manglelations in the corner cases. So we set the rounding mode to 2997 // 'zero', store the darned double down as an int, and reset the 2998 // rounding mode to 'nearest'. The hardware throws an exception which 2999 // patches up the correct value directly to the stack. 3000 enc_class DPR2I_encoding( regDPR src ) %{ 3001 // Flip to round-to-zero mode. We attempted to allow invalid-op 3002 // exceptions here, so that a NAN or other corner-case value will 3003 // thrown an exception (but normal values get converted at full speed). 3004 // However, I2C adapters and other float-stack manglers leave pending 3005 // invalid-op exceptions hanging. We would have to clear them before 3006 // enabling them and that is more expensive than just testing for the 3007 // invalid value Intel stores down in the corner cases. 3008 emit_opcode(cbuf,0xD9); // FLDCW trunc 3009 emit_opcode(cbuf,0x2D); 3010 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3011 // Allocate a word 3012 emit_opcode(cbuf,0x83); // SUB ESP,4 3013 emit_opcode(cbuf,0xEC); 3014 emit_d8(cbuf,0x04); 3015 // Encoding assumes a double has been pushed into FPR0. 3016 // Store down the double as an int, popping the FPU stack 3017 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3018 emit_opcode(cbuf,0x1C); 3019 emit_d8(cbuf,0x24); 3020 // Restore the rounding mode; mask the exception 3021 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3022 emit_opcode(cbuf,0x2D); 3023 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3024 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3025 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3026 3027 // Load the converted int; adjust CPU stack 3028 emit_opcode(cbuf,0x58); // POP EAX 3029 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3030 emit_d32 (cbuf,0x80000000); // 0x80000000 3031 emit_opcode(cbuf,0x75); // JNE around_slow_call 3032 emit_d8 (cbuf,0x07); // Size of slow_call 3033 // Push src onto stack slow-path 3034 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3035 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3036 // CALL directly to the runtime 3037 cbuf.set_insts_mark(); 3038 emit_opcode(cbuf,0xE8); // Call into runtime 3039 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3040 // Carry on here... 3041 %} 3042 3043 enc_class DPR2L_encoding( regDPR src ) %{ 3044 emit_opcode(cbuf,0xD9); // FLDCW trunc 3045 emit_opcode(cbuf,0x2D); 3046 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3047 // Allocate a word 3048 emit_opcode(cbuf,0x83); // SUB ESP,8 3049 emit_opcode(cbuf,0xEC); 3050 emit_d8(cbuf,0x08); 3051 // Encoding assumes a double has been pushed into FPR0. 3052 // Store down the double as a long, popping the FPU stack 3053 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3054 emit_opcode(cbuf,0x3C); 3055 emit_d8(cbuf,0x24); 3056 // Restore the rounding mode; mask the exception 3057 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3058 emit_opcode(cbuf,0x2D); 3059 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3060 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3061 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3062 3063 // Load the converted int; adjust CPU stack 3064 emit_opcode(cbuf,0x58); // POP EAX 3065 emit_opcode(cbuf,0x5A); // POP EDX 3066 emit_opcode(cbuf,0x81); // CMP EDX,imm 3067 emit_d8 (cbuf,0xFA); // rdx 3068 emit_d32 (cbuf,0x80000000); // 0x80000000 3069 emit_opcode(cbuf,0x75); // JNE around_slow_call 3070 emit_d8 (cbuf,0x07+4); // Size of slow_call 3071 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3072 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3073 emit_opcode(cbuf,0x75); // JNE around_slow_call 3074 emit_d8 (cbuf,0x07); // Size of slow_call 3075 // Push src onto stack slow-path 3076 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3077 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3078 // CALL directly to the runtime 3079 cbuf.set_insts_mark(); 3080 emit_opcode(cbuf,0xE8); // Call into runtime 3081 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3082 // Carry on here... 3083 %} 3084 3085 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3086 // Operand was loaded from memory into fp ST (stack top) 3087 // FMUL ST,$src /* D8 C8+i */ 3088 emit_opcode(cbuf, 0xD8); 3089 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3090 %} 3091 3092 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3093 // FADDP ST,src2 /* D8 C0+i */ 3094 emit_opcode(cbuf, 0xD8); 3095 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3096 //could use FADDP src2,fpST /* DE C0+i */ 3097 %} 3098 3099 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3100 // FADDP src2,ST /* DE C0+i */ 3101 emit_opcode(cbuf, 0xDE); 3102 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3103 %} 3104 3105 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3106 // Operand has been loaded into fp ST (stack top) 3107 // FSUB ST,$src1 3108 emit_opcode(cbuf, 0xD8); 3109 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3110 3111 // FDIV 3112 emit_opcode(cbuf, 0xD8); 3113 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3114 %} 3115 3116 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3117 // Operand was loaded from memory into fp ST (stack top) 3118 // FADD ST,$src /* D8 C0+i */ 3119 emit_opcode(cbuf, 0xD8); 3120 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3121 3122 // FMUL ST,src2 /* D8 C*+i */ 3123 emit_opcode(cbuf, 0xD8); 3124 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3125 %} 3126 3127 3128 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3129 // Operand was loaded from memory into fp ST (stack top) 3130 // FADD ST,$src /* D8 C0+i */ 3131 emit_opcode(cbuf, 0xD8); 3132 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3133 3134 // FMULP src2,ST /* DE C8+i */ 3135 emit_opcode(cbuf, 0xDE); 3136 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3137 %} 3138 3139 // Atomically load the volatile long 3140 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3141 emit_opcode(cbuf,0xDF); 3142 int rm_byte_opcode = 0x05; 3143 int base = $mem$$base; 3144 int index = $mem$$index; 3145 int scale = $mem$$scale; 3146 int displace = $mem$$disp; 3147 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3148 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3149 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3150 %} 3151 3152 // Volatile Store Long. Must be atomic, so move it into 3153 // the FP TOS and then do a 64-bit FIST. Has to probe the 3154 // target address before the store (for null-ptr checks) 3155 // so the memory operand is used twice in the encoding. 3156 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3157 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3158 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3159 emit_opcode(cbuf,0xDF); 3160 int rm_byte_opcode = 0x07; 3161 int base = $mem$$base; 3162 int index = $mem$$index; 3163 int scale = $mem$$scale; 3164 int displace = $mem$$disp; 3165 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3166 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3167 %} 3168 3169 // Safepoint Poll. This polls the safepoint page, and causes an 3170 // exception if it is not readable. Unfortunately, it kills the condition code 3171 // in the process 3172 // We current use TESTL [spp],EDI 3173 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3174 3175 enc_class Safepoint_Poll() %{ 3176 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3177 emit_opcode(cbuf,0x85); 3178 emit_rm (cbuf, 0x0, 0x7, 0x5); 3179 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3180 %} 3181 %} 3182 3183 3184 //----------FRAME-------------------------------------------------------------- 3185 // Definition of frame structure and management information. 3186 // 3187 // S T A C K L A Y O U T Allocators stack-slot number 3188 // | (to get allocators register number 3189 // G Owned by | | v add OptoReg::stack0()) 3190 // r CALLER | | 3191 // o | +--------+ pad to even-align allocators stack-slot 3192 // w V | pad0 | numbers; owned by CALLER 3193 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3194 // h ^ | in | 5 3195 // | | args | 4 Holes in incoming args owned by SELF 3196 // | | | | 3 3197 // | | +--------+ 3198 // V | | old out| Empty on Intel, window on Sparc 3199 // | old |preserve| Must be even aligned. 3200 // | SP-+--------+----> Matcher::_old_SP, even aligned 3201 // | | in | 3 area for Intel ret address 3202 // Owned by |preserve| Empty on Sparc. 3203 // SELF +--------+ 3204 // | | pad2 | 2 pad to align old SP 3205 // | +--------+ 1 3206 // | | locks | 0 3207 // | +--------+----> OptoReg::stack0(), even aligned 3208 // | | pad1 | 11 pad to align new SP 3209 // | +--------+ 3210 // | | | 10 3211 // | | spills | 9 spills 3212 // V | | 8 (pad0 slot for callee) 3213 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3214 // ^ | out | 7 3215 // | | args | 6 Holes in outgoing args owned by CALLEE 3216 // Owned by +--------+ 3217 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3218 // | new |preserve| Must be even-aligned. 3219 // | SP-+--------+----> Matcher::_new_SP, even aligned 3220 // | | | 3221 // 3222 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3223 // known from SELF's arguments and the Java calling convention. 3224 // Region 6-7 is determined per call site. 3225 // Note 2: If the calling convention leaves holes in the incoming argument 3226 // area, those holes are owned by SELF. Holes in the outgoing area 3227 // are owned by the CALLEE. Holes should not be nessecary in the 3228 // incoming area, as the Java calling convention is completely under 3229 // the control of the AD file. Doubles can be sorted and packed to 3230 // avoid holes. Holes in the outgoing arguments may be nessecary for 3231 // varargs C calling conventions. 3232 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3233 // even aligned with pad0 as needed. 3234 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3235 // region 6-11 is even aligned; it may be padded out more so that 3236 // the region from SP to FP meets the minimum stack alignment. 3237 3238 frame %{ 3239 // What direction does stack grow in (assumed to be same for C & Java) 3240 stack_direction(TOWARDS_LOW); 3241 3242 // These three registers define part of the calling convention 3243 // between compiled code and the interpreter. 3244 inline_cache_reg(EAX); // Inline Cache Register 3245 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3246 3247 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3248 cisc_spilling_operand_name(indOffset32); 3249 3250 // Number of stack slots consumed by locking an object 3251 sync_stack_slots(1); 3252 3253 // Compiled code's Frame Pointer 3254 frame_pointer(ESP); 3255 // Interpreter stores its frame pointer in a register which is 3256 // stored to the stack by I2CAdaptors. 3257 // I2CAdaptors convert from interpreted java to compiled java. 3258 interpreter_frame_pointer(EBP); 3259 3260 // Stack alignment requirement 3261 // Alignment size in bytes (128-bit -> 16 bytes) 3262 stack_alignment(StackAlignmentInBytes); 3263 3264 // Number of stack slots between incoming argument block and the start of 3265 // a new frame. The PROLOG must add this many slots to the stack. The 3266 // EPILOG must remove this many slots. Intel needs one slot for 3267 // return address and one for rbp, (must save rbp) 3268 in_preserve_stack_slots(2+VerifyStackAtCalls); 3269 3270 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3271 // for calls to C. Supports the var-args backing area for register parms. 3272 varargs_C_out_slots_killed(0); 3273 3274 // The after-PROLOG location of the return address. Location of 3275 // return address specifies a type (REG or STACK) and a number 3276 // representing the register number (i.e. - use a register name) or 3277 // stack slot. 3278 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3279 // Otherwise, it is above the locks and verification slot and alignment word 3280 return_addr(STACK - 1 + 3281 round_to((Compile::current()->in_preserve_stack_slots() + 3282 Compile::current()->fixed_slots()), 3283 stack_alignment_in_slots())); 3284 3285 // Body of function which returns an integer array locating 3286 // arguments either in registers or in stack slots. Passed an array 3287 // of ideal registers called "sig" and a "length" count. Stack-slot 3288 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3289 // arguments for a CALLEE. Incoming stack arguments are 3290 // automatically biased by the preserve_stack_slots field above. 3291 calling_convention %{ 3292 // No difference between ingoing/outgoing just pass false 3293 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3294 %} 3295 3296 3297 // Body of function which returns an integer array locating 3298 // arguments either in registers or in stack slots. Passed an array 3299 // of ideal registers called "sig" and a "length" count. Stack-slot 3300 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3301 // arguments for a CALLEE. Incoming stack arguments are 3302 // automatically biased by the preserve_stack_slots field above. 3303 c_calling_convention %{ 3304 // This is obviously always outgoing 3305 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3306 %} 3307 3308 // Location of C & interpreter return values 3309 c_return_value %{ 3310 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3311 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3312 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3313 3314 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3315 // that C functions return float and double results in XMM0. 3316 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3317 return OptoRegPair(XMM0b_num,XMM0_num); 3318 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3319 return OptoRegPair(OptoReg::Bad,XMM0_num); 3320 3321 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3322 %} 3323 3324 // Location of return values 3325 return_value %{ 3326 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3327 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3328 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3329 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3330 return OptoRegPair(XMM0b_num,XMM0_num); 3331 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3332 return OptoRegPair(OptoReg::Bad,XMM0_num); 3333 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3334 %} 3335 3336 %} 3337 3338 //----------ATTRIBUTES--------------------------------------------------------- 3339 //----------Operand Attributes------------------------------------------------- 3340 op_attrib op_cost(0); // Required cost attribute 3341 3342 //----------Instruction Attributes--------------------------------------------- 3343 ins_attrib ins_cost(100); // Required cost attribute 3344 ins_attrib ins_size(8); // Required size attribute (in bits) 3345 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3346 // non-matching short branch variant of some 3347 // long branch? 3348 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3349 // specifies the alignment that some part of the instruction (not 3350 // necessarily the start) requires. If > 1, a compute_padding() 3351 // function must be provided for the instruction 3352 3353 //----------OPERANDS----------------------------------------------------------- 3354 // Operand definitions must precede instruction definitions for correct parsing 3355 // in the ADLC because operands constitute user defined types which are used in 3356 // instruction definitions. 3357 3358 //----------Simple Operands---------------------------------------------------- 3359 // Immediate Operands 3360 // Integer Immediate 3361 operand immI() %{ 3362 match(ConI); 3363 3364 op_cost(10); 3365 format %{ %} 3366 interface(CONST_INTER); 3367 %} 3368 3369 // Constant for test vs zero 3370 operand immI0() %{ 3371 predicate(n->get_int() == 0); 3372 match(ConI); 3373 3374 op_cost(0); 3375 format %{ %} 3376 interface(CONST_INTER); 3377 %} 3378 3379 // Constant for increment 3380 operand immI1() %{ 3381 predicate(n->get_int() == 1); 3382 match(ConI); 3383 3384 op_cost(0); 3385 format %{ %} 3386 interface(CONST_INTER); 3387 %} 3388 3389 // Constant for decrement 3390 operand immI_M1() %{ 3391 predicate(n->get_int() == -1); 3392 match(ConI); 3393 3394 op_cost(0); 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 // Valid scale values for addressing modes 3400 operand immI2() %{ 3401 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3402 match(ConI); 3403 3404 format %{ %} 3405 interface(CONST_INTER); 3406 %} 3407 3408 operand immI8() %{ 3409 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3410 match(ConI); 3411 3412 op_cost(5); 3413 format %{ %} 3414 interface(CONST_INTER); 3415 %} 3416 3417 operand immI16() %{ 3418 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3419 match(ConI); 3420 3421 op_cost(10); 3422 format %{ %} 3423 interface(CONST_INTER); 3424 %} 3425 3426 // Int Immediate non-negative 3427 operand immU31() 3428 %{ 3429 predicate(n->get_int() >= 0); 3430 match(ConI); 3431 3432 op_cost(0); 3433 format %{ %} 3434 interface(CONST_INTER); 3435 %} 3436 3437 // Constant for long shifts 3438 operand immI_32() %{ 3439 predicate( n->get_int() == 32 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 operand immI_1_31() %{ 3448 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3449 match(ConI); 3450 3451 op_cost(0); 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 operand immI_32_63() %{ 3457 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3458 match(ConI); 3459 op_cost(0); 3460 3461 format %{ %} 3462 interface(CONST_INTER); 3463 %} 3464 3465 operand immI_1() %{ 3466 predicate( n->get_int() == 1 ); 3467 match(ConI); 3468 3469 op_cost(0); 3470 format %{ %} 3471 interface(CONST_INTER); 3472 %} 3473 3474 operand immI_2() %{ 3475 predicate( n->get_int() == 2 ); 3476 match(ConI); 3477 3478 op_cost(0); 3479 format %{ %} 3480 interface(CONST_INTER); 3481 %} 3482 3483 operand immI_3() %{ 3484 predicate( n->get_int() == 3 ); 3485 match(ConI); 3486 3487 op_cost(0); 3488 format %{ %} 3489 interface(CONST_INTER); 3490 %} 3491 3492 // Pointer Immediate 3493 operand immP() %{ 3494 match(ConP); 3495 3496 op_cost(10); 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 // NULL Pointer Immediate 3502 operand immP0() %{ 3503 predicate( n->get_ptr() == 0 ); 3504 match(ConP); 3505 op_cost(0); 3506 3507 format %{ %} 3508 interface(CONST_INTER); 3509 %} 3510 3511 // Long Immediate 3512 operand immL() %{ 3513 match(ConL); 3514 3515 op_cost(20); 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Long Immediate zero 3521 operand immL0() %{ 3522 predicate( n->get_long() == 0L ); 3523 match(ConL); 3524 op_cost(0); 3525 3526 format %{ %} 3527 interface(CONST_INTER); 3528 %} 3529 3530 // Long Immediate zero 3531 operand immL_M1() %{ 3532 predicate( n->get_long() == -1L ); 3533 match(ConL); 3534 op_cost(0); 3535 3536 format %{ %} 3537 interface(CONST_INTER); 3538 %} 3539 3540 // Long immediate from 0 to 127. 3541 // Used for a shorter form of long mul by 10. 3542 operand immL_127() %{ 3543 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3544 match(ConL); 3545 op_cost(0); 3546 3547 format %{ %} 3548 interface(CONST_INTER); 3549 %} 3550 3551 // Long Immediate: low 32-bit mask 3552 operand immL_32bits() %{ 3553 predicate(n->get_long() == 0xFFFFFFFFL); 3554 match(ConL); 3555 op_cost(0); 3556 3557 format %{ %} 3558 interface(CONST_INTER); 3559 %} 3560 3561 // Long Immediate: low 32-bit mask 3562 operand immL32() %{ 3563 predicate(n->get_long() == (int)(n->get_long())); 3564 match(ConL); 3565 op_cost(20); 3566 3567 format %{ %} 3568 interface(CONST_INTER); 3569 %} 3570 3571 //Double Immediate zero 3572 operand immDPR0() %{ 3573 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3574 // bug that generates code such that NaNs compare equal to 0.0 3575 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3576 match(ConD); 3577 3578 op_cost(5); 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 // Double Immediate one 3584 operand immDPR1() %{ 3585 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3586 match(ConD); 3587 3588 op_cost(5); 3589 format %{ %} 3590 interface(CONST_INTER); 3591 %} 3592 3593 // Double Immediate 3594 operand immDPR() %{ 3595 predicate(UseSSE<=1); 3596 match(ConD); 3597 3598 op_cost(5); 3599 format %{ %} 3600 interface(CONST_INTER); 3601 %} 3602 3603 operand immD() %{ 3604 predicate(UseSSE>=2); 3605 match(ConD); 3606 3607 op_cost(5); 3608 format %{ %} 3609 interface(CONST_INTER); 3610 %} 3611 3612 // Double Immediate zero 3613 operand immD0() %{ 3614 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3615 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3616 // compare equal to -0.0. 3617 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3618 match(ConD); 3619 3620 format %{ %} 3621 interface(CONST_INTER); 3622 %} 3623 3624 // Float Immediate zero 3625 operand immFPR0() %{ 3626 predicate(UseSSE == 0 && n->getf() == 0.0F); 3627 match(ConF); 3628 3629 op_cost(5); 3630 format %{ %} 3631 interface(CONST_INTER); 3632 %} 3633 3634 // Float Immediate one 3635 operand immFPR1() %{ 3636 predicate(UseSSE == 0 && n->getf() == 1.0F); 3637 match(ConF); 3638 3639 op_cost(5); 3640 format %{ %} 3641 interface(CONST_INTER); 3642 %} 3643 3644 // Float Immediate 3645 operand immFPR() %{ 3646 predicate( UseSSE == 0 ); 3647 match(ConF); 3648 3649 op_cost(5); 3650 format %{ %} 3651 interface(CONST_INTER); 3652 %} 3653 3654 // Float Immediate 3655 operand immF() %{ 3656 predicate(UseSSE >= 1); 3657 match(ConF); 3658 3659 op_cost(5); 3660 format %{ %} 3661 interface(CONST_INTER); 3662 %} 3663 3664 // Float Immediate zero. Zero and not -0.0 3665 operand immF0() %{ 3666 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3667 match(ConF); 3668 3669 op_cost(5); 3670 format %{ %} 3671 interface(CONST_INTER); 3672 %} 3673 3674 // Immediates for special shifts (sign extend) 3675 3676 // Constants for increment 3677 operand immI_16() %{ 3678 predicate( n->get_int() == 16 ); 3679 match(ConI); 3680 3681 format %{ %} 3682 interface(CONST_INTER); 3683 %} 3684 3685 operand immI_24() %{ 3686 predicate( n->get_int() == 24 ); 3687 match(ConI); 3688 3689 format %{ %} 3690 interface(CONST_INTER); 3691 %} 3692 3693 // Constant for byte-wide masking 3694 operand immI_255() %{ 3695 predicate( n->get_int() == 255 ); 3696 match(ConI); 3697 3698 format %{ %} 3699 interface(CONST_INTER); 3700 %} 3701 3702 // Constant for short-wide masking 3703 operand immI_65535() %{ 3704 predicate(n->get_int() == 65535); 3705 match(ConI); 3706 3707 format %{ %} 3708 interface(CONST_INTER); 3709 %} 3710 3711 // Register Operands 3712 // Integer Register 3713 operand rRegI() %{ 3714 constraint(ALLOC_IN_RC(int_reg)); 3715 match(RegI); 3716 match(xRegI); 3717 match(eAXRegI); 3718 match(eBXRegI); 3719 match(eCXRegI); 3720 match(eDXRegI); 3721 match(eDIRegI); 3722 match(eSIRegI); 3723 3724 format %{ %} 3725 interface(REG_INTER); 3726 %} 3727 3728 // Subset of Integer Register 3729 operand xRegI(rRegI reg) %{ 3730 constraint(ALLOC_IN_RC(int_x_reg)); 3731 match(reg); 3732 match(eAXRegI); 3733 match(eBXRegI); 3734 match(eCXRegI); 3735 match(eDXRegI); 3736 3737 format %{ %} 3738 interface(REG_INTER); 3739 %} 3740 3741 // Special Registers 3742 operand eAXRegI(xRegI reg) %{ 3743 constraint(ALLOC_IN_RC(eax_reg)); 3744 match(reg); 3745 match(rRegI); 3746 3747 format %{ "EAX" %} 3748 interface(REG_INTER); 3749 %} 3750 3751 // Special Registers 3752 operand eBXRegI(xRegI reg) %{ 3753 constraint(ALLOC_IN_RC(ebx_reg)); 3754 match(reg); 3755 match(rRegI); 3756 3757 format %{ "EBX" %} 3758 interface(REG_INTER); 3759 %} 3760 3761 operand eCXRegI(xRegI reg) %{ 3762 constraint(ALLOC_IN_RC(ecx_reg)); 3763 match(reg); 3764 match(rRegI); 3765 3766 format %{ "ECX" %} 3767 interface(REG_INTER); 3768 %} 3769 3770 operand eDXRegI(xRegI reg) %{ 3771 constraint(ALLOC_IN_RC(edx_reg)); 3772 match(reg); 3773 match(rRegI); 3774 3775 format %{ "EDX" %} 3776 interface(REG_INTER); 3777 %} 3778 3779 operand eDIRegI(xRegI reg) %{ 3780 constraint(ALLOC_IN_RC(edi_reg)); 3781 match(reg); 3782 match(rRegI); 3783 3784 format %{ "EDI" %} 3785 interface(REG_INTER); 3786 %} 3787 3788 operand naxRegI() %{ 3789 constraint(ALLOC_IN_RC(nax_reg)); 3790 match(RegI); 3791 match(eCXRegI); 3792 match(eDXRegI); 3793 match(eSIRegI); 3794 match(eDIRegI); 3795 3796 format %{ %} 3797 interface(REG_INTER); 3798 %} 3799 3800 operand nadxRegI() %{ 3801 constraint(ALLOC_IN_RC(nadx_reg)); 3802 match(RegI); 3803 match(eBXRegI); 3804 match(eCXRegI); 3805 match(eSIRegI); 3806 match(eDIRegI); 3807 3808 format %{ %} 3809 interface(REG_INTER); 3810 %} 3811 3812 operand ncxRegI() %{ 3813 constraint(ALLOC_IN_RC(ncx_reg)); 3814 match(RegI); 3815 match(eAXRegI); 3816 match(eDXRegI); 3817 match(eSIRegI); 3818 match(eDIRegI); 3819 3820 format %{ %} 3821 interface(REG_INTER); 3822 %} 3823 3824 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3825 // // 3826 operand eSIRegI(xRegI reg) %{ 3827 constraint(ALLOC_IN_RC(esi_reg)); 3828 match(reg); 3829 match(rRegI); 3830 3831 format %{ "ESI" %} 3832 interface(REG_INTER); 3833 %} 3834 3835 // Pointer Register 3836 operand anyRegP() %{ 3837 constraint(ALLOC_IN_RC(any_reg)); 3838 match(RegP); 3839 match(eAXRegP); 3840 match(eBXRegP); 3841 match(eCXRegP); 3842 match(eDIRegP); 3843 match(eRegP); 3844 3845 format %{ %} 3846 interface(REG_INTER); 3847 %} 3848 3849 operand eRegP() %{ 3850 constraint(ALLOC_IN_RC(int_reg)); 3851 match(RegP); 3852 match(eAXRegP); 3853 match(eBXRegP); 3854 match(eCXRegP); 3855 match(eDIRegP); 3856 3857 format %{ %} 3858 interface(REG_INTER); 3859 %} 3860 3861 // On windows95, EBP is not safe to use for implicit null tests. 3862 operand eRegP_no_EBP() %{ 3863 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3864 match(RegP); 3865 match(eAXRegP); 3866 match(eBXRegP); 3867 match(eCXRegP); 3868 match(eDIRegP); 3869 3870 op_cost(100); 3871 format %{ %} 3872 interface(REG_INTER); 3873 %} 3874 3875 operand naxRegP() %{ 3876 constraint(ALLOC_IN_RC(nax_reg)); 3877 match(RegP); 3878 match(eBXRegP); 3879 match(eDXRegP); 3880 match(eCXRegP); 3881 match(eSIRegP); 3882 match(eDIRegP); 3883 3884 format %{ %} 3885 interface(REG_INTER); 3886 %} 3887 3888 operand nabxRegP() %{ 3889 constraint(ALLOC_IN_RC(nabx_reg)); 3890 match(RegP); 3891 match(eCXRegP); 3892 match(eDXRegP); 3893 match(eSIRegP); 3894 match(eDIRegP); 3895 3896 format %{ %} 3897 interface(REG_INTER); 3898 %} 3899 3900 operand pRegP() %{ 3901 constraint(ALLOC_IN_RC(p_reg)); 3902 match(RegP); 3903 match(eBXRegP); 3904 match(eDXRegP); 3905 match(eSIRegP); 3906 match(eDIRegP); 3907 3908 format %{ %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Special Registers 3913 // Return a pointer value 3914 operand eAXRegP(eRegP reg) %{ 3915 constraint(ALLOC_IN_RC(eax_reg)); 3916 match(reg); 3917 format %{ "EAX" %} 3918 interface(REG_INTER); 3919 %} 3920 3921 // Used in AtomicAdd 3922 operand eBXRegP(eRegP reg) %{ 3923 constraint(ALLOC_IN_RC(ebx_reg)); 3924 match(reg); 3925 format %{ "EBX" %} 3926 interface(REG_INTER); 3927 %} 3928 3929 // Tail-call (interprocedural jump) to interpreter 3930 operand eCXRegP(eRegP reg) %{ 3931 constraint(ALLOC_IN_RC(ecx_reg)); 3932 match(reg); 3933 format %{ "ECX" %} 3934 interface(REG_INTER); 3935 %} 3936 3937 operand eSIRegP(eRegP reg) %{ 3938 constraint(ALLOC_IN_RC(esi_reg)); 3939 match(reg); 3940 format %{ "ESI" %} 3941 interface(REG_INTER); 3942 %} 3943 3944 // Used in rep stosw 3945 operand eDIRegP(eRegP reg) %{ 3946 constraint(ALLOC_IN_RC(edi_reg)); 3947 match(reg); 3948 format %{ "EDI" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eRegL() %{ 3953 constraint(ALLOC_IN_RC(long_reg)); 3954 match(RegL); 3955 match(eADXRegL); 3956 3957 format %{ %} 3958 interface(REG_INTER); 3959 %} 3960 3961 operand eADXRegL( eRegL reg ) %{ 3962 constraint(ALLOC_IN_RC(eadx_reg)); 3963 match(reg); 3964 3965 format %{ "EDX:EAX" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 operand eBCXRegL( eRegL reg ) %{ 3970 constraint(ALLOC_IN_RC(ebcx_reg)); 3971 match(reg); 3972 3973 format %{ "EBX:ECX" %} 3974 interface(REG_INTER); 3975 %} 3976 3977 // Special case for integer high multiply 3978 operand eADXRegL_low_only() %{ 3979 constraint(ALLOC_IN_RC(eadx_reg)); 3980 match(RegL); 3981 3982 format %{ "EAX" %} 3983 interface(REG_INTER); 3984 %} 3985 3986 // Flags register, used as output of compare instructions 3987 operand eFlagsReg() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 3991 format %{ "EFLAGS" %} 3992 interface(REG_INTER); 3993 %} 3994 3995 // Flags register, used as output of FLOATING POINT compare instructions 3996 operand eFlagsRegU() %{ 3997 constraint(ALLOC_IN_RC(int_flags)); 3998 match(RegFlags); 3999 4000 format %{ "EFLAGS_U" %} 4001 interface(REG_INTER); 4002 %} 4003 4004 operand eFlagsRegUCF() %{ 4005 constraint(ALLOC_IN_RC(int_flags)); 4006 match(RegFlags); 4007 predicate(false); 4008 4009 format %{ "EFLAGS_U_CF" %} 4010 interface(REG_INTER); 4011 %} 4012 4013 // Condition Code Register used by long compare 4014 operand flagsReg_long_LTGE() %{ 4015 constraint(ALLOC_IN_RC(int_flags)); 4016 match(RegFlags); 4017 format %{ "FLAGS_LTGE" %} 4018 interface(REG_INTER); 4019 %} 4020 operand flagsReg_long_EQNE() %{ 4021 constraint(ALLOC_IN_RC(int_flags)); 4022 match(RegFlags); 4023 format %{ "FLAGS_EQNE" %} 4024 interface(REG_INTER); 4025 %} 4026 operand flagsReg_long_LEGT() %{ 4027 constraint(ALLOC_IN_RC(int_flags)); 4028 match(RegFlags); 4029 format %{ "FLAGS_LEGT" %} 4030 interface(REG_INTER); 4031 %} 4032 4033 // Float register operands 4034 operand regDPR() %{ 4035 predicate( UseSSE < 2 ); 4036 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4037 match(RegD); 4038 match(regDPR1); 4039 match(regDPR2); 4040 format %{ %} 4041 interface(REG_INTER); 4042 %} 4043 4044 operand regDPR1(regDPR reg) %{ 4045 predicate( UseSSE < 2 ); 4046 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4047 match(reg); 4048 format %{ "FPR1" %} 4049 interface(REG_INTER); 4050 %} 4051 4052 operand regDPR2(regDPR reg) %{ 4053 predicate( UseSSE < 2 ); 4054 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4055 match(reg); 4056 format %{ "FPR2" %} 4057 interface(REG_INTER); 4058 %} 4059 4060 operand regnotDPR1(regDPR reg) %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4063 match(reg); 4064 format %{ %} 4065 interface(REG_INTER); 4066 %} 4067 4068 // Float register operands 4069 operand regFPR() %{ 4070 predicate( UseSSE < 2 ); 4071 constraint(ALLOC_IN_RC(fp_flt_reg)); 4072 match(RegF); 4073 match(regFPR1); 4074 format %{ %} 4075 interface(REG_INTER); 4076 %} 4077 4078 // Float register operands 4079 operand regFPR1(regFPR reg) %{ 4080 predicate( UseSSE < 2 ); 4081 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4082 match(reg); 4083 format %{ "FPR1" %} 4084 interface(REG_INTER); 4085 %} 4086 4087 // XMM Float register operands 4088 operand regF() %{ 4089 predicate( UseSSE>=1 ); 4090 constraint(ALLOC_IN_RC(float_reg_legacy)); 4091 match(RegF); 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 // XMM Double register operands 4097 operand regD() %{ 4098 predicate( UseSSE>=2 ); 4099 constraint(ALLOC_IN_RC(double_reg_legacy)); 4100 match(RegD); 4101 format %{ %} 4102 interface(REG_INTER); 4103 %} 4104 4105 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4106 // runtime code generation via reg_class_dynamic. 4107 operand vecS() %{ 4108 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4109 match(VecS); 4110 4111 format %{ %} 4112 interface(REG_INTER); 4113 %} 4114 4115 operand vecD() %{ 4116 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4117 match(VecD); 4118 4119 format %{ %} 4120 interface(REG_INTER); 4121 %} 4122 4123 operand vecX() %{ 4124 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4125 match(VecX); 4126 4127 format %{ %} 4128 interface(REG_INTER); 4129 %} 4130 4131 operand vecY() %{ 4132 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4133 match(VecY); 4134 4135 format %{ %} 4136 interface(REG_INTER); 4137 %} 4138 4139 //----------Memory Operands---------------------------------------------------- 4140 // Direct Memory Operand 4141 operand direct(immP addr) %{ 4142 match(addr); 4143 4144 format %{ "[$addr]" %} 4145 interface(MEMORY_INTER) %{ 4146 base(0xFFFFFFFF); 4147 index(0x4); 4148 scale(0x0); 4149 disp($addr); 4150 %} 4151 %} 4152 4153 // Indirect Memory Operand 4154 operand indirect(eRegP reg) %{ 4155 constraint(ALLOC_IN_RC(int_reg)); 4156 match(reg); 4157 4158 format %{ "[$reg]" %} 4159 interface(MEMORY_INTER) %{ 4160 base($reg); 4161 index(0x4); 4162 scale(0x0); 4163 disp(0x0); 4164 %} 4165 %} 4166 4167 // Indirect Memory Plus Short Offset Operand 4168 operand indOffset8(eRegP reg, immI8 off) %{ 4169 match(AddP reg off); 4170 4171 format %{ "[$reg + $off]" %} 4172 interface(MEMORY_INTER) %{ 4173 base($reg); 4174 index(0x4); 4175 scale(0x0); 4176 disp($off); 4177 %} 4178 %} 4179 4180 // Indirect Memory Plus Long Offset Operand 4181 operand indOffset32(eRegP reg, immI off) %{ 4182 match(AddP reg off); 4183 4184 format %{ "[$reg + $off]" %} 4185 interface(MEMORY_INTER) %{ 4186 base($reg); 4187 index(0x4); 4188 scale(0x0); 4189 disp($off); 4190 %} 4191 %} 4192 4193 // Indirect Memory Plus Long Offset Operand 4194 operand indOffset32X(rRegI reg, immP off) %{ 4195 match(AddP off reg); 4196 4197 format %{ "[$reg + $off]" %} 4198 interface(MEMORY_INTER) %{ 4199 base($reg); 4200 index(0x4); 4201 scale(0x0); 4202 disp($off); 4203 %} 4204 %} 4205 4206 // Indirect Memory Plus Index Register Plus Offset Operand 4207 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4208 match(AddP (AddP reg ireg) off); 4209 4210 op_cost(10); 4211 format %{"[$reg + $off + $ireg]" %} 4212 interface(MEMORY_INTER) %{ 4213 base($reg); 4214 index($ireg); 4215 scale(0x0); 4216 disp($off); 4217 %} 4218 %} 4219 4220 // Indirect Memory Plus Index Register Plus Offset Operand 4221 operand indIndex(eRegP reg, rRegI ireg) %{ 4222 match(AddP reg ireg); 4223 4224 op_cost(10); 4225 format %{"[$reg + $ireg]" %} 4226 interface(MEMORY_INTER) %{ 4227 base($reg); 4228 index($ireg); 4229 scale(0x0); 4230 disp(0x0); 4231 %} 4232 %} 4233 4234 // // ------------------------------------------------------------------------- 4235 // // 486 architecture doesn't support "scale * index + offset" with out a base 4236 // // ------------------------------------------------------------------------- 4237 // // Scaled Memory Operands 4238 // // Indirect Memory Times Scale Plus Offset Operand 4239 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4240 // match(AddP off (LShiftI ireg scale)); 4241 // 4242 // op_cost(10); 4243 // format %{"[$off + $ireg << $scale]" %} 4244 // interface(MEMORY_INTER) %{ 4245 // base(0x4); 4246 // index($ireg); 4247 // scale($scale); 4248 // disp($off); 4249 // %} 4250 // %} 4251 4252 // Indirect Memory Times Scale Plus Index Register 4253 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4254 match(AddP reg (LShiftI ireg scale)); 4255 4256 op_cost(10); 4257 format %{"[$reg + $ireg << $scale]" %} 4258 interface(MEMORY_INTER) %{ 4259 base($reg); 4260 index($ireg); 4261 scale($scale); 4262 disp(0x0); 4263 %} 4264 %} 4265 4266 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4267 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4268 match(AddP (AddP reg (LShiftI ireg scale)) off); 4269 4270 op_cost(10); 4271 format %{"[$reg + $off + $ireg << $scale]" %} 4272 interface(MEMORY_INTER) %{ 4273 base($reg); 4274 index($ireg); 4275 scale($scale); 4276 disp($off); 4277 %} 4278 %} 4279 4280 //----------Load Long Memory Operands------------------------------------------ 4281 // The load-long idiom will use it's address expression again after loading 4282 // the first word of the long. If the load-long destination overlaps with 4283 // registers used in the addressing expression, the 2nd half will be loaded 4284 // from a clobbered address. Fix this by requiring that load-long use 4285 // address registers that do not overlap with the load-long target. 4286 4287 // load-long support 4288 operand load_long_RegP() %{ 4289 constraint(ALLOC_IN_RC(esi_reg)); 4290 match(RegP); 4291 match(eSIRegP); 4292 op_cost(100); 4293 format %{ %} 4294 interface(REG_INTER); 4295 %} 4296 4297 // Indirect Memory Operand Long 4298 operand load_long_indirect(load_long_RegP reg) %{ 4299 constraint(ALLOC_IN_RC(esi_reg)); 4300 match(reg); 4301 4302 format %{ "[$reg]" %} 4303 interface(MEMORY_INTER) %{ 4304 base($reg); 4305 index(0x4); 4306 scale(0x0); 4307 disp(0x0); 4308 %} 4309 %} 4310 4311 // Indirect Memory Plus Long Offset Operand 4312 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4313 match(AddP reg off); 4314 4315 format %{ "[$reg + $off]" %} 4316 interface(MEMORY_INTER) %{ 4317 base($reg); 4318 index(0x4); 4319 scale(0x0); 4320 disp($off); 4321 %} 4322 %} 4323 4324 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4325 4326 4327 //----------Special Memory Operands-------------------------------------------- 4328 // Stack Slot Operand - This operand is used for loading and storing temporary 4329 // values on the stack where a match requires a value to 4330 // flow through memory. 4331 operand stackSlotP(sRegP reg) %{ 4332 constraint(ALLOC_IN_RC(stack_slots)); 4333 // No match rule because this operand is only generated in matching 4334 format %{ "[$reg]" %} 4335 interface(MEMORY_INTER) %{ 4336 base(0x4); // ESP 4337 index(0x4); // No Index 4338 scale(0x0); // No Scale 4339 disp($reg); // Stack Offset 4340 %} 4341 %} 4342 4343 operand stackSlotI(sRegI reg) %{ 4344 constraint(ALLOC_IN_RC(stack_slots)); 4345 // No match rule because this operand is only generated in matching 4346 format %{ "[$reg]" %} 4347 interface(MEMORY_INTER) %{ 4348 base(0x4); // ESP 4349 index(0x4); // No Index 4350 scale(0x0); // No Scale 4351 disp($reg); // Stack Offset 4352 %} 4353 %} 4354 4355 operand stackSlotF(sRegF reg) %{ 4356 constraint(ALLOC_IN_RC(stack_slots)); 4357 // No match rule because this operand is only generated in matching 4358 format %{ "[$reg]" %} 4359 interface(MEMORY_INTER) %{ 4360 base(0x4); // ESP 4361 index(0x4); // No Index 4362 scale(0x0); // No Scale 4363 disp($reg); // Stack Offset 4364 %} 4365 %} 4366 4367 operand stackSlotD(sRegD reg) %{ 4368 constraint(ALLOC_IN_RC(stack_slots)); 4369 // No match rule because this operand is only generated in matching 4370 format %{ "[$reg]" %} 4371 interface(MEMORY_INTER) %{ 4372 base(0x4); // ESP 4373 index(0x4); // No Index 4374 scale(0x0); // No Scale 4375 disp($reg); // Stack Offset 4376 %} 4377 %} 4378 4379 operand stackSlotL(sRegL reg) %{ 4380 constraint(ALLOC_IN_RC(stack_slots)); 4381 // No match rule because this operand is only generated in matching 4382 format %{ "[$reg]" %} 4383 interface(MEMORY_INTER) %{ 4384 base(0x4); // ESP 4385 index(0x4); // No Index 4386 scale(0x0); // No Scale 4387 disp($reg); // Stack Offset 4388 %} 4389 %} 4390 4391 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4392 // Indirect Memory Operand 4393 operand indirect_win95_safe(eRegP_no_EBP reg) 4394 %{ 4395 constraint(ALLOC_IN_RC(int_reg)); 4396 match(reg); 4397 4398 op_cost(100); 4399 format %{ "[$reg]" %} 4400 interface(MEMORY_INTER) %{ 4401 base($reg); 4402 index(0x4); 4403 scale(0x0); 4404 disp(0x0); 4405 %} 4406 %} 4407 4408 // Indirect Memory Plus Short Offset Operand 4409 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4410 %{ 4411 match(AddP reg off); 4412 4413 op_cost(100); 4414 format %{ "[$reg + $off]" %} 4415 interface(MEMORY_INTER) %{ 4416 base($reg); 4417 index(0x4); 4418 scale(0x0); 4419 disp($off); 4420 %} 4421 %} 4422 4423 // Indirect Memory Plus Long Offset Operand 4424 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4425 %{ 4426 match(AddP reg off); 4427 4428 op_cost(100); 4429 format %{ "[$reg + $off]" %} 4430 interface(MEMORY_INTER) %{ 4431 base($reg); 4432 index(0x4); 4433 scale(0x0); 4434 disp($off); 4435 %} 4436 %} 4437 4438 // Indirect Memory Plus Index Register Plus Offset Operand 4439 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4440 %{ 4441 match(AddP (AddP reg ireg) off); 4442 4443 op_cost(100); 4444 format %{"[$reg + $off + $ireg]" %} 4445 interface(MEMORY_INTER) %{ 4446 base($reg); 4447 index($ireg); 4448 scale(0x0); 4449 disp($off); 4450 %} 4451 %} 4452 4453 // Indirect Memory Times Scale Plus Index Register 4454 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4455 %{ 4456 match(AddP reg (LShiftI ireg scale)); 4457 4458 op_cost(100); 4459 format %{"[$reg + $ireg << $scale]" %} 4460 interface(MEMORY_INTER) %{ 4461 base($reg); 4462 index($ireg); 4463 scale($scale); 4464 disp(0x0); 4465 %} 4466 %} 4467 4468 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4469 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4470 %{ 4471 match(AddP (AddP reg (LShiftI ireg scale)) off); 4472 4473 op_cost(100); 4474 format %{"[$reg + $off + $ireg << $scale]" %} 4475 interface(MEMORY_INTER) %{ 4476 base($reg); 4477 index($ireg); 4478 scale($scale); 4479 disp($off); 4480 %} 4481 %} 4482 4483 //----------Conditional Branch Operands---------------------------------------- 4484 // Comparison Op - This is the operation of the comparison, and is limited to 4485 // the following set of codes: 4486 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4487 // 4488 // Other attributes of the comparison, such as unsignedness, are specified 4489 // by the comparison instruction that sets a condition code flags register. 4490 // That result is represented by a flags operand whose subtype is appropriate 4491 // to the unsignedness (etc.) of the comparison. 4492 // 4493 // Later, the instruction which matches both the Comparison Op (a Bool) and 4494 // the flags (produced by the Cmp) specifies the coding of the comparison op 4495 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4496 4497 // Comparision Code 4498 operand cmpOp() %{ 4499 match(Bool); 4500 4501 format %{ "" %} 4502 interface(COND_INTER) %{ 4503 equal(0x4, "e"); 4504 not_equal(0x5, "ne"); 4505 less(0xC, "l"); 4506 greater_equal(0xD, "ge"); 4507 less_equal(0xE, "le"); 4508 greater(0xF, "g"); 4509 overflow(0x0, "o"); 4510 no_overflow(0x1, "no"); 4511 %} 4512 %} 4513 4514 // Comparison Code, unsigned compare. Used by FP also, with 4515 // C2 (unordered) turned into GT or LT already. The other bits 4516 // C0 and C3 are turned into Carry & Zero flags. 4517 operand cmpOpU() %{ 4518 match(Bool); 4519 4520 format %{ "" %} 4521 interface(COND_INTER) %{ 4522 equal(0x4, "e"); 4523 not_equal(0x5, "ne"); 4524 less(0x2, "b"); 4525 greater_equal(0x3, "nb"); 4526 less_equal(0x6, "be"); 4527 greater(0x7, "nbe"); 4528 overflow(0x0, "o"); 4529 no_overflow(0x1, "no"); 4530 %} 4531 %} 4532 4533 // Floating comparisons that don't require any fixup for the unordered case 4534 operand cmpOpUCF() %{ 4535 match(Bool); 4536 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4537 n->as_Bool()->_test._test == BoolTest::ge || 4538 n->as_Bool()->_test._test == BoolTest::le || 4539 n->as_Bool()->_test._test == BoolTest::gt); 4540 format %{ "" %} 4541 interface(COND_INTER) %{ 4542 equal(0x4, "e"); 4543 not_equal(0x5, "ne"); 4544 less(0x2, "b"); 4545 greater_equal(0x3, "nb"); 4546 less_equal(0x6, "be"); 4547 greater(0x7, "nbe"); 4548 overflow(0x0, "o"); 4549 no_overflow(0x1, "no"); 4550 %} 4551 %} 4552 4553 4554 // Floating comparisons that can be fixed up with extra conditional jumps 4555 operand cmpOpUCF2() %{ 4556 match(Bool); 4557 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4558 n->as_Bool()->_test._test == BoolTest::eq); 4559 format %{ "" %} 4560 interface(COND_INTER) %{ 4561 equal(0x4, "e"); 4562 not_equal(0x5, "ne"); 4563 less(0x2, "b"); 4564 greater_equal(0x3, "nb"); 4565 less_equal(0x6, "be"); 4566 greater(0x7, "nbe"); 4567 overflow(0x0, "o"); 4568 no_overflow(0x1, "no"); 4569 %} 4570 %} 4571 4572 // Comparison Code for FP conditional move 4573 operand cmpOp_fcmov() %{ 4574 match(Bool); 4575 4576 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4577 n->as_Bool()->_test._test != BoolTest::no_overflow); 4578 format %{ "" %} 4579 interface(COND_INTER) %{ 4580 equal (0x0C8); 4581 not_equal (0x1C8); 4582 less (0x0C0); 4583 greater_equal(0x1C0); 4584 less_equal (0x0D0); 4585 greater (0x1D0); 4586 overflow(0x0, "o"); // not really supported by the instruction 4587 no_overflow(0x1, "no"); // not really supported by the instruction 4588 %} 4589 %} 4590 4591 // Comparision Code used in long compares 4592 operand cmpOp_commute() %{ 4593 match(Bool); 4594 4595 format %{ "" %} 4596 interface(COND_INTER) %{ 4597 equal(0x4, "e"); 4598 not_equal(0x5, "ne"); 4599 less(0xF, "g"); 4600 greater_equal(0xE, "le"); 4601 less_equal(0xD, "ge"); 4602 greater(0xC, "l"); 4603 overflow(0x0, "o"); 4604 no_overflow(0x1, "no"); 4605 %} 4606 %} 4607 4608 //----------OPERAND CLASSES---------------------------------------------------- 4609 // Operand Classes are groups of operands that are used as to simplify 4610 // instruction definitions by not requiring the AD writer to specify separate 4611 // instructions for every form of operand when the instruction accepts 4612 // multiple operand types with the same basic encoding and format. The classic 4613 // case of this is memory operands. 4614 4615 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4616 indIndex, indIndexScale, indIndexScaleOffset); 4617 4618 // Long memory operations are encoded in 2 instructions and a +4 offset. 4619 // This means some kind of offset is always required and you cannot use 4620 // an oop as the offset (done when working on static globals). 4621 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4622 indIndex, indIndexScale, indIndexScaleOffset); 4623 4624 4625 //----------PIPELINE----------------------------------------------------------- 4626 // Rules which define the behavior of the target architectures pipeline. 4627 pipeline %{ 4628 4629 //----------ATTRIBUTES--------------------------------------------------------- 4630 attributes %{ 4631 variable_size_instructions; // Fixed size instructions 4632 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4633 instruction_unit_size = 1; // An instruction is 1 bytes long 4634 instruction_fetch_unit_size = 16; // The processor fetches one line 4635 instruction_fetch_units = 1; // of 16 bytes 4636 4637 // List of nop instructions 4638 nops( MachNop ); 4639 %} 4640 4641 //----------RESOURCES---------------------------------------------------------- 4642 // Resources are the functional units available to the machine 4643 4644 // Generic P2/P3 pipeline 4645 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4646 // 3 instructions decoded per cycle. 4647 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4648 // 2 ALU op, only ALU0 handles mul/div instructions. 4649 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4650 MS0, MS1, MEM = MS0 | MS1, 4651 BR, FPU, 4652 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4653 4654 //----------PIPELINE DESCRIPTION----------------------------------------------- 4655 // Pipeline Description specifies the stages in the machine's pipeline 4656 4657 // Generic P2/P3 pipeline 4658 pipe_desc(S0, S1, S2, S3, S4, S5); 4659 4660 //----------PIPELINE CLASSES--------------------------------------------------- 4661 // Pipeline Classes describe the stages in which input and output are 4662 // referenced by the hardware pipeline. 4663 4664 // Naming convention: ialu or fpu 4665 // Then: _reg 4666 // Then: _reg if there is a 2nd register 4667 // Then: _long if it's a pair of instructions implementing a long 4668 // Then: _fat if it requires the big decoder 4669 // Or: _mem if it requires the big decoder and a memory unit. 4670 4671 // Integer ALU reg operation 4672 pipe_class ialu_reg(rRegI dst) %{ 4673 single_instruction; 4674 dst : S4(write); 4675 dst : S3(read); 4676 DECODE : S0; // any decoder 4677 ALU : S3; // any alu 4678 %} 4679 4680 // Long ALU reg operation 4681 pipe_class ialu_reg_long(eRegL dst) %{ 4682 instruction_count(2); 4683 dst : S4(write); 4684 dst : S3(read); 4685 DECODE : S0(2); // any 2 decoders 4686 ALU : S3(2); // both alus 4687 %} 4688 4689 // Integer ALU reg operation using big decoder 4690 pipe_class ialu_reg_fat(rRegI dst) %{ 4691 single_instruction; 4692 dst : S4(write); 4693 dst : S3(read); 4694 D0 : S0; // big decoder only 4695 ALU : S3; // any alu 4696 %} 4697 4698 // Long ALU reg operation using big decoder 4699 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4700 instruction_count(2); 4701 dst : S4(write); 4702 dst : S3(read); 4703 D0 : S0(2); // big decoder only; twice 4704 ALU : S3(2); // any 2 alus 4705 %} 4706 4707 // Integer ALU reg-reg operation 4708 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4709 single_instruction; 4710 dst : S4(write); 4711 src : S3(read); 4712 DECODE : S0; // any decoder 4713 ALU : S3; // any alu 4714 %} 4715 4716 // Long ALU reg-reg operation 4717 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4718 instruction_count(2); 4719 dst : S4(write); 4720 src : S3(read); 4721 DECODE : S0(2); // any 2 decoders 4722 ALU : S3(2); // both alus 4723 %} 4724 4725 // Integer ALU reg-reg operation 4726 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4727 single_instruction; 4728 dst : S4(write); 4729 src : S3(read); 4730 D0 : S0; // big decoder only 4731 ALU : S3; // any alu 4732 %} 4733 4734 // Long ALU reg-reg operation 4735 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4736 instruction_count(2); 4737 dst : S4(write); 4738 src : S3(read); 4739 D0 : S0(2); // big decoder only; twice 4740 ALU : S3(2); // both alus 4741 %} 4742 4743 // Integer ALU reg-mem operation 4744 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4745 single_instruction; 4746 dst : S5(write); 4747 mem : S3(read); 4748 D0 : S0; // big decoder only 4749 ALU : S4; // any alu 4750 MEM : S3; // any mem 4751 %} 4752 4753 // Long ALU reg-mem operation 4754 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4755 instruction_count(2); 4756 dst : S5(write); 4757 mem : S3(read); 4758 D0 : S0(2); // big decoder only; twice 4759 ALU : S4(2); // any 2 alus 4760 MEM : S3(2); // both mems 4761 %} 4762 4763 // Integer mem operation (prefetch) 4764 pipe_class ialu_mem(memory mem) 4765 %{ 4766 single_instruction; 4767 mem : S3(read); 4768 D0 : S0; // big decoder only 4769 MEM : S3; // any mem 4770 %} 4771 4772 // Integer Store to Memory 4773 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4774 single_instruction; 4775 mem : S3(read); 4776 src : S5(read); 4777 D0 : S0; // big decoder only 4778 ALU : S4; // any alu 4779 MEM : S3; 4780 %} 4781 4782 // Long Store to Memory 4783 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4784 instruction_count(2); 4785 mem : S3(read); 4786 src : S5(read); 4787 D0 : S0(2); // big decoder only; twice 4788 ALU : S4(2); // any 2 alus 4789 MEM : S3(2); // Both mems 4790 %} 4791 4792 // Integer Store to Memory 4793 pipe_class ialu_mem_imm(memory mem) %{ 4794 single_instruction; 4795 mem : S3(read); 4796 D0 : S0; // big decoder only 4797 ALU : S4; // any alu 4798 MEM : S3; 4799 %} 4800 4801 // Integer ALU0 reg-reg operation 4802 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4803 single_instruction; 4804 dst : S4(write); 4805 src : S3(read); 4806 D0 : S0; // Big decoder only 4807 ALU0 : S3; // only alu0 4808 %} 4809 4810 // Integer ALU0 reg-mem operation 4811 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4812 single_instruction; 4813 dst : S5(write); 4814 mem : S3(read); 4815 D0 : S0; // big decoder only 4816 ALU0 : S4; // ALU0 only 4817 MEM : S3; // any mem 4818 %} 4819 4820 // Integer ALU reg-reg operation 4821 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4822 single_instruction; 4823 cr : S4(write); 4824 src1 : S3(read); 4825 src2 : S3(read); 4826 DECODE : S0; // any decoder 4827 ALU : S3; // any alu 4828 %} 4829 4830 // Integer ALU reg-imm operation 4831 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4832 single_instruction; 4833 cr : S4(write); 4834 src1 : S3(read); 4835 DECODE : S0; // any decoder 4836 ALU : S3; // any alu 4837 %} 4838 4839 // Integer ALU reg-mem operation 4840 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4841 single_instruction; 4842 cr : S4(write); 4843 src1 : S3(read); 4844 src2 : S3(read); 4845 D0 : S0; // big decoder only 4846 ALU : S4; // any alu 4847 MEM : S3; 4848 %} 4849 4850 // Conditional move reg-reg 4851 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4852 instruction_count(4); 4853 y : S4(read); 4854 q : S3(read); 4855 p : S3(read); 4856 DECODE : S0(4); // any decoder 4857 %} 4858 4859 // Conditional move reg-reg 4860 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4861 single_instruction; 4862 dst : S4(write); 4863 src : S3(read); 4864 cr : S3(read); 4865 DECODE : S0; // any decoder 4866 %} 4867 4868 // Conditional move reg-mem 4869 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4870 single_instruction; 4871 dst : S4(write); 4872 src : S3(read); 4873 cr : S3(read); 4874 DECODE : S0; // any decoder 4875 MEM : S3; 4876 %} 4877 4878 // Conditional move reg-reg long 4879 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4880 single_instruction; 4881 dst : S4(write); 4882 src : S3(read); 4883 cr : S3(read); 4884 DECODE : S0(2); // any 2 decoders 4885 %} 4886 4887 // Conditional move double reg-reg 4888 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4889 single_instruction; 4890 dst : S4(write); 4891 src : S3(read); 4892 cr : S3(read); 4893 DECODE : S0; // any decoder 4894 %} 4895 4896 // Float reg-reg operation 4897 pipe_class fpu_reg(regDPR dst) %{ 4898 instruction_count(2); 4899 dst : S3(read); 4900 DECODE : S0(2); // any 2 decoders 4901 FPU : S3; 4902 %} 4903 4904 // Float reg-reg operation 4905 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4906 instruction_count(2); 4907 dst : S4(write); 4908 src : S3(read); 4909 DECODE : S0(2); // any 2 decoders 4910 FPU : S3; 4911 %} 4912 4913 // Float reg-reg operation 4914 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4915 instruction_count(3); 4916 dst : S4(write); 4917 src1 : S3(read); 4918 src2 : S3(read); 4919 DECODE : S0(3); // any 3 decoders 4920 FPU : S3(2); 4921 %} 4922 4923 // Float reg-reg operation 4924 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4925 instruction_count(4); 4926 dst : S4(write); 4927 src1 : S3(read); 4928 src2 : S3(read); 4929 src3 : S3(read); 4930 DECODE : S0(4); // any 3 decoders 4931 FPU : S3(2); 4932 %} 4933 4934 // Float reg-reg operation 4935 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4936 instruction_count(4); 4937 dst : S4(write); 4938 src1 : S3(read); 4939 src2 : S3(read); 4940 src3 : S3(read); 4941 DECODE : S1(3); // any 3 decoders 4942 D0 : S0; // Big decoder only 4943 FPU : S3(2); 4944 MEM : S3; 4945 %} 4946 4947 // Float reg-mem operation 4948 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4949 instruction_count(2); 4950 dst : S5(write); 4951 mem : S3(read); 4952 D0 : S0; // big decoder only 4953 DECODE : S1; // any decoder for FPU POP 4954 FPU : S4; 4955 MEM : S3; // any mem 4956 %} 4957 4958 // Float reg-mem operation 4959 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4960 instruction_count(3); 4961 dst : S5(write); 4962 src1 : S3(read); 4963 mem : S3(read); 4964 D0 : S0; // big decoder only 4965 DECODE : S1(2); // any decoder for FPU POP 4966 FPU : S4; 4967 MEM : S3; // any mem 4968 %} 4969 4970 // Float mem-reg operation 4971 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4972 instruction_count(2); 4973 src : S5(read); 4974 mem : S3(read); 4975 DECODE : S0; // any decoder for FPU PUSH 4976 D0 : S1; // big decoder only 4977 FPU : S4; 4978 MEM : S3; // any mem 4979 %} 4980 4981 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4982 instruction_count(3); 4983 src1 : S3(read); 4984 src2 : S3(read); 4985 mem : S3(read); 4986 DECODE : S0(2); // any decoder for FPU PUSH 4987 D0 : S1; // big decoder only 4988 FPU : S4; 4989 MEM : S3; // any mem 4990 %} 4991 4992 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4993 instruction_count(3); 4994 src1 : S3(read); 4995 src2 : S3(read); 4996 mem : S4(read); 4997 DECODE : S0; // any decoder for FPU PUSH 4998 D0 : S0(2); // big decoder only 4999 FPU : S4; 5000 MEM : S3(2); // any mem 5001 %} 5002 5003 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 5004 instruction_count(2); 5005 src1 : S3(read); 5006 dst : S4(read); 5007 D0 : S0(2); // big decoder only 5008 MEM : S3(2); // any mem 5009 %} 5010 5011 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5012 instruction_count(3); 5013 src1 : S3(read); 5014 src2 : S3(read); 5015 dst : S4(read); 5016 D0 : S0(3); // big decoder only 5017 FPU : S4; 5018 MEM : S3(3); // any mem 5019 %} 5020 5021 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5022 instruction_count(3); 5023 src1 : S4(read); 5024 mem : S4(read); 5025 DECODE : S0; // any decoder for FPU PUSH 5026 D0 : S0(2); // big decoder only 5027 FPU : S4; 5028 MEM : S3(2); // any mem 5029 %} 5030 5031 // Float load constant 5032 pipe_class fpu_reg_con(regDPR dst) %{ 5033 instruction_count(2); 5034 dst : S5(write); 5035 D0 : S0; // big decoder only for the load 5036 DECODE : S1; // any decoder for FPU POP 5037 FPU : S4; 5038 MEM : S3; // any mem 5039 %} 5040 5041 // Float load constant 5042 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5043 instruction_count(3); 5044 dst : S5(write); 5045 src : S3(read); 5046 D0 : S0; // big decoder only for the load 5047 DECODE : S1(2); // any decoder for FPU POP 5048 FPU : S4; 5049 MEM : S3; // any mem 5050 %} 5051 5052 // UnConditional branch 5053 pipe_class pipe_jmp( label labl ) %{ 5054 single_instruction; 5055 BR : S3; 5056 %} 5057 5058 // Conditional branch 5059 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5060 single_instruction; 5061 cr : S1(read); 5062 BR : S3; 5063 %} 5064 5065 // Allocation idiom 5066 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5067 instruction_count(1); force_serialization; 5068 fixed_latency(6); 5069 heap_ptr : S3(read); 5070 DECODE : S0(3); 5071 D0 : S2; 5072 MEM : S3; 5073 ALU : S3(2); 5074 dst : S5(write); 5075 BR : S5; 5076 %} 5077 5078 // Generic big/slow expanded idiom 5079 pipe_class pipe_slow( ) %{ 5080 instruction_count(10); multiple_bundles; force_serialization; 5081 fixed_latency(100); 5082 D0 : S0(2); 5083 MEM : S3(2); 5084 %} 5085 5086 // The real do-nothing guy 5087 pipe_class empty( ) %{ 5088 instruction_count(0); 5089 %} 5090 5091 // Define the class for the Nop node 5092 define %{ 5093 MachNop = empty; 5094 %} 5095 5096 %} 5097 5098 //----------INSTRUCTIONS------------------------------------------------------- 5099 // 5100 // match -- States which machine-independent subtree may be replaced 5101 // by this instruction. 5102 // ins_cost -- The estimated cost of this instruction is used by instruction 5103 // selection to identify a minimum cost tree of machine 5104 // instructions that matches a tree of machine-independent 5105 // instructions. 5106 // format -- A string providing the disassembly for this instruction. 5107 // The value of an instruction's operand may be inserted 5108 // by referring to it with a '$' prefix. 5109 // opcode -- Three instruction opcodes may be provided. These are referred 5110 // to within an encode class as $primary, $secondary, and $tertiary 5111 // respectively. The primary opcode is commonly used to 5112 // indicate the type of machine instruction, while secondary 5113 // and tertiary are often used for prefix options or addressing 5114 // modes. 5115 // ins_encode -- A list of encode classes with parameters. The encode class 5116 // name must have been defined in an 'enc_class' specification 5117 // in the encode section of the architecture description. 5118 5119 //----------BSWAP-Instruction-------------------------------------------------- 5120 instruct bytes_reverse_int(rRegI dst) %{ 5121 match(Set dst (ReverseBytesI dst)); 5122 5123 format %{ "BSWAP $dst" %} 5124 opcode(0x0F, 0xC8); 5125 ins_encode( OpcP, OpcSReg(dst) ); 5126 ins_pipe( ialu_reg ); 5127 %} 5128 5129 instruct bytes_reverse_long(eRegL dst) %{ 5130 match(Set dst (ReverseBytesL dst)); 5131 5132 format %{ "BSWAP $dst.lo\n\t" 5133 "BSWAP $dst.hi\n\t" 5134 "XCHG $dst.lo $dst.hi" %} 5135 5136 ins_cost(125); 5137 ins_encode( bswap_long_bytes(dst) ); 5138 ins_pipe( ialu_reg_reg); 5139 %} 5140 5141 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5142 match(Set dst (ReverseBytesUS dst)); 5143 effect(KILL cr); 5144 5145 format %{ "BSWAP $dst\n\t" 5146 "SHR $dst,16\n\t" %} 5147 ins_encode %{ 5148 __ bswapl($dst$$Register); 5149 __ shrl($dst$$Register, 16); 5150 %} 5151 ins_pipe( ialu_reg ); 5152 %} 5153 5154 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5155 match(Set dst (ReverseBytesS dst)); 5156 effect(KILL cr); 5157 5158 format %{ "BSWAP $dst\n\t" 5159 "SAR $dst,16\n\t" %} 5160 ins_encode %{ 5161 __ bswapl($dst$$Register); 5162 __ sarl($dst$$Register, 16); 5163 %} 5164 ins_pipe( ialu_reg ); 5165 %} 5166 5167 5168 //---------- Zeros Count Instructions ------------------------------------------ 5169 5170 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5171 predicate(UseCountLeadingZerosInstruction); 5172 match(Set dst (CountLeadingZerosI src)); 5173 effect(KILL cr); 5174 5175 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5176 ins_encode %{ 5177 __ lzcntl($dst$$Register, $src$$Register); 5178 %} 5179 ins_pipe(ialu_reg); 5180 %} 5181 5182 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5183 predicate(!UseCountLeadingZerosInstruction); 5184 match(Set dst (CountLeadingZerosI src)); 5185 effect(KILL cr); 5186 5187 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5188 "JNZ skip\n\t" 5189 "MOV $dst, -1\n" 5190 "skip:\n\t" 5191 "NEG $dst\n\t" 5192 "ADD $dst, 31" %} 5193 ins_encode %{ 5194 Register Rdst = $dst$$Register; 5195 Register Rsrc = $src$$Register; 5196 Label skip; 5197 __ bsrl(Rdst, Rsrc); 5198 __ jccb(Assembler::notZero, skip); 5199 __ movl(Rdst, -1); 5200 __ bind(skip); 5201 __ negl(Rdst); 5202 __ addl(Rdst, BitsPerInt - 1); 5203 %} 5204 ins_pipe(ialu_reg); 5205 %} 5206 5207 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5208 predicate(UseCountLeadingZerosInstruction); 5209 match(Set dst (CountLeadingZerosL src)); 5210 effect(TEMP dst, KILL cr); 5211 5212 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5213 "JNC done\n\t" 5214 "LZCNT $dst, $src.lo\n\t" 5215 "ADD $dst, 32\n" 5216 "done:" %} 5217 ins_encode %{ 5218 Register Rdst = $dst$$Register; 5219 Register Rsrc = $src$$Register; 5220 Label done; 5221 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5222 __ jccb(Assembler::carryClear, done); 5223 __ lzcntl(Rdst, Rsrc); 5224 __ addl(Rdst, BitsPerInt); 5225 __ bind(done); 5226 %} 5227 ins_pipe(ialu_reg); 5228 %} 5229 5230 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5231 predicate(!UseCountLeadingZerosInstruction); 5232 match(Set dst (CountLeadingZerosL src)); 5233 effect(TEMP dst, KILL cr); 5234 5235 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5236 "JZ msw_is_zero\n\t" 5237 "ADD $dst, 32\n\t" 5238 "JMP not_zero\n" 5239 "msw_is_zero:\n\t" 5240 "BSR $dst, $src.lo\n\t" 5241 "JNZ not_zero\n\t" 5242 "MOV $dst, -1\n" 5243 "not_zero:\n\t" 5244 "NEG $dst\n\t" 5245 "ADD $dst, 63\n" %} 5246 ins_encode %{ 5247 Register Rdst = $dst$$Register; 5248 Register Rsrc = $src$$Register; 5249 Label msw_is_zero; 5250 Label not_zero; 5251 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5252 __ jccb(Assembler::zero, msw_is_zero); 5253 __ addl(Rdst, BitsPerInt); 5254 __ jmpb(not_zero); 5255 __ bind(msw_is_zero); 5256 __ bsrl(Rdst, Rsrc); 5257 __ jccb(Assembler::notZero, not_zero); 5258 __ movl(Rdst, -1); 5259 __ bind(not_zero); 5260 __ negl(Rdst); 5261 __ addl(Rdst, BitsPerLong - 1); 5262 %} 5263 ins_pipe(ialu_reg); 5264 %} 5265 5266 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5267 predicate(UseCountTrailingZerosInstruction); 5268 match(Set dst (CountTrailingZerosI src)); 5269 effect(KILL cr); 5270 5271 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5272 ins_encode %{ 5273 __ tzcntl($dst$$Register, $src$$Register); 5274 %} 5275 ins_pipe(ialu_reg); 5276 %} 5277 5278 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5279 predicate(!UseCountTrailingZerosInstruction); 5280 match(Set dst (CountTrailingZerosI src)); 5281 effect(KILL cr); 5282 5283 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5284 "JNZ done\n\t" 5285 "MOV $dst, 32\n" 5286 "done:" %} 5287 ins_encode %{ 5288 Register Rdst = $dst$$Register; 5289 Label done; 5290 __ bsfl(Rdst, $src$$Register); 5291 __ jccb(Assembler::notZero, done); 5292 __ movl(Rdst, BitsPerInt); 5293 __ bind(done); 5294 %} 5295 ins_pipe(ialu_reg); 5296 %} 5297 5298 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5299 predicate(UseCountTrailingZerosInstruction); 5300 match(Set dst (CountTrailingZerosL src)); 5301 effect(TEMP dst, KILL cr); 5302 5303 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5304 "JNC done\n\t" 5305 "TZCNT $dst, $src.hi\n\t" 5306 "ADD $dst, 32\n" 5307 "done:" %} 5308 ins_encode %{ 5309 Register Rdst = $dst$$Register; 5310 Register Rsrc = $src$$Register; 5311 Label done; 5312 __ tzcntl(Rdst, Rsrc); 5313 __ jccb(Assembler::carryClear, done); 5314 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5315 __ addl(Rdst, BitsPerInt); 5316 __ bind(done); 5317 %} 5318 ins_pipe(ialu_reg); 5319 %} 5320 5321 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5322 predicate(!UseCountTrailingZerosInstruction); 5323 match(Set dst (CountTrailingZerosL src)); 5324 effect(TEMP dst, KILL cr); 5325 5326 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5327 "JNZ done\n\t" 5328 "BSF $dst, $src.hi\n\t" 5329 "JNZ msw_not_zero\n\t" 5330 "MOV $dst, 32\n" 5331 "msw_not_zero:\n\t" 5332 "ADD $dst, 32\n" 5333 "done:" %} 5334 ins_encode %{ 5335 Register Rdst = $dst$$Register; 5336 Register Rsrc = $src$$Register; 5337 Label msw_not_zero; 5338 Label done; 5339 __ bsfl(Rdst, Rsrc); 5340 __ jccb(Assembler::notZero, done); 5341 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5342 __ jccb(Assembler::notZero, msw_not_zero); 5343 __ movl(Rdst, BitsPerInt); 5344 __ bind(msw_not_zero); 5345 __ addl(Rdst, BitsPerInt); 5346 __ bind(done); 5347 %} 5348 ins_pipe(ialu_reg); 5349 %} 5350 5351 5352 //---------- Population Count Instructions ------------------------------------- 5353 5354 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5355 predicate(UsePopCountInstruction); 5356 match(Set dst (PopCountI src)); 5357 effect(KILL cr); 5358 5359 format %{ "POPCNT $dst, $src" %} 5360 ins_encode %{ 5361 __ popcntl($dst$$Register, $src$$Register); 5362 %} 5363 ins_pipe(ialu_reg); 5364 %} 5365 5366 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5367 predicate(UsePopCountInstruction); 5368 match(Set dst (PopCountI (LoadI mem))); 5369 effect(KILL cr); 5370 5371 format %{ "POPCNT $dst, $mem" %} 5372 ins_encode %{ 5373 __ popcntl($dst$$Register, $mem$$Address); 5374 %} 5375 ins_pipe(ialu_reg); 5376 %} 5377 5378 // Note: Long.bitCount(long) returns an int. 5379 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5380 predicate(UsePopCountInstruction); 5381 match(Set dst (PopCountL src)); 5382 effect(KILL cr, TEMP tmp, TEMP dst); 5383 5384 format %{ "POPCNT $dst, $src.lo\n\t" 5385 "POPCNT $tmp, $src.hi\n\t" 5386 "ADD $dst, $tmp" %} 5387 ins_encode %{ 5388 __ popcntl($dst$$Register, $src$$Register); 5389 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5390 __ addl($dst$$Register, $tmp$$Register); 5391 %} 5392 ins_pipe(ialu_reg); 5393 %} 5394 5395 // Note: Long.bitCount(long) returns an int. 5396 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5397 predicate(UsePopCountInstruction); 5398 match(Set dst (PopCountL (LoadL mem))); 5399 effect(KILL cr, TEMP tmp, TEMP dst); 5400 5401 format %{ "POPCNT $dst, $mem\n\t" 5402 "POPCNT $tmp, $mem+4\n\t" 5403 "ADD $dst, $tmp" %} 5404 ins_encode %{ 5405 //__ popcntl($dst$$Register, $mem$$Address$$first); 5406 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5407 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5408 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5409 __ addl($dst$$Register, $tmp$$Register); 5410 %} 5411 ins_pipe(ialu_reg); 5412 %} 5413 5414 5415 //----------Load/Store/Move Instructions--------------------------------------- 5416 //----------Load Instructions-------------------------------------------------- 5417 // Load Byte (8bit signed) 5418 instruct loadB(xRegI dst, memory mem) %{ 5419 match(Set dst (LoadB mem)); 5420 5421 ins_cost(125); 5422 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5423 5424 ins_encode %{ 5425 __ movsbl($dst$$Register, $mem$$Address); 5426 %} 5427 5428 ins_pipe(ialu_reg_mem); 5429 %} 5430 5431 // Load Byte (8bit signed) into Long Register 5432 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5433 match(Set dst (ConvI2L (LoadB mem))); 5434 effect(KILL cr); 5435 5436 ins_cost(375); 5437 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5438 "MOV $dst.hi,$dst.lo\n\t" 5439 "SAR $dst.hi,7" %} 5440 5441 ins_encode %{ 5442 __ movsbl($dst$$Register, $mem$$Address); 5443 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5444 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5445 %} 5446 5447 ins_pipe(ialu_reg_mem); 5448 %} 5449 5450 // Load Unsigned Byte (8bit UNsigned) 5451 instruct loadUB(xRegI dst, memory mem) %{ 5452 match(Set dst (LoadUB mem)); 5453 5454 ins_cost(125); 5455 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5456 5457 ins_encode %{ 5458 __ movzbl($dst$$Register, $mem$$Address); 5459 %} 5460 5461 ins_pipe(ialu_reg_mem); 5462 %} 5463 5464 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5465 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5466 match(Set dst (ConvI2L (LoadUB mem))); 5467 effect(KILL cr); 5468 5469 ins_cost(250); 5470 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5471 "XOR $dst.hi,$dst.hi" %} 5472 5473 ins_encode %{ 5474 Register Rdst = $dst$$Register; 5475 __ movzbl(Rdst, $mem$$Address); 5476 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5477 %} 5478 5479 ins_pipe(ialu_reg_mem); 5480 %} 5481 5482 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5483 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5484 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5485 effect(KILL cr); 5486 5487 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5488 "XOR $dst.hi,$dst.hi\n\t" 5489 "AND $dst.lo,right_n_bits($mask, 8)" %} 5490 ins_encode %{ 5491 Register Rdst = $dst$$Register; 5492 __ movzbl(Rdst, $mem$$Address); 5493 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5494 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5495 %} 5496 ins_pipe(ialu_reg_mem); 5497 %} 5498 5499 // Load Short (16bit signed) 5500 instruct loadS(rRegI dst, memory mem) %{ 5501 match(Set dst (LoadS mem)); 5502 5503 ins_cost(125); 5504 format %{ "MOVSX $dst,$mem\t# short" %} 5505 5506 ins_encode %{ 5507 __ movswl($dst$$Register, $mem$$Address); 5508 %} 5509 5510 ins_pipe(ialu_reg_mem); 5511 %} 5512 5513 // Load Short (16 bit signed) to Byte (8 bit signed) 5514 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5515 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5516 5517 ins_cost(125); 5518 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5519 ins_encode %{ 5520 __ movsbl($dst$$Register, $mem$$Address); 5521 %} 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Short (16bit signed) into Long Register 5526 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5527 match(Set dst (ConvI2L (LoadS mem))); 5528 effect(KILL cr); 5529 5530 ins_cost(375); 5531 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5532 "MOV $dst.hi,$dst.lo\n\t" 5533 "SAR $dst.hi,15" %} 5534 5535 ins_encode %{ 5536 __ movswl($dst$$Register, $mem$$Address); 5537 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5538 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5539 %} 5540 5541 ins_pipe(ialu_reg_mem); 5542 %} 5543 5544 // Load Unsigned Short/Char (16bit unsigned) 5545 instruct loadUS(rRegI dst, memory mem) %{ 5546 match(Set dst (LoadUS mem)); 5547 5548 ins_cost(125); 5549 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5550 5551 ins_encode %{ 5552 __ movzwl($dst$$Register, $mem$$Address); 5553 %} 5554 5555 ins_pipe(ialu_reg_mem); 5556 %} 5557 5558 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5559 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5560 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5561 5562 ins_cost(125); 5563 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5564 ins_encode %{ 5565 __ movsbl($dst$$Register, $mem$$Address); 5566 %} 5567 ins_pipe(ialu_reg_mem); 5568 %} 5569 5570 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5571 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5572 match(Set dst (ConvI2L (LoadUS mem))); 5573 effect(KILL cr); 5574 5575 ins_cost(250); 5576 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5577 "XOR $dst.hi,$dst.hi" %} 5578 5579 ins_encode %{ 5580 __ movzwl($dst$$Register, $mem$$Address); 5581 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5582 %} 5583 5584 ins_pipe(ialu_reg_mem); 5585 %} 5586 5587 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5588 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5589 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5590 effect(KILL cr); 5591 5592 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5593 "XOR $dst.hi,$dst.hi" %} 5594 ins_encode %{ 5595 Register Rdst = $dst$$Register; 5596 __ movzbl(Rdst, $mem$$Address); 5597 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5598 %} 5599 ins_pipe(ialu_reg_mem); 5600 %} 5601 5602 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5603 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5604 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5605 effect(KILL cr); 5606 5607 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5608 "XOR $dst.hi,$dst.hi\n\t" 5609 "AND $dst.lo,right_n_bits($mask, 16)" %} 5610 ins_encode %{ 5611 Register Rdst = $dst$$Register; 5612 __ movzwl(Rdst, $mem$$Address); 5613 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5614 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5615 %} 5616 ins_pipe(ialu_reg_mem); 5617 %} 5618 5619 // Load Integer 5620 instruct loadI(rRegI dst, memory mem) %{ 5621 match(Set dst (LoadI mem)); 5622 5623 ins_cost(125); 5624 format %{ "MOV $dst,$mem\t# int" %} 5625 5626 ins_encode %{ 5627 __ movl($dst$$Register, $mem$$Address); 5628 %} 5629 5630 ins_pipe(ialu_reg_mem); 5631 %} 5632 5633 // Load Integer (32 bit signed) to Byte (8 bit signed) 5634 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5635 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5636 5637 ins_cost(125); 5638 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5639 ins_encode %{ 5640 __ movsbl($dst$$Register, $mem$$Address); 5641 %} 5642 ins_pipe(ialu_reg_mem); 5643 %} 5644 5645 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5646 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5647 match(Set dst (AndI (LoadI mem) mask)); 5648 5649 ins_cost(125); 5650 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5651 ins_encode %{ 5652 __ movzbl($dst$$Register, $mem$$Address); 5653 %} 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Integer (32 bit signed) to Short (16 bit signed) 5658 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5659 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5660 5661 ins_cost(125); 5662 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5663 ins_encode %{ 5664 __ movswl($dst$$Register, $mem$$Address); 5665 %} 5666 ins_pipe(ialu_reg_mem); 5667 %} 5668 5669 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5670 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5671 match(Set dst (AndI (LoadI mem) mask)); 5672 5673 ins_cost(125); 5674 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5675 ins_encode %{ 5676 __ movzwl($dst$$Register, $mem$$Address); 5677 %} 5678 ins_pipe(ialu_reg_mem); 5679 %} 5680 5681 // Load Integer into Long Register 5682 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5683 match(Set dst (ConvI2L (LoadI mem))); 5684 effect(KILL cr); 5685 5686 ins_cost(375); 5687 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5688 "MOV $dst.hi,$dst.lo\n\t" 5689 "SAR $dst.hi,31" %} 5690 5691 ins_encode %{ 5692 __ movl($dst$$Register, $mem$$Address); 5693 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5694 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5695 %} 5696 5697 ins_pipe(ialu_reg_mem); 5698 %} 5699 5700 // Load Integer with mask 0xFF into Long Register 5701 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5702 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5703 effect(KILL cr); 5704 5705 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5706 "XOR $dst.hi,$dst.hi" %} 5707 ins_encode %{ 5708 Register Rdst = $dst$$Register; 5709 __ movzbl(Rdst, $mem$$Address); 5710 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5711 %} 5712 ins_pipe(ialu_reg_mem); 5713 %} 5714 5715 // Load Integer with mask 0xFFFF into Long Register 5716 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5717 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5718 effect(KILL cr); 5719 5720 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5721 "XOR $dst.hi,$dst.hi" %} 5722 ins_encode %{ 5723 Register Rdst = $dst$$Register; 5724 __ movzwl(Rdst, $mem$$Address); 5725 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5726 %} 5727 ins_pipe(ialu_reg_mem); 5728 %} 5729 5730 // Load Integer with 31-bit mask into Long Register 5731 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5732 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5733 effect(KILL cr); 5734 5735 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5736 "XOR $dst.hi,$dst.hi\n\t" 5737 "AND $dst.lo,$mask" %} 5738 ins_encode %{ 5739 Register Rdst = $dst$$Register; 5740 __ movl(Rdst, $mem$$Address); 5741 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5742 __ andl(Rdst, $mask$$constant); 5743 %} 5744 ins_pipe(ialu_reg_mem); 5745 %} 5746 5747 // Load Unsigned Integer into Long Register 5748 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5749 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5750 effect(KILL cr); 5751 5752 ins_cost(250); 5753 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5754 "XOR $dst.hi,$dst.hi" %} 5755 5756 ins_encode %{ 5757 __ movl($dst$$Register, $mem$$Address); 5758 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5759 %} 5760 5761 ins_pipe(ialu_reg_mem); 5762 %} 5763 5764 // Load Long. Cannot clobber address while loading, so restrict address 5765 // register to ESI 5766 instruct loadL(eRegL dst, load_long_memory mem) %{ 5767 predicate(!((LoadLNode*)n)->require_atomic_access()); 5768 match(Set dst (LoadL mem)); 5769 5770 ins_cost(250); 5771 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5772 "MOV $dst.hi,$mem+4" %} 5773 5774 ins_encode %{ 5775 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5776 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5777 __ movl($dst$$Register, Amemlo); 5778 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5779 %} 5780 5781 ins_pipe(ialu_reg_long_mem); 5782 %} 5783 5784 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5785 // then store it down to the stack and reload on the int 5786 // side. 5787 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5788 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5789 match(Set dst (LoadL mem)); 5790 5791 ins_cost(200); 5792 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5793 "FISTp $dst" %} 5794 ins_encode(enc_loadL_volatile(mem,dst)); 5795 ins_pipe( fpu_reg_mem ); 5796 %} 5797 5798 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5799 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5800 match(Set dst (LoadL mem)); 5801 effect(TEMP tmp); 5802 ins_cost(180); 5803 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5804 "MOVSD $dst,$tmp" %} 5805 ins_encode %{ 5806 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5807 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5808 %} 5809 ins_pipe( pipe_slow ); 5810 %} 5811 5812 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5813 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5814 match(Set dst (LoadL mem)); 5815 effect(TEMP tmp); 5816 ins_cost(160); 5817 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5818 "MOVD $dst.lo,$tmp\n\t" 5819 "PSRLQ $tmp,32\n\t" 5820 "MOVD $dst.hi,$tmp" %} 5821 ins_encode %{ 5822 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5823 __ movdl($dst$$Register, $tmp$$XMMRegister); 5824 __ psrlq($tmp$$XMMRegister, 32); 5825 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5826 %} 5827 ins_pipe( pipe_slow ); 5828 %} 5829 5830 // Load Range 5831 instruct loadRange(rRegI dst, memory mem) %{ 5832 match(Set dst (LoadRange mem)); 5833 5834 ins_cost(125); 5835 format %{ "MOV $dst,$mem" %} 5836 opcode(0x8B); 5837 ins_encode( OpcP, RegMem(dst,mem)); 5838 ins_pipe( ialu_reg_mem ); 5839 %} 5840 5841 5842 // Load Pointer 5843 instruct loadP(eRegP dst, memory mem) %{ 5844 match(Set dst (LoadP mem)); 5845 5846 ins_cost(125); 5847 format %{ "MOV $dst,$mem" %} 5848 opcode(0x8B); 5849 ins_encode( OpcP, RegMem(dst,mem)); 5850 ins_pipe( ialu_reg_mem ); 5851 %} 5852 5853 // Load Klass Pointer 5854 instruct loadKlass(eRegP dst, memory mem) %{ 5855 match(Set dst (LoadKlass mem)); 5856 5857 ins_cost(125); 5858 format %{ "MOV $dst,$mem" %} 5859 opcode(0x8B); 5860 ins_encode( OpcP, RegMem(dst,mem)); 5861 ins_pipe( ialu_reg_mem ); 5862 %} 5863 5864 // Load Double 5865 instruct loadDPR(regDPR dst, memory mem) %{ 5866 predicate(UseSSE<=1); 5867 match(Set dst (LoadD mem)); 5868 5869 ins_cost(150); 5870 format %{ "FLD_D ST,$mem\n\t" 5871 "FSTP $dst" %} 5872 opcode(0xDD); /* DD /0 */ 5873 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5874 Pop_Reg_DPR(dst) ); 5875 ins_pipe( fpu_reg_mem ); 5876 %} 5877 5878 // Load Double to XMM 5879 instruct loadD(regD dst, memory mem) %{ 5880 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5881 match(Set dst (LoadD mem)); 5882 ins_cost(145); 5883 format %{ "MOVSD $dst,$mem" %} 5884 ins_encode %{ 5885 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 instruct loadD_partial(regD dst, memory mem) %{ 5891 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5892 match(Set dst (LoadD mem)); 5893 ins_cost(145); 5894 format %{ "MOVLPD $dst,$mem" %} 5895 ins_encode %{ 5896 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5897 %} 5898 ins_pipe( pipe_slow ); 5899 %} 5900 5901 // Load to XMM register (single-precision floating point) 5902 // MOVSS instruction 5903 instruct loadF(regF dst, memory mem) %{ 5904 predicate(UseSSE>=1); 5905 match(Set dst (LoadF mem)); 5906 ins_cost(145); 5907 format %{ "MOVSS $dst,$mem" %} 5908 ins_encode %{ 5909 __ movflt ($dst$$XMMRegister, $mem$$Address); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 // Load Float 5915 instruct loadFPR(regFPR dst, memory mem) %{ 5916 predicate(UseSSE==0); 5917 match(Set dst (LoadF mem)); 5918 5919 ins_cost(150); 5920 format %{ "FLD_S ST,$mem\n\t" 5921 "FSTP $dst" %} 5922 opcode(0xD9); /* D9 /0 */ 5923 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5924 Pop_Reg_FPR(dst) ); 5925 ins_pipe( fpu_reg_mem ); 5926 %} 5927 5928 // Load Effective Address 5929 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5930 match(Set dst mem); 5931 5932 ins_cost(110); 5933 format %{ "LEA $dst,$mem" %} 5934 opcode(0x8D); 5935 ins_encode( OpcP, RegMem(dst,mem)); 5936 ins_pipe( ialu_reg_reg_fat ); 5937 %} 5938 5939 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5940 match(Set dst mem); 5941 5942 ins_cost(110); 5943 format %{ "LEA $dst,$mem" %} 5944 opcode(0x8D); 5945 ins_encode( OpcP, RegMem(dst,mem)); 5946 ins_pipe( ialu_reg_reg_fat ); 5947 %} 5948 5949 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5950 match(Set dst mem); 5951 5952 ins_cost(110); 5953 format %{ "LEA $dst,$mem" %} 5954 opcode(0x8D); 5955 ins_encode( OpcP, RegMem(dst,mem)); 5956 ins_pipe( ialu_reg_reg_fat ); 5957 %} 5958 5959 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5960 match(Set dst mem); 5961 5962 ins_cost(110); 5963 format %{ "LEA $dst,$mem" %} 5964 opcode(0x8D); 5965 ins_encode( OpcP, RegMem(dst,mem)); 5966 ins_pipe( ialu_reg_reg_fat ); 5967 %} 5968 5969 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5970 match(Set dst mem); 5971 5972 ins_cost(110); 5973 format %{ "LEA $dst,$mem" %} 5974 opcode(0x8D); 5975 ins_encode( OpcP, RegMem(dst,mem)); 5976 ins_pipe( ialu_reg_reg_fat ); 5977 %} 5978 5979 // Load Constant 5980 instruct loadConI(rRegI dst, immI src) %{ 5981 match(Set dst src); 5982 5983 format %{ "MOV $dst,$src" %} 5984 ins_encode( LdImmI(dst, src) ); 5985 ins_pipe( ialu_reg_fat ); 5986 %} 5987 5988 // Load Constant zero 5989 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5990 match(Set dst src); 5991 effect(KILL cr); 5992 5993 ins_cost(50); 5994 format %{ "XOR $dst,$dst" %} 5995 opcode(0x33); /* + rd */ 5996 ins_encode( OpcP, RegReg( dst, dst ) ); 5997 ins_pipe( ialu_reg ); 5998 %} 5999 6000 instruct loadConP(eRegP dst, immP src) %{ 6001 match(Set dst src); 6002 6003 format %{ "MOV $dst,$src" %} 6004 opcode(0xB8); /* + rd */ 6005 ins_encode( LdImmP(dst, src) ); 6006 ins_pipe( ialu_reg_fat ); 6007 %} 6008 6009 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6010 match(Set dst src); 6011 effect(KILL cr); 6012 ins_cost(200); 6013 format %{ "MOV $dst.lo,$src.lo\n\t" 6014 "MOV $dst.hi,$src.hi" %} 6015 opcode(0xB8); 6016 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6017 ins_pipe( ialu_reg_long_fat ); 6018 %} 6019 6020 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6021 match(Set dst src); 6022 effect(KILL cr); 6023 ins_cost(150); 6024 format %{ "XOR $dst.lo,$dst.lo\n\t" 6025 "XOR $dst.hi,$dst.hi" %} 6026 opcode(0x33,0x33); 6027 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6028 ins_pipe( ialu_reg_long ); 6029 %} 6030 6031 // The instruction usage is guarded by predicate in operand immFPR(). 6032 instruct loadConFPR(regFPR dst, immFPR con) %{ 6033 match(Set dst con); 6034 ins_cost(125); 6035 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6036 "FSTP $dst" %} 6037 ins_encode %{ 6038 __ fld_s($constantaddress($con)); 6039 __ fstp_d($dst$$reg); 6040 %} 6041 ins_pipe(fpu_reg_con); 6042 %} 6043 6044 // The instruction usage is guarded by predicate in operand immFPR0(). 6045 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6046 match(Set dst con); 6047 ins_cost(125); 6048 format %{ "FLDZ ST\n\t" 6049 "FSTP $dst" %} 6050 ins_encode %{ 6051 __ fldz(); 6052 __ fstp_d($dst$$reg); 6053 %} 6054 ins_pipe(fpu_reg_con); 6055 %} 6056 6057 // The instruction usage is guarded by predicate in operand immFPR1(). 6058 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6059 match(Set dst con); 6060 ins_cost(125); 6061 format %{ "FLD1 ST\n\t" 6062 "FSTP $dst" %} 6063 ins_encode %{ 6064 __ fld1(); 6065 __ fstp_d($dst$$reg); 6066 %} 6067 ins_pipe(fpu_reg_con); 6068 %} 6069 6070 // The instruction usage is guarded by predicate in operand immF(). 6071 instruct loadConF(regF dst, immF con) %{ 6072 match(Set dst con); 6073 ins_cost(125); 6074 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6075 ins_encode %{ 6076 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6077 %} 6078 ins_pipe(pipe_slow); 6079 %} 6080 6081 // The instruction usage is guarded by predicate in operand immF0(). 6082 instruct loadConF0(regF dst, immF0 src) %{ 6083 match(Set dst src); 6084 ins_cost(100); 6085 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6086 ins_encode %{ 6087 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6088 %} 6089 ins_pipe(pipe_slow); 6090 %} 6091 6092 // The instruction usage is guarded by predicate in operand immDPR(). 6093 instruct loadConDPR(regDPR dst, immDPR con) %{ 6094 match(Set dst con); 6095 ins_cost(125); 6096 6097 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6098 "FSTP $dst" %} 6099 ins_encode %{ 6100 __ fld_d($constantaddress($con)); 6101 __ fstp_d($dst$$reg); 6102 %} 6103 ins_pipe(fpu_reg_con); 6104 %} 6105 6106 // The instruction usage is guarded by predicate in operand immDPR0(). 6107 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6108 match(Set dst con); 6109 ins_cost(125); 6110 6111 format %{ "FLDZ ST\n\t" 6112 "FSTP $dst" %} 6113 ins_encode %{ 6114 __ fldz(); 6115 __ fstp_d($dst$$reg); 6116 %} 6117 ins_pipe(fpu_reg_con); 6118 %} 6119 6120 // The instruction usage is guarded by predicate in operand immDPR1(). 6121 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6122 match(Set dst con); 6123 ins_cost(125); 6124 6125 format %{ "FLD1 ST\n\t" 6126 "FSTP $dst" %} 6127 ins_encode %{ 6128 __ fld1(); 6129 __ fstp_d($dst$$reg); 6130 %} 6131 ins_pipe(fpu_reg_con); 6132 %} 6133 6134 // The instruction usage is guarded by predicate in operand immD(). 6135 instruct loadConD(regD dst, immD con) %{ 6136 match(Set dst con); 6137 ins_cost(125); 6138 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6139 ins_encode %{ 6140 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6141 %} 6142 ins_pipe(pipe_slow); 6143 %} 6144 6145 // The instruction usage is guarded by predicate in operand immD0(). 6146 instruct loadConD0(regD dst, immD0 src) %{ 6147 match(Set dst src); 6148 ins_cost(100); 6149 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6150 ins_encode %{ 6151 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6152 %} 6153 ins_pipe( pipe_slow ); 6154 %} 6155 6156 // Load Stack Slot 6157 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6158 match(Set dst src); 6159 ins_cost(125); 6160 6161 format %{ "MOV $dst,$src" %} 6162 opcode(0x8B); 6163 ins_encode( OpcP, RegMem(dst,src)); 6164 ins_pipe( ialu_reg_mem ); 6165 %} 6166 6167 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6168 match(Set dst src); 6169 6170 ins_cost(200); 6171 format %{ "MOV $dst,$src.lo\n\t" 6172 "MOV $dst+4,$src.hi" %} 6173 opcode(0x8B, 0x8B); 6174 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6175 ins_pipe( ialu_mem_long_reg ); 6176 %} 6177 6178 // Load Stack Slot 6179 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6180 match(Set dst src); 6181 ins_cost(125); 6182 6183 format %{ "MOV $dst,$src" %} 6184 opcode(0x8B); 6185 ins_encode( OpcP, RegMem(dst,src)); 6186 ins_pipe( ialu_reg_mem ); 6187 %} 6188 6189 // Load Stack Slot 6190 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6191 match(Set dst src); 6192 ins_cost(125); 6193 6194 format %{ "FLD_S $src\n\t" 6195 "FSTP $dst" %} 6196 opcode(0xD9); /* D9 /0, FLD m32real */ 6197 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6198 Pop_Reg_FPR(dst) ); 6199 ins_pipe( fpu_reg_mem ); 6200 %} 6201 6202 // Load Stack Slot 6203 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6204 match(Set dst src); 6205 ins_cost(125); 6206 6207 format %{ "FLD_D $src\n\t" 6208 "FSTP $dst" %} 6209 opcode(0xDD); /* DD /0, FLD m64real */ 6210 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6211 Pop_Reg_DPR(dst) ); 6212 ins_pipe( fpu_reg_mem ); 6213 %} 6214 6215 // Prefetch instructions for allocation. 6216 // Must be safe to execute with invalid address (cannot fault). 6217 6218 instruct prefetchAlloc0( memory mem ) %{ 6219 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6220 match(PrefetchAllocation mem); 6221 ins_cost(0); 6222 size(0); 6223 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6224 ins_encode(); 6225 ins_pipe(empty); 6226 %} 6227 6228 instruct prefetchAlloc( memory mem ) %{ 6229 predicate(AllocatePrefetchInstr==3); 6230 match( PrefetchAllocation mem ); 6231 ins_cost(100); 6232 6233 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6234 ins_encode %{ 6235 __ prefetchw($mem$$Address); 6236 %} 6237 ins_pipe(ialu_mem); 6238 %} 6239 6240 instruct prefetchAllocNTA( memory mem ) %{ 6241 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6242 match(PrefetchAllocation mem); 6243 ins_cost(100); 6244 6245 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6246 ins_encode %{ 6247 __ prefetchnta($mem$$Address); 6248 %} 6249 ins_pipe(ialu_mem); 6250 %} 6251 6252 instruct prefetchAllocT0( memory mem ) %{ 6253 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6254 match(PrefetchAllocation mem); 6255 ins_cost(100); 6256 6257 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6258 ins_encode %{ 6259 __ prefetcht0($mem$$Address); 6260 %} 6261 ins_pipe(ialu_mem); 6262 %} 6263 6264 instruct prefetchAllocT2( memory mem ) %{ 6265 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6266 match(PrefetchAllocation mem); 6267 ins_cost(100); 6268 6269 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6270 ins_encode %{ 6271 __ prefetcht2($mem$$Address); 6272 %} 6273 ins_pipe(ialu_mem); 6274 %} 6275 6276 //----------Store Instructions------------------------------------------------- 6277 6278 // Store Byte 6279 instruct storeB(memory mem, xRegI src) %{ 6280 match(Set mem (StoreB mem src)); 6281 6282 ins_cost(125); 6283 format %{ "MOV8 $mem,$src" %} 6284 opcode(0x88); 6285 ins_encode( OpcP, RegMem( src, mem ) ); 6286 ins_pipe( ialu_mem_reg ); 6287 %} 6288 6289 // Store Char/Short 6290 instruct storeC(memory mem, rRegI src) %{ 6291 match(Set mem (StoreC mem src)); 6292 6293 ins_cost(125); 6294 format %{ "MOV16 $mem,$src" %} 6295 opcode(0x89, 0x66); 6296 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6297 ins_pipe( ialu_mem_reg ); 6298 %} 6299 6300 // Store Integer 6301 instruct storeI(memory mem, rRegI src) %{ 6302 match(Set mem (StoreI mem src)); 6303 6304 ins_cost(125); 6305 format %{ "MOV $mem,$src" %} 6306 opcode(0x89); 6307 ins_encode( OpcP, RegMem( src, mem ) ); 6308 ins_pipe( ialu_mem_reg ); 6309 %} 6310 6311 // Store Long 6312 instruct storeL(long_memory mem, eRegL src) %{ 6313 predicate(!((StoreLNode*)n)->require_atomic_access()); 6314 match(Set mem (StoreL mem src)); 6315 6316 ins_cost(200); 6317 format %{ "MOV $mem,$src.lo\n\t" 6318 "MOV $mem+4,$src.hi" %} 6319 opcode(0x89, 0x89); 6320 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6321 ins_pipe( ialu_mem_long_reg ); 6322 %} 6323 6324 // Store Long to Integer 6325 instruct storeL2I(memory mem, eRegL src) %{ 6326 match(Set mem (StoreI mem (ConvL2I src))); 6327 6328 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6329 ins_encode %{ 6330 __ movl($mem$$Address, $src$$Register); 6331 %} 6332 ins_pipe(ialu_mem_reg); 6333 %} 6334 6335 // Volatile Store Long. Must be atomic, so move it into 6336 // the FP TOS and then do a 64-bit FIST. Has to probe the 6337 // target address before the store (for null-ptr checks) 6338 // so the memory operand is used twice in the encoding. 6339 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6340 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6341 match(Set mem (StoreL mem src)); 6342 effect( KILL cr ); 6343 ins_cost(400); 6344 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6345 "FILD $src\n\t" 6346 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6347 opcode(0x3B); 6348 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6349 ins_pipe( fpu_reg_mem ); 6350 %} 6351 6352 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6353 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6354 match(Set mem (StoreL mem src)); 6355 effect( TEMP tmp, KILL cr ); 6356 ins_cost(380); 6357 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6358 "MOVSD $tmp,$src\n\t" 6359 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6360 ins_encode %{ 6361 __ cmpl(rax, $mem$$Address); 6362 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6363 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6364 %} 6365 ins_pipe( pipe_slow ); 6366 %} 6367 6368 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6369 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6370 match(Set mem (StoreL mem src)); 6371 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6372 ins_cost(360); 6373 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6374 "MOVD $tmp,$src.lo\n\t" 6375 "MOVD $tmp2,$src.hi\n\t" 6376 "PUNPCKLDQ $tmp,$tmp2\n\t" 6377 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6378 ins_encode %{ 6379 __ cmpl(rax, $mem$$Address); 6380 __ movdl($tmp$$XMMRegister, $src$$Register); 6381 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6382 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6383 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 // Store Pointer; for storing unknown oops and raw pointers 6389 instruct storeP(memory mem, anyRegP src) %{ 6390 match(Set mem (StoreP mem src)); 6391 6392 ins_cost(125); 6393 format %{ "MOV $mem,$src" %} 6394 opcode(0x89); 6395 ins_encode( OpcP, RegMem( src, mem ) ); 6396 ins_pipe( ialu_mem_reg ); 6397 %} 6398 6399 // Store Integer Immediate 6400 instruct storeImmI(memory mem, immI src) %{ 6401 match(Set mem (StoreI mem src)); 6402 6403 ins_cost(150); 6404 format %{ "MOV $mem,$src" %} 6405 opcode(0xC7); /* C7 /0 */ 6406 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6407 ins_pipe( ialu_mem_imm ); 6408 %} 6409 6410 // Store Short/Char Immediate 6411 instruct storeImmI16(memory mem, immI16 src) %{ 6412 predicate(UseStoreImmI16); 6413 match(Set mem (StoreC mem src)); 6414 6415 ins_cost(150); 6416 format %{ "MOV16 $mem,$src" %} 6417 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6418 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6419 ins_pipe( ialu_mem_imm ); 6420 %} 6421 6422 // Store Pointer Immediate; null pointers or constant oops that do not 6423 // need card-mark barriers. 6424 instruct storeImmP(memory mem, immP src) %{ 6425 match(Set mem (StoreP mem src)); 6426 6427 ins_cost(150); 6428 format %{ "MOV $mem,$src" %} 6429 opcode(0xC7); /* C7 /0 */ 6430 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6431 ins_pipe( ialu_mem_imm ); 6432 %} 6433 6434 // Store Byte Immediate 6435 instruct storeImmB(memory mem, immI8 src) %{ 6436 match(Set mem (StoreB mem src)); 6437 6438 ins_cost(150); 6439 format %{ "MOV8 $mem,$src" %} 6440 opcode(0xC6); /* C6 /0 */ 6441 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6442 ins_pipe( ialu_mem_imm ); 6443 %} 6444 6445 // Store CMS card-mark Immediate 6446 instruct storeImmCM(memory mem, immI8 src) %{ 6447 match(Set mem (StoreCM mem src)); 6448 6449 ins_cost(150); 6450 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6451 opcode(0xC6); /* C6 /0 */ 6452 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6453 ins_pipe( ialu_mem_imm ); 6454 %} 6455 6456 // Store Double 6457 instruct storeDPR( memory mem, regDPR1 src) %{ 6458 predicate(UseSSE<=1); 6459 match(Set mem (StoreD mem src)); 6460 6461 ins_cost(100); 6462 format %{ "FST_D $mem,$src" %} 6463 opcode(0xDD); /* DD /2 */ 6464 ins_encode( enc_FPR_store(mem,src) ); 6465 ins_pipe( fpu_mem_reg ); 6466 %} 6467 6468 // Store double does rounding on x86 6469 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6470 predicate(UseSSE<=1); 6471 match(Set mem (StoreD mem (RoundDouble src))); 6472 6473 ins_cost(100); 6474 format %{ "FST_D $mem,$src\t# round" %} 6475 opcode(0xDD); /* DD /2 */ 6476 ins_encode( enc_FPR_store(mem,src) ); 6477 ins_pipe( fpu_mem_reg ); 6478 %} 6479 6480 // Store XMM register to memory (double-precision floating points) 6481 // MOVSD instruction 6482 instruct storeD(memory mem, regD src) %{ 6483 predicate(UseSSE>=2); 6484 match(Set mem (StoreD mem src)); 6485 ins_cost(95); 6486 format %{ "MOVSD $mem,$src" %} 6487 ins_encode %{ 6488 __ movdbl($mem$$Address, $src$$XMMRegister); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 // Store XMM register to memory (single-precision floating point) 6494 // MOVSS instruction 6495 instruct storeF(memory mem, regF src) %{ 6496 predicate(UseSSE>=1); 6497 match(Set mem (StoreF mem src)); 6498 ins_cost(95); 6499 format %{ "MOVSS $mem,$src" %} 6500 ins_encode %{ 6501 __ movflt($mem$$Address, $src$$XMMRegister); 6502 %} 6503 ins_pipe( pipe_slow ); 6504 %} 6505 6506 // Store Float 6507 instruct storeFPR( memory mem, regFPR1 src) %{ 6508 predicate(UseSSE==0); 6509 match(Set mem (StoreF mem src)); 6510 6511 ins_cost(100); 6512 format %{ "FST_S $mem,$src" %} 6513 opcode(0xD9); /* D9 /2 */ 6514 ins_encode( enc_FPR_store(mem,src) ); 6515 ins_pipe( fpu_mem_reg ); 6516 %} 6517 6518 // Store Float does rounding on x86 6519 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6520 predicate(UseSSE==0); 6521 match(Set mem (StoreF mem (RoundFloat src))); 6522 6523 ins_cost(100); 6524 format %{ "FST_S $mem,$src\t# round" %} 6525 opcode(0xD9); /* D9 /2 */ 6526 ins_encode( enc_FPR_store(mem,src) ); 6527 ins_pipe( fpu_mem_reg ); 6528 %} 6529 6530 // Store Float does rounding on x86 6531 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6532 predicate(UseSSE<=1); 6533 match(Set mem (StoreF mem (ConvD2F src))); 6534 6535 ins_cost(100); 6536 format %{ "FST_S $mem,$src\t# D-round" %} 6537 opcode(0xD9); /* D9 /2 */ 6538 ins_encode( enc_FPR_store(mem,src) ); 6539 ins_pipe( fpu_mem_reg ); 6540 %} 6541 6542 // Store immediate Float value (it is faster than store from FPU register) 6543 // The instruction usage is guarded by predicate in operand immFPR(). 6544 instruct storeFPR_imm( memory mem, immFPR src) %{ 6545 match(Set mem (StoreF mem src)); 6546 6547 ins_cost(50); 6548 format %{ "MOV $mem,$src\t# store float" %} 6549 opcode(0xC7); /* C7 /0 */ 6550 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6551 ins_pipe( ialu_mem_imm ); 6552 %} 6553 6554 // Store immediate Float value (it is faster than store from XMM register) 6555 // The instruction usage is guarded by predicate in operand immF(). 6556 instruct storeF_imm( memory mem, immF src) %{ 6557 match(Set mem (StoreF mem src)); 6558 6559 ins_cost(50); 6560 format %{ "MOV $mem,$src\t# store float" %} 6561 opcode(0xC7); /* C7 /0 */ 6562 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6563 ins_pipe( ialu_mem_imm ); 6564 %} 6565 6566 // Store Integer to stack slot 6567 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6568 match(Set dst src); 6569 6570 ins_cost(100); 6571 format %{ "MOV $dst,$src" %} 6572 opcode(0x89); 6573 ins_encode( OpcPRegSS( dst, src ) ); 6574 ins_pipe( ialu_mem_reg ); 6575 %} 6576 6577 // Store Integer to stack slot 6578 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6579 match(Set dst src); 6580 6581 ins_cost(100); 6582 format %{ "MOV $dst,$src" %} 6583 opcode(0x89); 6584 ins_encode( OpcPRegSS( dst, src ) ); 6585 ins_pipe( ialu_mem_reg ); 6586 %} 6587 6588 // Store Long to stack slot 6589 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6590 match(Set dst src); 6591 6592 ins_cost(200); 6593 format %{ "MOV $dst,$src.lo\n\t" 6594 "MOV $dst+4,$src.hi" %} 6595 opcode(0x89, 0x89); 6596 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6597 ins_pipe( ialu_mem_long_reg ); 6598 %} 6599 6600 //----------MemBar Instructions----------------------------------------------- 6601 // Memory barrier flavors 6602 6603 instruct membar_acquire() %{ 6604 match(MemBarAcquire); 6605 match(LoadFence); 6606 ins_cost(400); 6607 6608 size(0); 6609 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6610 ins_encode(); 6611 ins_pipe(empty); 6612 %} 6613 6614 instruct membar_acquire_lock() %{ 6615 match(MemBarAcquireLock); 6616 ins_cost(0); 6617 6618 size(0); 6619 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6620 ins_encode( ); 6621 ins_pipe(empty); 6622 %} 6623 6624 instruct membar_release() %{ 6625 match(MemBarRelease); 6626 match(StoreFence); 6627 ins_cost(400); 6628 6629 size(0); 6630 format %{ "MEMBAR-release ! (empty encoding)" %} 6631 ins_encode( ); 6632 ins_pipe(empty); 6633 %} 6634 6635 instruct membar_release_lock() %{ 6636 match(MemBarReleaseLock); 6637 ins_cost(0); 6638 6639 size(0); 6640 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6641 ins_encode( ); 6642 ins_pipe(empty); 6643 %} 6644 6645 instruct membar_volatile(eFlagsReg cr) %{ 6646 match(MemBarVolatile); 6647 effect(KILL cr); 6648 ins_cost(400); 6649 6650 format %{ 6651 $$template 6652 if (os::is_MP()) { 6653 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6654 } else { 6655 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6656 } 6657 %} 6658 ins_encode %{ 6659 __ membar(Assembler::StoreLoad); 6660 %} 6661 ins_pipe(pipe_slow); 6662 %} 6663 6664 instruct unnecessary_membar_volatile() %{ 6665 match(MemBarVolatile); 6666 predicate(Matcher::post_store_load_barrier(n)); 6667 ins_cost(0); 6668 6669 size(0); 6670 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6671 ins_encode( ); 6672 ins_pipe(empty); 6673 %} 6674 6675 instruct membar_storestore() %{ 6676 match(MemBarStoreStore); 6677 ins_cost(0); 6678 6679 size(0); 6680 format %{ "MEMBAR-storestore (empty encoding)" %} 6681 ins_encode( ); 6682 ins_pipe(empty); 6683 %} 6684 6685 //----------Move Instructions-------------------------------------------------- 6686 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6687 match(Set dst (CastX2P src)); 6688 format %{ "# X2P $dst, $src" %} 6689 ins_encode( /*empty encoding*/ ); 6690 ins_cost(0); 6691 ins_pipe(empty); 6692 %} 6693 6694 instruct castP2X(rRegI dst, eRegP src ) %{ 6695 match(Set dst (CastP2X src)); 6696 ins_cost(50); 6697 format %{ "MOV $dst, $src\t# CastP2X" %} 6698 ins_encode( enc_Copy( dst, src) ); 6699 ins_pipe( ialu_reg_reg ); 6700 %} 6701 6702 //----------Conditional Move--------------------------------------------------- 6703 // Conditional move 6704 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6705 predicate(!VM_Version::supports_cmov() ); 6706 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6707 ins_cost(200); 6708 format %{ "J$cop,us skip\t# signed cmove\n\t" 6709 "MOV $dst,$src\n" 6710 "skip:" %} 6711 ins_encode %{ 6712 Label Lskip; 6713 // Invert sense of branch from sense of CMOV 6714 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6715 __ movl($dst$$Register, $src$$Register); 6716 __ bind(Lskip); 6717 %} 6718 ins_pipe( pipe_cmov_reg ); 6719 %} 6720 6721 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6722 predicate(!VM_Version::supports_cmov() ); 6723 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6724 ins_cost(200); 6725 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6726 "MOV $dst,$src\n" 6727 "skip:" %} 6728 ins_encode %{ 6729 Label Lskip; 6730 // Invert sense of branch from sense of CMOV 6731 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6732 __ movl($dst$$Register, $src$$Register); 6733 __ bind(Lskip); 6734 %} 6735 ins_pipe( pipe_cmov_reg ); 6736 %} 6737 6738 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6739 predicate(VM_Version::supports_cmov() ); 6740 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6741 ins_cost(200); 6742 format %{ "CMOV$cop $dst,$src" %} 6743 opcode(0x0F,0x40); 6744 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6745 ins_pipe( pipe_cmov_reg ); 6746 %} 6747 6748 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6749 predicate(VM_Version::supports_cmov() ); 6750 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6751 ins_cost(200); 6752 format %{ "CMOV$cop $dst,$src" %} 6753 opcode(0x0F,0x40); 6754 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6755 ins_pipe( pipe_cmov_reg ); 6756 %} 6757 6758 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6759 predicate(VM_Version::supports_cmov() ); 6760 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6761 ins_cost(200); 6762 expand %{ 6763 cmovI_regU(cop, cr, dst, src); 6764 %} 6765 %} 6766 6767 // Conditional move 6768 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6769 predicate(VM_Version::supports_cmov() ); 6770 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6771 ins_cost(250); 6772 format %{ "CMOV$cop $dst,$src" %} 6773 opcode(0x0F,0x40); 6774 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6775 ins_pipe( pipe_cmov_mem ); 6776 %} 6777 6778 // Conditional move 6779 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6780 predicate(VM_Version::supports_cmov() ); 6781 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6782 ins_cost(250); 6783 format %{ "CMOV$cop $dst,$src" %} 6784 opcode(0x0F,0x40); 6785 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6786 ins_pipe( pipe_cmov_mem ); 6787 %} 6788 6789 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6790 predicate(VM_Version::supports_cmov() ); 6791 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6792 ins_cost(250); 6793 expand %{ 6794 cmovI_memU(cop, cr, dst, src); 6795 %} 6796 %} 6797 6798 // Conditional move 6799 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6800 predicate(VM_Version::supports_cmov() ); 6801 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6802 ins_cost(200); 6803 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6804 opcode(0x0F,0x40); 6805 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6806 ins_pipe( pipe_cmov_reg ); 6807 %} 6808 6809 // Conditional move (non-P6 version) 6810 // Note: a CMoveP is generated for stubs and native wrappers 6811 // regardless of whether we are on a P6, so we 6812 // emulate a cmov here 6813 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6814 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6815 ins_cost(300); 6816 format %{ "Jn$cop skip\n\t" 6817 "MOV $dst,$src\t# pointer\n" 6818 "skip:" %} 6819 opcode(0x8b); 6820 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6821 ins_pipe( pipe_cmov_reg ); 6822 %} 6823 6824 // Conditional move 6825 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6826 predicate(VM_Version::supports_cmov() ); 6827 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6828 ins_cost(200); 6829 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6830 opcode(0x0F,0x40); 6831 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6832 ins_pipe( pipe_cmov_reg ); 6833 %} 6834 6835 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6836 predicate(VM_Version::supports_cmov() ); 6837 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6838 ins_cost(200); 6839 expand %{ 6840 cmovP_regU(cop, cr, dst, src); 6841 %} 6842 %} 6843 6844 // DISABLED: Requires the ADLC to emit a bottom_type call that 6845 // correctly meets the two pointer arguments; one is an incoming 6846 // register but the other is a memory operand. ALSO appears to 6847 // be buggy with implicit null checks. 6848 // 6849 //// Conditional move 6850 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6851 // predicate(VM_Version::supports_cmov() ); 6852 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6853 // ins_cost(250); 6854 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6855 // opcode(0x0F,0x40); 6856 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6857 // ins_pipe( pipe_cmov_mem ); 6858 //%} 6859 // 6860 //// Conditional move 6861 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6862 // predicate(VM_Version::supports_cmov() ); 6863 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6864 // ins_cost(250); 6865 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6866 // opcode(0x0F,0x40); 6867 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6868 // ins_pipe( pipe_cmov_mem ); 6869 //%} 6870 6871 // Conditional move 6872 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6873 predicate(UseSSE<=1); 6874 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6875 ins_cost(200); 6876 format %{ "FCMOV$cop $dst,$src\t# double" %} 6877 opcode(0xDA); 6878 ins_encode( enc_cmov_dpr(cop,src) ); 6879 ins_pipe( pipe_cmovDPR_reg ); 6880 %} 6881 6882 // Conditional move 6883 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6884 predicate(UseSSE==0); 6885 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6886 ins_cost(200); 6887 format %{ "FCMOV$cop $dst,$src\t# float" %} 6888 opcode(0xDA); 6889 ins_encode( enc_cmov_dpr(cop,src) ); 6890 ins_pipe( pipe_cmovDPR_reg ); 6891 %} 6892 6893 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6894 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6895 predicate(UseSSE<=1); 6896 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6897 ins_cost(200); 6898 format %{ "Jn$cop skip\n\t" 6899 "MOV $dst,$src\t# double\n" 6900 "skip:" %} 6901 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6902 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6903 ins_pipe( pipe_cmovDPR_reg ); 6904 %} 6905 6906 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6907 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6908 predicate(UseSSE==0); 6909 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6910 ins_cost(200); 6911 format %{ "Jn$cop skip\n\t" 6912 "MOV $dst,$src\t# float\n" 6913 "skip:" %} 6914 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6915 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6916 ins_pipe( pipe_cmovDPR_reg ); 6917 %} 6918 6919 // No CMOVE with SSE/SSE2 6920 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6921 predicate (UseSSE>=1); 6922 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6923 ins_cost(200); 6924 format %{ "Jn$cop skip\n\t" 6925 "MOVSS $dst,$src\t# float\n" 6926 "skip:" %} 6927 ins_encode %{ 6928 Label skip; 6929 // Invert sense of branch from sense of CMOV 6930 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6931 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6932 __ bind(skip); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 // No CMOVE with SSE/SSE2 6938 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6939 predicate (UseSSE>=2); 6940 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6941 ins_cost(200); 6942 format %{ "Jn$cop skip\n\t" 6943 "MOVSD $dst,$src\t# float\n" 6944 "skip:" %} 6945 ins_encode %{ 6946 Label skip; 6947 // Invert sense of branch from sense of CMOV 6948 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6949 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6950 __ bind(skip); 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 // unsigned version 6956 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6957 predicate (UseSSE>=1); 6958 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6959 ins_cost(200); 6960 format %{ "Jn$cop skip\n\t" 6961 "MOVSS $dst,$src\t# float\n" 6962 "skip:" %} 6963 ins_encode %{ 6964 Label skip; 6965 // Invert sense of branch from sense of CMOV 6966 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6967 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6968 __ bind(skip); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6974 predicate (UseSSE>=1); 6975 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6976 ins_cost(200); 6977 expand %{ 6978 fcmovF_regU(cop, cr, dst, src); 6979 %} 6980 %} 6981 6982 // unsigned version 6983 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6984 predicate (UseSSE>=2); 6985 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6986 ins_cost(200); 6987 format %{ "Jn$cop skip\n\t" 6988 "MOVSD $dst,$src\t# float\n" 6989 "skip:" %} 6990 ins_encode %{ 6991 Label skip; 6992 // Invert sense of branch from sense of CMOV 6993 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6994 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6995 __ bind(skip); 6996 %} 6997 ins_pipe( pipe_slow ); 6998 %} 6999 7000 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7001 predicate (UseSSE>=2); 7002 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 expand %{ 7005 fcmovD_regU(cop, cr, dst, src); 7006 %} 7007 %} 7008 7009 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7010 predicate(VM_Version::supports_cmov() ); 7011 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7012 ins_cost(200); 7013 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7014 "CMOV$cop $dst.hi,$src.hi" %} 7015 opcode(0x0F,0x40); 7016 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7017 ins_pipe( pipe_cmov_reg_long ); 7018 %} 7019 7020 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7021 predicate(VM_Version::supports_cmov() ); 7022 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7023 ins_cost(200); 7024 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7025 "CMOV$cop $dst.hi,$src.hi" %} 7026 opcode(0x0F,0x40); 7027 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7028 ins_pipe( pipe_cmov_reg_long ); 7029 %} 7030 7031 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7032 predicate(VM_Version::supports_cmov() ); 7033 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7034 ins_cost(200); 7035 expand %{ 7036 cmovL_regU(cop, cr, dst, src); 7037 %} 7038 %} 7039 7040 //----------Arithmetic Instructions-------------------------------------------- 7041 //----------Addition Instructions---------------------------------------------- 7042 7043 // Integer Addition Instructions 7044 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7045 match(Set dst (AddI dst src)); 7046 effect(KILL cr); 7047 7048 size(2); 7049 format %{ "ADD $dst,$src" %} 7050 opcode(0x03); 7051 ins_encode( OpcP, RegReg( dst, src) ); 7052 ins_pipe( ialu_reg_reg ); 7053 %} 7054 7055 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7056 match(Set dst (AddI dst src)); 7057 effect(KILL cr); 7058 7059 format %{ "ADD $dst,$src" %} 7060 opcode(0x81, 0x00); /* /0 id */ 7061 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7062 ins_pipe( ialu_reg ); 7063 %} 7064 7065 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7066 predicate(UseIncDec); 7067 match(Set dst (AddI dst src)); 7068 effect(KILL cr); 7069 7070 size(1); 7071 format %{ "INC $dst" %} 7072 opcode(0x40); /* */ 7073 ins_encode( Opc_plus( primary, dst ) ); 7074 ins_pipe( ialu_reg ); 7075 %} 7076 7077 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7078 match(Set dst (AddI src0 src1)); 7079 ins_cost(110); 7080 7081 format %{ "LEA $dst,[$src0 + $src1]" %} 7082 opcode(0x8D); /* 0x8D /r */ 7083 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7084 ins_pipe( ialu_reg_reg ); 7085 %} 7086 7087 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7088 match(Set dst (AddP src0 src1)); 7089 ins_cost(110); 7090 7091 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7092 opcode(0x8D); /* 0x8D /r */ 7093 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7094 ins_pipe( ialu_reg_reg ); 7095 %} 7096 7097 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7098 predicate(UseIncDec); 7099 match(Set dst (AddI dst src)); 7100 effect(KILL cr); 7101 7102 size(1); 7103 format %{ "DEC $dst" %} 7104 opcode(0x48); /* */ 7105 ins_encode( Opc_plus( primary, dst ) ); 7106 ins_pipe( ialu_reg ); 7107 %} 7108 7109 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7110 match(Set dst (AddP dst src)); 7111 effect(KILL cr); 7112 7113 size(2); 7114 format %{ "ADD $dst,$src" %} 7115 opcode(0x03); 7116 ins_encode( OpcP, RegReg( dst, src) ); 7117 ins_pipe( ialu_reg_reg ); 7118 %} 7119 7120 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7121 match(Set dst (AddP dst src)); 7122 effect(KILL cr); 7123 7124 format %{ "ADD $dst,$src" %} 7125 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7126 // ins_encode( RegImm( dst, src) ); 7127 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7128 ins_pipe( ialu_reg ); 7129 %} 7130 7131 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7132 match(Set dst (AddI dst (LoadI src))); 7133 effect(KILL cr); 7134 7135 ins_cost(125); 7136 format %{ "ADD $dst,$src" %} 7137 opcode(0x03); 7138 ins_encode( OpcP, RegMem( dst, src) ); 7139 ins_pipe( ialu_reg_mem ); 7140 %} 7141 7142 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7143 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7144 effect(KILL cr); 7145 7146 ins_cost(150); 7147 format %{ "ADD $dst,$src" %} 7148 opcode(0x01); /* Opcode 01 /r */ 7149 ins_encode( OpcP, RegMem( src, dst ) ); 7150 ins_pipe( ialu_mem_reg ); 7151 %} 7152 7153 // Add Memory with Immediate 7154 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7155 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7156 effect(KILL cr); 7157 7158 ins_cost(125); 7159 format %{ "ADD $dst,$src" %} 7160 opcode(0x81); /* Opcode 81 /0 id */ 7161 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7162 ins_pipe( ialu_mem_imm ); 7163 %} 7164 7165 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7166 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7167 effect(KILL cr); 7168 7169 ins_cost(125); 7170 format %{ "INC $dst" %} 7171 opcode(0xFF); /* Opcode FF /0 */ 7172 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7173 ins_pipe( ialu_mem_imm ); 7174 %} 7175 7176 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7177 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7178 effect(KILL cr); 7179 7180 ins_cost(125); 7181 format %{ "DEC $dst" %} 7182 opcode(0xFF); /* Opcode FF /1 */ 7183 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7184 ins_pipe( ialu_mem_imm ); 7185 %} 7186 7187 7188 instruct checkCastPP( eRegP dst ) %{ 7189 match(Set dst (CheckCastPP dst)); 7190 7191 size(0); 7192 format %{ "#checkcastPP of $dst" %} 7193 ins_encode( /*empty encoding*/ ); 7194 ins_pipe( empty ); 7195 %} 7196 7197 instruct castPP( eRegP dst ) %{ 7198 match(Set dst (CastPP dst)); 7199 format %{ "#castPP of $dst" %} 7200 ins_encode( /*empty encoding*/ ); 7201 ins_pipe( empty ); 7202 %} 7203 7204 instruct castII( rRegI dst ) %{ 7205 match(Set dst (CastII dst)); 7206 format %{ "#castII of $dst" %} 7207 ins_encode( /*empty encoding*/ ); 7208 ins_cost(0); 7209 ins_pipe( empty ); 7210 %} 7211 7212 7213 // Load-locked - same as a regular pointer load when used with compare-swap 7214 instruct loadPLocked(eRegP dst, memory mem) %{ 7215 match(Set dst (LoadPLocked mem)); 7216 7217 ins_cost(125); 7218 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7219 opcode(0x8B); 7220 ins_encode( OpcP, RegMem(dst,mem)); 7221 ins_pipe( ialu_reg_mem ); 7222 %} 7223 7224 // Conditional-store of the updated heap-top. 7225 // Used during allocation of the shared heap. 7226 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7227 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7228 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7229 // EAX is killed if there is contention, but then it's also unused. 7230 // In the common case of no contention, EAX holds the new oop address. 7231 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7232 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7233 ins_pipe( pipe_cmpxchg ); 7234 %} 7235 7236 // Conditional-store of an int value. 7237 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7238 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7239 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7240 effect(KILL oldval); 7241 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7242 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7243 ins_pipe( pipe_cmpxchg ); 7244 %} 7245 7246 // Conditional-store of a long value. 7247 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7248 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7249 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7250 effect(KILL oldval); 7251 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7252 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7253 "XCHG EBX,ECX" 7254 %} 7255 ins_encode %{ 7256 // Note: we need to swap rbx, and rcx before and after the 7257 // cmpxchg8 instruction because the instruction uses 7258 // rcx as the high order word of the new value to store but 7259 // our register encoding uses rbx. 7260 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7261 if( os::is_MP() ) 7262 __ lock(); 7263 __ cmpxchg8($mem$$Address); 7264 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7265 %} 7266 ins_pipe( pipe_cmpxchg ); 7267 %} 7268 7269 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7270 7271 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7272 predicate(VM_Version::supports_cx8()); 7273 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7274 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7275 effect(KILL cr, KILL oldval); 7276 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7277 "MOV $res,0\n\t" 7278 "JNE,s fail\n\t" 7279 "MOV $res,1\n" 7280 "fail:" %} 7281 ins_encode( enc_cmpxchg8(mem_ptr), 7282 enc_flags_ne_to_boolean(res) ); 7283 ins_pipe( pipe_cmpxchg ); 7284 %} 7285 7286 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7287 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7288 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7289 effect(KILL cr, KILL oldval); 7290 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7291 "MOV $res,0\n\t" 7292 "JNE,s fail\n\t" 7293 "MOV $res,1\n" 7294 "fail:" %} 7295 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7296 ins_pipe( pipe_cmpxchg ); 7297 %} 7298 7299 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7300 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7301 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7302 effect(KILL cr, KILL oldval); 7303 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7304 "MOV $res,0\n\t" 7305 "JNE,s fail\n\t" 7306 "MOV $res,1\n" 7307 "fail:" %} 7308 ins_encode( enc_cmpxchgb(mem_ptr), 7309 enc_flags_ne_to_boolean(res) ); 7310 ins_pipe( pipe_cmpxchg ); 7311 %} 7312 7313 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7314 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7315 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7316 effect(KILL cr, KILL oldval); 7317 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7318 "MOV $res,0\n\t" 7319 "JNE,s fail\n\t" 7320 "MOV $res,1\n" 7321 "fail:" %} 7322 ins_encode( enc_cmpxchgw(mem_ptr), 7323 enc_flags_ne_to_boolean(res) ); 7324 ins_pipe( pipe_cmpxchg ); 7325 %} 7326 7327 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7328 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7329 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7330 effect(KILL cr, KILL oldval); 7331 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7332 "MOV $res,0\n\t" 7333 "JNE,s fail\n\t" 7334 "MOV $res,1\n" 7335 "fail:" %} 7336 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7341 predicate(VM_Version::supports_cx8()); 7342 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7343 effect(KILL cr); 7344 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7345 ins_encode( enc_cmpxchg8(mem_ptr) ); 7346 ins_pipe( pipe_cmpxchg ); 7347 %} 7348 7349 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7350 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7351 effect(KILL cr); 7352 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7353 ins_encode( enc_cmpxchg(mem_ptr) ); 7354 ins_pipe( pipe_cmpxchg ); 7355 %} 7356 7357 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7358 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7359 effect(KILL cr); 7360 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7361 ins_encode( enc_cmpxchgb(mem_ptr) ); 7362 ins_pipe( pipe_cmpxchg ); 7363 %} 7364 7365 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7366 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7367 effect(KILL cr); 7368 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7369 ins_encode( enc_cmpxchgw(mem_ptr) ); 7370 ins_pipe( pipe_cmpxchg ); 7371 %} 7372 7373 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7374 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7375 effect(KILL cr); 7376 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7377 ins_encode( enc_cmpxchg(mem_ptr) ); 7378 ins_pipe( pipe_cmpxchg ); 7379 %} 7380 7381 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7382 predicate(n->as_LoadStore()->result_not_used()); 7383 match(Set dummy (GetAndAddB mem add)); 7384 effect(KILL cr); 7385 format %{ "ADDB [$mem],$add" %} 7386 ins_encode %{ 7387 if (os::is_MP()) { __ lock(); } 7388 __ addb($mem$$Address, $add$$constant); 7389 %} 7390 ins_pipe( pipe_cmpxchg ); 7391 %} 7392 7393 // Important to match to xRegI: only 8-bit regs. 7394 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7395 match(Set newval (GetAndAddB mem newval)); 7396 effect(KILL cr); 7397 format %{ "XADDB [$mem],$newval" %} 7398 ins_encode %{ 7399 if (os::is_MP()) { __ lock(); } 7400 __ xaddb($mem$$Address, $newval$$Register); 7401 %} 7402 ins_pipe( pipe_cmpxchg ); 7403 %} 7404 7405 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7406 predicate(n->as_LoadStore()->result_not_used()); 7407 match(Set dummy (GetAndAddS mem add)); 7408 effect(KILL cr); 7409 format %{ "ADDS [$mem],$add" %} 7410 ins_encode %{ 7411 if (os::is_MP()) { __ lock(); } 7412 __ addw($mem$$Address, $add$$constant); 7413 %} 7414 ins_pipe( pipe_cmpxchg ); 7415 %} 7416 7417 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7418 match(Set newval (GetAndAddS mem newval)); 7419 effect(KILL cr); 7420 format %{ "XADDS [$mem],$newval" %} 7421 ins_encode %{ 7422 if (os::is_MP()) { __ lock(); } 7423 __ xaddw($mem$$Address, $newval$$Register); 7424 %} 7425 ins_pipe( pipe_cmpxchg ); 7426 %} 7427 7428 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7429 predicate(n->as_LoadStore()->result_not_used()); 7430 match(Set dummy (GetAndAddI mem add)); 7431 effect(KILL cr); 7432 format %{ "ADDL [$mem],$add" %} 7433 ins_encode %{ 7434 if (os::is_MP()) { __ lock(); } 7435 __ addl($mem$$Address, $add$$constant); 7436 %} 7437 ins_pipe( pipe_cmpxchg ); 7438 %} 7439 7440 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7441 match(Set newval (GetAndAddI mem newval)); 7442 effect(KILL cr); 7443 format %{ "XADDL [$mem],$newval" %} 7444 ins_encode %{ 7445 if (os::is_MP()) { __ lock(); } 7446 __ xaddl($mem$$Address, $newval$$Register); 7447 %} 7448 ins_pipe( pipe_cmpxchg ); 7449 %} 7450 7451 // Important to match to xRegI: only 8-bit regs. 7452 instruct xchgB( memory mem, xRegI newval) %{ 7453 match(Set newval (GetAndSetB mem newval)); 7454 format %{ "XCHGB $newval,[$mem]" %} 7455 ins_encode %{ 7456 __ xchgb($newval$$Register, $mem$$Address); 7457 %} 7458 ins_pipe( pipe_cmpxchg ); 7459 %} 7460 7461 instruct xchgS( memory mem, rRegI newval) %{ 7462 match(Set newval (GetAndSetS mem newval)); 7463 format %{ "XCHGW $newval,[$mem]" %} 7464 ins_encode %{ 7465 __ xchgw($newval$$Register, $mem$$Address); 7466 %} 7467 ins_pipe( pipe_cmpxchg ); 7468 %} 7469 7470 instruct xchgI( memory mem, rRegI newval) %{ 7471 match(Set newval (GetAndSetI mem newval)); 7472 format %{ "XCHGL $newval,[$mem]" %} 7473 ins_encode %{ 7474 __ xchgl($newval$$Register, $mem$$Address); 7475 %} 7476 ins_pipe( pipe_cmpxchg ); 7477 %} 7478 7479 instruct xchgP( memory mem, pRegP newval) %{ 7480 match(Set newval (GetAndSetP mem newval)); 7481 format %{ "XCHGL $newval,[$mem]" %} 7482 ins_encode %{ 7483 __ xchgl($newval$$Register, $mem$$Address); 7484 %} 7485 ins_pipe( pipe_cmpxchg ); 7486 %} 7487 7488 //----------Subtraction Instructions------------------------------------------- 7489 7490 // Integer Subtraction Instructions 7491 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7492 match(Set dst (SubI dst src)); 7493 effect(KILL cr); 7494 7495 size(2); 7496 format %{ "SUB $dst,$src" %} 7497 opcode(0x2B); 7498 ins_encode( OpcP, RegReg( dst, src) ); 7499 ins_pipe( ialu_reg_reg ); 7500 %} 7501 7502 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7503 match(Set dst (SubI dst src)); 7504 effect(KILL cr); 7505 7506 format %{ "SUB $dst,$src" %} 7507 opcode(0x81,0x05); /* Opcode 81 /5 */ 7508 // ins_encode( RegImm( dst, src) ); 7509 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7510 ins_pipe( ialu_reg ); 7511 %} 7512 7513 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7514 match(Set dst (SubI dst (LoadI src))); 7515 effect(KILL cr); 7516 7517 ins_cost(125); 7518 format %{ "SUB $dst,$src" %} 7519 opcode(0x2B); 7520 ins_encode( OpcP, RegMem( dst, src) ); 7521 ins_pipe( ialu_reg_mem ); 7522 %} 7523 7524 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7525 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7526 effect(KILL cr); 7527 7528 ins_cost(150); 7529 format %{ "SUB $dst,$src" %} 7530 opcode(0x29); /* Opcode 29 /r */ 7531 ins_encode( OpcP, RegMem( src, dst ) ); 7532 ins_pipe( ialu_mem_reg ); 7533 %} 7534 7535 // Subtract from a pointer 7536 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7537 match(Set dst (AddP dst (SubI zero src))); 7538 effect(KILL cr); 7539 7540 size(2); 7541 format %{ "SUB $dst,$src" %} 7542 opcode(0x2B); 7543 ins_encode( OpcP, RegReg( dst, src) ); 7544 ins_pipe( ialu_reg_reg ); 7545 %} 7546 7547 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7548 match(Set dst (SubI zero dst)); 7549 effect(KILL cr); 7550 7551 size(2); 7552 format %{ "NEG $dst" %} 7553 opcode(0xF7,0x03); // Opcode F7 /3 7554 ins_encode( OpcP, RegOpc( dst ) ); 7555 ins_pipe( ialu_reg ); 7556 %} 7557 7558 //----------Multiplication/Division Instructions------------------------------- 7559 // Integer Multiplication Instructions 7560 // Multiply Register 7561 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7562 match(Set dst (MulI dst src)); 7563 effect(KILL cr); 7564 7565 size(3); 7566 ins_cost(300); 7567 format %{ "IMUL $dst,$src" %} 7568 opcode(0xAF, 0x0F); 7569 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7570 ins_pipe( ialu_reg_reg_alu0 ); 7571 %} 7572 7573 // Multiply 32-bit Immediate 7574 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7575 match(Set dst (MulI src imm)); 7576 effect(KILL cr); 7577 7578 ins_cost(300); 7579 format %{ "IMUL $dst,$src,$imm" %} 7580 opcode(0x69); /* 69 /r id */ 7581 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7582 ins_pipe( ialu_reg_reg_alu0 ); 7583 %} 7584 7585 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7586 match(Set dst src); 7587 effect(KILL cr); 7588 7589 // Note that this is artificially increased to make it more expensive than loadConL 7590 ins_cost(250); 7591 format %{ "MOV EAX,$src\t// low word only" %} 7592 opcode(0xB8); 7593 ins_encode( LdImmL_Lo(dst, src) ); 7594 ins_pipe( ialu_reg_fat ); 7595 %} 7596 7597 // Multiply by 32-bit Immediate, taking the shifted high order results 7598 // (special case for shift by 32) 7599 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7600 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7601 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7602 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7603 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7604 effect(USE src1, KILL cr); 7605 7606 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7607 ins_cost(0*100 + 1*400 - 150); 7608 format %{ "IMUL EDX:EAX,$src1" %} 7609 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7610 ins_pipe( pipe_slow ); 7611 %} 7612 7613 // Multiply by 32-bit Immediate, taking the shifted high order results 7614 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7615 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7616 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7617 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7618 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7619 effect(USE src1, KILL cr); 7620 7621 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7622 ins_cost(1*100 + 1*400 - 150); 7623 format %{ "IMUL EDX:EAX,$src1\n\t" 7624 "SAR EDX,$cnt-32" %} 7625 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 // Multiply Memory 32-bit Immediate 7630 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7631 match(Set dst (MulI (LoadI src) imm)); 7632 effect(KILL cr); 7633 7634 ins_cost(300); 7635 format %{ "IMUL $dst,$src,$imm" %} 7636 opcode(0x69); /* 69 /r id */ 7637 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7638 ins_pipe( ialu_reg_mem_alu0 ); 7639 %} 7640 7641 // Multiply Memory 7642 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7643 match(Set dst (MulI dst (LoadI src))); 7644 effect(KILL cr); 7645 7646 ins_cost(350); 7647 format %{ "IMUL $dst,$src" %} 7648 opcode(0xAF, 0x0F); 7649 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7650 ins_pipe( ialu_reg_mem_alu0 ); 7651 %} 7652 7653 // Multiply Register Int to Long 7654 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7655 // Basic Idea: long = (long)int * (long)int 7656 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7657 effect(DEF dst, USE src, USE src1, KILL flags); 7658 7659 ins_cost(300); 7660 format %{ "IMUL $dst,$src1" %} 7661 7662 ins_encode( long_int_multiply( dst, src1 ) ); 7663 ins_pipe( ialu_reg_reg_alu0 ); 7664 %} 7665 7666 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7667 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7668 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7669 effect(KILL flags); 7670 7671 ins_cost(300); 7672 format %{ "MUL $dst,$src1" %} 7673 7674 ins_encode( long_uint_multiply(dst, src1) ); 7675 ins_pipe( ialu_reg_reg_alu0 ); 7676 %} 7677 7678 // Multiply Register Long 7679 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7680 match(Set dst (MulL dst src)); 7681 effect(KILL cr, TEMP tmp); 7682 ins_cost(4*100+3*400); 7683 // Basic idea: lo(result) = lo(x_lo * y_lo) 7684 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7685 format %{ "MOV $tmp,$src.lo\n\t" 7686 "IMUL $tmp,EDX\n\t" 7687 "MOV EDX,$src.hi\n\t" 7688 "IMUL EDX,EAX\n\t" 7689 "ADD $tmp,EDX\n\t" 7690 "MUL EDX:EAX,$src.lo\n\t" 7691 "ADD EDX,$tmp" %} 7692 ins_encode( long_multiply( dst, src, tmp ) ); 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 // Multiply Register Long where the left operand's high 32 bits are zero 7697 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7698 predicate(is_operand_hi32_zero(n->in(1))); 7699 match(Set dst (MulL dst src)); 7700 effect(KILL cr, TEMP tmp); 7701 ins_cost(2*100+2*400); 7702 // Basic idea: lo(result) = lo(x_lo * y_lo) 7703 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7704 format %{ "MOV $tmp,$src.hi\n\t" 7705 "IMUL $tmp,EAX\n\t" 7706 "MUL EDX:EAX,$src.lo\n\t" 7707 "ADD EDX,$tmp" %} 7708 ins_encode %{ 7709 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7710 __ imull($tmp$$Register, rax); 7711 __ mull($src$$Register); 7712 __ addl(rdx, $tmp$$Register); 7713 %} 7714 ins_pipe( pipe_slow ); 7715 %} 7716 7717 // Multiply Register Long where the right operand's high 32 bits are zero 7718 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7719 predicate(is_operand_hi32_zero(n->in(2))); 7720 match(Set dst (MulL dst src)); 7721 effect(KILL cr, TEMP tmp); 7722 ins_cost(2*100+2*400); 7723 // Basic idea: lo(result) = lo(x_lo * y_lo) 7724 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7725 format %{ "MOV $tmp,$src.lo\n\t" 7726 "IMUL $tmp,EDX\n\t" 7727 "MUL EDX:EAX,$src.lo\n\t" 7728 "ADD EDX,$tmp" %} 7729 ins_encode %{ 7730 __ movl($tmp$$Register, $src$$Register); 7731 __ imull($tmp$$Register, rdx); 7732 __ mull($src$$Register); 7733 __ addl(rdx, $tmp$$Register); 7734 %} 7735 ins_pipe( pipe_slow ); 7736 %} 7737 7738 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7739 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7740 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7741 match(Set dst (MulL dst src)); 7742 effect(KILL cr); 7743 ins_cost(1*400); 7744 // Basic idea: lo(result) = lo(x_lo * y_lo) 7745 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7746 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7747 ins_encode %{ 7748 __ mull($src$$Register); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 // Multiply Register Long by small constant 7754 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7755 match(Set dst (MulL dst src)); 7756 effect(KILL cr, TEMP tmp); 7757 ins_cost(2*100+2*400); 7758 size(12); 7759 // Basic idea: lo(result) = lo(src * EAX) 7760 // hi(result) = hi(src * EAX) + lo(src * EDX) 7761 format %{ "IMUL $tmp,EDX,$src\n\t" 7762 "MOV EDX,$src\n\t" 7763 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7764 "ADD EDX,$tmp" %} 7765 ins_encode( long_multiply_con( dst, src, tmp ) ); 7766 ins_pipe( pipe_slow ); 7767 %} 7768 7769 // Integer DIV with Register 7770 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7771 match(Set rax (DivI rax div)); 7772 effect(KILL rdx, KILL cr); 7773 size(26); 7774 ins_cost(30*100+10*100); 7775 format %{ "CMP EAX,0x80000000\n\t" 7776 "JNE,s normal\n\t" 7777 "XOR EDX,EDX\n\t" 7778 "CMP ECX,-1\n\t" 7779 "JE,s done\n" 7780 "normal: CDQ\n\t" 7781 "IDIV $div\n\t" 7782 "done:" %} 7783 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7784 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7785 ins_pipe( ialu_reg_reg_alu0 ); 7786 %} 7787 7788 // Divide Register Long 7789 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7790 match(Set dst (DivL src1 src2)); 7791 effect( KILL cr, KILL cx, KILL bx ); 7792 ins_cost(10000); 7793 format %{ "PUSH $src1.hi\n\t" 7794 "PUSH $src1.lo\n\t" 7795 "PUSH $src2.hi\n\t" 7796 "PUSH $src2.lo\n\t" 7797 "CALL SharedRuntime::ldiv\n\t" 7798 "ADD ESP,16" %} 7799 ins_encode( long_div(src1,src2) ); 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 // Integer DIVMOD with Register, both quotient and mod results 7804 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7805 match(DivModI rax div); 7806 effect(KILL cr); 7807 size(26); 7808 ins_cost(30*100+10*100); 7809 format %{ "CMP EAX,0x80000000\n\t" 7810 "JNE,s normal\n\t" 7811 "XOR EDX,EDX\n\t" 7812 "CMP ECX,-1\n\t" 7813 "JE,s done\n" 7814 "normal: CDQ\n\t" 7815 "IDIV $div\n\t" 7816 "done:" %} 7817 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7818 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 // Integer MOD with Register 7823 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7824 match(Set rdx (ModI rax div)); 7825 effect(KILL rax, KILL cr); 7826 7827 size(26); 7828 ins_cost(300); 7829 format %{ "CDQ\n\t" 7830 "IDIV $div" %} 7831 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7832 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7833 ins_pipe( ialu_reg_reg_alu0 ); 7834 %} 7835 7836 // Remainder Register Long 7837 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7838 match(Set dst (ModL src1 src2)); 7839 effect( KILL cr, KILL cx, KILL bx ); 7840 ins_cost(10000); 7841 format %{ "PUSH $src1.hi\n\t" 7842 "PUSH $src1.lo\n\t" 7843 "PUSH $src2.hi\n\t" 7844 "PUSH $src2.lo\n\t" 7845 "CALL SharedRuntime::lrem\n\t" 7846 "ADD ESP,16" %} 7847 ins_encode( long_mod(src1,src2) ); 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 // Divide Register Long (no special case since divisor != -1) 7852 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7853 match(Set dst (DivL dst imm)); 7854 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7855 ins_cost(1000); 7856 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7857 "XOR $tmp2,$tmp2\n\t" 7858 "CMP $tmp,EDX\n\t" 7859 "JA,s fast\n\t" 7860 "MOV $tmp2,EAX\n\t" 7861 "MOV EAX,EDX\n\t" 7862 "MOV EDX,0\n\t" 7863 "JLE,s pos\n\t" 7864 "LNEG EAX : $tmp2\n\t" 7865 "DIV $tmp # unsigned division\n\t" 7866 "XCHG EAX,$tmp2\n\t" 7867 "DIV $tmp\n\t" 7868 "LNEG $tmp2 : EAX\n\t" 7869 "JMP,s done\n" 7870 "pos:\n\t" 7871 "DIV $tmp\n\t" 7872 "XCHG EAX,$tmp2\n" 7873 "fast:\n\t" 7874 "DIV $tmp\n" 7875 "done:\n\t" 7876 "MOV EDX,$tmp2\n\t" 7877 "NEG EDX:EAX # if $imm < 0" %} 7878 ins_encode %{ 7879 int con = (int)$imm$$constant; 7880 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7881 int pcon = (con > 0) ? con : -con; 7882 Label Lfast, Lpos, Ldone; 7883 7884 __ movl($tmp$$Register, pcon); 7885 __ xorl($tmp2$$Register,$tmp2$$Register); 7886 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7887 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7888 7889 __ movl($tmp2$$Register, $dst$$Register); // save 7890 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7891 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7892 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7893 7894 // Negative dividend. 7895 // convert value to positive to use unsigned division 7896 __ lneg($dst$$Register, $tmp2$$Register); 7897 __ divl($tmp$$Register); 7898 __ xchgl($dst$$Register, $tmp2$$Register); 7899 __ divl($tmp$$Register); 7900 // revert result back to negative 7901 __ lneg($tmp2$$Register, $dst$$Register); 7902 __ jmpb(Ldone); 7903 7904 __ bind(Lpos); 7905 __ divl($tmp$$Register); // Use unsigned division 7906 __ xchgl($dst$$Register, $tmp2$$Register); 7907 // Fallthrow for final divide, tmp2 has 32 bit hi result 7908 7909 __ bind(Lfast); 7910 // fast path: src is positive 7911 __ divl($tmp$$Register); // Use unsigned division 7912 7913 __ bind(Ldone); 7914 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7915 if (con < 0) { 7916 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7917 } 7918 %} 7919 ins_pipe( pipe_slow ); 7920 %} 7921 7922 // Remainder Register Long (remainder fit into 32 bits) 7923 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7924 match(Set dst (ModL dst imm)); 7925 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7926 ins_cost(1000); 7927 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7928 "CMP $tmp,EDX\n\t" 7929 "JA,s fast\n\t" 7930 "MOV $tmp2,EAX\n\t" 7931 "MOV EAX,EDX\n\t" 7932 "MOV EDX,0\n\t" 7933 "JLE,s pos\n\t" 7934 "LNEG EAX : $tmp2\n\t" 7935 "DIV $tmp # unsigned division\n\t" 7936 "MOV EAX,$tmp2\n\t" 7937 "DIV $tmp\n\t" 7938 "NEG EDX\n\t" 7939 "JMP,s done\n" 7940 "pos:\n\t" 7941 "DIV $tmp\n\t" 7942 "MOV EAX,$tmp2\n" 7943 "fast:\n\t" 7944 "DIV $tmp\n" 7945 "done:\n\t" 7946 "MOV EAX,EDX\n\t" 7947 "SAR EDX,31\n\t" %} 7948 ins_encode %{ 7949 int con = (int)$imm$$constant; 7950 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7951 int pcon = (con > 0) ? con : -con; 7952 Label Lfast, Lpos, Ldone; 7953 7954 __ movl($tmp$$Register, pcon); 7955 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7956 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7957 7958 __ movl($tmp2$$Register, $dst$$Register); // save 7959 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7960 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7961 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7962 7963 // Negative dividend. 7964 // convert value to positive to use unsigned division 7965 __ lneg($dst$$Register, $tmp2$$Register); 7966 __ divl($tmp$$Register); 7967 __ movl($dst$$Register, $tmp2$$Register); 7968 __ divl($tmp$$Register); 7969 // revert remainder back to negative 7970 __ negl(HIGH_FROM_LOW($dst$$Register)); 7971 __ jmpb(Ldone); 7972 7973 __ bind(Lpos); 7974 __ divl($tmp$$Register); 7975 __ movl($dst$$Register, $tmp2$$Register); 7976 7977 __ bind(Lfast); 7978 // fast path: src is positive 7979 __ divl($tmp$$Register); 7980 7981 __ bind(Ldone); 7982 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7983 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7984 7985 %} 7986 ins_pipe( pipe_slow ); 7987 %} 7988 7989 // Integer Shift Instructions 7990 // Shift Left by one 7991 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7992 match(Set dst (LShiftI dst shift)); 7993 effect(KILL cr); 7994 7995 size(2); 7996 format %{ "SHL $dst,$shift" %} 7997 opcode(0xD1, 0x4); /* D1 /4 */ 7998 ins_encode( OpcP, RegOpc( dst ) ); 7999 ins_pipe( ialu_reg ); 8000 %} 8001 8002 // Shift Left by 8-bit immediate 8003 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8004 match(Set dst (LShiftI dst shift)); 8005 effect(KILL cr); 8006 8007 size(3); 8008 format %{ "SHL $dst,$shift" %} 8009 opcode(0xC1, 0x4); /* C1 /4 ib */ 8010 ins_encode( RegOpcImm( dst, shift) ); 8011 ins_pipe( ialu_reg ); 8012 %} 8013 8014 // Shift Left by variable 8015 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8016 match(Set dst (LShiftI dst shift)); 8017 effect(KILL cr); 8018 8019 size(2); 8020 format %{ "SHL $dst,$shift" %} 8021 opcode(0xD3, 0x4); /* D3 /4 */ 8022 ins_encode( OpcP, RegOpc( dst ) ); 8023 ins_pipe( ialu_reg_reg ); 8024 %} 8025 8026 // Arithmetic shift right by one 8027 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8028 match(Set dst (RShiftI dst shift)); 8029 effect(KILL cr); 8030 8031 size(2); 8032 format %{ "SAR $dst,$shift" %} 8033 opcode(0xD1, 0x7); /* D1 /7 */ 8034 ins_encode( OpcP, RegOpc( dst ) ); 8035 ins_pipe( ialu_reg ); 8036 %} 8037 8038 // Arithmetic shift right by one 8039 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8040 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8041 effect(KILL cr); 8042 format %{ "SAR $dst,$shift" %} 8043 opcode(0xD1, 0x7); /* D1 /7 */ 8044 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8045 ins_pipe( ialu_mem_imm ); 8046 %} 8047 8048 // Arithmetic Shift Right by 8-bit immediate 8049 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8050 match(Set dst (RShiftI dst shift)); 8051 effect(KILL cr); 8052 8053 size(3); 8054 format %{ "SAR $dst,$shift" %} 8055 opcode(0xC1, 0x7); /* C1 /7 ib */ 8056 ins_encode( RegOpcImm( dst, shift ) ); 8057 ins_pipe( ialu_mem_imm ); 8058 %} 8059 8060 // Arithmetic Shift Right by 8-bit immediate 8061 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8062 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8063 effect(KILL cr); 8064 8065 format %{ "SAR $dst,$shift" %} 8066 opcode(0xC1, 0x7); /* C1 /7 ib */ 8067 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8068 ins_pipe( ialu_mem_imm ); 8069 %} 8070 8071 // Arithmetic Shift Right by variable 8072 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8073 match(Set dst (RShiftI dst shift)); 8074 effect(KILL cr); 8075 8076 size(2); 8077 format %{ "SAR $dst,$shift" %} 8078 opcode(0xD3, 0x7); /* D3 /7 */ 8079 ins_encode( OpcP, RegOpc( dst ) ); 8080 ins_pipe( ialu_reg_reg ); 8081 %} 8082 8083 // Logical shift right by one 8084 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8085 match(Set dst (URShiftI dst shift)); 8086 effect(KILL cr); 8087 8088 size(2); 8089 format %{ "SHR $dst,$shift" %} 8090 opcode(0xD1, 0x5); /* D1 /5 */ 8091 ins_encode( OpcP, RegOpc( dst ) ); 8092 ins_pipe( ialu_reg ); 8093 %} 8094 8095 // Logical Shift Right by 8-bit immediate 8096 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8097 match(Set dst (URShiftI dst shift)); 8098 effect(KILL cr); 8099 8100 size(3); 8101 format %{ "SHR $dst,$shift" %} 8102 opcode(0xC1, 0x5); /* C1 /5 ib */ 8103 ins_encode( RegOpcImm( dst, shift) ); 8104 ins_pipe( ialu_reg ); 8105 %} 8106 8107 8108 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8109 // This idiom is used by the compiler for the i2b bytecode. 8110 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8111 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8112 8113 size(3); 8114 format %{ "MOVSX $dst,$src :8" %} 8115 ins_encode %{ 8116 __ movsbl($dst$$Register, $src$$Register); 8117 %} 8118 ins_pipe(ialu_reg_reg); 8119 %} 8120 8121 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8122 // This idiom is used by the compiler the i2s bytecode. 8123 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8124 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8125 8126 size(3); 8127 format %{ "MOVSX $dst,$src :16" %} 8128 ins_encode %{ 8129 __ movswl($dst$$Register, $src$$Register); 8130 %} 8131 ins_pipe(ialu_reg_reg); 8132 %} 8133 8134 8135 // Logical Shift Right by variable 8136 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8137 match(Set dst (URShiftI dst shift)); 8138 effect(KILL cr); 8139 8140 size(2); 8141 format %{ "SHR $dst,$shift" %} 8142 opcode(0xD3, 0x5); /* D3 /5 */ 8143 ins_encode( OpcP, RegOpc( dst ) ); 8144 ins_pipe( ialu_reg_reg ); 8145 %} 8146 8147 8148 //----------Logical Instructions----------------------------------------------- 8149 //----------Integer Logical Instructions--------------------------------------- 8150 // And Instructions 8151 // And Register with Register 8152 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8153 match(Set dst (AndI dst src)); 8154 effect(KILL cr); 8155 8156 size(2); 8157 format %{ "AND $dst,$src" %} 8158 opcode(0x23); 8159 ins_encode( OpcP, RegReg( dst, src) ); 8160 ins_pipe( ialu_reg_reg ); 8161 %} 8162 8163 // And Register with Immediate 8164 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8165 match(Set dst (AndI dst src)); 8166 effect(KILL cr); 8167 8168 format %{ "AND $dst,$src" %} 8169 opcode(0x81,0x04); /* Opcode 81 /4 */ 8170 // ins_encode( RegImm( dst, src) ); 8171 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8172 ins_pipe( ialu_reg ); 8173 %} 8174 8175 // And Register with Memory 8176 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8177 match(Set dst (AndI dst (LoadI src))); 8178 effect(KILL cr); 8179 8180 ins_cost(125); 8181 format %{ "AND $dst,$src" %} 8182 opcode(0x23); 8183 ins_encode( OpcP, RegMem( dst, src) ); 8184 ins_pipe( ialu_reg_mem ); 8185 %} 8186 8187 // And Memory with Register 8188 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8189 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8190 effect(KILL cr); 8191 8192 ins_cost(150); 8193 format %{ "AND $dst,$src" %} 8194 opcode(0x21); /* Opcode 21 /r */ 8195 ins_encode( OpcP, RegMem( src, dst ) ); 8196 ins_pipe( ialu_mem_reg ); 8197 %} 8198 8199 // And Memory with Immediate 8200 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8201 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8202 effect(KILL cr); 8203 8204 ins_cost(125); 8205 format %{ "AND $dst,$src" %} 8206 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8207 // ins_encode( MemImm( dst, src) ); 8208 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8209 ins_pipe( ialu_mem_imm ); 8210 %} 8211 8212 // BMI1 instructions 8213 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8214 match(Set dst (AndI (XorI src1 minus_1) src2)); 8215 predicate(UseBMI1Instructions); 8216 effect(KILL cr); 8217 8218 format %{ "ANDNL $dst, $src1, $src2" %} 8219 8220 ins_encode %{ 8221 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8222 %} 8223 ins_pipe(ialu_reg); 8224 %} 8225 8226 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8227 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8228 predicate(UseBMI1Instructions); 8229 effect(KILL cr); 8230 8231 ins_cost(125); 8232 format %{ "ANDNL $dst, $src1, $src2" %} 8233 8234 ins_encode %{ 8235 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8236 %} 8237 ins_pipe(ialu_reg_mem); 8238 %} 8239 8240 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8241 match(Set dst (AndI (SubI imm_zero src) src)); 8242 predicate(UseBMI1Instructions); 8243 effect(KILL cr); 8244 8245 format %{ "BLSIL $dst, $src" %} 8246 8247 ins_encode %{ 8248 __ blsil($dst$$Register, $src$$Register); 8249 %} 8250 ins_pipe(ialu_reg); 8251 %} 8252 8253 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8254 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8255 predicate(UseBMI1Instructions); 8256 effect(KILL cr); 8257 8258 ins_cost(125); 8259 format %{ "BLSIL $dst, $src" %} 8260 8261 ins_encode %{ 8262 __ blsil($dst$$Register, $src$$Address); 8263 %} 8264 ins_pipe(ialu_reg_mem); 8265 %} 8266 8267 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8268 %{ 8269 match(Set dst (XorI (AddI src minus_1) src)); 8270 predicate(UseBMI1Instructions); 8271 effect(KILL cr); 8272 8273 format %{ "BLSMSKL $dst, $src" %} 8274 8275 ins_encode %{ 8276 __ blsmskl($dst$$Register, $src$$Register); 8277 %} 8278 8279 ins_pipe(ialu_reg); 8280 %} 8281 8282 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8283 %{ 8284 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8285 predicate(UseBMI1Instructions); 8286 effect(KILL cr); 8287 8288 ins_cost(125); 8289 format %{ "BLSMSKL $dst, $src" %} 8290 8291 ins_encode %{ 8292 __ blsmskl($dst$$Register, $src$$Address); 8293 %} 8294 8295 ins_pipe(ialu_reg_mem); 8296 %} 8297 8298 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8299 %{ 8300 match(Set dst (AndI (AddI src minus_1) src) ); 8301 predicate(UseBMI1Instructions); 8302 effect(KILL cr); 8303 8304 format %{ "BLSRL $dst, $src" %} 8305 8306 ins_encode %{ 8307 __ blsrl($dst$$Register, $src$$Register); 8308 %} 8309 8310 ins_pipe(ialu_reg); 8311 %} 8312 8313 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8314 %{ 8315 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8316 predicate(UseBMI1Instructions); 8317 effect(KILL cr); 8318 8319 ins_cost(125); 8320 format %{ "BLSRL $dst, $src" %} 8321 8322 ins_encode %{ 8323 __ blsrl($dst$$Register, $src$$Address); 8324 %} 8325 8326 ins_pipe(ialu_reg_mem); 8327 %} 8328 8329 // Or Instructions 8330 // Or Register with Register 8331 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8332 match(Set dst (OrI dst src)); 8333 effect(KILL cr); 8334 8335 size(2); 8336 format %{ "OR $dst,$src" %} 8337 opcode(0x0B); 8338 ins_encode( OpcP, RegReg( dst, src) ); 8339 ins_pipe( ialu_reg_reg ); 8340 %} 8341 8342 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8343 match(Set dst (OrI dst (CastP2X src))); 8344 effect(KILL cr); 8345 8346 size(2); 8347 format %{ "OR $dst,$src" %} 8348 opcode(0x0B); 8349 ins_encode( OpcP, RegReg( dst, src) ); 8350 ins_pipe( ialu_reg_reg ); 8351 %} 8352 8353 8354 // Or Register with Immediate 8355 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8356 match(Set dst (OrI dst src)); 8357 effect(KILL cr); 8358 8359 format %{ "OR $dst,$src" %} 8360 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8361 // ins_encode( RegImm( dst, src) ); 8362 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8363 ins_pipe( ialu_reg ); 8364 %} 8365 8366 // Or Register with Memory 8367 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8368 match(Set dst (OrI dst (LoadI src))); 8369 effect(KILL cr); 8370 8371 ins_cost(125); 8372 format %{ "OR $dst,$src" %} 8373 opcode(0x0B); 8374 ins_encode( OpcP, RegMem( dst, src) ); 8375 ins_pipe( ialu_reg_mem ); 8376 %} 8377 8378 // Or Memory with Register 8379 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8380 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8381 effect(KILL cr); 8382 8383 ins_cost(150); 8384 format %{ "OR $dst,$src" %} 8385 opcode(0x09); /* Opcode 09 /r */ 8386 ins_encode( OpcP, RegMem( src, dst ) ); 8387 ins_pipe( ialu_mem_reg ); 8388 %} 8389 8390 // Or Memory with Immediate 8391 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8392 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8393 effect(KILL cr); 8394 8395 ins_cost(125); 8396 format %{ "OR $dst,$src" %} 8397 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8398 // ins_encode( MemImm( dst, src) ); 8399 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8400 ins_pipe( ialu_mem_imm ); 8401 %} 8402 8403 // ROL/ROR 8404 // ROL expand 8405 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8406 effect(USE_DEF dst, USE shift, KILL cr); 8407 8408 format %{ "ROL $dst, $shift" %} 8409 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8410 ins_encode( OpcP, RegOpc( dst )); 8411 ins_pipe( ialu_reg ); 8412 %} 8413 8414 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8415 effect(USE_DEF dst, USE shift, KILL cr); 8416 8417 format %{ "ROL $dst, $shift" %} 8418 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8419 ins_encode( RegOpcImm(dst, shift) ); 8420 ins_pipe(ialu_reg); 8421 %} 8422 8423 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8424 effect(USE_DEF dst, USE shift, KILL cr); 8425 8426 format %{ "ROL $dst, $shift" %} 8427 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8428 ins_encode(OpcP, RegOpc(dst)); 8429 ins_pipe( ialu_reg_reg ); 8430 %} 8431 // end of ROL expand 8432 8433 // ROL 32bit by one once 8434 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8435 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8436 8437 expand %{ 8438 rolI_eReg_imm1(dst, lshift, cr); 8439 %} 8440 %} 8441 8442 // ROL 32bit var by imm8 once 8443 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8444 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8445 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8446 8447 expand %{ 8448 rolI_eReg_imm8(dst, lshift, cr); 8449 %} 8450 %} 8451 8452 // ROL 32bit var by var once 8453 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8454 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8455 8456 expand %{ 8457 rolI_eReg_CL(dst, shift, cr); 8458 %} 8459 %} 8460 8461 // ROL 32bit var by var once 8462 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8463 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8464 8465 expand %{ 8466 rolI_eReg_CL(dst, shift, cr); 8467 %} 8468 %} 8469 8470 // ROR expand 8471 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8472 effect(USE_DEF dst, USE shift, KILL cr); 8473 8474 format %{ "ROR $dst, $shift" %} 8475 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8476 ins_encode( OpcP, RegOpc( dst ) ); 8477 ins_pipe( ialu_reg ); 8478 %} 8479 8480 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8481 effect (USE_DEF dst, USE shift, KILL cr); 8482 8483 format %{ "ROR $dst, $shift" %} 8484 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8485 ins_encode( RegOpcImm(dst, shift) ); 8486 ins_pipe( ialu_reg ); 8487 %} 8488 8489 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8490 effect(USE_DEF dst, USE shift, KILL cr); 8491 8492 format %{ "ROR $dst, $shift" %} 8493 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8494 ins_encode(OpcP, RegOpc(dst)); 8495 ins_pipe( ialu_reg_reg ); 8496 %} 8497 // end of ROR expand 8498 8499 // ROR right once 8500 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8501 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8502 8503 expand %{ 8504 rorI_eReg_imm1(dst, rshift, cr); 8505 %} 8506 %} 8507 8508 // ROR 32bit by immI8 once 8509 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8510 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8511 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8512 8513 expand %{ 8514 rorI_eReg_imm8(dst, rshift, cr); 8515 %} 8516 %} 8517 8518 // ROR 32bit var by var once 8519 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8520 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8521 8522 expand %{ 8523 rorI_eReg_CL(dst, shift, cr); 8524 %} 8525 %} 8526 8527 // ROR 32bit var by var once 8528 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8529 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8530 8531 expand %{ 8532 rorI_eReg_CL(dst, shift, cr); 8533 %} 8534 %} 8535 8536 // Xor Instructions 8537 // Xor Register with Register 8538 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8539 match(Set dst (XorI dst src)); 8540 effect(KILL cr); 8541 8542 size(2); 8543 format %{ "XOR $dst,$src" %} 8544 opcode(0x33); 8545 ins_encode( OpcP, RegReg( dst, src) ); 8546 ins_pipe( ialu_reg_reg ); 8547 %} 8548 8549 // Xor Register with Immediate -1 8550 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8551 match(Set dst (XorI dst imm)); 8552 8553 size(2); 8554 format %{ "NOT $dst" %} 8555 ins_encode %{ 8556 __ notl($dst$$Register); 8557 %} 8558 ins_pipe( ialu_reg ); 8559 %} 8560 8561 // Xor Register with Immediate 8562 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8563 match(Set dst (XorI dst src)); 8564 effect(KILL cr); 8565 8566 format %{ "XOR $dst,$src" %} 8567 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8568 // ins_encode( RegImm( dst, src) ); 8569 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8570 ins_pipe( ialu_reg ); 8571 %} 8572 8573 // Xor Register with Memory 8574 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8575 match(Set dst (XorI dst (LoadI src))); 8576 effect(KILL cr); 8577 8578 ins_cost(125); 8579 format %{ "XOR $dst,$src" %} 8580 opcode(0x33); 8581 ins_encode( OpcP, RegMem(dst, src) ); 8582 ins_pipe( ialu_reg_mem ); 8583 %} 8584 8585 // Xor Memory with Register 8586 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8587 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8588 effect(KILL cr); 8589 8590 ins_cost(150); 8591 format %{ "XOR $dst,$src" %} 8592 opcode(0x31); /* Opcode 31 /r */ 8593 ins_encode( OpcP, RegMem( src, dst ) ); 8594 ins_pipe( ialu_mem_reg ); 8595 %} 8596 8597 // Xor Memory with Immediate 8598 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8599 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8600 effect(KILL cr); 8601 8602 ins_cost(125); 8603 format %{ "XOR $dst,$src" %} 8604 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8605 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8606 ins_pipe( ialu_mem_imm ); 8607 %} 8608 8609 //----------Convert Int to Boolean--------------------------------------------- 8610 8611 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8612 effect( DEF dst, USE src ); 8613 format %{ "MOV $dst,$src" %} 8614 ins_encode( enc_Copy( dst, src) ); 8615 ins_pipe( ialu_reg_reg ); 8616 %} 8617 8618 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8619 effect( USE_DEF dst, USE src, KILL cr ); 8620 8621 size(4); 8622 format %{ "NEG $dst\n\t" 8623 "ADC $dst,$src" %} 8624 ins_encode( neg_reg(dst), 8625 OpcRegReg(0x13,dst,src) ); 8626 ins_pipe( ialu_reg_reg_long ); 8627 %} 8628 8629 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8630 match(Set dst (Conv2B src)); 8631 8632 expand %{ 8633 movI_nocopy(dst,src); 8634 ci2b(dst,src,cr); 8635 %} 8636 %} 8637 8638 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8639 effect( DEF dst, USE src ); 8640 format %{ "MOV $dst,$src" %} 8641 ins_encode( enc_Copy( dst, src) ); 8642 ins_pipe( ialu_reg_reg ); 8643 %} 8644 8645 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8646 effect( USE_DEF dst, USE src, KILL cr ); 8647 format %{ "NEG $dst\n\t" 8648 "ADC $dst,$src" %} 8649 ins_encode( neg_reg(dst), 8650 OpcRegReg(0x13,dst,src) ); 8651 ins_pipe( ialu_reg_reg_long ); 8652 %} 8653 8654 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8655 match(Set dst (Conv2B src)); 8656 8657 expand %{ 8658 movP_nocopy(dst,src); 8659 cp2b(dst,src,cr); 8660 %} 8661 %} 8662 8663 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8664 match(Set dst (CmpLTMask p q)); 8665 effect(KILL cr); 8666 ins_cost(400); 8667 8668 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8669 format %{ "XOR $dst,$dst\n\t" 8670 "CMP $p,$q\n\t" 8671 "SETlt $dst\n\t" 8672 "NEG $dst" %} 8673 ins_encode %{ 8674 Register Rp = $p$$Register; 8675 Register Rq = $q$$Register; 8676 Register Rd = $dst$$Register; 8677 Label done; 8678 __ xorl(Rd, Rd); 8679 __ cmpl(Rp, Rq); 8680 __ setb(Assembler::less, Rd); 8681 __ negl(Rd); 8682 %} 8683 8684 ins_pipe(pipe_slow); 8685 %} 8686 8687 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8688 match(Set dst (CmpLTMask dst zero)); 8689 effect(DEF dst, KILL cr); 8690 ins_cost(100); 8691 8692 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8693 ins_encode %{ 8694 __ sarl($dst$$Register, 31); 8695 %} 8696 ins_pipe(ialu_reg); 8697 %} 8698 8699 /* better to save a register than avoid a branch */ 8700 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8701 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8702 effect(KILL cr); 8703 ins_cost(400); 8704 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8705 "JGE done\n\t" 8706 "ADD $p,$y\n" 8707 "done: " %} 8708 ins_encode %{ 8709 Register Rp = $p$$Register; 8710 Register Rq = $q$$Register; 8711 Register Ry = $y$$Register; 8712 Label done; 8713 __ subl(Rp, Rq); 8714 __ jccb(Assembler::greaterEqual, done); 8715 __ addl(Rp, Ry); 8716 __ bind(done); 8717 %} 8718 8719 ins_pipe(pipe_cmplt); 8720 %} 8721 8722 /* better to save a register than avoid a branch */ 8723 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8724 match(Set y (AndI (CmpLTMask p q) y)); 8725 effect(KILL cr); 8726 8727 ins_cost(300); 8728 8729 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8730 "JLT done\n\t" 8731 "XORL $y, $y\n" 8732 "done: " %} 8733 ins_encode %{ 8734 Register Rp = $p$$Register; 8735 Register Rq = $q$$Register; 8736 Register Ry = $y$$Register; 8737 Label done; 8738 __ cmpl(Rp, Rq); 8739 __ jccb(Assembler::less, done); 8740 __ xorl(Ry, Ry); 8741 __ bind(done); 8742 %} 8743 8744 ins_pipe(pipe_cmplt); 8745 %} 8746 8747 /* If I enable this, I encourage spilling in the inner loop of compress. 8748 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8749 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8750 */ 8751 //----------Overflow Math Instructions----------------------------------------- 8752 8753 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8754 %{ 8755 match(Set cr (OverflowAddI op1 op2)); 8756 effect(DEF cr, USE_KILL op1, USE op2); 8757 8758 format %{ "ADD $op1, $op2\t# overflow check int" %} 8759 8760 ins_encode %{ 8761 __ addl($op1$$Register, $op2$$Register); 8762 %} 8763 ins_pipe(ialu_reg_reg); 8764 %} 8765 8766 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8767 %{ 8768 match(Set cr (OverflowAddI op1 op2)); 8769 effect(DEF cr, USE_KILL op1, USE op2); 8770 8771 format %{ "ADD $op1, $op2\t# overflow check int" %} 8772 8773 ins_encode %{ 8774 __ addl($op1$$Register, $op2$$constant); 8775 %} 8776 ins_pipe(ialu_reg_reg); 8777 %} 8778 8779 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8780 %{ 8781 match(Set cr (OverflowSubI op1 op2)); 8782 8783 format %{ "CMP $op1, $op2\t# overflow check int" %} 8784 ins_encode %{ 8785 __ cmpl($op1$$Register, $op2$$Register); 8786 %} 8787 ins_pipe(ialu_reg_reg); 8788 %} 8789 8790 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8791 %{ 8792 match(Set cr (OverflowSubI op1 op2)); 8793 8794 format %{ "CMP $op1, $op2\t# overflow check int" %} 8795 ins_encode %{ 8796 __ cmpl($op1$$Register, $op2$$constant); 8797 %} 8798 ins_pipe(ialu_reg_reg); 8799 %} 8800 8801 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8802 %{ 8803 match(Set cr (OverflowSubI zero op2)); 8804 effect(DEF cr, USE_KILL op2); 8805 8806 format %{ "NEG $op2\t# overflow check int" %} 8807 ins_encode %{ 8808 __ negl($op2$$Register); 8809 %} 8810 ins_pipe(ialu_reg_reg); 8811 %} 8812 8813 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8814 %{ 8815 match(Set cr (OverflowMulI op1 op2)); 8816 effect(DEF cr, USE_KILL op1, USE op2); 8817 8818 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8819 ins_encode %{ 8820 __ imull($op1$$Register, $op2$$Register); 8821 %} 8822 ins_pipe(ialu_reg_reg_alu0); 8823 %} 8824 8825 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8826 %{ 8827 match(Set cr (OverflowMulI op1 op2)); 8828 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8829 8830 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8831 ins_encode %{ 8832 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8833 %} 8834 ins_pipe(ialu_reg_reg_alu0); 8835 %} 8836 8837 //----------Long Instructions------------------------------------------------ 8838 // Add Long Register with Register 8839 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8840 match(Set dst (AddL dst src)); 8841 effect(KILL cr); 8842 ins_cost(200); 8843 format %{ "ADD $dst.lo,$src.lo\n\t" 8844 "ADC $dst.hi,$src.hi" %} 8845 opcode(0x03, 0x13); 8846 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8847 ins_pipe( ialu_reg_reg_long ); 8848 %} 8849 8850 // Add Long Register with Immediate 8851 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8852 match(Set dst (AddL dst src)); 8853 effect(KILL cr); 8854 format %{ "ADD $dst.lo,$src.lo\n\t" 8855 "ADC $dst.hi,$src.hi" %} 8856 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8857 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8858 ins_pipe( ialu_reg_long ); 8859 %} 8860 8861 // Add Long Register with Memory 8862 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8863 match(Set dst (AddL dst (LoadL mem))); 8864 effect(KILL cr); 8865 ins_cost(125); 8866 format %{ "ADD $dst.lo,$mem\n\t" 8867 "ADC $dst.hi,$mem+4" %} 8868 opcode(0x03, 0x13); 8869 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8870 ins_pipe( ialu_reg_long_mem ); 8871 %} 8872 8873 // Subtract Long Register with Register. 8874 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8875 match(Set dst (SubL dst src)); 8876 effect(KILL cr); 8877 ins_cost(200); 8878 format %{ "SUB $dst.lo,$src.lo\n\t" 8879 "SBB $dst.hi,$src.hi" %} 8880 opcode(0x2B, 0x1B); 8881 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8882 ins_pipe( ialu_reg_reg_long ); 8883 %} 8884 8885 // Subtract Long Register with Immediate 8886 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8887 match(Set dst (SubL dst src)); 8888 effect(KILL cr); 8889 format %{ "SUB $dst.lo,$src.lo\n\t" 8890 "SBB $dst.hi,$src.hi" %} 8891 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8892 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8893 ins_pipe( ialu_reg_long ); 8894 %} 8895 8896 // Subtract Long Register with Memory 8897 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8898 match(Set dst (SubL dst (LoadL mem))); 8899 effect(KILL cr); 8900 ins_cost(125); 8901 format %{ "SUB $dst.lo,$mem\n\t" 8902 "SBB $dst.hi,$mem+4" %} 8903 opcode(0x2B, 0x1B); 8904 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8905 ins_pipe( ialu_reg_long_mem ); 8906 %} 8907 8908 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8909 match(Set dst (SubL zero dst)); 8910 effect(KILL cr); 8911 ins_cost(300); 8912 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8913 ins_encode( neg_long(dst) ); 8914 ins_pipe( ialu_reg_reg_long ); 8915 %} 8916 8917 // And Long Register with Register 8918 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8919 match(Set dst (AndL dst src)); 8920 effect(KILL cr); 8921 format %{ "AND $dst.lo,$src.lo\n\t" 8922 "AND $dst.hi,$src.hi" %} 8923 opcode(0x23,0x23); 8924 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8925 ins_pipe( ialu_reg_reg_long ); 8926 %} 8927 8928 // And Long Register with Immediate 8929 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8930 match(Set dst (AndL dst src)); 8931 effect(KILL cr); 8932 format %{ "AND $dst.lo,$src.lo\n\t" 8933 "AND $dst.hi,$src.hi" %} 8934 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8935 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8936 ins_pipe( ialu_reg_long ); 8937 %} 8938 8939 // And Long Register with Memory 8940 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8941 match(Set dst (AndL dst (LoadL mem))); 8942 effect(KILL cr); 8943 ins_cost(125); 8944 format %{ "AND $dst.lo,$mem\n\t" 8945 "AND $dst.hi,$mem+4" %} 8946 opcode(0x23, 0x23); 8947 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8948 ins_pipe( ialu_reg_long_mem ); 8949 %} 8950 8951 // BMI1 instructions 8952 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8953 match(Set dst (AndL (XorL src1 minus_1) src2)); 8954 predicate(UseBMI1Instructions); 8955 effect(KILL cr, TEMP dst); 8956 8957 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8958 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8959 %} 8960 8961 ins_encode %{ 8962 Register Rdst = $dst$$Register; 8963 Register Rsrc1 = $src1$$Register; 8964 Register Rsrc2 = $src2$$Register; 8965 __ andnl(Rdst, Rsrc1, Rsrc2); 8966 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8967 %} 8968 ins_pipe(ialu_reg_reg_long); 8969 %} 8970 8971 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8972 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8973 predicate(UseBMI1Instructions); 8974 effect(KILL cr, TEMP dst); 8975 8976 ins_cost(125); 8977 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8978 "ANDNL $dst.hi, $src1.hi, $src2+4" 8979 %} 8980 8981 ins_encode %{ 8982 Register Rdst = $dst$$Register; 8983 Register Rsrc1 = $src1$$Register; 8984 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8985 8986 __ andnl(Rdst, Rsrc1, $src2$$Address); 8987 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8988 %} 8989 ins_pipe(ialu_reg_mem); 8990 %} 8991 8992 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8993 match(Set dst (AndL (SubL imm_zero src) src)); 8994 predicate(UseBMI1Instructions); 8995 effect(KILL cr, TEMP dst); 8996 8997 format %{ "MOVL $dst.hi, 0\n\t" 8998 "BLSIL $dst.lo, $src.lo\n\t" 8999 "JNZ done\n\t" 9000 "BLSIL $dst.hi, $src.hi\n" 9001 "done:" 9002 %} 9003 9004 ins_encode %{ 9005 Label done; 9006 Register Rdst = $dst$$Register; 9007 Register Rsrc = $src$$Register; 9008 __ movl(HIGH_FROM_LOW(Rdst), 0); 9009 __ blsil(Rdst, Rsrc); 9010 __ jccb(Assembler::notZero, done); 9011 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9012 __ bind(done); 9013 %} 9014 ins_pipe(ialu_reg); 9015 %} 9016 9017 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9018 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9019 predicate(UseBMI1Instructions); 9020 effect(KILL cr, TEMP dst); 9021 9022 ins_cost(125); 9023 format %{ "MOVL $dst.hi, 0\n\t" 9024 "BLSIL $dst.lo, $src\n\t" 9025 "JNZ done\n\t" 9026 "BLSIL $dst.hi, $src+4\n" 9027 "done:" 9028 %} 9029 9030 ins_encode %{ 9031 Label done; 9032 Register Rdst = $dst$$Register; 9033 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9034 9035 __ movl(HIGH_FROM_LOW(Rdst), 0); 9036 __ blsil(Rdst, $src$$Address); 9037 __ jccb(Assembler::notZero, done); 9038 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9039 __ bind(done); 9040 %} 9041 ins_pipe(ialu_reg_mem); 9042 %} 9043 9044 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9045 %{ 9046 match(Set dst (XorL (AddL src minus_1) src)); 9047 predicate(UseBMI1Instructions); 9048 effect(KILL cr, TEMP dst); 9049 9050 format %{ "MOVL $dst.hi, 0\n\t" 9051 "BLSMSKL $dst.lo, $src.lo\n\t" 9052 "JNC done\n\t" 9053 "BLSMSKL $dst.hi, $src.hi\n" 9054 "done:" 9055 %} 9056 9057 ins_encode %{ 9058 Label done; 9059 Register Rdst = $dst$$Register; 9060 Register Rsrc = $src$$Register; 9061 __ movl(HIGH_FROM_LOW(Rdst), 0); 9062 __ blsmskl(Rdst, Rsrc); 9063 __ jccb(Assembler::carryClear, done); 9064 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9065 __ bind(done); 9066 %} 9067 9068 ins_pipe(ialu_reg); 9069 %} 9070 9071 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9072 %{ 9073 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9074 predicate(UseBMI1Instructions); 9075 effect(KILL cr, TEMP dst); 9076 9077 ins_cost(125); 9078 format %{ "MOVL $dst.hi, 0\n\t" 9079 "BLSMSKL $dst.lo, $src\n\t" 9080 "JNC done\n\t" 9081 "BLSMSKL $dst.hi, $src+4\n" 9082 "done:" 9083 %} 9084 9085 ins_encode %{ 9086 Label done; 9087 Register Rdst = $dst$$Register; 9088 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9089 9090 __ movl(HIGH_FROM_LOW(Rdst), 0); 9091 __ blsmskl(Rdst, $src$$Address); 9092 __ jccb(Assembler::carryClear, done); 9093 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9094 __ bind(done); 9095 %} 9096 9097 ins_pipe(ialu_reg_mem); 9098 %} 9099 9100 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9101 %{ 9102 match(Set dst (AndL (AddL src minus_1) src) ); 9103 predicate(UseBMI1Instructions); 9104 effect(KILL cr, TEMP dst); 9105 9106 format %{ "MOVL $dst.hi, $src.hi\n\t" 9107 "BLSRL $dst.lo, $src.lo\n\t" 9108 "JNC done\n\t" 9109 "BLSRL $dst.hi, $src.hi\n" 9110 "done:" 9111 %} 9112 9113 ins_encode %{ 9114 Label done; 9115 Register Rdst = $dst$$Register; 9116 Register Rsrc = $src$$Register; 9117 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9118 __ blsrl(Rdst, Rsrc); 9119 __ jccb(Assembler::carryClear, done); 9120 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9121 __ bind(done); 9122 %} 9123 9124 ins_pipe(ialu_reg); 9125 %} 9126 9127 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9128 %{ 9129 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9130 predicate(UseBMI1Instructions); 9131 effect(KILL cr, TEMP dst); 9132 9133 ins_cost(125); 9134 format %{ "MOVL $dst.hi, $src+4\n\t" 9135 "BLSRL $dst.lo, $src\n\t" 9136 "JNC done\n\t" 9137 "BLSRL $dst.hi, $src+4\n" 9138 "done:" 9139 %} 9140 9141 ins_encode %{ 9142 Label done; 9143 Register Rdst = $dst$$Register; 9144 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9145 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9146 __ blsrl(Rdst, $src$$Address); 9147 __ jccb(Assembler::carryClear, done); 9148 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9149 __ bind(done); 9150 %} 9151 9152 ins_pipe(ialu_reg_mem); 9153 %} 9154 9155 // Or Long Register with Register 9156 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9157 match(Set dst (OrL dst src)); 9158 effect(KILL cr); 9159 format %{ "OR $dst.lo,$src.lo\n\t" 9160 "OR $dst.hi,$src.hi" %} 9161 opcode(0x0B,0x0B); 9162 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9163 ins_pipe( ialu_reg_reg_long ); 9164 %} 9165 9166 // Or Long Register with Immediate 9167 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9168 match(Set dst (OrL dst src)); 9169 effect(KILL cr); 9170 format %{ "OR $dst.lo,$src.lo\n\t" 9171 "OR $dst.hi,$src.hi" %} 9172 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9173 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9174 ins_pipe( ialu_reg_long ); 9175 %} 9176 9177 // Or Long Register with Memory 9178 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9179 match(Set dst (OrL dst (LoadL mem))); 9180 effect(KILL cr); 9181 ins_cost(125); 9182 format %{ "OR $dst.lo,$mem\n\t" 9183 "OR $dst.hi,$mem+4" %} 9184 opcode(0x0B,0x0B); 9185 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9186 ins_pipe( ialu_reg_long_mem ); 9187 %} 9188 9189 // Xor Long Register with Register 9190 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9191 match(Set dst (XorL dst src)); 9192 effect(KILL cr); 9193 format %{ "XOR $dst.lo,$src.lo\n\t" 9194 "XOR $dst.hi,$src.hi" %} 9195 opcode(0x33,0x33); 9196 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9197 ins_pipe( ialu_reg_reg_long ); 9198 %} 9199 9200 // Xor Long Register with Immediate -1 9201 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9202 match(Set dst (XorL dst imm)); 9203 format %{ "NOT $dst.lo\n\t" 9204 "NOT $dst.hi" %} 9205 ins_encode %{ 9206 __ notl($dst$$Register); 9207 __ notl(HIGH_FROM_LOW($dst$$Register)); 9208 %} 9209 ins_pipe( ialu_reg_long ); 9210 %} 9211 9212 // Xor Long Register with Immediate 9213 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9214 match(Set dst (XorL dst src)); 9215 effect(KILL cr); 9216 format %{ "XOR $dst.lo,$src.lo\n\t" 9217 "XOR $dst.hi,$src.hi" %} 9218 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9219 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9220 ins_pipe( ialu_reg_long ); 9221 %} 9222 9223 // Xor Long Register with Memory 9224 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9225 match(Set dst (XorL dst (LoadL mem))); 9226 effect(KILL cr); 9227 ins_cost(125); 9228 format %{ "XOR $dst.lo,$mem\n\t" 9229 "XOR $dst.hi,$mem+4" %} 9230 opcode(0x33,0x33); 9231 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9232 ins_pipe( ialu_reg_long_mem ); 9233 %} 9234 9235 // Shift Left Long by 1 9236 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9237 predicate(UseNewLongLShift); 9238 match(Set dst (LShiftL dst cnt)); 9239 effect(KILL cr); 9240 ins_cost(100); 9241 format %{ "ADD $dst.lo,$dst.lo\n\t" 9242 "ADC $dst.hi,$dst.hi" %} 9243 ins_encode %{ 9244 __ addl($dst$$Register,$dst$$Register); 9245 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9246 %} 9247 ins_pipe( ialu_reg_long ); 9248 %} 9249 9250 // Shift Left Long by 2 9251 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9252 predicate(UseNewLongLShift); 9253 match(Set dst (LShiftL dst cnt)); 9254 effect(KILL cr); 9255 ins_cost(100); 9256 format %{ "ADD $dst.lo,$dst.lo\n\t" 9257 "ADC $dst.hi,$dst.hi\n\t" 9258 "ADD $dst.lo,$dst.lo\n\t" 9259 "ADC $dst.hi,$dst.hi" %} 9260 ins_encode %{ 9261 __ addl($dst$$Register,$dst$$Register); 9262 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9263 __ addl($dst$$Register,$dst$$Register); 9264 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9265 %} 9266 ins_pipe( ialu_reg_long ); 9267 %} 9268 9269 // Shift Left Long by 3 9270 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9271 predicate(UseNewLongLShift); 9272 match(Set dst (LShiftL dst cnt)); 9273 effect(KILL cr); 9274 ins_cost(100); 9275 format %{ "ADD $dst.lo,$dst.lo\n\t" 9276 "ADC $dst.hi,$dst.hi\n\t" 9277 "ADD $dst.lo,$dst.lo\n\t" 9278 "ADC $dst.hi,$dst.hi\n\t" 9279 "ADD $dst.lo,$dst.lo\n\t" 9280 "ADC $dst.hi,$dst.hi" %} 9281 ins_encode %{ 9282 __ addl($dst$$Register,$dst$$Register); 9283 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9284 __ addl($dst$$Register,$dst$$Register); 9285 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9286 __ addl($dst$$Register,$dst$$Register); 9287 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9288 %} 9289 ins_pipe( ialu_reg_long ); 9290 %} 9291 9292 // Shift Left Long by 1-31 9293 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9294 match(Set dst (LShiftL dst cnt)); 9295 effect(KILL cr); 9296 ins_cost(200); 9297 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9298 "SHL $dst.lo,$cnt" %} 9299 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9300 ins_encode( move_long_small_shift(dst,cnt) ); 9301 ins_pipe( ialu_reg_long ); 9302 %} 9303 9304 // Shift Left Long by 32-63 9305 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9306 match(Set dst (LShiftL dst cnt)); 9307 effect(KILL cr); 9308 ins_cost(300); 9309 format %{ "MOV $dst.hi,$dst.lo\n" 9310 "\tSHL $dst.hi,$cnt-32\n" 9311 "\tXOR $dst.lo,$dst.lo" %} 9312 opcode(0xC1, 0x4); /* C1 /4 ib */ 9313 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9314 ins_pipe( ialu_reg_long ); 9315 %} 9316 9317 // Shift Left Long by variable 9318 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9319 match(Set dst (LShiftL dst shift)); 9320 effect(KILL cr); 9321 ins_cost(500+200); 9322 size(17); 9323 format %{ "TEST $shift,32\n\t" 9324 "JEQ,s small\n\t" 9325 "MOV $dst.hi,$dst.lo\n\t" 9326 "XOR $dst.lo,$dst.lo\n" 9327 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9328 "SHL $dst.lo,$shift" %} 9329 ins_encode( shift_left_long( dst, shift ) ); 9330 ins_pipe( pipe_slow ); 9331 %} 9332 9333 // Shift Right Long by 1-31 9334 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9335 match(Set dst (URShiftL dst cnt)); 9336 effect(KILL cr); 9337 ins_cost(200); 9338 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9339 "SHR $dst.hi,$cnt" %} 9340 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9341 ins_encode( move_long_small_shift(dst,cnt) ); 9342 ins_pipe( ialu_reg_long ); 9343 %} 9344 9345 // Shift Right Long by 32-63 9346 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9347 match(Set dst (URShiftL dst cnt)); 9348 effect(KILL cr); 9349 ins_cost(300); 9350 format %{ "MOV $dst.lo,$dst.hi\n" 9351 "\tSHR $dst.lo,$cnt-32\n" 9352 "\tXOR $dst.hi,$dst.hi" %} 9353 opcode(0xC1, 0x5); /* C1 /5 ib */ 9354 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9355 ins_pipe( ialu_reg_long ); 9356 %} 9357 9358 // Shift Right Long by variable 9359 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9360 match(Set dst (URShiftL dst shift)); 9361 effect(KILL cr); 9362 ins_cost(600); 9363 size(17); 9364 format %{ "TEST $shift,32\n\t" 9365 "JEQ,s small\n\t" 9366 "MOV $dst.lo,$dst.hi\n\t" 9367 "XOR $dst.hi,$dst.hi\n" 9368 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9369 "SHR $dst.hi,$shift" %} 9370 ins_encode( shift_right_long( dst, shift ) ); 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 // Shift Right Long by 1-31 9375 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9376 match(Set dst (RShiftL dst cnt)); 9377 effect(KILL cr); 9378 ins_cost(200); 9379 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9380 "SAR $dst.hi,$cnt" %} 9381 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9382 ins_encode( move_long_small_shift(dst,cnt) ); 9383 ins_pipe( ialu_reg_long ); 9384 %} 9385 9386 // Shift Right Long by 32-63 9387 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9388 match(Set dst (RShiftL dst cnt)); 9389 effect(KILL cr); 9390 ins_cost(300); 9391 format %{ "MOV $dst.lo,$dst.hi\n" 9392 "\tSAR $dst.lo,$cnt-32\n" 9393 "\tSAR $dst.hi,31" %} 9394 opcode(0xC1, 0x7); /* C1 /7 ib */ 9395 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9396 ins_pipe( ialu_reg_long ); 9397 %} 9398 9399 // Shift Right arithmetic Long by variable 9400 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9401 match(Set dst (RShiftL dst shift)); 9402 effect(KILL cr); 9403 ins_cost(600); 9404 size(18); 9405 format %{ "TEST $shift,32\n\t" 9406 "JEQ,s small\n\t" 9407 "MOV $dst.lo,$dst.hi\n\t" 9408 "SAR $dst.hi,31\n" 9409 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9410 "SAR $dst.hi,$shift" %} 9411 ins_encode( shift_right_arith_long( dst, shift ) ); 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 9416 //----------Double Instructions------------------------------------------------ 9417 // Double Math 9418 9419 // Compare & branch 9420 9421 // P6 version of float compare, sets condition codes in EFLAGS 9422 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9423 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9424 match(Set cr (CmpD src1 src2)); 9425 effect(KILL rax); 9426 ins_cost(150); 9427 format %{ "FLD $src1\n\t" 9428 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9429 "JNP exit\n\t" 9430 "MOV ah,1 // saw a NaN, set CF\n\t" 9431 "SAHF\n" 9432 "exit:\tNOP // avoid branch to branch" %} 9433 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9434 ins_encode( Push_Reg_DPR(src1), 9435 OpcP, RegOpc(src2), 9436 cmpF_P6_fixup ); 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9441 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9442 match(Set cr (CmpD src1 src2)); 9443 ins_cost(150); 9444 format %{ "FLD $src1\n\t" 9445 "FUCOMIP ST,$src2 // P6 instruction" %} 9446 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9447 ins_encode( Push_Reg_DPR(src1), 9448 OpcP, RegOpc(src2)); 9449 ins_pipe( pipe_slow ); 9450 %} 9451 9452 // Compare & branch 9453 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9454 predicate(UseSSE<=1); 9455 match(Set cr (CmpD src1 src2)); 9456 effect(KILL rax); 9457 ins_cost(200); 9458 format %{ "FLD $src1\n\t" 9459 "FCOMp $src2\n\t" 9460 "FNSTSW AX\n\t" 9461 "TEST AX,0x400\n\t" 9462 "JZ,s flags\n\t" 9463 "MOV AH,1\t# unordered treat as LT\n" 9464 "flags:\tSAHF" %} 9465 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9466 ins_encode( Push_Reg_DPR(src1), 9467 OpcP, RegOpc(src2), 9468 fpu_flags); 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 // Compare vs zero into -1,0,1 9473 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9474 predicate(UseSSE<=1); 9475 match(Set dst (CmpD3 src1 zero)); 9476 effect(KILL cr, KILL rax); 9477 ins_cost(280); 9478 format %{ "FTSTD $dst,$src1" %} 9479 opcode(0xE4, 0xD9); 9480 ins_encode( Push_Reg_DPR(src1), 9481 OpcS, OpcP, PopFPU, 9482 CmpF_Result(dst)); 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 // Compare into -1,0,1 9487 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9488 predicate(UseSSE<=1); 9489 match(Set dst (CmpD3 src1 src2)); 9490 effect(KILL cr, KILL rax); 9491 ins_cost(300); 9492 format %{ "FCMPD $dst,$src1,$src2" %} 9493 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9494 ins_encode( Push_Reg_DPR(src1), 9495 OpcP, RegOpc(src2), 9496 CmpF_Result(dst)); 9497 ins_pipe( pipe_slow ); 9498 %} 9499 9500 // float compare and set condition codes in EFLAGS by XMM regs 9501 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9502 predicate(UseSSE>=2); 9503 match(Set cr (CmpD src1 src2)); 9504 ins_cost(145); 9505 format %{ "UCOMISD $src1,$src2\n\t" 9506 "JNP,s exit\n\t" 9507 "PUSHF\t# saw NaN, set CF\n\t" 9508 "AND [rsp], #0xffffff2b\n\t" 9509 "POPF\n" 9510 "exit:" %} 9511 ins_encode %{ 9512 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9513 emit_cmpfp_fixup(_masm); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9519 predicate(UseSSE>=2); 9520 match(Set cr (CmpD src1 src2)); 9521 ins_cost(100); 9522 format %{ "UCOMISD $src1,$src2" %} 9523 ins_encode %{ 9524 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 // float compare and set condition codes in EFLAGS by XMM regs 9530 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9531 predicate(UseSSE>=2); 9532 match(Set cr (CmpD src1 (LoadD src2))); 9533 ins_cost(145); 9534 format %{ "UCOMISD $src1,$src2\n\t" 9535 "JNP,s exit\n\t" 9536 "PUSHF\t# saw NaN, set CF\n\t" 9537 "AND [rsp], #0xffffff2b\n\t" 9538 "POPF\n" 9539 "exit:" %} 9540 ins_encode %{ 9541 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9542 emit_cmpfp_fixup(_masm); 9543 %} 9544 ins_pipe( pipe_slow ); 9545 %} 9546 9547 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9548 predicate(UseSSE>=2); 9549 match(Set cr (CmpD src1 (LoadD src2))); 9550 ins_cost(100); 9551 format %{ "UCOMISD $src1,$src2" %} 9552 ins_encode %{ 9553 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9554 %} 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 // Compare into -1,0,1 in XMM 9559 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9560 predicate(UseSSE>=2); 9561 match(Set dst (CmpD3 src1 src2)); 9562 effect(KILL cr); 9563 ins_cost(255); 9564 format %{ "UCOMISD $src1, $src2\n\t" 9565 "MOV $dst, #-1\n\t" 9566 "JP,s done\n\t" 9567 "JB,s done\n\t" 9568 "SETNE $dst\n\t" 9569 "MOVZB $dst, $dst\n" 9570 "done:" %} 9571 ins_encode %{ 9572 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9573 emit_cmpfp3(_masm, $dst$$Register); 9574 %} 9575 ins_pipe( pipe_slow ); 9576 %} 9577 9578 // Compare into -1,0,1 in XMM and memory 9579 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9580 predicate(UseSSE>=2); 9581 match(Set dst (CmpD3 src1 (LoadD src2))); 9582 effect(KILL cr); 9583 ins_cost(275); 9584 format %{ "UCOMISD $src1, $src2\n\t" 9585 "MOV $dst, #-1\n\t" 9586 "JP,s done\n\t" 9587 "JB,s done\n\t" 9588 "SETNE $dst\n\t" 9589 "MOVZB $dst, $dst\n" 9590 "done:" %} 9591 ins_encode %{ 9592 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9593 emit_cmpfp3(_masm, $dst$$Register); 9594 %} 9595 ins_pipe( pipe_slow ); 9596 %} 9597 9598 9599 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9600 predicate (UseSSE <=1); 9601 match(Set dst (SubD dst src)); 9602 9603 format %{ "FLD $src\n\t" 9604 "DSUBp $dst,ST" %} 9605 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9606 ins_cost(150); 9607 ins_encode( Push_Reg_DPR(src), 9608 OpcP, RegOpc(dst) ); 9609 ins_pipe( fpu_reg_reg ); 9610 %} 9611 9612 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9613 predicate (UseSSE <=1); 9614 match(Set dst (RoundDouble (SubD src1 src2))); 9615 ins_cost(250); 9616 9617 format %{ "FLD $src2\n\t" 9618 "DSUB ST,$src1\n\t" 9619 "FSTP_D $dst\t# D-round" %} 9620 opcode(0xD8, 0x5); 9621 ins_encode( Push_Reg_DPR(src2), 9622 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9623 ins_pipe( fpu_mem_reg_reg ); 9624 %} 9625 9626 9627 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9628 predicate (UseSSE <=1); 9629 match(Set dst (SubD dst (LoadD src))); 9630 ins_cost(150); 9631 9632 format %{ "FLD $src\n\t" 9633 "DSUBp $dst,ST" %} 9634 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9635 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9636 OpcP, RegOpc(dst) ); 9637 ins_pipe( fpu_reg_mem ); 9638 %} 9639 9640 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9641 predicate (UseSSE<=1); 9642 match(Set dst (AbsD src)); 9643 ins_cost(100); 9644 format %{ "FABS" %} 9645 opcode(0xE1, 0xD9); 9646 ins_encode( OpcS, OpcP ); 9647 ins_pipe( fpu_reg_reg ); 9648 %} 9649 9650 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9651 predicate(UseSSE<=1); 9652 match(Set dst (NegD src)); 9653 ins_cost(100); 9654 format %{ "FCHS" %} 9655 opcode(0xE0, 0xD9); 9656 ins_encode( OpcS, OpcP ); 9657 ins_pipe( fpu_reg_reg ); 9658 %} 9659 9660 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9661 predicate(UseSSE<=1); 9662 match(Set dst (AddD dst src)); 9663 format %{ "FLD $src\n\t" 9664 "DADD $dst,ST" %} 9665 size(4); 9666 ins_cost(150); 9667 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9668 ins_encode( Push_Reg_DPR(src), 9669 OpcP, RegOpc(dst) ); 9670 ins_pipe( fpu_reg_reg ); 9671 %} 9672 9673 9674 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9675 predicate(UseSSE<=1); 9676 match(Set dst (RoundDouble (AddD src1 src2))); 9677 ins_cost(250); 9678 9679 format %{ "FLD $src2\n\t" 9680 "DADD ST,$src1\n\t" 9681 "FSTP_D $dst\t# D-round" %} 9682 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9683 ins_encode( Push_Reg_DPR(src2), 9684 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9685 ins_pipe( fpu_mem_reg_reg ); 9686 %} 9687 9688 9689 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9690 predicate(UseSSE<=1); 9691 match(Set dst (AddD dst (LoadD src))); 9692 ins_cost(150); 9693 9694 format %{ "FLD $src\n\t" 9695 "DADDp $dst,ST" %} 9696 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9697 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9698 OpcP, RegOpc(dst) ); 9699 ins_pipe( fpu_reg_mem ); 9700 %} 9701 9702 // add-to-memory 9703 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9704 predicate(UseSSE<=1); 9705 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9706 ins_cost(150); 9707 9708 format %{ "FLD_D $dst\n\t" 9709 "DADD ST,$src\n\t" 9710 "FST_D $dst" %} 9711 opcode(0xDD, 0x0); 9712 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9713 Opcode(0xD8), RegOpc(src), 9714 set_instruction_start, 9715 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9716 ins_pipe( fpu_reg_mem ); 9717 %} 9718 9719 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9720 predicate(UseSSE<=1); 9721 match(Set dst (AddD dst con)); 9722 ins_cost(125); 9723 format %{ "FLD1\n\t" 9724 "DADDp $dst,ST" %} 9725 ins_encode %{ 9726 __ fld1(); 9727 __ faddp($dst$$reg); 9728 %} 9729 ins_pipe(fpu_reg); 9730 %} 9731 9732 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9733 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9734 match(Set dst (AddD dst con)); 9735 ins_cost(200); 9736 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9737 "DADDp $dst,ST" %} 9738 ins_encode %{ 9739 __ fld_d($constantaddress($con)); 9740 __ faddp($dst$$reg); 9741 %} 9742 ins_pipe(fpu_reg_mem); 9743 %} 9744 9745 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9746 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9747 match(Set dst (RoundDouble (AddD src con))); 9748 ins_cost(200); 9749 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9750 "DADD ST,$src\n\t" 9751 "FSTP_D $dst\t# D-round" %} 9752 ins_encode %{ 9753 __ fld_d($constantaddress($con)); 9754 __ fadd($src$$reg); 9755 __ fstp_d(Address(rsp, $dst$$disp)); 9756 %} 9757 ins_pipe(fpu_mem_reg_con); 9758 %} 9759 9760 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9761 predicate(UseSSE<=1); 9762 match(Set dst (MulD dst src)); 9763 format %{ "FLD $src\n\t" 9764 "DMULp $dst,ST" %} 9765 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9766 ins_cost(150); 9767 ins_encode( Push_Reg_DPR(src), 9768 OpcP, RegOpc(dst) ); 9769 ins_pipe( fpu_reg_reg ); 9770 %} 9771 9772 // Strict FP instruction biases argument before multiply then 9773 // biases result to avoid double rounding of subnormals. 9774 // 9775 // scale arg1 by multiplying arg1 by 2^(-15360) 9776 // load arg2 9777 // multiply scaled arg1 by arg2 9778 // rescale product by 2^(15360) 9779 // 9780 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9781 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9782 match(Set dst (MulD dst src)); 9783 ins_cost(1); // Select this instruction for all strict FP double multiplies 9784 9785 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9786 "DMULp $dst,ST\n\t" 9787 "FLD $src\n\t" 9788 "DMULp $dst,ST\n\t" 9789 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9790 "DMULp $dst,ST\n\t" %} 9791 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9792 ins_encode( strictfp_bias1(dst), 9793 Push_Reg_DPR(src), 9794 OpcP, RegOpc(dst), 9795 strictfp_bias2(dst) ); 9796 ins_pipe( fpu_reg_reg ); 9797 %} 9798 9799 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9800 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9801 match(Set dst (MulD dst con)); 9802 ins_cost(200); 9803 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9804 "DMULp $dst,ST" %} 9805 ins_encode %{ 9806 __ fld_d($constantaddress($con)); 9807 __ fmulp($dst$$reg); 9808 %} 9809 ins_pipe(fpu_reg_mem); 9810 %} 9811 9812 9813 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9814 predicate( UseSSE<=1 ); 9815 match(Set dst (MulD dst (LoadD src))); 9816 ins_cost(200); 9817 format %{ "FLD_D $src\n\t" 9818 "DMULp $dst,ST" %} 9819 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9820 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9821 OpcP, RegOpc(dst) ); 9822 ins_pipe( fpu_reg_mem ); 9823 %} 9824 9825 // 9826 // Cisc-alternate to reg-reg multiply 9827 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9828 predicate( UseSSE<=1 ); 9829 match(Set dst (MulD src (LoadD mem))); 9830 ins_cost(250); 9831 format %{ "FLD_D $mem\n\t" 9832 "DMUL ST,$src\n\t" 9833 "FSTP_D $dst" %} 9834 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9835 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9836 OpcReg_FPR(src), 9837 Pop_Reg_DPR(dst) ); 9838 ins_pipe( fpu_reg_reg_mem ); 9839 %} 9840 9841 9842 // MACRO3 -- addDPR a mulDPR 9843 // This instruction is a '2-address' instruction in that the result goes 9844 // back to src2. This eliminates a move from the macro; possibly the 9845 // register allocator will have to add it back (and maybe not). 9846 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9847 predicate( UseSSE<=1 ); 9848 match(Set src2 (AddD (MulD src0 src1) src2)); 9849 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9850 "DMUL ST,$src1\n\t" 9851 "DADDp $src2,ST" %} 9852 ins_cost(250); 9853 opcode(0xDD); /* LoadD DD /0 */ 9854 ins_encode( Push_Reg_FPR(src0), 9855 FMul_ST_reg(src1), 9856 FAddP_reg_ST(src2) ); 9857 ins_pipe( fpu_reg_reg_reg ); 9858 %} 9859 9860 9861 // MACRO3 -- subDPR a mulDPR 9862 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9863 predicate( UseSSE<=1 ); 9864 match(Set src2 (SubD (MulD src0 src1) src2)); 9865 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9866 "DMUL ST,$src1\n\t" 9867 "DSUBRp $src2,ST" %} 9868 ins_cost(250); 9869 ins_encode( Push_Reg_FPR(src0), 9870 FMul_ST_reg(src1), 9871 Opcode(0xDE), Opc_plus(0xE0,src2)); 9872 ins_pipe( fpu_reg_reg_reg ); 9873 %} 9874 9875 9876 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9877 predicate( UseSSE<=1 ); 9878 match(Set dst (DivD dst src)); 9879 9880 format %{ "FLD $src\n\t" 9881 "FDIVp $dst,ST" %} 9882 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9883 ins_cost(150); 9884 ins_encode( Push_Reg_DPR(src), 9885 OpcP, RegOpc(dst) ); 9886 ins_pipe( fpu_reg_reg ); 9887 %} 9888 9889 // Strict FP instruction biases argument before division then 9890 // biases result, to avoid double rounding of subnormals. 9891 // 9892 // scale dividend by multiplying dividend by 2^(-15360) 9893 // load divisor 9894 // divide scaled dividend by divisor 9895 // rescale quotient by 2^(15360) 9896 // 9897 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9898 predicate (UseSSE<=1); 9899 match(Set dst (DivD dst src)); 9900 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9901 ins_cost(01); 9902 9903 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9904 "DMULp $dst,ST\n\t" 9905 "FLD $src\n\t" 9906 "FDIVp $dst,ST\n\t" 9907 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9908 "DMULp $dst,ST\n\t" %} 9909 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9910 ins_encode( strictfp_bias1(dst), 9911 Push_Reg_DPR(src), 9912 OpcP, RegOpc(dst), 9913 strictfp_bias2(dst) ); 9914 ins_pipe( fpu_reg_reg ); 9915 %} 9916 9917 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9918 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9919 match(Set dst (RoundDouble (DivD src1 src2))); 9920 9921 format %{ "FLD $src1\n\t" 9922 "FDIV ST,$src2\n\t" 9923 "FSTP_D $dst\t# D-round" %} 9924 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9925 ins_encode( Push_Reg_DPR(src1), 9926 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9927 ins_pipe( fpu_mem_reg_reg ); 9928 %} 9929 9930 9931 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9932 predicate(UseSSE<=1); 9933 match(Set dst (ModD dst src)); 9934 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9935 9936 format %{ "DMOD $dst,$src" %} 9937 ins_cost(250); 9938 ins_encode(Push_Reg_Mod_DPR(dst, src), 9939 emitModDPR(), 9940 Push_Result_Mod_DPR(src), 9941 Pop_Reg_DPR(dst)); 9942 ins_pipe( pipe_slow ); 9943 %} 9944 9945 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9946 predicate(UseSSE>=2); 9947 match(Set dst (ModD src0 src1)); 9948 effect(KILL rax, KILL cr); 9949 9950 format %{ "SUB ESP,8\t # DMOD\n" 9951 "\tMOVSD [ESP+0],$src1\n" 9952 "\tFLD_D [ESP+0]\n" 9953 "\tMOVSD [ESP+0],$src0\n" 9954 "\tFLD_D [ESP+0]\n" 9955 "loop:\tFPREM\n" 9956 "\tFWAIT\n" 9957 "\tFNSTSW AX\n" 9958 "\tSAHF\n" 9959 "\tJP loop\n" 9960 "\tFSTP_D [ESP+0]\n" 9961 "\tMOVSD $dst,[ESP+0]\n" 9962 "\tADD ESP,8\n" 9963 "\tFSTP ST0\t # Restore FPU Stack" 9964 %} 9965 ins_cost(250); 9966 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9971 predicate (UseSSE<=1); 9972 match(Set dst(AtanD dst src)); 9973 format %{ "DATA $dst,$src" %} 9974 opcode(0xD9, 0xF3); 9975 ins_encode( Push_Reg_DPR(src), 9976 OpcP, OpcS, RegOpc(dst) ); 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9981 predicate (UseSSE>=2); 9982 match(Set dst(AtanD dst src)); 9983 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9984 format %{ "DATA $dst,$src" %} 9985 opcode(0xD9, 0xF3); 9986 ins_encode( Push_SrcD(src), 9987 OpcP, OpcS, Push_ResultD(dst) ); 9988 ins_pipe( pipe_slow ); 9989 %} 9990 9991 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9992 predicate (UseSSE<=1); 9993 match(Set dst (SqrtD src)); 9994 format %{ "DSQRT $dst,$src" %} 9995 opcode(0xFA, 0xD9); 9996 ins_encode( Push_Reg_DPR(src), 9997 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9998 ins_pipe( pipe_slow ); 9999 %} 10000 10001 //-------------Float Instructions------------------------------- 10002 // Float Math 10003 10004 // Code for float compare: 10005 // fcompp(); 10006 // fwait(); fnstsw_ax(); 10007 // sahf(); 10008 // movl(dst, unordered_result); 10009 // jcc(Assembler::parity, exit); 10010 // movl(dst, less_result); 10011 // jcc(Assembler::below, exit); 10012 // movl(dst, equal_result); 10013 // jcc(Assembler::equal, exit); 10014 // movl(dst, greater_result); 10015 // exit: 10016 10017 // P6 version of float compare, sets condition codes in EFLAGS 10018 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10019 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10020 match(Set cr (CmpF src1 src2)); 10021 effect(KILL rax); 10022 ins_cost(150); 10023 format %{ "FLD $src1\n\t" 10024 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10025 "JNP exit\n\t" 10026 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10027 "SAHF\n" 10028 "exit:\tNOP // avoid branch to branch" %} 10029 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10030 ins_encode( Push_Reg_DPR(src1), 10031 OpcP, RegOpc(src2), 10032 cmpF_P6_fixup ); 10033 ins_pipe( pipe_slow ); 10034 %} 10035 10036 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10037 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10038 match(Set cr (CmpF src1 src2)); 10039 ins_cost(100); 10040 format %{ "FLD $src1\n\t" 10041 "FUCOMIP ST,$src2 // P6 instruction" %} 10042 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10043 ins_encode( Push_Reg_DPR(src1), 10044 OpcP, RegOpc(src2)); 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 10049 // Compare & branch 10050 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10051 predicate(UseSSE == 0); 10052 match(Set cr (CmpF src1 src2)); 10053 effect(KILL rax); 10054 ins_cost(200); 10055 format %{ "FLD $src1\n\t" 10056 "FCOMp $src2\n\t" 10057 "FNSTSW AX\n\t" 10058 "TEST AX,0x400\n\t" 10059 "JZ,s flags\n\t" 10060 "MOV AH,1\t# unordered treat as LT\n" 10061 "flags:\tSAHF" %} 10062 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10063 ins_encode( Push_Reg_DPR(src1), 10064 OpcP, RegOpc(src2), 10065 fpu_flags); 10066 ins_pipe( pipe_slow ); 10067 %} 10068 10069 // Compare vs zero into -1,0,1 10070 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10071 predicate(UseSSE == 0); 10072 match(Set dst (CmpF3 src1 zero)); 10073 effect(KILL cr, KILL rax); 10074 ins_cost(280); 10075 format %{ "FTSTF $dst,$src1" %} 10076 opcode(0xE4, 0xD9); 10077 ins_encode( Push_Reg_DPR(src1), 10078 OpcS, OpcP, PopFPU, 10079 CmpF_Result(dst)); 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 // Compare into -1,0,1 10084 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10085 predicate(UseSSE == 0); 10086 match(Set dst (CmpF3 src1 src2)); 10087 effect(KILL cr, KILL rax); 10088 ins_cost(300); 10089 format %{ "FCMPF $dst,$src1,$src2" %} 10090 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10091 ins_encode( Push_Reg_DPR(src1), 10092 OpcP, RegOpc(src2), 10093 CmpF_Result(dst)); 10094 ins_pipe( pipe_slow ); 10095 %} 10096 10097 // float compare and set condition codes in EFLAGS by XMM regs 10098 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10099 predicate(UseSSE>=1); 10100 match(Set cr (CmpF src1 src2)); 10101 ins_cost(145); 10102 format %{ "UCOMISS $src1,$src2\n\t" 10103 "JNP,s exit\n\t" 10104 "PUSHF\t# saw NaN, set CF\n\t" 10105 "AND [rsp], #0xffffff2b\n\t" 10106 "POPF\n" 10107 "exit:" %} 10108 ins_encode %{ 10109 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10110 emit_cmpfp_fixup(_masm); 10111 %} 10112 ins_pipe( pipe_slow ); 10113 %} 10114 10115 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10116 predicate(UseSSE>=1); 10117 match(Set cr (CmpF src1 src2)); 10118 ins_cost(100); 10119 format %{ "UCOMISS $src1,$src2" %} 10120 ins_encode %{ 10121 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10122 %} 10123 ins_pipe( pipe_slow ); 10124 %} 10125 10126 // float compare and set condition codes in EFLAGS by XMM regs 10127 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10128 predicate(UseSSE>=1); 10129 match(Set cr (CmpF src1 (LoadF src2))); 10130 ins_cost(165); 10131 format %{ "UCOMISS $src1,$src2\n\t" 10132 "JNP,s exit\n\t" 10133 "PUSHF\t# saw NaN, set CF\n\t" 10134 "AND [rsp], #0xffffff2b\n\t" 10135 "POPF\n" 10136 "exit:" %} 10137 ins_encode %{ 10138 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10139 emit_cmpfp_fixup(_masm); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10145 predicate(UseSSE>=1); 10146 match(Set cr (CmpF src1 (LoadF src2))); 10147 ins_cost(100); 10148 format %{ "UCOMISS $src1,$src2" %} 10149 ins_encode %{ 10150 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10151 %} 10152 ins_pipe( pipe_slow ); 10153 %} 10154 10155 // Compare into -1,0,1 in XMM 10156 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10157 predicate(UseSSE>=1); 10158 match(Set dst (CmpF3 src1 src2)); 10159 effect(KILL cr); 10160 ins_cost(255); 10161 format %{ "UCOMISS $src1, $src2\n\t" 10162 "MOV $dst, #-1\n\t" 10163 "JP,s done\n\t" 10164 "JB,s done\n\t" 10165 "SETNE $dst\n\t" 10166 "MOVZB $dst, $dst\n" 10167 "done:" %} 10168 ins_encode %{ 10169 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10170 emit_cmpfp3(_masm, $dst$$Register); 10171 %} 10172 ins_pipe( pipe_slow ); 10173 %} 10174 10175 // Compare into -1,0,1 in XMM and memory 10176 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10177 predicate(UseSSE>=1); 10178 match(Set dst (CmpF3 src1 (LoadF src2))); 10179 effect(KILL cr); 10180 ins_cost(275); 10181 format %{ "UCOMISS $src1, $src2\n\t" 10182 "MOV $dst, #-1\n\t" 10183 "JP,s done\n\t" 10184 "JB,s done\n\t" 10185 "SETNE $dst\n\t" 10186 "MOVZB $dst, $dst\n" 10187 "done:" %} 10188 ins_encode %{ 10189 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10190 emit_cmpfp3(_masm, $dst$$Register); 10191 %} 10192 ins_pipe( pipe_slow ); 10193 %} 10194 10195 // Spill to obtain 24-bit precision 10196 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10197 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10198 match(Set dst (SubF src1 src2)); 10199 10200 format %{ "FSUB $dst,$src1 - $src2" %} 10201 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10202 ins_encode( Push_Reg_FPR(src1), 10203 OpcReg_FPR(src2), 10204 Pop_Mem_FPR(dst) ); 10205 ins_pipe( fpu_mem_reg_reg ); 10206 %} 10207 // 10208 // This instruction does not round to 24-bits 10209 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10210 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10211 match(Set dst (SubF dst src)); 10212 10213 format %{ "FSUB $dst,$src" %} 10214 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10215 ins_encode( Push_Reg_FPR(src), 10216 OpcP, RegOpc(dst) ); 10217 ins_pipe( fpu_reg_reg ); 10218 %} 10219 10220 // Spill to obtain 24-bit precision 10221 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10222 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10223 match(Set dst (AddF src1 src2)); 10224 10225 format %{ "FADD $dst,$src1,$src2" %} 10226 opcode(0xD8, 0x0); /* D8 C0+i */ 10227 ins_encode( Push_Reg_FPR(src2), 10228 OpcReg_FPR(src1), 10229 Pop_Mem_FPR(dst) ); 10230 ins_pipe( fpu_mem_reg_reg ); 10231 %} 10232 // 10233 // This instruction does not round to 24-bits 10234 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10235 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10236 match(Set dst (AddF dst src)); 10237 10238 format %{ "FLD $src\n\t" 10239 "FADDp $dst,ST" %} 10240 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10241 ins_encode( Push_Reg_FPR(src), 10242 OpcP, RegOpc(dst) ); 10243 ins_pipe( fpu_reg_reg ); 10244 %} 10245 10246 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10247 predicate(UseSSE==0); 10248 match(Set dst (AbsF src)); 10249 ins_cost(100); 10250 format %{ "FABS" %} 10251 opcode(0xE1, 0xD9); 10252 ins_encode( OpcS, OpcP ); 10253 ins_pipe( fpu_reg_reg ); 10254 %} 10255 10256 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10257 predicate(UseSSE==0); 10258 match(Set dst (NegF src)); 10259 ins_cost(100); 10260 format %{ "FCHS" %} 10261 opcode(0xE0, 0xD9); 10262 ins_encode( OpcS, OpcP ); 10263 ins_pipe( fpu_reg_reg ); 10264 %} 10265 10266 // Cisc-alternate to addFPR_reg 10267 // Spill to obtain 24-bit precision 10268 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10269 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10270 match(Set dst (AddF src1 (LoadF src2))); 10271 10272 format %{ "FLD $src2\n\t" 10273 "FADD ST,$src1\n\t" 10274 "FSTP_S $dst" %} 10275 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10276 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10277 OpcReg_FPR(src1), 10278 Pop_Mem_FPR(dst) ); 10279 ins_pipe( fpu_mem_reg_mem ); 10280 %} 10281 // 10282 // Cisc-alternate to addFPR_reg 10283 // This instruction does not round to 24-bits 10284 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10285 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10286 match(Set dst (AddF dst (LoadF src))); 10287 10288 format %{ "FADD $dst,$src" %} 10289 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10290 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10291 OpcP, RegOpc(dst) ); 10292 ins_pipe( fpu_reg_mem ); 10293 %} 10294 10295 // // Following two instructions for _222_mpegaudio 10296 // Spill to obtain 24-bit precision 10297 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10298 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10299 match(Set dst (AddF src1 src2)); 10300 10301 format %{ "FADD $dst,$src1,$src2" %} 10302 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10303 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10304 OpcReg_FPR(src2), 10305 Pop_Mem_FPR(dst) ); 10306 ins_pipe( fpu_mem_reg_mem ); 10307 %} 10308 10309 // Cisc-spill variant 10310 // Spill to obtain 24-bit precision 10311 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10312 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10313 match(Set dst (AddF src1 (LoadF src2))); 10314 10315 format %{ "FADD $dst,$src1,$src2 cisc" %} 10316 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10317 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10318 set_instruction_start, 10319 OpcP, RMopc_Mem(secondary,src1), 10320 Pop_Mem_FPR(dst) ); 10321 ins_pipe( fpu_mem_mem_mem ); 10322 %} 10323 10324 // Spill to obtain 24-bit precision 10325 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10326 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10327 match(Set dst (AddF src1 src2)); 10328 10329 format %{ "FADD $dst,$src1,$src2" %} 10330 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10331 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10332 set_instruction_start, 10333 OpcP, RMopc_Mem(secondary,src1), 10334 Pop_Mem_FPR(dst) ); 10335 ins_pipe( fpu_mem_mem_mem ); 10336 %} 10337 10338 10339 // Spill to obtain 24-bit precision 10340 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10341 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10342 match(Set dst (AddF src con)); 10343 format %{ "FLD $src\n\t" 10344 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10345 "FSTP_S $dst" %} 10346 ins_encode %{ 10347 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10348 __ fadd_s($constantaddress($con)); 10349 __ fstp_s(Address(rsp, $dst$$disp)); 10350 %} 10351 ins_pipe(fpu_mem_reg_con); 10352 %} 10353 // 10354 // This instruction does not round to 24-bits 10355 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10356 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10357 match(Set dst (AddF src con)); 10358 format %{ "FLD $src\n\t" 10359 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10360 "FSTP $dst" %} 10361 ins_encode %{ 10362 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10363 __ fadd_s($constantaddress($con)); 10364 __ fstp_d($dst$$reg); 10365 %} 10366 ins_pipe(fpu_reg_reg_con); 10367 %} 10368 10369 // Spill to obtain 24-bit precision 10370 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10371 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10372 match(Set dst (MulF src1 src2)); 10373 10374 format %{ "FLD $src1\n\t" 10375 "FMUL $src2\n\t" 10376 "FSTP_S $dst" %} 10377 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10378 ins_encode( Push_Reg_FPR(src1), 10379 OpcReg_FPR(src2), 10380 Pop_Mem_FPR(dst) ); 10381 ins_pipe( fpu_mem_reg_reg ); 10382 %} 10383 // 10384 // This instruction does not round to 24-bits 10385 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10386 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10387 match(Set dst (MulF src1 src2)); 10388 10389 format %{ "FLD $src1\n\t" 10390 "FMUL $src2\n\t" 10391 "FSTP_S $dst" %} 10392 opcode(0xD8, 0x1); /* D8 C8+i */ 10393 ins_encode( Push_Reg_FPR(src2), 10394 OpcReg_FPR(src1), 10395 Pop_Reg_FPR(dst) ); 10396 ins_pipe( fpu_reg_reg_reg ); 10397 %} 10398 10399 10400 // Spill to obtain 24-bit precision 10401 // Cisc-alternate to reg-reg multiply 10402 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10403 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10404 match(Set dst (MulF src1 (LoadF src2))); 10405 10406 format %{ "FLD_S $src2\n\t" 10407 "FMUL $src1\n\t" 10408 "FSTP_S $dst" %} 10409 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10410 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10411 OpcReg_FPR(src1), 10412 Pop_Mem_FPR(dst) ); 10413 ins_pipe( fpu_mem_reg_mem ); 10414 %} 10415 // 10416 // This instruction does not round to 24-bits 10417 // Cisc-alternate to reg-reg multiply 10418 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10419 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10420 match(Set dst (MulF src1 (LoadF src2))); 10421 10422 format %{ "FMUL $dst,$src1,$src2" %} 10423 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10424 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10425 OpcReg_FPR(src1), 10426 Pop_Reg_FPR(dst) ); 10427 ins_pipe( fpu_reg_reg_mem ); 10428 %} 10429 10430 // Spill to obtain 24-bit precision 10431 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10432 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10433 match(Set dst (MulF src1 src2)); 10434 10435 format %{ "FMUL $dst,$src1,$src2" %} 10436 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10437 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10438 set_instruction_start, 10439 OpcP, RMopc_Mem(secondary,src1), 10440 Pop_Mem_FPR(dst) ); 10441 ins_pipe( fpu_mem_mem_mem ); 10442 %} 10443 10444 // Spill to obtain 24-bit precision 10445 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10446 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10447 match(Set dst (MulF src con)); 10448 10449 format %{ "FLD $src\n\t" 10450 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10451 "FSTP_S $dst" %} 10452 ins_encode %{ 10453 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10454 __ fmul_s($constantaddress($con)); 10455 __ fstp_s(Address(rsp, $dst$$disp)); 10456 %} 10457 ins_pipe(fpu_mem_reg_con); 10458 %} 10459 // 10460 // This instruction does not round to 24-bits 10461 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10462 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10463 match(Set dst (MulF src con)); 10464 10465 format %{ "FLD $src\n\t" 10466 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10467 "FSTP $dst" %} 10468 ins_encode %{ 10469 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10470 __ fmul_s($constantaddress($con)); 10471 __ fstp_d($dst$$reg); 10472 %} 10473 ins_pipe(fpu_reg_reg_con); 10474 %} 10475 10476 10477 // 10478 // MACRO1 -- subsume unshared load into mulFPR 10479 // This instruction does not round to 24-bits 10480 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10481 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10482 match(Set dst (MulF (LoadF mem1) src)); 10483 10484 format %{ "FLD $mem1 ===MACRO1===\n\t" 10485 "FMUL ST,$src\n\t" 10486 "FSTP $dst" %} 10487 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10488 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10489 OpcReg_FPR(src), 10490 Pop_Reg_FPR(dst) ); 10491 ins_pipe( fpu_reg_reg_mem ); 10492 %} 10493 // 10494 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10495 // This instruction does not round to 24-bits 10496 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10497 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10498 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10499 ins_cost(95); 10500 10501 format %{ "FLD $mem1 ===MACRO2===\n\t" 10502 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10503 "FADD ST,$src2\n\t" 10504 "FSTP $dst" %} 10505 opcode(0xD9); /* LoadF D9 /0 */ 10506 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10507 FMul_ST_reg(src1), 10508 FAdd_ST_reg(src2), 10509 Pop_Reg_FPR(dst) ); 10510 ins_pipe( fpu_reg_mem_reg_reg ); 10511 %} 10512 10513 // MACRO3 -- addFPR a mulFPR 10514 // This instruction does not round to 24-bits. It is a '2-address' 10515 // instruction in that the result goes back to src2. This eliminates 10516 // a move from the macro; possibly the register allocator will have 10517 // to add it back (and maybe not). 10518 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10519 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10520 match(Set src2 (AddF (MulF src0 src1) src2)); 10521 10522 format %{ "FLD $src0 ===MACRO3===\n\t" 10523 "FMUL ST,$src1\n\t" 10524 "FADDP $src2,ST" %} 10525 opcode(0xD9); /* LoadF D9 /0 */ 10526 ins_encode( Push_Reg_FPR(src0), 10527 FMul_ST_reg(src1), 10528 FAddP_reg_ST(src2) ); 10529 ins_pipe( fpu_reg_reg_reg ); 10530 %} 10531 10532 // MACRO4 -- divFPR subFPR 10533 // This instruction does not round to 24-bits 10534 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10535 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10536 match(Set dst (DivF (SubF src2 src1) src3)); 10537 10538 format %{ "FLD $src2 ===MACRO4===\n\t" 10539 "FSUB ST,$src1\n\t" 10540 "FDIV ST,$src3\n\t" 10541 "FSTP $dst" %} 10542 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10543 ins_encode( Push_Reg_FPR(src2), 10544 subFPR_divFPR_encode(src1,src3), 10545 Pop_Reg_FPR(dst) ); 10546 ins_pipe( fpu_reg_reg_reg_reg ); 10547 %} 10548 10549 // Spill to obtain 24-bit precision 10550 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10551 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10552 match(Set dst (DivF src1 src2)); 10553 10554 format %{ "FDIV $dst,$src1,$src2" %} 10555 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10556 ins_encode( Push_Reg_FPR(src1), 10557 OpcReg_FPR(src2), 10558 Pop_Mem_FPR(dst) ); 10559 ins_pipe( fpu_mem_reg_reg ); 10560 %} 10561 // 10562 // This instruction does not round to 24-bits 10563 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10564 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10565 match(Set dst (DivF dst src)); 10566 10567 format %{ "FDIV $dst,$src" %} 10568 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10569 ins_encode( Push_Reg_FPR(src), 10570 OpcP, RegOpc(dst) ); 10571 ins_pipe( fpu_reg_reg ); 10572 %} 10573 10574 10575 // Spill to obtain 24-bit precision 10576 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10577 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10578 match(Set dst (ModF src1 src2)); 10579 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10580 10581 format %{ "FMOD $dst,$src1,$src2" %} 10582 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10583 emitModDPR(), 10584 Push_Result_Mod_DPR(src2), 10585 Pop_Mem_FPR(dst)); 10586 ins_pipe( pipe_slow ); 10587 %} 10588 // 10589 // This instruction does not round to 24-bits 10590 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10591 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10592 match(Set dst (ModF dst src)); 10593 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10594 10595 format %{ "FMOD $dst,$src" %} 10596 ins_encode(Push_Reg_Mod_DPR(dst, src), 10597 emitModDPR(), 10598 Push_Result_Mod_DPR(src), 10599 Pop_Reg_FPR(dst)); 10600 ins_pipe( pipe_slow ); 10601 %} 10602 10603 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10604 predicate(UseSSE>=1); 10605 match(Set dst (ModF src0 src1)); 10606 effect(KILL rax, KILL cr); 10607 format %{ "SUB ESP,4\t # FMOD\n" 10608 "\tMOVSS [ESP+0],$src1\n" 10609 "\tFLD_S [ESP+0]\n" 10610 "\tMOVSS [ESP+0],$src0\n" 10611 "\tFLD_S [ESP+0]\n" 10612 "loop:\tFPREM\n" 10613 "\tFWAIT\n" 10614 "\tFNSTSW AX\n" 10615 "\tSAHF\n" 10616 "\tJP loop\n" 10617 "\tFSTP_S [ESP+0]\n" 10618 "\tMOVSS $dst,[ESP+0]\n" 10619 "\tADD ESP,4\n" 10620 "\tFSTP ST0\t # Restore FPU Stack" 10621 %} 10622 ins_cost(250); 10623 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10624 ins_pipe( pipe_slow ); 10625 %} 10626 10627 10628 //----------Arithmetic Conversion Instructions--------------------------------- 10629 // The conversions operations are all Alpha sorted. Please keep it that way! 10630 10631 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10632 predicate(UseSSE==0); 10633 match(Set dst (RoundFloat src)); 10634 ins_cost(125); 10635 format %{ "FST_S $dst,$src\t# F-round" %} 10636 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10637 ins_pipe( fpu_mem_reg ); 10638 %} 10639 10640 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10641 predicate(UseSSE<=1); 10642 match(Set dst (RoundDouble src)); 10643 ins_cost(125); 10644 format %{ "FST_D $dst,$src\t# D-round" %} 10645 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10646 ins_pipe( fpu_mem_reg ); 10647 %} 10648 10649 // Force rounding to 24-bit precision and 6-bit exponent 10650 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10651 predicate(UseSSE==0); 10652 match(Set dst (ConvD2F src)); 10653 format %{ "FST_S $dst,$src\t# F-round" %} 10654 expand %{ 10655 roundFloat_mem_reg(dst,src); 10656 %} 10657 %} 10658 10659 // Force rounding to 24-bit precision and 6-bit exponent 10660 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10661 predicate(UseSSE==1); 10662 match(Set dst (ConvD2F src)); 10663 effect( KILL cr ); 10664 format %{ "SUB ESP,4\n\t" 10665 "FST_S [ESP],$src\t# F-round\n\t" 10666 "MOVSS $dst,[ESP]\n\t" 10667 "ADD ESP,4" %} 10668 ins_encode %{ 10669 __ subptr(rsp, 4); 10670 if ($src$$reg != FPR1L_enc) { 10671 __ fld_s($src$$reg-1); 10672 __ fstp_s(Address(rsp, 0)); 10673 } else { 10674 __ fst_s(Address(rsp, 0)); 10675 } 10676 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10677 __ addptr(rsp, 4); 10678 %} 10679 ins_pipe( pipe_slow ); 10680 %} 10681 10682 // Force rounding double precision to single precision 10683 instruct convD2F_reg(regF dst, regD src) %{ 10684 predicate(UseSSE>=2); 10685 match(Set dst (ConvD2F src)); 10686 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10687 ins_encode %{ 10688 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10689 %} 10690 ins_pipe( pipe_slow ); 10691 %} 10692 10693 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10694 predicate(UseSSE==0); 10695 match(Set dst (ConvF2D src)); 10696 format %{ "FST_S $dst,$src\t# D-round" %} 10697 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10698 ins_pipe( fpu_reg_reg ); 10699 %} 10700 10701 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10702 predicate(UseSSE==1); 10703 match(Set dst (ConvF2D src)); 10704 format %{ "FST_D $dst,$src\t# D-round" %} 10705 expand %{ 10706 roundDouble_mem_reg(dst,src); 10707 %} 10708 %} 10709 10710 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10711 predicate(UseSSE==1); 10712 match(Set dst (ConvF2D src)); 10713 effect( KILL cr ); 10714 format %{ "SUB ESP,4\n\t" 10715 "MOVSS [ESP] $src\n\t" 10716 "FLD_S [ESP]\n\t" 10717 "ADD ESP,4\n\t" 10718 "FSTP $dst\t# D-round" %} 10719 ins_encode %{ 10720 __ subptr(rsp, 4); 10721 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10722 __ fld_s(Address(rsp, 0)); 10723 __ addptr(rsp, 4); 10724 __ fstp_d($dst$$reg); 10725 %} 10726 ins_pipe( pipe_slow ); 10727 %} 10728 10729 instruct convF2D_reg(regD dst, regF src) %{ 10730 predicate(UseSSE>=2); 10731 match(Set dst (ConvF2D src)); 10732 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10733 ins_encode %{ 10734 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10735 %} 10736 ins_pipe( pipe_slow ); 10737 %} 10738 10739 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10740 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10741 predicate(UseSSE<=1); 10742 match(Set dst (ConvD2I src)); 10743 effect( KILL tmp, KILL cr ); 10744 format %{ "FLD $src\t# Convert double to int \n\t" 10745 "FLDCW trunc mode\n\t" 10746 "SUB ESP,4\n\t" 10747 "FISTp [ESP + #0]\n\t" 10748 "FLDCW std/24-bit mode\n\t" 10749 "POP EAX\n\t" 10750 "CMP EAX,0x80000000\n\t" 10751 "JNE,s fast\n\t" 10752 "FLD_D $src\n\t" 10753 "CALL d2i_wrapper\n" 10754 "fast:" %} 10755 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10756 ins_pipe( pipe_slow ); 10757 %} 10758 10759 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10760 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10761 predicate(UseSSE>=2); 10762 match(Set dst (ConvD2I src)); 10763 effect( KILL tmp, KILL cr ); 10764 format %{ "CVTTSD2SI $dst, $src\n\t" 10765 "CMP $dst,0x80000000\n\t" 10766 "JNE,s fast\n\t" 10767 "SUB ESP, 8\n\t" 10768 "MOVSD [ESP], $src\n\t" 10769 "FLD_D [ESP]\n\t" 10770 "ADD ESP, 8\n\t" 10771 "CALL d2i_wrapper\n" 10772 "fast:" %} 10773 ins_encode %{ 10774 Label fast; 10775 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10776 __ cmpl($dst$$Register, 0x80000000); 10777 __ jccb(Assembler::notEqual, fast); 10778 __ subptr(rsp, 8); 10779 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10780 __ fld_d(Address(rsp, 0)); 10781 __ addptr(rsp, 8); 10782 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10783 __ bind(fast); 10784 %} 10785 ins_pipe( pipe_slow ); 10786 %} 10787 10788 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10789 predicate(UseSSE<=1); 10790 match(Set dst (ConvD2L src)); 10791 effect( KILL cr ); 10792 format %{ "FLD $src\t# Convert double to long\n\t" 10793 "FLDCW trunc mode\n\t" 10794 "SUB ESP,8\n\t" 10795 "FISTp [ESP + #0]\n\t" 10796 "FLDCW std/24-bit mode\n\t" 10797 "POP EAX\n\t" 10798 "POP EDX\n\t" 10799 "CMP EDX,0x80000000\n\t" 10800 "JNE,s fast\n\t" 10801 "TEST EAX,EAX\n\t" 10802 "JNE,s fast\n\t" 10803 "FLD $src\n\t" 10804 "CALL d2l_wrapper\n" 10805 "fast:" %} 10806 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10807 ins_pipe( pipe_slow ); 10808 %} 10809 10810 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10811 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10812 predicate (UseSSE>=2); 10813 match(Set dst (ConvD2L src)); 10814 effect( KILL cr ); 10815 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10816 "MOVSD [ESP],$src\n\t" 10817 "FLD_D [ESP]\n\t" 10818 "FLDCW trunc mode\n\t" 10819 "FISTp [ESP + #0]\n\t" 10820 "FLDCW std/24-bit mode\n\t" 10821 "POP EAX\n\t" 10822 "POP EDX\n\t" 10823 "CMP EDX,0x80000000\n\t" 10824 "JNE,s fast\n\t" 10825 "TEST EAX,EAX\n\t" 10826 "JNE,s fast\n\t" 10827 "SUB ESP,8\n\t" 10828 "MOVSD [ESP],$src\n\t" 10829 "FLD_D [ESP]\n\t" 10830 "ADD ESP,8\n\t" 10831 "CALL d2l_wrapper\n" 10832 "fast:" %} 10833 ins_encode %{ 10834 Label fast; 10835 __ subptr(rsp, 8); 10836 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10837 __ fld_d(Address(rsp, 0)); 10838 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10839 __ fistp_d(Address(rsp, 0)); 10840 // Restore the rounding mode, mask the exception 10841 if (Compile::current()->in_24_bit_fp_mode()) { 10842 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10843 } else { 10844 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10845 } 10846 // Load the converted long, adjust CPU stack 10847 __ pop(rax); 10848 __ pop(rdx); 10849 __ cmpl(rdx, 0x80000000); 10850 __ jccb(Assembler::notEqual, fast); 10851 __ testl(rax, rax); 10852 __ jccb(Assembler::notEqual, fast); 10853 __ subptr(rsp, 8); 10854 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10855 __ fld_d(Address(rsp, 0)); 10856 __ addptr(rsp, 8); 10857 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10858 __ bind(fast); 10859 %} 10860 ins_pipe( pipe_slow ); 10861 %} 10862 10863 // Convert a double to an int. Java semantics require we do complex 10864 // manglations in the corner cases. So we set the rounding mode to 10865 // 'zero', store the darned double down as an int, and reset the 10866 // rounding mode to 'nearest'. The hardware stores a flag value down 10867 // if we would overflow or converted a NAN; we check for this and 10868 // and go the slow path if needed. 10869 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10870 predicate(UseSSE==0); 10871 match(Set dst (ConvF2I src)); 10872 effect( KILL tmp, KILL cr ); 10873 format %{ "FLD $src\t# Convert float to int \n\t" 10874 "FLDCW trunc mode\n\t" 10875 "SUB ESP,4\n\t" 10876 "FISTp [ESP + #0]\n\t" 10877 "FLDCW std/24-bit mode\n\t" 10878 "POP EAX\n\t" 10879 "CMP EAX,0x80000000\n\t" 10880 "JNE,s fast\n\t" 10881 "FLD $src\n\t" 10882 "CALL d2i_wrapper\n" 10883 "fast:" %} 10884 // DPR2I_encoding works for FPR2I 10885 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10886 ins_pipe( pipe_slow ); 10887 %} 10888 10889 // Convert a float in xmm to an int reg. 10890 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10891 predicate(UseSSE>=1); 10892 match(Set dst (ConvF2I src)); 10893 effect( KILL tmp, KILL cr ); 10894 format %{ "CVTTSS2SI $dst, $src\n\t" 10895 "CMP $dst,0x80000000\n\t" 10896 "JNE,s fast\n\t" 10897 "SUB ESP, 4\n\t" 10898 "MOVSS [ESP], $src\n\t" 10899 "FLD [ESP]\n\t" 10900 "ADD ESP, 4\n\t" 10901 "CALL d2i_wrapper\n" 10902 "fast:" %} 10903 ins_encode %{ 10904 Label fast; 10905 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10906 __ cmpl($dst$$Register, 0x80000000); 10907 __ jccb(Assembler::notEqual, fast); 10908 __ subptr(rsp, 4); 10909 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10910 __ fld_s(Address(rsp, 0)); 10911 __ addptr(rsp, 4); 10912 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10913 __ bind(fast); 10914 %} 10915 ins_pipe( pipe_slow ); 10916 %} 10917 10918 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10919 predicate(UseSSE==0); 10920 match(Set dst (ConvF2L src)); 10921 effect( KILL cr ); 10922 format %{ "FLD $src\t# Convert float to long\n\t" 10923 "FLDCW trunc mode\n\t" 10924 "SUB ESP,8\n\t" 10925 "FISTp [ESP + #0]\n\t" 10926 "FLDCW std/24-bit mode\n\t" 10927 "POP EAX\n\t" 10928 "POP EDX\n\t" 10929 "CMP EDX,0x80000000\n\t" 10930 "JNE,s fast\n\t" 10931 "TEST EAX,EAX\n\t" 10932 "JNE,s fast\n\t" 10933 "FLD $src\n\t" 10934 "CALL d2l_wrapper\n" 10935 "fast:" %} 10936 // DPR2L_encoding works for FPR2L 10937 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10938 ins_pipe( pipe_slow ); 10939 %} 10940 10941 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10942 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10943 predicate (UseSSE>=1); 10944 match(Set dst (ConvF2L src)); 10945 effect( KILL cr ); 10946 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10947 "MOVSS [ESP],$src\n\t" 10948 "FLD_S [ESP]\n\t" 10949 "FLDCW trunc mode\n\t" 10950 "FISTp [ESP + #0]\n\t" 10951 "FLDCW std/24-bit mode\n\t" 10952 "POP EAX\n\t" 10953 "POP EDX\n\t" 10954 "CMP EDX,0x80000000\n\t" 10955 "JNE,s fast\n\t" 10956 "TEST EAX,EAX\n\t" 10957 "JNE,s fast\n\t" 10958 "SUB ESP,4\t# Convert float to long\n\t" 10959 "MOVSS [ESP],$src\n\t" 10960 "FLD_S [ESP]\n\t" 10961 "ADD ESP,4\n\t" 10962 "CALL d2l_wrapper\n" 10963 "fast:" %} 10964 ins_encode %{ 10965 Label fast; 10966 __ subptr(rsp, 8); 10967 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10968 __ fld_s(Address(rsp, 0)); 10969 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10970 __ fistp_d(Address(rsp, 0)); 10971 // Restore the rounding mode, mask the exception 10972 if (Compile::current()->in_24_bit_fp_mode()) { 10973 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10974 } else { 10975 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10976 } 10977 // Load the converted long, adjust CPU stack 10978 __ pop(rax); 10979 __ pop(rdx); 10980 __ cmpl(rdx, 0x80000000); 10981 __ jccb(Assembler::notEqual, fast); 10982 __ testl(rax, rax); 10983 __ jccb(Assembler::notEqual, fast); 10984 __ subptr(rsp, 4); 10985 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10986 __ fld_s(Address(rsp, 0)); 10987 __ addptr(rsp, 4); 10988 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10989 __ bind(fast); 10990 %} 10991 ins_pipe( pipe_slow ); 10992 %} 10993 10994 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10995 predicate( UseSSE<=1 ); 10996 match(Set dst (ConvI2D src)); 10997 format %{ "FILD $src\n\t" 10998 "FSTP $dst" %} 10999 opcode(0xDB, 0x0); /* DB /0 */ 11000 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11001 ins_pipe( fpu_reg_mem ); 11002 %} 11003 11004 instruct convI2D_reg(regD dst, rRegI src) %{ 11005 predicate( UseSSE>=2 && !UseXmmI2D ); 11006 match(Set dst (ConvI2D src)); 11007 format %{ "CVTSI2SD $dst,$src" %} 11008 ins_encode %{ 11009 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11010 %} 11011 ins_pipe( pipe_slow ); 11012 %} 11013 11014 instruct convI2D_mem(regD dst, memory mem) %{ 11015 predicate( UseSSE>=2 ); 11016 match(Set dst (ConvI2D (LoadI mem))); 11017 format %{ "CVTSI2SD $dst,$mem" %} 11018 ins_encode %{ 11019 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11020 %} 11021 ins_pipe( pipe_slow ); 11022 %} 11023 11024 instruct convXI2D_reg(regD dst, rRegI src) 11025 %{ 11026 predicate( UseSSE>=2 && UseXmmI2D ); 11027 match(Set dst (ConvI2D src)); 11028 11029 format %{ "MOVD $dst,$src\n\t" 11030 "CVTDQ2PD $dst,$dst\t# i2d" %} 11031 ins_encode %{ 11032 __ movdl($dst$$XMMRegister, $src$$Register); 11033 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11034 %} 11035 ins_pipe(pipe_slow); // XXX 11036 %} 11037 11038 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11039 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11040 match(Set dst (ConvI2D (LoadI mem))); 11041 format %{ "FILD $mem\n\t" 11042 "FSTP $dst" %} 11043 opcode(0xDB); /* DB /0 */ 11044 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11045 Pop_Reg_DPR(dst)); 11046 ins_pipe( fpu_reg_mem ); 11047 %} 11048 11049 // Convert a byte to a float; no rounding step needed. 11050 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11051 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11052 match(Set dst (ConvI2F src)); 11053 format %{ "FILD $src\n\t" 11054 "FSTP $dst" %} 11055 11056 opcode(0xDB, 0x0); /* DB /0 */ 11057 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11058 ins_pipe( fpu_reg_mem ); 11059 %} 11060 11061 // In 24-bit mode, force exponent rounding by storing back out 11062 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11063 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11064 match(Set dst (ConvI2F src)); 11065 ins_cost(200); 11066 format %{ "FILD $src\n\t" 11067 "FSTP_S $dst" %} 11068 opcode(0xDB, 0x0); /* DB /0 */ 11069 ins_encode( Push_Mem_I(src), 11070 Pop_Mem_FPR(dst)); 11071 ins_pipe( fpu_mem_mem ); 11072 %} 11073 11074 // In 24-bit mode, force exponent rounding by storing back out 11075 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11076 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11077 match(Set dst (ConvI2F (LoadI mem))); 11078 ins_cost(200); 11079 format %{ "FILD $mem\n\t" 11080 "FSTP_S $dst" %} 11081 opcode(0xDB); /* DB /0 */ 11082 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11083 Pop_Mem_FPR(dst)); 11084 ins_pipe( fpu_mem_mem ); 11085 %} 11086 11087 // This instruction does not round to 24-bits 11088 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11089 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11090 match(Set dst (ConvI2F src)); 11091 format %{ "FILD $src\n\t" 11092 "FSTP $dst" %} 11093 opcode(0xDB, 0x0); /* DB /0 */ 11094 ins_encode( Push_Mem_I(src), 11095 Pop_Reg_FPR(dst)); 11096 ins_pipe( fpu_reg_mem ); 11097 %} 11098 11099 // This instruction does not round to 24-bits 11100 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11101 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11102 match(Set dst (ConvI2F (LoadI mem))); 11103 format %{ "FILD $mem\n\t" 11104 "FSTP $dst" %} 11105 opcode(0xDB); /* DB /0 */ 11106 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11107 Pop_Reg_FPR(dst)); 11108 ins_pipe( fpu_reg_mem ); 11109 %} 11110 11111 // Convert an int to a float in xmm; no rounding step needed. 11112 instruct convI2F_reg(regF dst, rRegI src) %{ 11113 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11114 match(Set dst (ConvI2F src)); 11115 format %{ "CVTSI2SS $dst, $src" %} 11116 ins_encode %{ 11117 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11118 %} 11119 ins_pipe( pipe_slow ); 11120 %} 11121 11122 instruct convXI2F_reg(regF dst, rRegI src) 11123 %{ 11124 predicate( UseSSE>=2 && UseXmmI2F ); 11125 match(Set dst (ConvI2F src)); 11126 11127 format %{ "MOVD $dst,$src\n\t" 11128 "CVTDQ2PS $dst,$dst\t# i2f" %} 11129 ins_encode %{ 11130 __ movdl($dst$$XMMRegister, $src$$Register); 11131 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11132 %} 11133 ins_pipe(pipe_slow); // XXX 11134 %} 11135 11136 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11137 match(Set dst (ConvI2L src)); 11138 effect(KILL cr); 11139 ins_cost(375); 11140 format %{ "MOV $dst.lo,$src\n\t" 11141 "MOV $dst.hi,$src\n\t" 11142 "SAR $dst.hi,31" %} 11143 ins_encode(convert_int_long(dst,src)); 11144 ins_pipe( ialu_reg_reg_long ); 11145 %} 11146 11147 // Zero-extend convert int to long 11148 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11149 match(Set dst (AndL (ConvI2L src) mask) ); 11150 effect( KILL flags ); 11151 ins_cost(250); 11152 format %{ "MOV $dst.lo,$src\n\t" 11153 "XOR $dst.hi,$dst.hi" %} 11154 opcode(0x33); // XOR 11155 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11156 ins_pipe( ialu_reg_reg_long ); 11157 %} 11158 11159 // Zero-extend long 11160 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11161 match(Set dst (AndL src mask) ); 11162 effect( KILL flags ); 11163 ins_cost(250); 11164 format %{ "MOV $dst.lo,$src.lo\n\t" 11165 "XOR $dst.hi,$dst.hi\n\t" %} 11166 opcode(0x33); // XOR 11167 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11168 ins_pipe( ialu_reg_reg_long ); 11169 %} 11170 11171 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11172 predicate (UseSSE<=1); 11173 match(Set dst (ConvL2D src)); 11174 effect( KILL cr ); 11175 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11176 "PUSH $src.lo\n\t" 11177 "FILD ST,[ESP + #0]\n\t" 11178 "ADD ESP,8\n\t" 11179 "FSTP_D $dst\t# D-round" %} 11180 opcode(0xDF, 0x5); /* DF /5 */ 11181 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11182 ins_pipe( pipe_slow ); 11183 %} 11184 11185 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11186 predicate (UseSSE>=2); 11187 match(Set dst (ConvL2D src)); 11188 effect( KILL cr ); 11189 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11190 "PUSH $src.lo\n\t" 11191 "FILD_D [ESP]\n\t" 11192 "FSTP_D [ESP]\n\t" 11193 "MOVSD $dst,[ESP]\n\t" 11194 "ADD ESP,8" %} 11195 opcode(0xDF, 0x5); /* DF /5 */ 11196 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11197 ins_pipe( pipe_slow ); 11198 %} 11199 11200 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11201 predicate (UseSSE>=1); 11202 match(Set dst (ConvL2F src)); 11203 effect( KILL cr ); 11204 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11205 "PUSH $src.lo\n\t" 11206 "FILD_D [ESP]\n\t" 11207 "FSTP_S [ESP]\n\t" 11208 "MOVSS $dst,[ESP]\n\t" 11209 "ADD ESP,8" %} 11210 opcode(0xDF, 0x5); /* DF /5 */ 11211 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11212 ins_pipe( pipe_slow ); 11213 %} 11214 11215 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11216 match(Set dst (ConvL2F src)); 11217 effect( KILL cr ); 11218 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11219 "PUSH $src.lo\n\t" 11220 "FILD ST,[ESP + #0]\n\t" 11221 "ADD ESP,8\n\t" 11222 "FSTP_S $dst\t# F-round" %} 11223 opcode(0xDF, 0x5); /* DF /5 */ 11224 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11225 ins_pipe( pipe_slow ); 11226 %} 11227 11228 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11229 match(Set dst (ConvL2I src)); 11230 effect( DEF dst, USE src ); 11231 format %{ "MOV $dst,$src.lo" %} 11232 ins_encode(enc_CopyL_Lo(dst,src)); 11233 ins_pipe( ialu_reg_reg ); 11234 %} 11235 11236 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11237 match(Set dst (MoveF2I src)); 11238 effect( DEF dst, USE src ); 11239 ins_cost(100); 11240 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11241 ins_encode %{ 11242 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11243 %} 11244 ins_pipe( ialu_reg_mem ); 11245 %} 11246 11247 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11248 predicate(UseSSE==0); 11249 match(Set dst (MoveF2I src)); 11250 effect( DEF dst, USE src ); 11251 11252 ins_cost(125); 11253 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11254 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11255 ins_pipe( fpu_mem_reg ); 11256 %} 11257 11258 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11259 predicate(UseSSE>=1); 11260 match(Set dst (MoveF2I src)); 11261 effect( DEF dst, USE src ); 11262 11263 ins_cost(95); 11264 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11265 ins_encode %{ 11266 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11267 %} 11268 ins_pipe( pipe_slow ); 11269 %} 11270 11271 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11272 predicate(UseSSE>=2); 11273 match(Set dst (MoveF2I src)); 11274 effect( DEF dst, USE src ); 11275 ins_cost(85); 11276 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11277 ins_encode %{ 11278 __ movdl($dst$$Register, $src$$XMMRegister); 11279 %} 11280 ins_pipe( pipe_slow ); 11281 %} 11282 11283 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11284 match(Set dst (MoveI2F src)); 11285 effect( DEF dst, USE src ); 11286 11287 ins_cost(100); 11288 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11289 ins_encode %{ 11290 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11291 %} 11292 ins_pipe( ialu_mem_reg ); 11293 %} 11294 11295 11296 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11297 predicate(UseSSE==0); 11298 match(Set dst (MoveI2F src)); 11299 effect(DEF dst, USE src); 11300 11301 ins_cost(125); 11302 format %{ "FLD_S $src\n\t" 11303 "FSTP $dst\t# MoveI2F_stack_reg" %} 11304 opcode(0xD9); /* D9 /0, FLD m32real */ 11305 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11306 Pop_Reg_FPR(dst) ); 11307 ins_pipe( fpu_reg_mem ); 11308 %} 11309 11310 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11311 predicate(UseSSE>=1); 11312 match(Set dst (MoveI2F src)); 11313 effect( DEF dst, USE src ); 11314 11315 ins_cost(95); 11316 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11317 ins_encode %{ 11318 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11319 %} 11320 ins_pipe( pipe_slow ); 11321 %} 11322 11323 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11324 predicate(UseSSE>=2); 11325 match(Set dst (MoveI2F src)); 11326 effect( DEF dst, USE src ); 11327 11328 ins_cost(85); 11329 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11330 ins_encode %{ 11331 __ movdl($dst$$XMMRegister, $src$$Register); 11332 %} 11333 ins_pipe( pipe_slow ); 11334 %} 11335 11336 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11337 match(Set dst (MoveD2L src)); 11338 effect(DEF dst, USE src); 11339 11340 ins_cost(250); 11341 format %{ "MOV $dst.lo,$src\n\t" 11342 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11343 opcode(0x8B, 0x8B); 11344 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11345 ins_pipe( ialu_mem_long_reg ); 11346 %} 11347 11348 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11349 predicate(UseSSE<=1); 11350 match(Set dst (MoveD2L src)); 11351 effect(DEF dst, USE src); 11352 11353 ins_cost(125); 11354 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11355 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11356 ins_pipe( fpu_mem_reg ); 11357 %} 11358 11359 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11360 predicate(UseSSE>=2); 11361 match(Set dst (MoveD2L src)); 11362 effect(DEF dst, USE src); 11363 ins_cost(95); 11364 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11365 ins_encode %{ 11366 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11367 %} 11368 ins_pipe( pipe_slow ); 11369 %} 11370 11371 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11372 predicate(UseSSE>=2); 11373 match(Set dst (MoveD2L src)); 11374 effect(DEF dst, USE src, TEMP tmp); 11375 ins_cost(85); 11376 format %{ "MOVD $dst.lo,$src\n\t" 11377 "PSHUFLW $tmp,$src,0x4E\n\t" 11378 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11379 ins_encode %{ 11380 __ movdl($dst$$Register, $src$$XMMRegister); 11381 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11382 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11383 %} 11384 ins_pipe( pipe_slow ); 11385 %} 11386 11387 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11388 match(Set dst (MoveL2D src)); 11389 effect(DEF dst, USE src); 11390 11391 ins_cost(200); 11392 format %{ "MOV $dst,$src.lo\n\t" 11393 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11394 opcode(0x89, 0x89); 11395 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11396 ins_pipe( ialu_mem_long_reg ); 11397 %} 11398 11399 11400 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11401 predicate(UseSSE<=1); 11402 match(Set dst (MoveL2D src)); 11403 effect(DEF dst, USE src); 11404 ins_cost(125); 11405 11406 format %{ "FLD_D $src\n\t" 11407 "FSTP $dst\t# MoveL2D_stack_reg" %} 11408 opcode(0xDD); /* DD /0, FLD m64real */ 11409 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11410 Pop_Reg_DPR(dst) ); 11411 ins_pipe( fpu_reg_mem ); 11412 %} 11413 11414 11415 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11416 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11417 match(Set dst (MoveL2D src)); 11418 effect(DEF dst, USE src); 11419 11420 ins_cost(95); 11421 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11422 ins_encode %{ 11423 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11424 %} 11425 ins_pipe( pipe_slow ); 11426 %} 11427 11428 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11429 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11430 match(Set dst (MoveL2D src)); 11431 effect(DEF dst, USE src); 11432 11433 ins_cost(95); 11434 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11435 ins_encode %{ 11436 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11437 %} 11438 ins_pipe( pipe_slow ); 11439 %} 11440 11441 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11442 predicate(UseSSE>=2); 11443 match(Set dst (MoveL2D src)); 11444 effect(TEMP dst, USE src, TEMP tmp); 11445 ins_cost(85); 11446 format %{ "MOVD $dst,$src.lo\n\t" 11447 "MOVD $tmp,$src.hi\n\t" 11448 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11449 ins_encode %{ 11450 __ movdl($dst$$XMMRegister, $src$$Register); 11451 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11452 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11453 %} 11454 ins_pipe( pipe_slow ); 11455 %} 11456 11457 11458 // ======================================================================= 11459 // fast clearing of an array 11460 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11461 predicate(!((ClearArrayNode*)n)->is_large()); 11462 match(Set dummy (ClearArray cnt base)); 11463 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11464 11465 format %{ $$template 11466 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11467 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11468 $$emit$$"JG LARGE\n\t" 11469 $$emit$$"SHL ECX, 1\n\t" 11470 $$emit$$"DEC ECX\n\t" 11471 $$emit$$"JS DONE\t# Zero length\n\t" 11472 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11473 $$emit$$"DEC ECX\n\t" 11474 $$emit$$"JGE LOOP\n\t" 11475 $$emit$$"JMP DONE\n\t" 11476 $$emit$$"# LARGE:\n\t" 11477 if (UseFastStosb) { 11478 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11479 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11480 } else { 11481 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11482 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11483 } 11484 $$emit$$"# DONE" 11485 %} 11486 ins_encode %{ 11487 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11488 %} 11489 ins_pipe( pipe_slow ); 11490 %} 11491 11492 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11493 predicate(((ClearArrayNode*)n)->is_large()); 11494 match(Set dummy (ClearArray cnt base)); 11495 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11496 format %{ $$template 11497 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11498 if (UseFastStosb) { 11499 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11500 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11501 } else { 11502 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11503 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11504 } 11505 $$emit$$"# DONE" 11506 %} 11507 ins_encode %{ 11508 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11509 %} 11510 ins_pipe( pipe_slow ); 11511 %} 11512 11513 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11514 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11515 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11516 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11517 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11518 11519 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11520 ins_encode %{ 11521 __ string_compare($str1$$Register, $str2$$Register, 11522 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11523 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11524 %} 11525 ins_pipe( pipe_slow ); 11526 %} 11527 11528 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11529 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11530 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11531 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11532 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11533 11534 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11535 ins_encode %{ 11536 __ string_compare($str1$$Register, $str2$$Register, 11537 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11538 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11539 %} 11540 ins_pipe( pipe_slow ); 11541 %} 11542 11543 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11544 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11545 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11546 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11547 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11548 11549 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11550 ins_encode %{ 11551 __ string_compare($str1$$Register, $str2$$Register, 11552 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11553 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11554 %} 11555 ins_pipe( pipe_slow ); 11556 %} 11557 11558 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11559 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11560 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11561 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11562 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11563 11564 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11565 ins_encode %{ 11566 __ string_compare($str2$$Register, $str1$$Register, 11567 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11568 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11569 %} 11570 ins_pipe( pipe_slow ); 11571 %} 11572 11573 // fast string equals 11574 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11575 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11576 match(Set result (StrEquals (Binary str1 str2) cnt)); 11577 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11578 11579 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11580 ins_encode %{ 11581 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11582 $cnt$$Register, $result$$Register, $tmp3$$Register, 11583 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11584 %} 11585 11586 ins_pipe( pipe_slow ); 11587 %} 11588 11589 // fast search of substring with known size. 11590 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11591 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11592 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11593 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11594 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11595 11596 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11597 ins_encode %{ 11598 int icnt2 = (int)$int_cnt2$$constant; 11599 if (icnt2 >= 16) { 11600 // IndexOf for constant substrings with size >= 16 elements 11601 // which don't need to be loaded through stack. 11602 __ string_indexofC8($str1$$Register, $str2$$Register, 11603 $cnt1$$Register, $cnt2$$Register, 11604 icnt2, $result$$Register, 11605 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11606 } else { 11607 // Small strings are loaded through stack if they cross page boundary. 11608 __ string_indexof($str1$$Register, $str2$$Register, 11609 $cnt1$$Register, $cnt2$$Register, 11610 icnt2, $result$$Register, 11611 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11612 } 11613 %} 11614 ins_pipe( pipe_slow ); 11615 %} 11616 11617 // fast search of substring with known size. 11618 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11619 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11620 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11621 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11622 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11623 11624 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11625 ins_encode %{ 11626 int icnt2 = (int)$int_cnt2$$constant; 11627 if (icnt2 >= 8) { 11628 // IndexOf for constant substrings with size >= 8 elements 11629 // which don't need to be loaded through stack. 11630 __ string_indexofC8($str1$$Register, $str2$$Register, 11631 $cnt1$$Register, $cnt2$$Register, 11632 icnt2, $result$$Register, 11633 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11634 } else { 11635 // Small strings are loaded through stack if they cross page boundary. 11636 __ string_indexof($str1$$Register, $str2$$Register, 11637 $cnt1$$Register, $cnt2$$Register, 11638 icnt2, $result$$Register, 11639 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11640 } 11641 %} 11642 ins_pipe( pipe_slow ); 11643 %} 11644 11645 // fast search of substring with known size. 11646 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11647 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11648 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11649 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11650 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11651 11652 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11653 ins_encode %{ 11654 int icnt2 = (int)$int_cnt2$$constant; 11655 if (icnt2 >= 8) { 11656 // IndexOf for constant substrings with size >= 8 elements 11657 // which don't need to be loaded through stack. 11658 __ string_indexofC8($str1$$Register, $str2$$Register, 11659 $cnt1$$Register, $cnt2$$Register, 11660 icnt2, $result$$Register, 11661 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11662 } else { 11663 // Small strings are loaded through stack if they cross page boundary. 11664 __ string_indexof($str1$$Register, $str2$$Register, 11665 $cnt1$$Register, $cnt2$$Register, 11666 icnt2, $result$$Register, 11667 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11668 } 11669 %} 11670 ins_pipe( pipe_slow ); 11671 %} 11672 11673 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11674 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11675 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11676 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11677 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11678 11679 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11680 ins_encode %{ 11681 __ string_indexof($str1$$Register, $str2$$Register, 11682 $cnt1$$Register, $cnt2$$Register, 11683 (-1), $result$$Register, 11684 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11685 %} 11686 ins_pipe( pipe_slow ); 11687 %} 11688 11689 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11690 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11691 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11692 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11693 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11694 11695 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11696 ins_encode %{ 11697 __ string_indexof($str1$$Register, $str2$$Register, 11698 $cnt1$$Register, $cnt2$$Register, 11699 (-1), $result$$Register, 11700 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11701 %} 11702 ins_pipe( pipe_slow ); 11703 %} 11704 11705 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11706 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11707 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11708 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11709 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11710 11711 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11712 ins_encode %{ 11713 __ string_indexof($str1$$Register, $str2$$Register, 11714 $cnt1$$Register, $cnt2$$Register, 11715 (-1), $result$$Register, 11716 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11717 %} 11718 ins_pipe( pipe_slow ); 11719 %} 11720 11721 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11722 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11723 predicate(UseSSE42Intrinsics); 11724 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11725 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11726 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11727 ins_encode %{ 11728 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11729 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11730 %} 11731 ins_pipe( pipe_slow ); 11732 %} 11733 11734 // fast array equals 11735 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11736 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11737 %{ 11738 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11739 match(Set result (AryEq ary1 ary2)); 11740 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11741 //ins_cost(300); 11742 11743 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11744 ins_encode %{ 11745 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11746 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11747 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11748 %} 11749 ins_pipe( pipe_slow ); 11750 %} 11751 11752 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11753 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11754 %{ 11755 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11756 match(Set result (AryEq ary1 ary2)); 11757 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11758 //ins_cost(300); 11759 11760 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11761 ins_encode %{ 11762 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11763 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11764 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11765 %} 11766 ins_pipe( pipe_slow ); 11767 %} 11768 11769 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11770 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11771 %{ 11772 match(Set result (HasNegatives ary1 len)); 11773 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11774 11775 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11776 ins_encode %{ 11777 __ has_negatives($ary1$$Register, $len$$Register, 11778 $result$$Register, $tmp3$$Register, 11779 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11780 %} 11781 ins_pipe( pipe_slow ); 11782 %} 11783 11784 // fast char[] to byte[] compression 11785 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11786 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11787 match(Set result (StrCompressedCopy src (Binary dst len))); 11788 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11789 11790 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11791 ins_encode %{ 11792 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11793 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11794 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11795 %} 11796 ins_pipe( pipe_slow ); 11797 %} 11798 11799 // fast byte[] to char[] inflation 11800 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11801 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11802 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11803 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11804 11805 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11806 ins_encode %{ 11807 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11808 $tmp1$$XMMRegister, $tmp2$$Register); 11809 %} 11810 ins_pipe( pipe_slow ); 11811 %} 11812 11813 // encode char[] to byte[] in ISO_8859_1 11814 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11815 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11816 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11817 match(Set result (EncodeISOArray src (Binary dst len))); 11818 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11819 11820 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11821 ins_encode %{ 11822 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11823 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11824 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11825 %} 11826 ins_pipe( pipe_slow ); 11827 %} 11828 11829 11830 //----------Control Flow Instructions------------------------------------------ 11831 // Signed compare Instructions 11832 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11833 match(Set cr (CmpI op1 op2)); 11834 effect( DEF cr, USE op1, USE op2 ); 11835 format %{ "CMP $op1,$op2" %} 11836 opcode(0x3B); /* Opcode 3B /r */ 11837 ins_encode( OpcP, RegReg( op1, op2) ); 11838 ins_pipe( ialu_cr_reg_reg ); 11839 %} 11840 11841 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11842 match(Set cr (CmpI op1 op2)); 11843 effect( DEF cr, USE op1 ); 11844 format %{ "CMP $op1,$op2" %} 11845 opcode(0x81,0x07); /* Opcode 81 /7 */ 11846 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11847 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11848 ins_pipe( ialu_cr_reg_imm ); 11849 %} 11850 11851 // Cisc-spilled version of cmpI_eReg 11852 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11853 match(Set cr (CmpI op1 (LoadI op2))); 11854 11855 format %{ "CMP $op1,$op2" %} 11856 ins_cost(500); 11857 opcode(0x3B); /* Opcode 3B /r */ 11858 ins_encode( OpcP, RegMem( op1, op2) ); 11859 ins_pipe( ialu_cr_reg_mem ); 11860 %} 11861 11862 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11863 match(Set cr (CmpI src zero)); 11864 effect( DEF cr, USE src ); 11865 11866 format %{ "TEST $src,$src" %} 11867 opcode(0x85); 11868 ins_encode( OpcP, RegReg( src, src ) ); 11869 ins_pipe( ialu_cr_reg_imm ); 11870 %} 11871 11872 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11873 match(Set cr (CmpI (AndI src con) zero)); 11874 11875 format %{ "TEST $src,$con" %} 11876 opcode(0xF7,0x00); 11877 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11878 ins_pipe( ialu_cr_reg_imm ); 11879 %} 11880 11881 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11882 match(Set cr (CmpI (AndI src mem) zero)); 11883 11884 format %{ "TEST $src,$mem" %} 11885 opcode(0x85); 11886 ins_encode( OpcP, RegMem( src, mem ) ); 11887 ins_pipe( ialu_cr_reg_mem ); 11888 %} 11889 11890 // Unsigned compare Instructions; really, same as signed except they 11891 // produce an eFlagsRegU instead of eFlagsReg. 11892 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11893 match(Set cr (CmpU op1 op2)); 11894 11895 format %{ "CMPu $op1,$op2" %} 11896 opcode(0x3B); /* Opcode 3B /r */ 11897 ins_encode( OpcP, RegReg( op1, op2) ); 11898 ins_pipe( ialu_cr_reg_reg ); 11899 %} 11900 11901 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11902 match(Set cr (CmpU op1 op2)); 11903 11904 format %{ "CMPu $op1,$op2" %} 11905 opcode(0x81,0x07); /* Opcode 81 /7 */ 11906 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11907 ins_pipe( ialu_cr_reg_imm ); 11908 %} 11909 11910 // // Cisc-spilled version of cmpU_eReg 11911 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11912 match(Set cr (CmpU op1 (LoadI op2))); 11913 11914 format %{ "CMPu $op1,$op2" %} 11915 ins_cost(500); 11916 opcode(0x3B); /* Opcode 3B /r */ 11917 ins_encode( OpcP, RegMem( op1, op2) ); 11918 ins_pipe( ialu_cr_reg_mem ); 11919 %} 11920 11921 // // Cisc-spilled version of cmpU_eReg 11922 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11923 // match(Set cr (CmpU (LoadI op1) op2)); 11924 // 11925 // format %{ "CMPu $op1,$op2" %} 11926 // ins_cost(500); 11927 // opcode(0x39); /* Opcode 39 /r */ 11928 // ins_encode( OpcP, RegMem( op1, op2) ); 11929 //%} 11930 11931 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11932 match(Set cr (CmpU src zero)); 11933 11934 format %{ "TESTu $src,$src" %} 11935 opcode(0x85); 11936 ins_encode( OpcP, RegReg( src, src ) ); 11937 ins_pipe( ialu_cr_reg_imm ); 11938 %} 11939 11940 // Unsigned pointer compare Instructions 11941 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11942 match(Set cr (CmpP op1 op2)); 11943 11944 format %{ "CMPu $op1,$op2" %} 11945 opcode(0x3B); /* Opcode 3B /r */ 11946 ins_encode( OpcP, RegReg( op1, op2) ); 11947 ins_pipe( ialu_cr_reg_reg ); 11948 %} 11949 11950 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11951 match(Set cr (CmpP op1 op2)); 11952 11953 format %{ "CMPu $op1,$op2" %} 11954 opcode(0x81,0x07); /* Opcode 81 /7 */ 11955 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11956 ins_pipe( ialu_cr_reg_imm ); 11957 %} 11958 11959 // // Cisc-spilled version of cmpP_eReg 11960 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11961 match(Set cr (CmpP op1 (LoadP op2))); 11962 11963 format %{ "CMPu $op1,$op2" %} 11964 ins_cost(500); 11965 opcode(0x3B); /* Opcode 3B /r */ 11966 ins_encode( OpcP, RegMem( op1, op2) ); 11967 ins_pipe( ialu_cr_reg_mem ); 11968 %} 11969 11970 // // Cisc-spilled version of cmpP_eReg 11971 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11972 // match(Set cr (CmpP (LoadP op1) op2)); 11973 // 11974 // format %{ "CMPu $op1,$op2" %} 11975 // ins_cost(500); 11976 // opcode(0x39); /* Opcode 39 /r */ 11977 // ins_encode( OpcP, RegMem( op1, op2) ); 11978 //%} 11979 11980 // Compare raw pointer (used in out-of-heap check). 11981 // Only works because non-oop pointers must be raw pointers 11982 // and raw pointers have no anti-dependencies. 11983 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11984 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11985 match(Set cr (CmpP op1 (LoadP op2))); 11986 11987 format %{ "CMPu $op1,$op2" %} 11988 opcode(0x3B); /* Opcode 3B /r */ 11989 ins_encode( OpcP, RegMem( op1, op2) ); 11990 ins_pipe( ialu_cr_reg_mem ); 11991 %} 11992 11993 // 11994 // This will generate a signed flags result. This should be ok 11995 // since any compare to a zero should be eq/neq. 11996 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11997 match(Set cr (CmpP src zero)); 11998 11999 format %{ "TEST $src,$src" %} 12000 opcode(0x85); 12001 ins_encode( OpcP, RegReg( src, src ) ); 12002 ins_pipe( ialu_cr_reg_imm ); 12003 %} 12004 12005 // Cisc-spilled version of testP_reg 12006 // This will generate a signed flags result. This should be ok 12007 // since any compare to a zero should be eq/neq. 12008 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12009 match(Set cr (CmpP (LoadP op) zero)); 12010 12011 format %{ "TEST $op,0xFFFFFFFF" %} 12012 ins_cost(500); 12013 opcode(0xF7); /* Opcode F7 /0 */ 12014 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12015 ins_pipe( ialu_cr_reg_imm ); 12016 %} 12017 12018 // Yanked all unsigned pointer compare operations. 12019 // Pointer compares are done with CmpP which is already unsigned. 12020 12021 //----------Max and Min-------------------------------------------------------- 12022 // Min Instructions 12023 //// 12024 // *** Min and Max using the conditional move are slower than the 12025 // *** branch version on a Pentium III. 12026 // // Conditional move for min 12027 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12028 // effect( USE_DEF op2, USE op1, USE cr ); 12029 // format %{ "CMOVlt $op2,$op1\t! min" %} 12030 // opcode(0x4C,0x0F); 12031 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12032 // ins_pipe( pipe_cmov_reg ); 12033 //%} 12034 // 12035 //// Min Register with Register (P6 version) 12036 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12037 // predicate(VM_Version::supports_cmov() ); 12038 // match(Set op2 (MinI op1 op2)); 12039 // ins_cost(200); 12040 // expand %{ 12041 // eFlagsReg cr; 12042 // compI_eReg(cr,op1,op2); 12043 // cmovI_reg_lt(op2,op1,cr); 12044 // %} 12045 //%} 12046 12047 // Min Register with Register (generic version) 12048 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12049 match(Set dst (MinI dst src)); 12050 effect(KILL flags); 12051 ins_cost(300); 12052 12053 format %{ "MIN $dst,$src" %} 12054 opcode(0xCC); 12055 ins_encode( min_enc(dst,src) ); 12056 ins_pipe( pipe_slow ); 12057 %} 12058 12059 // Max Register with Register 12060 // *** Min and Max using the conditional move are slower than the 12061 // *** branch version on a Pentium III. 12062 // // Conditional move for max 12063 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12064 // effect( USE_DEF op2, USE op1, USE cr ); 12065 // format %{ "CMOVgt $op2,$op1\t! max" %} 12066 // opcode(0x4F,0x0F); 12067 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12068 // ins_pipe( pipe_cmov_reg ); 12069 //%} 12070 // 12071 // // Max Register with Register (P6 version) 12072 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12073 // predicate(VM_Version::supports_cmov() ); 12074 // match(Set op2 (MaxI op1 op2)); 12075 // ins_cost(200); 12076 // expand %{ 12077 // eFlagsReg cr; 12078 // compI_eReg(cr,op1,op2); 12079 // cmovI_reg_gt(op2,op1,cr); 12080 // %} 12081 //%} 12082 12083 // Max Register with Register (generic version) 12084 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12085 match(Set dst (MaxI dst src)); 12086 effect(KILL flags); 12087 ins_cost(300); 12088 12089 format %{ "MAX $dst,$src" %} 12090 opcode(0xCC); 12091 ins_encode( max_enc(dst,src) ); 12092 ins_pipe( pipe_slow ); 12093 %} 12094 12095 // ============================================================================ 12096 // Counted Loop limit node which represents exact final iterator value. 12097 // Note: the resulting value should fit into integer range since 12098 // counted loops have limit check on overflow. 12099 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12100 match(Set limit (LoopLimit (Binary init limit) stride)); 12101 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12102 ins_cost(300); 12103 12104 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12105 ins_encode %{ 12106 int strd = (int)$stride$$constant; 12107 assert(strd != 1 && strd != -1, "sanity"); 12108 int m1 = (strd > 0) ? 1 : -1; 12109 // Convert limit to long (EAX:EDX) 12110 __ cdql(); 12111 // Convert init to long (init:tmp) 12112 __ movl($tmp$$Register, $init$$Register); 12113 __ sarl($tmp$$Register, 31); 12114 // $limit - $init 12115 __ subl($limit$$Register, $init$$Register); 12116 __ sbbl($limit_hi$$Register, $tmp$$Register); 12117 // + ($stride - 1) 12118 if (strd > 0) { 12119 __ addl($limit$$Register, (strd - 1)); 12120 __ adcl($limit_hi$$Register, 0); 12121 __ movl($tmp$$Register, strd); 12122 } else { 12123 __ addl($limit$$Register, (strd + 1)); 12124 __ adcl($limit_hi$$Register, -1); 12125 __ lneg($limit_hi$$Register, $limit$$Register); 12126 __ movl($tmp$$Register, -strd); 12127 } 12128 // signed devision: (EAX:EDX) / pos_stride 12129 __ idivl($tmp$$Register); 12130 if (strd < 0) { 12131 // restore sign 12132 __ negl($tmp$$Register); 12133 } 12134 // (EAX) * stride 12135 __ mull($tmp$$Register); 12136 // + init (ignore upper bits) 12137 __ addl($limit$$Register, $init$$Register); 12138 %} 12139 ins_pipe( pipe_slow ); 12140 %} 12141 12142 // ============================================================================ 12143 // Branch Instructions 12144 // Jump Table 12145 instruct jumpXtnd(rRegI switch_val) %{ 12146 match(Jump switch_val); 12147 ins_cost(350); 12148 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12149 ins_encode %{ 12150 // Jump to Address(table_base + switch_reg) 12151 Address index(noreg, $switch_val$$Register, Address::times_1); 12152 __ jump(ArrayAddress($constantaddress, index)); 12153 %} 12154 ins_pipe(pipe_jmp); 12155 %} 12156 12157 // Jump Direct - Label defines a relative address from JMP+1 12158 instruct jmpDir(label labl) %{ 12159 match(Goto); 12160 effect(USE labl); 12161 12162 ins_cost(300); 12163 format %{ "JMP $labl" %} 12164 size(5); 12165 ins_encode %{ 12166 Label* L = $labl$$label; 12167 __ jmp(*L, false); // Always long jump 12168 %} 12169 ins_pipe( pipe_jmp ); 12170 %} 12171 12172 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12173 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12174 match(If cop cr); 12175 effect(USE labl); 12176 12177 ins_cost(300); 12178 format %{ "J$cop $labl" %} 12179 size(6); 12180 ins_encode %{ 12181 Label* L = $labl$$label; 12182 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12183 %} 12184 ins_pipe( pipe_jcc ); 12185 %} 12186 12187 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12188 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12189 predicate(!n->has_vector_mask_set()); 12190 match(CountedLoopEnd cop cr); 12191 effect(USE labl); 12192 12193 ins_cost(300); 12194 format %{ "J$cop $labl\t# Loop end" %} 12195 size(6); 12196 ins_encode %{ 12197 Label* L = $labl$$label; 12198 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12199 %} 12200 ins_pipe( pipe_jcc ); 12201 %} 12202 12203 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12204 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12205 predicate(!n->has_vector_mask_set()); 12206 match(CountedLoopEnd cop cmp); 12207 effect(USE labl); 12208 12209 ins_cost(300); 12210 format %{ "J$cop,u $labl\t# Loop end" %} 12211 size(6); 12212 ins_encode %{ 12213 Label* L = $labl$$label; 12214 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12215 %} 12216 ins_pipe( pipe_jcc ); 12217 %} 12218 12219 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12220 predicate(!n->has_vector_mask_set()); 12221 match(CountedLoopEnd cop cmp); 12222 effect(USE labl); 12223 12224 ins_cost(200); 12225 format %{ "J$cop,u $labl\t# Loop end" %} 12226 size(6); 12227 ins_encode %{ 12228 Label* L = $labl$$label; 12229 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12230 %} 12231 ins_pipe( pipe_jcc ); 12232 %} 12233 12234 // mask version 12235 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12236 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12237 predicate(n->has_vector_mask_set()); 12238 match(CountedLoopEnd cop cr); 12239 effect(USE labl); 12240 12241 ins_cost(400); 12242 format %{ "J$cop $labl\t# Loop end\n\t" 12243 "restorevectmask \t# vector mask restore for loops" %} 12244 size(10); 12245 ins_encode %{ 12246 Label* L = $labl$$label; 12247 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12248 __ restorevectmask(); 12249 %} 12250 ins_pipe( pipe_jcc ); 12251 %} 12252 12253 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12254 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12255 predicate(n->has_vector_mask_set()); 12256 match(CountedLoopEnd cop cmp); 12257 effect(USE labl); 12258 12259 ins_cost(400); 12260 format %{ "J$cop,u $labl\t# Loop end\n\t" 12261 "restorevectmask \t# vector mask restore for loops" %} 12262 size(10); 12263 ins_encode %{ 12264 Label* L = $labl$$label; 12265 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12266 __ restorevectmask(); 12267 %} 12268 ins_pipe( pipe_jcc ); 12269 %} 12270 12271 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12272 predicate(n->has_vector_mask_set()); 12273 match(CountedLoopEnd cop cmp); 12274 effect(USE labl); 12275 12276 ins_cost(300); 12277 format %{ "J$cop,u $labl\t# Loop end\n\t" 12278 "restorevectmask \t# vector mask restore for loops" %} 12279 size(10); 12280 ins_encode %{ 12281 Label* L = $labl$$label; 12282 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12283 __ restorevectmask(); 12284 %} 12285 ins_pipe( pipe_jcc ); 12286 %} 12287 12288 // Jump Direct Conditional - using unsigned comparison 12289 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12290 match(If cop cmp); 12291 effect(USE labl); 12292 12293 ins_cost(300); 12294 format %{ "J$cop,u $labl" %} 12295 size(6); 12296 ins_encode %{ 12297 Label* L = $labl$$label; 12298 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12299 %} 12300 ins_pipe(pipe_jcc); 12301 %} 12302 12303 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12304 match(If cop cmp); 12305 effect(USE labl); 12306 12307 ins_cost(200); 12308 format %{ "J$cop,u $labl" %} 12309 size(6); 12310 ins_encode %{ 12311 Label* L = $labl$$label; 12312 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12313 %} 12314 ins_pipe(pipe_jcc); 12315 %} 12316 12317 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12318 match(If cop cmp); 12319 effect(USE labl); 12320 12321 ins_cost(200); 12322 format %{ $$template 12323 if ($cop$$cmpcode == Assembler::notEqual) { 12324 $$emit$$"JP,u $labl\n\t" 12325 $$emit$$"J$cop,u $labl" 12326 } else { 12327 $$emit$$"JP,u done\n\t" 12328 $$emit$$"J$cop,u $labl\n\t" 12329 $$emit$$"done:" 12330 } 12331 %} 12332 ins_encode %{ 12333 Label* l = $labl$$label; 12334 if ($cop$$cmpcode == Assembler::notEqual) { 12335 __ jcc(Assembler::parity, *l, false); 12336 __ jcc(Assembler::notEqual, *l, false); 12337 } else if ($cop$$cmpcode == Assembler::equal) { 12338 Label done; 12339 __ jccb(Assembler::parity, done); 12340 __ jcc(Assembler::equal, *l, false); 12341 __ bind(done); 12342 } else { 12343 ShouldNotReachHere(); 12344 } 12345 %} 12346 ins_pipe(pipe_jcc); 12347 %} 12348 12349 // ============================================================================ 12350 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12351 // array for an instance of the superklass. Set a hidden internal cache on a 12352 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12353 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12354 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12355 match(Set result (PartialSubtypeCheck sub super)); 12356 effect( KILL rcx, KILL cr ); 12357 12358 ins_cost(1100); // slightly larger than the next version 12359 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12360 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12361 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12362 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12363 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12364 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12365 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12366 "miss:\t" %} 12367 12368 opcode(0x1); // Force a XOR of EDI 12369 ins_encode( enc_PartialSubtypeCheck() ); 12370 ins_pipe( pipe_slow ); 12371 %} 12372 12373 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12374 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12375 effect( KILL rcx, KILL result ); 12376 12377 ins_cost(1000); 12378 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12379 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12380 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12381 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12382 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12383 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12384 "miss:\t" %} 12385 12386 opcode(0x0); // No need to XOR EDI 12387 ins_encode( enc_PartialSubtypeCheck() ); 12388 ins_pipe( pipe_slow ); 12389 %} 12390 12391 // ============================================================================ 12392 // Branch Instructions -- short offset versions 12393 // 12394 // These instructions are used to replace jumps of a long offset (the default 12395 // match) with jumps of a shorter offset. These instructions are all tagged 12396 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12397 // match rules in general matching. Instead, the ADLC generates a conversion 12398 // method in the MachNode which can be used to do in-place replacement of the 12399 // long variant with the shorter variant. The compiler will determine if a 12400 // branch can be taken by the is_short_branch_offset() predicate in the machine 12401 // specific code section of the file. 12402 12403 // Jump Direct - Label defines a relative address from JMP+1 12404 instruct jmpDir_short(label labl) %{ 12405 match(Goto); 12406 effect(USE labl); 12407 12408 ins_cost(300); 12409 format %{ "JMP,s $labl" %} 12410 size(2); 12411 ins_encode %{ 12412 Label* L = $labl$$label; 12413 __ jmpb(*L); 12414 %} 12415 ins_pipe( pipe_jmp ); 12416 ins_short_branch(1); 12417 %} 12418 12419 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12420 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12421 match(If cop cr); 12422 effect(USE labl); 12423 12424 ins_cost(300); 12425 format %{ "J$cop,s $labl" %} 12426 size(2); 12427 ins_encode %{ 12428 Label* L = $labl$$label; 12429 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12430 %} 12431 ins_pipe( pipe_jcc ); 12432 ins_short_branch(1); 12433 %} 12434 12435 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12436 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12437 match(CountedLoopEnd cop cr); 12438 effect(USE labl); 12439 12440 ins_cost(300); 12441 format %{ "J$cop,s $labl\t# Loop end" %} 12442 size(2); 12443 ins_encode %{ 12444 Label* L = $labl$$label; 12445 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12446 %} 12447 ins_pipe( pipe_jcc ); 12448 ins_short_branch(1); 12449 %} 12450 12451 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12452 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12453 match(CountedLoopEnd cop cmp); 12454 effect(USE labl); 12455 12456 ins_cost(300); 12457 format %{ "J$cop,us $labl\t# Loop end" %} 12458 size(2); 12459 ins_encode %{ 12460 Label* L = $labl$$label; 12461 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12462 %} 12463 ins_pipe( pipe_jcc ); 12464 ins_short_branch(1); 12465 %} 12466 12467 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12468 match(CountedLoopEnd cop cmp); 12469 effect(USE labl); 12470 12471 ins_cost(300); 12472 format %{ "J$cop,us $labl\t# Loop end" %} 12473 size(2); 12474 ins_encode %{ 12475 Label* L = $labl$$label; 12476 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12477 %} 12478 ins_pipe( pipe_jcc ); 12479 ins_short_branch(1); 12480 %} 12481 12482 // Jump Direct Conditional - using unsigned comparison 12483 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12484 match(If cop cmp); 12485 effect(USE labl); 12486 12487 ins_cost(300); 12488 format %{ "J$cop,us $labl" %} 12489 size(2); 12490 ins_encode %{ 12491 Label* L = $labl$$label; 12492 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12493 %} 12494 ins_pipe( pipe_jcc ); 12495 ins_short_branch(1); 12496 %} 12497 12498 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12499 match(If cop cmp); 12500 effect(USE labl); 12501 12502 ins_cost(300); 12503 format %{ "J$cop,us $labl" %} 12504 size(2); 12505 ins_encode %{ 12506 Label* L = $labl$$label; 12507 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12508 %} 12509 ins_pipe( pipe_jcc ); 12510 ins_short_branch(1); 12511 %} 12512 12513 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12514 match(If cop cmp); 12515 effect(USE labl); 12516 12517 ins_cost(300); 12518 format %{ $$template 12519 if ($cop$$cmpcode == Assembler::notEqual) { 12520 $$emit$$"JP,u,s $labl\n\t" 12521 $$emit$$"J$cop,u,s $labl" 12522 } else { 12523 $$emit$$"JP,u,s done\n\t" 12524 $$emit$$"J$cop,u,s $labl\n\t" 12525 $$emit$$"done:" 12526 } 12527 %} 12528 size(4); 12529 ins_encode %{ 12530 Label* l = $labl$$label; 12531 if ($cop$$cmpcode == Assembler::notEqual) { 12532 __ jccb(Assembler::parity, *l); 12533 __ jccb(Assembler::notEqual, *l); 12534 } else if ($cop$$cmpcode == Assembler::equal) { 12535 Label done; 12536 __ jccb(Assembler::parity, done); 12537 __ jccb(Assembler::equal, *l); 12538 __ bind(done); 12539 } else { 12540 ShouldNotReachHere(); 12541 } 12542 %} 12543 ins_pipe(pipe_jcc); 12544 ins_short_branch(1); 12545 %} 12546 12547 // ============================================================================ 12548 // Long Compare 12549 // 12550 // Currently we hold longs in 2 registers. Comparing such values efficiently 12551 // is tricky. The flavor of compare used depends on whether we are testing 12552 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12553 // The GE test is the negated LT test. The LE test can be had by commuting 12554 // the operands (yielding a GE test) and then negating; negate again for the 12555 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12556 // NE test is negated from that. 12557 12558 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12559 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12560 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12561 // are collapsed internally in the ADLC's dfa-gen code. The match for 12562 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12563 // foo match ends up with the wrong leaf. One fix is to not match both 12564 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12565 // both forms beat the trinary form of long-compare and both are very useful 12566 // on Intel which has so few registers. 12567 12568 // Manifest a CmpL result in an integer register. Very painful. 12569 // This is the test to avoid. 12570 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12571 match(Set dst (CmpL3 src1 src2)); 12572 effect( KILL flags ); 12573 ins_cost(1000); 12574 format %{ "XOR $dst,$dst\n\t" 12575 "CMP $src1.hi,$src2.hi\n\t" 12576 "JLT,s m_one\n\t" 12577 "JGT,s p_one\n\t" 12578 "CMP $src1.lo,$src2.lo\n\t" 12579 "JB,s m_one\n\t" 12580 "JEQ,s done\n" 12581 "p_one:\tINC $dst\n\t" 12582 "JMP,s done\n" 12583 "m_one:\tDEC $dst\n" 12584 "done:" %} 12585 ins_encode %{ 12586 Label p_one, m_one, done; 12587 __ xorptr($dst$$Register, $dst$$Register); 12588 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12589 __ jccb(Assembler::less, m_one); 12590 __ jccb(Assembler::greater, p_one); 12591 __ cmpl($src1$$Register, $src2$$Register); 12592 __ jccb(Assembler::below, m_one); 12593 __ jccb(Assembler::equal, done); 12594 __ bind(p_one); 12595 __ incrementl($dst$$Register); 12596 __ jmpb(done); 12597 __ bind(m_one); 12598 __ decrementl($dst$$Register); 12599 __ bind(done); 12600 %} 12601 ins_pipe( pipe_slow ); 12602 %} 12603 12604 //====== 12605 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12606 // compares. Can be used for LE or GT compares by reversing arguments. 12607 // NOT GOOD FOR EQ/NE tests. 12608 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12609 match( Set flags (CmpL src zero )); 12610 ins_cost(100); 12611 format %{ "TEST $src.hi,$src.hi" %} 12612 opcode(0x85); 12613 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12614 ins_pipe( ialu_cr_reg_reg ); 12615 %} 12616 12617 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12618 // compares. Can be used for LE or GT compares by reversing arguments. 12619 // NOT GOOD FOR EQ/NE tests. 12620 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12621 match( Set flags (CmpL src1 src2 )); 12622 effect( TEMP tmp ); 12623 ins_cost(300); 12624 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12625 "MOV $tmp,$src1.hi\n\t" 12626 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12627 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12628 ins_pipe( ialu_cr_reg_reg ); 12629 %} 12630 12631 // Long compares reg < zero/req OR reg >= zero/req. 12632 // Just a wrapper for a normal branch, plus the predicate test. 12633 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12634 match(If cmp flags); 12635 effect(USE labl); 12636 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12637 expand %{ 12638 jmpCon(cmp,flags,labl); // JLT or JGE... 12639 %} 12640 %} 12641 12642 // Compare 2 longs and CMOVE longs. 12643 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12644 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12645 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12646 ins_cost(400); 12647 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12648 "CMOV$cmp $dst.hi,$src.hi" %} 12649 opcode(0x0F,0x40); 12650 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12651 ins_pipe( pipe_cmov_reg_long ); 12652 %} 12653 12654 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12655 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12656 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12657 ins_cost(500); 12658 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12659 "CMOV$cmp $dst.hi,$src.hi" %} 12660 opcode(0x0F,0x40); 12661 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12662 ins_pipe( pipe_cmov_reg_long ); 12663 %} 12664 12665 // Compare 2 longs and CMOVE ints. 12666 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12667 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12668 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12669 ins_cost(200); 12670 format %{ "CMOV$cmp $dst,$src" %} 12671 opcode(0x0F,0x40); 12672 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12673 ins_pipe( pipe_cmov_reg ); 12674 %} 12675 12676 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12677 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12678 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12679 ins_cost(250); 12680 format %{ "CMOV$cmp $dst,$src" %} 12681 opcode(0x0F,0x40); 12682 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12683 ins_pipe( pipe_cmov_mem ); 12684 %} 12685 12686 // Compare 2 longs and CMOVE ints. 12687 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12688 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12689 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12690 ins_cost(200); 12691 format %{ "CMOV$cmp $dst,$src" %} 12692 opcode(0x0F,0x40); 12693 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12694 ins_pipe( pipe_cmov_reg ); 12695 %} 12696 12697 // Compare 2 longs and CMOVE doubles 12698 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12699 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12700 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12701 ins_cost(200); 12702 expand %{ 12703 fcmovDPR_regS(cmp,flags,dst,src); 12704 %} 12705 %} 12706 12707 // Compare 2 longs and CMOVE doubles 12708 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12709 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12710 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12711 ins_cost(200); 12712 expand %{ 12713 fcmovD_regS(cmp,flags,dst,src); 12714 %} 12715 %} 12716 12717 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12718 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12719 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12720 ins_cost(200); 12721 expand %{ 12722 fcmovFPR_regS(cmp,flags,dst,src); 12723 %} 12724 %} 12725 12726 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12727 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12728 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12729 ins_cost(200); 12730 expand %{ 12731 fcmovF_regS(cmp,flags,dst,src); 12732 %} 12733 %} 12734 12735 //====== 12736 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12737 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12738 match( Set flags (CmpL src zero )); 12739 effect(TEMP tmp); 12740 ins_cost(200); 12741 format %{ "MOV $tmp,$src.lo\n\t" 12742 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12743 ins_encode( long_cmp_flags0( src, tmp ) ); 12744 ins_pipe( ialu_reg_reg_long ); 12745 %} 12746 12747 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12748 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12749 match( Set flags (CmpL src1 src2 )); 12750 ins_cost(200+300); 12751 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12752 "JNE,s skip\n\t" 12753 "CMP $src1.hi,$src2.hi\n\t" 12754 "skip:\t" %} 12755 ins_encode( long_cmp_flags1( src1, src2 ) ); 12756 ins_pipe( ialu_cr_reg_reg ); 12757 %} 12758 12759 // Long compare reg == zero/reg OR reg != zero/reg 12760 // Just a wrapper for a normal branch, plus the predicate test. 12761 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12762 match(If cmp flags); 12763 effect(USE labl); 12764 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12765 expand %{ 12766 jmpCon(cmp,flags,labl); // JEQ or JNE... 12767 %} 12768 %} 12769 12770 // Compare 2 longs and CMOVE longs. 12771 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12772 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12773 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12774 ins_cost(400); 12775 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12776 "CMOV$cmp $dst.hi,$src.hi" %} 12777 opcode(0x0F,0x40); 12778 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12779 ins_pipe( pipe_cmov_reg_long ); 12780 %} 12781 12782 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12783 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12784 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12785 ins_cost(500); 12786 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12787 "CMOV$cmp $dst.hi,$src.hi" %} 12788 opcode(0x0F,0x40); 12789 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12790 ins_pipe( pipe_cmov_reg_long ); 12791 %} 12792 12793 // Compare 2 longs and CMOVE ints. 12794 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12795 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12796 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12797 ins_cost(200); 12798 format %{ "CMOV$cmp $dst,$src" %} 12799 opcode(0x0F,0x40); 12800 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12801 ins_pipe( pipe_cmov_reg ); 12802 %} 12803 12804 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12805 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12806 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12807 ins_cost(250); 12808 format %{ "CMOV$cmp $dst,$src" %} 12809 opcode(0x0F,0x40); 12810 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12811 ins_pipe( pipe_cmov_mem ); 12812 %} 12813 12814 // Compare 2 longs and CMOVE ints. 12815 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12816 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12817 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12818 ins_cost(200); 12819 format %{ "CMOV$cmp $dst,$src" %} 12820 opcode(0x0F,0x40); 12821 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12822 ins_pipe( pipe_cmov_reg ); 12823 %} 12824 12825 // Compare 2 longs and CMOVE doubles 12826 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12827 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12828 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12829 ins_cost(200); 12830 expand %{ 12831 fcmovDPR_regS(cmp,flags,dst,src); 12832 %} 12833 %} 12834 12835 // Compare 2 longs and CMOVE doubles 12836 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12837 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12838 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12839 ins_cost(200); 12840 expand %{ 12841 fcmovD_regS(cmp,flags,dst,src); 12842 %} 12843 %} 12844 12845 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12846 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12847 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12848 ins_cost(200); 12849 expand %{ 12850 fcmovFPR_regS(cmp,flags,dst,src); 12851 %} 12852 %} 12853 12854 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12855 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12856 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12857 ins_cost(200); 12858 expand %{ 12859 fcmovF_regS(cmp,flags,dst,src); 12860 %} 12861 %} 12862 12863 //====== 12864 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12865 // Same as cmpL_reg_flags_LEGT except must negate src 12866 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12867 match( Set flags (CmpL src zero )); 12868 effect( TEMP tmp ); 12869 ins_cost(300); 12870 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12871 "CMP $tmp,$src.lo\n\t" 12872 "SBB $tmp,$src.hi\n\t" %} 12873 ins_encode( long_cmp_flags3(src, tmp) ); 12874 ins_pipe( ialu_reg_reg_long ); 12875 %} 12876 12877 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12878 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12879 // requires a commuted test to get the same result. 12880 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12881 match( Set flags (CmpL src1 src2 )); 12882 effect( TEMP tmp ); 12883 ins_cost(300); 12884 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12885 "MOV $tmp,$src2.hi\n\t" 12886 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12887 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12888 ins_pipe( ialu_cr_reg_reg ); 12889 %} 12890 12891 // Long compares reg < zero/req OR reg >= zero/req. 12892 // Just a wrapper for a normal branch, plus the predicate test 12893 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12894 match(If cmp flags); 12895 effect(USE labl); 12896 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12897 ins_cost(300); 12898 expand %{ 12899 jmpCon(cmp,flags,labl); // JGT or JLE... 12900 %} 12901 %} 12902 12903 // Compare 2 longs and CMOVE longs. 12904 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12905 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12906 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12907 ins_cost(400); 12908 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12909 "CMOV$cmp $dst.hi,$src.hi" %} 12910 opcode(0x0F,0x40); 12911 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12912 ins_pipe( pipe_cmov_reg_long ); 12913 %} 12914 12915 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12916 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12917 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12918 ins_cost(500); 12919 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12920 "CMOV$cmp $dst.hi,$src.hi+4" %} 12921 opcode(0x0F,0x40); 12922 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12923 ins_pipe( pipe_cmov_reg_long ); 12924 %} 12925 12926 // Compare 2 longs and CMOVE ints. 12927 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12928 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12929 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12930 ins_cost(200); 12931 format %{ "CMOV$cmp $dst,$src" %} 12932 opcode(0x0F,0x40); 12933 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12934 ins_pipe( pipe_cmov_reg ); 12935 %} 12936 12937 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12938 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12939 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12940 ins_cost(250); 12941 format %{ "CMOV$cmp $dst,$src" %} 12942 opcode(0x0F,0x40); 12943 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12944 ins_pipe( pipe_cmov_mem ); 12945 %} 12946 12947 // Compare 2 longs and CMOVE ptrs. 12948 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12949 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12950 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12951 ins_cost(200); 12952 format %{ "CMOV$cmp $dst,$src" %} 12953 opcode(0x0F,0x40); 12954 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12955 ins_pipe( pipe_cmov_reg ); 12956 %} 12957 12958 // Compare 2 longs and CMOVE doubles 12959 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12960 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12961 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12962 ins_cost(200); 12963 expand %{ 12964 fcmovDPR_regS(cmp,flags,dst,src); 12965 %} 12966 %} 12967 12968 // Compare 2 longs and CMOVE doubles 12969 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12970 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12971 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12972 ins_cost(200); 12973 expand %{ 12974 fcmovD_regS(cmp,flags,dst,src); 12975 %} 12976 %} 12977 12978 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12979 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12980 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12981 ins_cost(200); 12982 expand %{ 12983 fcmovFPR_regS(cmp,flags,dst,src); 12984 %} 12985 %} 12986 12987 12988 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12989 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12990 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12991 ins_cost(200); 12992 expand %{ 12993 fcmovF_regS(cmp,flags,dst,src); 12994 %} 12995 %} 12996 12997 12998 // ============================================================================ 12999 // Procedure Call/Return Instructions 13000 // Call Java Static Instruction 13001 // Note: If this code changes, the corresponding ret_addr_offset() and 13002 // compute_padding() functions will have to be adjusted. 13003 instruct CallStaticJavaDirect(method meth) %{ 13004 match(CallStaticJava); 13005 effect(USE meth); 13006 13007 ins_cost(300); 13008 format %{ "CALL,static " %} 13009 opcode(0xE8); /* E8 cd */ 13010 ins_encode( pre_call_resets, 13011 Java_Static_Call( meth ), 13012 call_epilog, 13013 post_call_FPU ); 13014 ins_pipe( pipe_slow ); 13015 ins_alignment(4); 13016 %} 13017 13018 // Call Java Dynamic Instruction 13019 // Note: If this code changes, the corresponding ret_addr_offset() and 13020 // compute_padding() functions will have to be adjusted. 13021 instruct CallDynamicJavaDirect(method meth) %{ 13022 match(CallDynamicJava); 13023 effect(USE meth); 13024 13025 ins_cost(300); 13026 format %{ "MOV EAX,(oop)-1\n\t" 13027 "CALL,dynamic" %} 13028 opcode(0xE8); /* E8 cd */ 13029 ins_encode( pre_call_resets, 13030 Java_Dynamic_Call( meth ), 13031 call_epilog, 13032 post_call_FPU ); 13033 ins_pipe( pipe_slow ); 13034 ins_alignment(4); 13035 %} 13036 13037 // Call Runtime Instruction 13038 instruct CallRuntimeDirect(method meth) %{ 13039 match(CallRuntime ); 13040 effect(USE meth); 13041 13042 ins_cost(300); 13043 format %{ "CALL,runtime " %} 13044 opcode(0xE8); /* E8 cd */ 13045 // Use FFREEs to clear entries in float stack 13046 ins_encode( pre_call_resets, 13047 FFree_Float_Stack_All, 13048 Java_To_Runtime( meth ), 13049 post_call_FPU ); 13050 ins_pipe( pipe_slow ); 13051 %} 13052 13053 // Call runtime without safepoint 13054 instruct CallLeafDirect(method meth) %{ 13055 match(CallLeaf); 13056 effect(USE meth); 13057 13058 ins_cost(300); 13059 format %{ "CALL_LEAF,runtime " %} 13060 opcode(0xE8); /* E8 cd */ 13061 ins_encode( pre_call_resets, 13062 FFree_Float_Stack_All, 13063 Java_To_Runtime( meth ), 13064 Verify_FPU_For_Leaf, post_call_FPU ); 13065 ins_pipe( pipe_slow ); 13066 %} 13067 13068 instruct CallLeafNoFPDirect(method meth) %{ 13069 match(CallLeafNoFP); 13070 effect(USE meth); 13071 13072 ins_cost(300); 13073 format %{ "CALL_LEAF_NOFP,runtime " %} 13074 opcode(0xE8); /* E8 cd */ 13075 ins_encode(Java_To_Runtime(meth)); 13076 ins_pipe( pipe_slow ); 13077 %} 13078 13079 13080 // Return Instruction 13081 // Remove the return address & jump to it. 13082 instruct Ret() %{ 13083 match(Return); 13084 format %{ "RET" %} 13085 opcode(0xC3); 13086 ins_encode(OpcP); 13087 ins_pipe( pipe_jmp ); 13088 %} 13089 13090 // Tail Call; Jump from runtime stub to Java code. 13091 // Also known as an 'interprocedural jump'. 13092 // Target of jump will eventually return to caller. 13093 // TailJump below removes the return address. 13094 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13095 match(TailCall jump_target method_oop ); 13096 ins_cost(300); 13097 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13098 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13099 ins_encode( OpcP, RegOpc(jump_target) ); 13100 ins_pipe( pipe_jmp ); 13101 %} 13102 13103 13104 // Tail Jump; remove the return address; jump to target. 13105 // TailCall above leaves the return address around. 13106 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13107 match( TailJump jump_target ex_oop ); 13108 ins_cost(300); 13109 format %{ "POP EDX\t# pop return address into dummy\n\t" 13110 "JMP $jump_target " %} 13111 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13112 ins_encode( enc_pop_rdx, 13113 OpcP, RegOpc(jump_target) ); 13114 ins_pipe( pipe_jmp ); 13115 %} 13116 13117 // Create exception oop: created by stack-crawling runtime code. 13118 // Created exception is now available to this handler, and is setup 13119 // just prior to jumping to this handler. No code emitted. 13120 instruct CreateException( eAXRegP ex_oop ) 13121 %{ 13122 match(Set ex_oop (CreateEx)); 13123 13124 size(0); 13125 // use the following format syntax 13126 format %{ "# exception oop is in EAX; no code emitted" %} 13127 ins_encode(); 13128 ins_pipe( empty ); 13129 %} 13130 13131 13132 // Rethrow exception: 13133 // The exception oop will come in the first argument position. 13134 // Then JUMP (not call) to the rethrow stub code. 13135 instruct RethrowException() 13136 %{ 13137 match(Rethrow); 13138 13139 // use the following format syntax 13140 format %{ "JMP rethrow_stub" %} 13141 ins_encode(enc_rethrow); 13142 ins_pipe( pipe_jmp ); 13143 %} 13144 13145 // inlined locking and unlocking 13146 13147 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13148 predicate(Compile::current()->use_rtm()); 13149 match(Set cr (FastLock object box)); 13150 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13151 ins_cost(300); 13152 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13153 ins_encode %{ 13154 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13155 $scr$$Register, $cx1$$Register, $cx2$$Register, 13156 _counters, _rtm_counters, _stack_rtm_counters, 13157 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13158 true, ra_->C->profile_rtm()); 13159 %} 13160 ins_pipe(pipe_slow); 13161 %} 13162 13163 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13164 predicate(!Compile::current()->use_rtm()); 13165 match(Set cr (FastLock object box)); 13166 effect(TEMP tmp, TEMP scr, USE_KILL box); 13167 ins_cost(300); 13168 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13169 ins_encode %{ 13170 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13171 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13172 %} 13173 ins_pipe(pipe_slow); 13174 %} 13175 13176 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13177 match(Set cr (FastUnlock object box)); 13178 effect(TEMP tmp, USE_KILL box); 13179 ins_cost(300); 13180 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13181 ins_encode %{ 13182 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13183 %} 13184 ins_pipe(pipe_slow); 13185 %} 13186 13187 13188 13189 // ============================================================================ 13190 // Safepoint Instruction 13191 instruct safePoint_poll(eFlagsReg cr) %{ 13192 match(SafePoint); 13193 effect(KILL cr); 13194 13195 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13196 // On SPARC that might be acceptable as we can generate the address with 13197 // just a sethi, saving an or. By polling at offset 0 we can end up 13198 // putting additional pressure on the index-0 in the D$. Because of 13199 // alignment (just like the situation at hand) the lower indices tend 13200 // to see more traffic. It'd be better to change the polling address 13201 // to offset 0 of the last $line in the polling page. 13202 13203 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13204 ins_cost(125); 13205 size(6) ; 13206 ins_encode( Safepoint_Poll() ); 13207 ins_pipe( ialu_reg_mem ); 13208 %} 13209 13210 13211 // ============================================================================ 13212 // This name is KNOWN by the ADLC and cannot be changed. 13213 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13214 // for this guy. 13215 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13216 match(Set dst (ThreadLocal)); 13217 effect(DEF dst, KILL cr); 13218 13219 format %{ "MOV $dst, Thread::current()" %} 13220 ins_encode %{ 13221 Register dstReg = as_Register($dst$$reg); 13222 __ get_thread(dstReg); 13223 %} 13224 ins_pipe( ialu_reg_fat ); 13225 %} 13226 13227 13228 13229 //----------PEEPHOLE RULES----------------------------------------------------- 13230 // These must follow all instruction definitions as they use the names 13231 // defined in the instructions definitions. 13232 // 13233 // peepmatch ( root_instr_name [preceding_instruction]* ); 13234 // 13235 // peepconstraint %{ 13236 // (instruction_number.operand_name relational_op instruction_number.operand_name 13237 // [, ...] ); 13238 // // instruction numbers are zero-based using left to right order in peepmatch 13239 // 13240 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13241 // // provide an instruction_number.operand_name for each operand that appears 13242 // // in the replacement instruction's match rule 13243 // 13244 // ---------VM FLAGS--------------------------------------------------------- 13245 // 13246 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13247 // 13248 // Each peephole rule is given an identifying number starting with zero and 13249 // increasing by one in the order seen by the parser. An individual peephole 13250 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13251 // on the command-line. 13252 // 13253 // ---------CURRENT LIMITATIONS---------------------------------------------- 13254 // 13255 // Only match adjacent instructions in same basic block 13256 // Only equality constraints 13257 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13258 // Only one replacement instruction 13259 // 13260 // ---------EXAMPLE---------------------------------------------------------- 13261 // 13262 // // pertinent parts of existing instructions in architecture description 13263 // instruct movI(rRegI dst, rRegI src) %{ 13264 // match(Set dst (CopyI src)); 13265 // %} 13266 // 13267 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13268 // match(Set dst (AddI dst src)); 13269 // effect(KILL cr); 13270 // %} 13271 // 13272 // // Change (inc mov) to lea 13273 // peephole %{ 13274 // // increment preceeded by register-register move 13275 // peepmatch ( incI_eReg movI ); 13276 // // require that the destination register of the increment 13277 // // match the destination register of the move 13278 // peepconstraint ( 0.dst == 1.dst ); 13279 // // construct a replacement instruction that sets 13280 // // the destination to ( move's source register + one ) 13281 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13282 // %} 13283 // 13284 // Implementation no longer uses movX instructions since 13285 // machine-independent system no longer uses CopyX nodes. 13286 // 13287 // peephole %{ 13288 // peepmatch ( incI_eReg movI ); 13289 // peepconstraint ( 0.dst == 1.dst ); 13290 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13291 // %} 13292 // 13293 // peephole %{ 13294 // peepmatch ( decI_eReg movI ); 13295 // peepconstraint ( 0.dst == 1.dst ); 13296 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13297 // %} 13298 // 13299 // peephole %{ 13300 // peepmatch ( addI_eReg_imm movI ); 13301 // peepconstraint ( 0.dst == 1.dst ); 13302 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13303 // %} 13304 // 13305 // peephole %{ 13306 // peepmatch ( addP_eReg_imm movP ); 13307 // peepconstraint ( 0.dst == 1.dst ); 13308 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13309 // %} 13310 13311 // // Change load of spilled value to only a spill 13312 // instruct storeI(memory mem, rRegI src) %{ 13313 // match(Set mem (StoreI mem src)); 13314 // %} 13315 // 13316 // instruct loadI(rRegI dst, memory mem) %{ 13317 // match(Set dst (LoadI mem)); 13318 // %} 13319 // 13320 peephole %{ 13321 peepmatch ( loadI storeI ); 13322 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13323 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13324 %} 13325 13326 //----------SMARTSPILL RULES--------------------------------------------------- 13327 // These must follow all instruction definitions as they use the names 13328 // defined in the instructions definitions.