1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 799 // it maps more cases to single byte displacement 800 _masm.set_managed(); 801 if (reg_lo+1 == reg_hi) { // double move? 802 if (is_load) { 803 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 804 } else { 805 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 806 } 807 } else { 808 if (is_load) { 809 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 810 } else { 811 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 812 } 813 } 814 #ifndef PRODUCT 815 } else if (!do_size) { 816 if (size != 0) st->print("\n\t"); 817 if (reg_lo+1 == reg_hi) { // double move? 818 if (is_load) st->print("%s %s,[ESP + #%d]", 819 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSD [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } else { 824 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 825 Matcher::regName[reg_lo], offset); 826 else st->print("MOVSS [ESP + #%d],%s", 827 offset, Matcher::regName[reg_lo]); 828 } 829 #endif 830 } 831 bool is_single_byte = false; 832 if ((UseAVX > 2) && (offset != 0)) { 833 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 834 } 835 int offset_size = 0; 836 if (UseAVX > 2 ) { 837 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 838 } else { 839 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 840 } 841 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 842 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 843 return size+5+offset_size; 844 } 845 846 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 848 int src_hi, int dst_hi, int size, outputStream* st ) { 849 if (cbuf) { 850 MacroAssembler _masm(cbuf); 851 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 852 _masm.set_managed(); 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } else { 857 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 858 as_XMMRegister(Matcher::_regEncode[src_lo])); 859 } 860 #ifndef PRODUCT 861 } else if (!do_size) { 862 if (size != 0) st->print("\n\t"); 863 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 864 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 865 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 866 } else { 867 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } 869 } else { 870 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 871 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 872 } else { 873 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 874 } 875 } 876 #endif 877 } 878 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 879 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 880 int sz = (UseAVX > 2) ? 6 : 4; 881 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 882 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 883 return size + sz; 884 } 885 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 894 as_Register(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 905 int src_hi, int dst_hi, int size, outputStream* st ) { 906 // 32-bit 907 if (cbuf) { 908 MacroAssembler _masm(cbuf); 909 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 910 _masm.set_managed(); 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 912 as_XMMRegister(Matcher::_regEncode[src_lo])); 913 #ifndef PRODUCT 914 } else if (!do_size) { 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 916 #endif 917 } 918 return (UseAVX> 2) ? 6 : 4; 919 } 920 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 922 if( cbuf ) { 923 emit_opcode(*cbuf, 0x8B ); 924 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 929 #endif 930 } 931 return size+2; 932 } 933 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 935 int offset, int size, outputStream* st ) { 936 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 937 if( cbuf ) { 938 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 939 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 940 #ifndef PRODUCT 941 } else if( !do_size ) { 942 if( size != 0 ) st->print("\n\t"); 943 st->print("FLD %s",Matcher::regName[src_lo]); 944 #endif 945 } 946 size += 2; 947 } 948 949 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 950 const char *op_str; 951 int op; 952 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 953 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 954 op = 0xDD; 955 } else { // 32-bit store 956 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 957 op = 0xD9; 958 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 959 } 960 961 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 962 } 963 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 966 int src_hi, int dst_hi, uint ireg, outputStream* st); 967 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 969 int stack_offset, int reg, uint ireg, outputStream* st); 970 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 972 int dst_offset, uint ireg, outputStream* st) { 973 int calc_size = 0; 974 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 975 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 976 switch (ireg) { 977 case Op_VecS: 978 calc_size = 3+src_offset_size + 3+dst_offset_size; 979 break; 980 case Op_VecD: { 981 calc_size = 3+src_offset_size + 3+dst_offset_size; 982 int tmp_src_offset = src_offset + 4; 983 int tmp_dst_offset = dst_offset + 4; 984 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 985 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 986 calc_size += 3+src_offset_size + 3+dst_offset_size; 987 break; 988 } 989 case Op_VecX: 990 case Op_VecY: 991 case Op_VecZ: 992 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 993 break; 994 default: 995 ShouldNotReachHere(); 996 } 997 if (cbuf) { 998 MacroAssembler _masm(cbuf); 999 int offset = __ offset(); 1000 switch (ireg) { 1001 case Op_VecS: 1002 __ pushl(Address(rsp, src_offset)); 1003 __ popl (Address(rsp, dst_offset)); 1004 break; 1005 case Op_VecD: 1006 __ pushl(Address(rsp, src_offset)); 1007 __ popl (Address(rsp, dst_offset)); 1008 __ pushl(Address(rsp, src_offset+4)); 1009 __ popl (Address(rsp, dst_offset+4)); 1010 break; 1011 case Op_VecX: 1012 __ movdqu(Address(rsp, -16), xmm0); 1013 __ movdqu(xmm0, Address(rsp, src_offset)); 1014 __ movdqu(Address(rsp, dst_offset), xmm0); 1015 __ movdqu(xmm0, Address(rsp, -16)); 1016 break; 1017 case Op_VecY: 1018 __ vmovdqu(Address(rsp, -32), xmm0); 1019 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1020 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1021 __ vmovdqu(xmm0, Address(rsp, -32)); 1022 break; 1023 case Op_VecZ: 1024 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1025 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1026 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1027 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1028 break; 1029 default: 1030 ShouldNotReachHere(); 1031 } 1032 int size = __ offset() - offset; 1033 assert(size == calc_size, "incorrect size calculation"); 1034 return size; 1035 #ifndef PRODUCT 1036 } else if (!do_size) { 1037 switch (ireg) { 1038 case Op_VecS: 1039 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1040 "popl [rsp + #%d]", 1041 src_offset, dst_offset); 1042 break; 1043 case Op_VecD: 1044 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1045 "popq [rsp + #%d]\n\t" 1046 "pushl [rsp + #%d]\n\t" 1047 "popq [rsp + #%d]", 1048 src_offset, dst_offset, src_offset+4, dst_offset+4); 1049 break; 1050 case Op_VecX: 1051 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1052 "movdqu xmm0, [rsp + #%d]\n\t" 1053 "movdqu [rsp + #%d], xmm0\n\t" 1054 "movdqu xmm0, [rsp - #16]", 1055 src_offset, dst_offset); 1056 break; 1057 case Op_VecY: 1058 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1059 "vmovdqu xmm0, [rsp + #%d]\n\t" 1060 "vmovdqu [rsp + #%d], xmm0\n\t" 1061 "vmovdqu xmm0, [rsp - #32]", 1062 src_offset, dst_offset); 1063 break; 1064 case Op_VecZ: 1065 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1066 "vmovdqu xmm0, [rsp + #%d]\n\t" 1067 "vmovdqu [rsp + #%d], xmm0\n\t" 1068 "vmovdqu xmm0, [rsp - #64]", 1069 src_offset, dst_offset); 1070 break; 1071 default: 1072 ShouldNotReachHere(); 1073 } 1074 #endif 1075 } 1076 return calc_size; 1077 } 1078 1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1080 // Get registers to move 1081 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1082 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1083 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1084 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1085 1086 enum RC src_second_rc = rc_class(src_second); 1087 enum RC src_first_rc = rc_class(src_first); 1088 enum RC dst_second_rc = rc_class(dst_second); 1089 enum RC dst_first_rc = rc_class(dst_first); 1090 1091 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1092 1093 // Generate spill code! 1094 int size = 0; 1095 1096 if( src_first == dst_first && src_second == dst_second ) 1097 return size; // Self copy, no move 1098 1099 if (bottom_type()->isa_vect() != NULL) { 1100 uint ireg = ideal_reg(); 1101 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1102 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1103 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1105 // mem -> mem 1106 int src_offset = ra_->reg2offset(src_first); 1107 int dst_offset = ra_->reg2offset(dst_first); 1108 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1109 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1110 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1111 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1112 int stack_offset = ra_->reg2offset(dst_first); 1113 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1114 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1115 int stack_offset = ra_->reg2offset(src_first); 1116 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1117 } else { 1118 ShouldNotReachHere(); 1119 } 1120 } 1121 1122 // -------------------------------------- 1123 // Check for mem-mem move. push/pop to move. 1124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1125 if( src_second == dst_first ) { // overlapping stack copy ranges 1126 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1127 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1128 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1129 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1130 } 1131 // move low bits 1132 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1133 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1134 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1135 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1137 } 1138 return size; 1139 } 1140 1141 // -------------------------------------- 1142 // Check for integer reg-reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1144 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1145 1146 // Check for integer store 1147 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1148 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1149 1150 // Check for integer load 1151 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1152 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1153 1154 // Check for integer reg-xmm reg copy 1155 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1156 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1157 "no 64 bit integer-float reg moves" ); 1158 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1159 } 1160 // -------------------------------------- 1161 // Check for float reg-reg copy 1162 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1163 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1164 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1165 if( cbuf ) { 1166 1167 // Note the mucking with the register encode to compensate for the 0/1 1168 // indexing issue mentioned in a comment in the reg_def sections 1169 // for FPR registers many lines above here. 1170 1171 if( src_first != FPR1L_num ) { 1172 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1173 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1174 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1175 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1176 } else { 1177 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1178 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1179 } 1180 #ifndef PRODUCT 1181 } else if( !do_size ) { 1182 if( size != 0 ) st->print("\n\t"); 1183 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1184 else st->print( "FST %s", Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1188 } 1189 1190 // Check for float store 1191 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1192 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1193 } 1194 1195 // Check for float load 1196 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1197 int offset = ra_->reg2offset(src_first); 1198 const char *op_str; 1199 int op; 1200 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1201 op_str = "FLD_D"; 1202 op = 0xDD; 1203 } else { // 32-bit load 1204 op_str = "FLD_S"; 1205 op = 0xD9; 1206 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1207 } 1208 if( cbuf ) { 1209 emit_opcode (*cbuf, op ); 1210 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1211 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1212 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1213 #ifndef PRODUCT 1214 } else if( !do_size ) { 1215 if( size != 0 ) st->print("\n\t"); 1216 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1217 #endif 1218 } 1219 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1220 return size + 3+offset_size+2; 1221 } 1222 1223 // Check for xmm reg-reg copy 1224 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1225 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1226 (src_first+1 == src_second && dst_first+1 == dst_second), 1227 "no non-adjacent float-moves" ); 1228 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1229 } 1230 1231 // Check for xmm reg-integer reg copy 1232 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1233 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1234 "no 64 bit float-integer reg moves" ); 1235 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1236 } 1237 1238 // Check for xmm store 1239 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1240 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1241 } 1242 1243 // Check for float xmm load 1244 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1245 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1246 } 1247 1248 // Copy from float reg to xmm reg 1249 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1250 // copy to the top of stack from floating point reg 1251 // and use LEA to preserve flags 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0xF8); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP-8]"); 1261 #endif 1262 } 1263 size += 4; 1264 1265 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1266 1267 // Copy from the temp memory to the xmm reg. 1268 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1269 1270 if( cbuf ) { 1271 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1272 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1273 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1274 emit_d8(*cbuf,0x08); 1275 #ifndef PRODUCT 1276 } else if( !do_size ) { 1277 if( size != 0 ) st->print("\n\t"); 1278 st->print("LEA ESP,[ESP+8]"); 1279 #endif 1280 } 1281 size += 4; 1282 return size; 1283 } 1284 1285 assert( size > 0, "missed a case" ); 1286 1287 // -------------------------------------------------------------------- 1288 // Check for second bits still needing moving. 1289 if( src_second == dst_second ) 1290 return size; // Self copy; no move 1291 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1292 1293 // Check for second word int-int move 1294 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1295 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1296 1297 // Check for second word integer store 1298 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1299 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1300 1301 // Check for second word integer load 1302 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1303 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1304 1305 1306 Unimplemented(); 1307 return 0; // Mute compiler 1308 } 1309 1310 #ifndef PRODUCT 1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1312 implementation( NULL, ra_, false, st ); 1313 } 1314 #endif 1315 1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 implementation( &cbuf, ra_, false, NULL ); 1318 } 1319 1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1321 return implementation( NULL, ra_, true, NULL ); 1322 } 1323 1324 1325 //============================================================================= 1326 #ifndef PRODUCT 1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 int reg = ra_->get_reg_first(this); 1330 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1331 } 1332 #endif 1333 1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1335 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1336 int reg = ra_->get_encode(this); 1337 if( offset >= 128 ) { 1338 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1339 emit_rm(cbuf, 0x2, reg, 0x04); 1340 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1341 emit_d32(cbuf, offset); 1342 } 1343 else { 1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(cbuf, 0x1, reg, 0x04); 1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1347 emit_d8(cbuf, offset); 1348 } 1349 } 1350 1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1352 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1353 if( offset >= 128 ) { 1354 return 7; 1355 } 1356 else { 1357 return 4; 1358 } 1359 } 1360 1361 //============================================================================= 1362 #ifndef PRODUCT 1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1364 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1365 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1366 st->print_cr("\tNOP"); 1367 st->print_cr("\tNOP"); 1368 if( !OptoBreakpoint ) 1369 st->print_cr("\tNOP"); 1370 } 1371 #endif 1372 1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1374 MacroAssembler masm(&cbuf); 1375 #ifdef ASSERT 1376 uint insts_size = cbuf.insts_size(); 1377 #endif 1378 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1379 masm.jump_cc(Assembler::notEqual, 1380 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1381 /* WARNING these NOPs are critical so that verified entry point is properly 1382 aligned for patching by NativeJump::patch_verified_entry() */ 1383 int nops_cnt = 2; 1384 if( !OptoBreakpoint ) // Leave space for int3 1385 nops_cnt += 1; 1386 masm.nop(nops_cnt); 1387 1388 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1389 } 1390 1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1392 return OptoBreakpoint ? 11 : 12; 1393 } 1394 1395 1396 //============================================================================= 1397 1398 int Matcher::regnum_to_fpu_offset(int regnum) { 1399 return regnum - 32; // The FP registers are in the second chunk 1400 } 1401 1402 // This is UltraSparc specific, true just means we have fast l2f conversion 1403 const bool Matcher::convL2FSupported(void) { 1404 return true; 1405 } 1406 1407 // Is this branch offset short enough that a short branch can be used? 1408 // 1409 // NOTE: If the platform does not provide any short branch variants, then 1410 // this method should return false for offset 0. 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1412 // The passed offset is relative to address of the branch. 1413 // On 86 a branch displacement is calculated relative to address 1414 // of a next instruction. 1415 offset -= br_size; 1416 1417 // the short version of jmpConUCF2 contains multiple branches, 1418 // making the reach slightly less 1419 if (rule == jmpConUCF2_rule) 1420 return (-126 <= offset && offset <= 125); 1421 return (-128 <= offset && offset <= 127); 1422 } 1423 1424 const bool Matcher::isSimpleConstant64(jlong value) { 1425 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1426 return false; 1427 } 1428 1429 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1430 const bool Matcher::init_array_count_is_in_bytes = false; 1431 1432 // Needs 2 CMOV's for longs. 1433 const int Matcher::long_cmove_cost() { return 1; } 1434 1435 // No CMOVF/CMOVD with SSE/SSE2 1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1437 1438 // Does the CPU require late expand (see block.cpp for description of late expand)? 1439 const bool Matcher::require_postalloc_expand = false; 1440 1441 // Should the Matcher clone shifts on addressing modes, expecting them to 1442 // be subsumed into complex addressing expressions or compute them into 1443 // registers? True for Intel but false for most RISCs 1444 const bool Matcher::clone_shift_expressions = true; 1445 1446 // Do we need to mask the count passed to shift instructions or does 1447 // the cpu only look at the lower 5/6 bits anyway? 1448 const bool Matcher::need_masked_shift_count = false; 1449 1450 bool Matcher::narrow_oop_use_complex_address() { 1451 ShouldNotCallThis(); 1452 return true; 1453 } 1454 1455 bool Matcher::narrow_klass_use_complex_address() { 1456 ShouldNotCallThis(); 1457 return true; 1458 } 1459 1460 bool Matcher::const_oop_prefer_decode() { 1461 ShouldNotCallThis(); 1462 return true; 1463 } 1464 1465 bool Matcher::const_klass_prefer_decode() { 1466 ShouldNotCallThis(); 1467 return true; 1468 } 1469 1470 // Is it better to copy float constants, or load them directly from memory? 1471 // Intel can load a float constant from a direct address, requiring no 1472 // extra registers. Most RISCs will have to materialize an address into a 1473 // register first, so they would do better to copy the constant from stack. 1474 const bool Matcher::rematerialize_float_constants = true; 1475 1476 // If CPU can load and store mis-aligned doubles directly then no fixup is 1477 // needed. Else we split the double into 2 integer pieces and move it 1478 // piece-by-piece. Only happens when passing doubles into C code as the 1479 // Java calling convention forces doubles to be aligned. 1480 const bool Matcher::misaligned_doubles_ok = true; 1481 1482 1483 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1484 // Get the memory operand from the node 1485 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1486 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1487 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1488 uint opcnt = 1; // First operand 1489 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1490 while( idx >= skipped+num_edges ) { 1491 skipped += num_edges; 1492 opcnt++; // Bump operand count 1493 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1494 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1495 } 1496 1497 MachOper *memory = node->_opnds[opcnt]; 1498 MachOper *new_memory = NULL; 1499 switch (memory->opcode()) { 1500 case DIRECT: 1501 case INDOFFSET32X: 1502 // No transformation necessary. 1503 return; 1504 case INDIRECT: 1505 new_memory = new indirect_win95_safeOper( ); 1506 break; 1507 case INDOFFSET8: 1508 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1509 break; 1510 case INDOFFSET32: 1511 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1512 break; 1513 case INDINDEXOFFSET: 1514 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1515 break; 1516 case INDINDEXSCALE: 1517 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1518 break; 1519 case INDINDEXSCALEOFFSET: 1520 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1521 break; 1522 case LOAD_LONG_INDIRECT: 1523 case LOAD_LONG_INDOFFSET32: 1524 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1525 return; 1526 default: 1527 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1528 return; 1529 } 1530 node->_opnds[opcnt] = new_memory; 1531 } 1532 1533 // Advertise here if the CPU requires explicit rounding operations 1534 // to implement the UseStrictFP mode. 1535 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1536 1537 // Are floats conerted to double when stored to stack during deoptimization? 1538 // On x32 it is stored with convertion only when FPU is used for floats. 1539 bool Matcher::float_in_double() { return (UseSSE == 0); } 1540 1541 // Do ints take an entire long register or just half? 1542 const bool Matcher::int_in_long = false; 1543 1544 // Return whether or not this register is ever used as an argument. This 1545 // function is used on startup to build the trampoline stubs in generateOptoStub. 1546 // Registers not mentioned will be killed by the VM call in the trampoline, and 1547 // arguments in those registers not be available to the callee. 1548 bool Matcher::can_be_java_arg( int reg ) { 1549 if( reg == ECX_num || reg == EDX_num ) return true; 1550 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1551 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1552 return false; 1553 } 1554 1555 bool Matcher::is_spillable_arg( int reg ) { 1556 return can_be_java_arg(reg); 1557 } 1558 1559 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1560 // Use hardware integer DIV instruction when 1561 // it is faster than a code which use multiply. 1562 // Only when constant divisor fits into 32 bit 1563 // (min_jint is excluded to get only correct 1564 // positive 32 bit values from negative). 1565 return VM_Version::has_fast_idiv() && 1566 (divisor == (int)divisor && divisor != min_jint); 1567 } 1568 1569 // Register for DIVI projection of divmodI 1570 RegMask Matcher::divI_proj_mask() { 1571 return EAX_REG_mask(); 1572 } 1573 1574 // Register for MODI projection of divmodI 1575 RegMask Matcher::modI_proj_mask() { 1576 return EDX_REG_mask(); 1577 } 1578 1579 // Register for DIVL projection of divmodL 1580 RegMask Matcher::divL_proj_mask() { 1581 ShouldNotReachHere(); 1582 return RegMask(); 1583 } 1584 1585 // Register for MODL projection of divmodL 1586 RegMask Matcher::modL_proj_mask() { 1587 ShouldNotReachHere(); 1588 return RegMask(); 1589 } 1590 1591 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1592 return NO_REG_mask(); 1593 } 1594 1595 // Returns true if the high 32 bits of the value is known to be zero. 1596 bool is_operand_hi32_zero(Node* n) { 1597 int opc = n->Opcode(); 1598 if (opc == Op_AndL) { 1599 Node* o2 = n->in(2); 1600 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1601 return true; 1602 } 1603 } 1604 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1605 return true; 1606 } 1607 return false; 1608 } 1609 1610 %} 1611 1612 //----------ENCODING BLOCK----------------------------------------------------- 1613 // This block specifies the encoding classes used by the compiler to output 1614 // byte streams. Encoding classes generate functions which are called by 1615 // Machine Instruction Nodes in order to generate the bit encoding of the 1616 // instruction. Operands specify their base encoding interface with the 1617 // interface keyword. There are currently supported four interfaces, 1618 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1619 // operand to generate a function which returns its register number when 1620 // queried. CONST_INTER causes an operand to generate a function which 1621 // returns the value of the constant when queried. MEMORY_INTER causes an 1622 // operand to generate four functions which return the Base Register, the 1623 // Index Register, the Scale Value, and the Offset Value of the operand when 1624 // queried. COND_INTER causes an operand to generate six functions which 1625 // return the encoding code (ie - encoding bits for the instruction) 1626 // associated with each basic boolean condition for a conditional instruction. 1627 // Instructions specify two basic values for encoding. They use the 1628 // ins_encode keyword to specify their encoding class (which must be one of 1629 // the class names specified in the encoding block), and they use the 1630 // opcode keyword to specify, in order, their primary, secondary, and 1631 // tertiary opcode. Only the opcode sections which a particular instruction 1632 // needs for encoding need to be specified. 1633 encode %{ 1634 // Build emit functions for each basic byte or larger field in the intel 1635 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1636 // code in the enc_class source block. Emit functions will live in the 1637 // main source block for now. In future, we can generalize this by 1638 // adding a syntax that specifies the sizes of fields in an order, 1639 // so that the adlc can build the emit functions automagically 1640 1641 // Emit primary opcode 1642 enc_class OpcP %{ 1643 emit_opcode(cbuf, $primary); 1644 %} 1645 1646 // Emit secondary opcode 1647 enc_class OpcS %{ 1648 emit_opcode(cbuf, $secondary); 1649 %} 1650 1651 // Emit opcode directly 1652 enc_class Opcode(immI d8) %{ 1653 emit_opcode(cbuf, $d8$$constant); 1654 %} 1655 1656 enc_class SizePrefix %{ 1657 emit_opcode(cbuf,0x66); 1658 %} 1659 1660 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1661 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1662 %} 1663 1664 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1665 emit_opcode(cbuf,$opcode$$constant); 1666 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1667 %} 1668 1669 enc_class mov_r32_imm0( rRegI dst ) %{ 1670 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1671 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1672 %} 1673 1674 enc_class cdq_enc %{ 1675 // Full implementation of Java idiv and irem; checks for 1676 // special case as described in JVM spec., p.243 & p.271. 1677 // 1678 // normal case special case 1679 // 1680 // input : rax,: dividend min_int 1681 // reg: divisor -1 1682 // 1683 // output: rax,: quotient (= rax, idiv reg) min_int 1684 // rdx: remainder (= rax, irem reg) 0 1685 // 1686 // Code sequnce: 1687 // 1688 // 81 F8 00 00 00 80 cmp rax,80000000h 1689 // 0F 85 0B 00 00 00 jne normal_case 1690 // 33 D2 xor rdx,edx 1691 // 83 F9 FF cmp rcx,0FFh 1692 // 0F 84 03 00 00 00 je done 1693 // normal_case: 1694 // 99 cdq 1695 // F7 F9 idiv rax,ecx 1696 // done: 1697 // 1698 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1699 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1700 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1701 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1702 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1703 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1704 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1705 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1706 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1707 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1708 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1709 // normal_case: 1710 emit_opcode(cbuf,0x99); // cdq 1711 // idiv (note: must be emitted by the user of this rule) 1712 // normal: 1713 %} 1714 1715 // Dense encoding for older common ops 1716 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1717 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1718 %} 1719 1720 1721 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1722 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1723 // Check for 8-bit immediate, and set sign extend bit in opcode 1724 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1725 emit_opcode(cbuf, $primary | 0x02); 1726 } 1727 else { // If 32-bit immediate 1728 emit_opcode(cbuf, $primary); 1729 } 1730 %} 1731 1732 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1733 // Emit primary opcode and set sign-extend bit 1734 // Check for 8-bit immediate, and set sign extend bit in opcode 1735 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1736 emit_opcode(cbuf, $primary | 0x02); } 1737 else { // If 32-bit immediate 1738 emit_opcode(cbuf, $primary); 1739 } 1740 // Emit r/m byte with secondary opcode, after primary opcode. 1741 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1742 %} 1743 1744 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1745 // Check for 8-bit immediate, and set sign extend bit in opcode 1746 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1747 $$$emit8$imm$$constant; 1748 } 1749 else { // If 32-bit immediate 1750 // Output immediate 1751 $$$emit32$imm$$constant; 1752 } 1753 %} 1754 1755 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1756 // Emit primary opcode and set sign-extend bit 1757 // Check for 8-bit immediate, and set sign extend bit in opcode 1758 int con = (int)$imm$$constant; // Throw away top bits 1759 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1760 // Emit r/m byte with secondary opcode, after primary opcode. 1761 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1762 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1763 else emit_d32(cbuf,con); 1764 %} 1765 1766 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1767 // Emit primary opcode and set sign-extend bit 1768 // Check for 8-bit immediate, and set sign extend bit in opcode 1769 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1770 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1771 // Emit r/m byte with tertiary opcode, after primary opcode. 1772 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1773 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1774 else emit_d32(cbuf,con); 1775 %} 1776 1777 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1778 emit_cc(cbuf, $secondary, $dst$$reg ); 1779 %} 1780 1781 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1782 int destlo = $dst$$reg; 1783 int desthi = HIGH_FROM_LOW(destlo); 1784 // bswap lo 1785 emit_opcode(cbuf, 0x0F); 1786 emit_cc(cbuf, 0xC8, destlo); 1787 // bswap hi 1788 emit_opcode(cbuf, 0x0F); 1789 emit_cc(cbuf, 0xC8, desthi); 1790 // xchg lo and hi 1791 emit_opcode(cbuf, 0x87); 1792 emit_rm(cbuf, 0x3, destlo, desthi); 1793 %} 1794 1795 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1796 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1797 %} 1798 1799 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1800 $$$emit8$primary; 1801 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1802 %} 1803 1804 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1805 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1806 emit_d8(cbuf, op >> 8 ); 1807 emit_d8(cbuf, op & 255); 1808 %} 1809 1810 // emulate a CMOV with a conditional branch around a MOV 1811 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1812 // Invert sense of branch from sense of CMOV 1813 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1814 emit_d8( cbuf, $brOffs$$constant ); 1815 %} 1816 1817 enc_class enc_PartialSubtypeCheck( ) %{ 1818 Register Redi = as_Register(EDI_enc); // result register 1819 Register Reax = as_Register(EAX_enc); // super class 1820 Register Recx = as_Register(ECX_enc); // killed 1821 Register Resi = as_Register(ESI_enc); // sub class 1822 Label miss; 1823 1824 MacroAssembler _masm(&cbuf); 1825 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1826 NULL, &miss, 1827 /*set_cond_codes:*/ true); 1828 if ($primary) { 1829 __ xorptr(Redi, Redi); 1830 } 1831 __ bind(miss); 1832 %} 1833 1834 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1835 MacroAssembler masm(&cbuf); 1836 int start = masm.offset(); 1837 if (UseSSE >= 2) { 1838 if (VerifyFPU) { 1839 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1840 } 1841 } else { 1842 // External c_calling_convention expects the FPU stack to be 'clean'. 1843 // Compiled code leaves it dirty. Do cleanup now. 1844 masm.empty_FPU_stack(); 1845 } 1846 if (sizeof_FFree_Float_Stack_All == -1) { 1847 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1848 } else { 1849 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1850 } 1851 %} 1852 1853 enc_class Verify_FPU_For_Leaf %{ 1854 if( VerifyFPU ) { 1855 MacroAssembler masm(&cbuf); 1856 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1857 } 1858 %} 1859 1860 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1861 // This is the instruction starting address for relocation info. 1862 cbuf.set_insts_mark(); 1863 $$$emit8$primary; 1864 // CALL directly to the runtime 1865 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1866 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1867 1868 if (UseSSE >= 2) { 1869 MacroAssembler _masm(&cbuf); 1870 BasicType rt = tf()->return_type(); 1871 1872 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1873 // A C runtime call where the return value is unused. In SSE2+ 1874 // mode the result needs to be removed from the FPU stack. It's 1875 // likely that this function call could be removed by the 1876 // optimizer if the C function is a pure function. 1877 __ ffree(0); 1878 } else if (rt == T_FLOAT) { 1879 __ lea(rsp, Address(rsp, -4)); 1880 __ fstp_s(Address(rsp, 0)); 1881 __ movflt(xmm0, Address(rsp, 0)); 1882 __ lea(rsp, Address(rsp, 4)); 1883 } else if (rt == T_DOUBLE) { 1884 __ lea(rsp, Address(rsp, -8)); 1885 __ fstp_d(Address(rsp, 0)); 1886 __ movdbl(xmm0, Address(rsp, 0)); 1887 __ lea(rsp, Address(rsp, 8)); 1888 } 1889 } 1890 %} 1891 1892 1893 enc_class pre_call_resets %{ 1894 // If method sets FPU control word restore it here 1895 debug_only(int off0 = cbuf.insts_size()); 1896 if (ra_->C->in_24_bit_fp_mode()) { 1897 MacroAssembler _masm(&cbuf); 1898 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1899 } 1900 if (ra_->C->max_vector_size() > 16) { 1901 // Clear upper bits of YMM registers when current compiled code uses 1902 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1903 MacroAssembler _masm(&cbuf); 1904 __ vzeroupper(); 1905 } 1906 debug_only(int off1 = cbuf.insts_size()); 1907 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1908 %} 1909 1910 enc_class post_call_FPU %{ 1911 // If method sets FPU control word do it here also 1912 if (Compile::current()->in_24_bit_fp_mode()) { 1913 MacroAssembler masm(&cbuf); 1914 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1915 } 1916 %} 1917 1918 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1919 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1920 // who we intended to call. 1921 cbuf.set_insts_mark(); 1922 $$$emit8$primary; 1923 1924 if (!_method) { 1925 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1926 runtime_call_Relocation::spec(), 1927 RELOC_IMM32); 1928 } else { 1929 int method_index = resolved_method_index(cbuf); 1930 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1931 : static_call_Relocation::spec(method_index); 1932 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1933 rspec, RELOC_DISP32); 1934 // Emit stubs for static call. 1935 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1936 if (stub == NULL) { 1937 ciEnv::current()->record_failure("CodeCache is full"); 1938 return; 1939 } 1940 } 1941 %} 1942 1943 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1944 MacroAssembler _masm(&cbuf); 1945 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1946 %} 1947 1948 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1949 int disp = in_bytes(Method::from_compiled_offset()); 1950 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1951 1952 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1953 cbuf.set_insts_mark(); 1954 $$$emit8$primary; 1955 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1956 emit_d8(cbuf, disp); // Displacement 1957 1958 %} 1959 1960 // Following encoding is no longer used, but may be restored if calling 1961 // convention changes significantly. 1962 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1963 // 1964 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1965 // // int ic_reg = Matcher::inline_cache_reg(); 1966 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1967 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1968 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1969 // 1970 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1971 // // // so we load it immediately before the call 1972 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1973 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1974 // 1975 // // xor rbp,ebp 1976 // emit_opcode(cbuf, 0x33); 1977 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1978 // 1979 // // CALL to interpreter. 1980 // cbuf.set_insts_mark(); 1981 // $$$emit8$primary; 1982 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1983 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1984 // %} 1985 1986 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1987 $$$emit8$primary; 1988 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1989 $$$emit8$shift$$constant; 1990 %} 1991 1992 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1993 // Load immediate does not have a zero or sign extended version 1994 // for 8-bit immediates 1995 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1996 $$$emit32$src$$constant; 1997 %} 1998 1999 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 2000 // Load immediate does not have a zero or sign extended version 2001 // for 8-bit immediates 2002 emit_opcode(cbuf, $primary + $dst$$reg); 2003 $$$emit32$src$$constant; 2004 %} 2005 2006 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 2007 // Load immediate does not have a zero or sign extended version 2008 // for 8-bit immediates 2009 int dst_enc = $dst$$reg; 2010 int src_con = $src$$constant & 0x0FFFFFFFFL; 2011 if (src_con == 0) { 2012 // xor dst, dst 2013 emit_opcode(cbuf, 0x33); 2014 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2015 } else { 2016 emit_opcode(cbuf, $primary + dst_enc); 2017 emit_d32(cbuf, src_con); 2018 } 2019 %} 2020 2021 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2022 // Load immediate does not have a zero or sign extended version 2023 // for 8-bit immediates 2024 int dst_enc = $dst$$reg + 2; 2025 int src_con = ((julong)($src$$constant)) >> 32; 2026 if (src_con == 0) { 2027 // xor dst, dst 2028 emit_opcode(cbuf, 0x33); 2029 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2030 } else { 2031 emit_opcode(cbuf, $primary + dst_enc); 2032 emit_d32(cbuf, src_con); 2033 } 2034 %} 2035 2036 2037 // Encode a reg-reg copy. If it is useless, then empty encoding. 2038 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2039 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2040 %} 2041 2042 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2043 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2044 %} 2045 2046 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2047 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2048 %} 2049 2050 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2051 $$$emit8$primary; 2052 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2053 %} 2054 2055 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2056 $$$emit8$secondary; 2057 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2058 %} 2059 2060 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2061 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2062 %} 2063 2064 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2065 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2066 %} 2067 2068 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2069 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2070 %} 2071 2072 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2073 // Output immediate 2074 $$$emit32$src$$constant; 2075 %} 2076 2077 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2078 // Output Float immediate bits 2079 jfloat jf = $src$$constant; 2080 int jf_as_bits = jint_cast( jf ); 2081 emit_d32(cbuf, jf_as_bits); 2082 %} 2083 2084 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2085 // Output Float immediate bits 2086 jfloat jf = $src$$constant; 2087 int jf_as_bits = jint_cast( jf ); 2088 emit_d32(cbuf, jf_as_bits); 2089 %} 2090 2091 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2092 // Output immediate 2093 $$$emit16$src$$constant; 2094 %} 2095 2096 enc_class Con_d32(immI src) %{ 2097 emit_d32(cbuf,$src$$constant); 2098 %} 2099 2100 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2101 // Output immediate memory reference 2102 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2103 emit_d32(cbuf, 0x00); 2104 %} 2105 2106 enc_class lock_prefix( ) %{ 2107 if( os::is_MP() ) 2108 emit_opcode(cbuf,0xF0); // [Lock] 2109 %} 2110 2111 // Cmp-xchg long value. 2112 // Note: we need to swap rbx, and rcx before and after the 2113 // cmpxchg8 instruction because the instruction uses 2114 // rcx as the high order word of the new value to store but 2115 // our register encoding uses rbx,. 2116 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2117 2118 // XCHG rbx,ecx 2119 emit_opcode(cbuf,0x87); 2120 emit_opcode(cbuf,0xD9); 2121 // [Lock] 2122 if( os::is_MP() ) 2123 emit_opcode(cbuf,0xF0); 2124 // CMPXCHG8 [Eptr] 2125 emit_opcode(cbuf,0x0F); 2126 emit_opcode(cbuf,0xC7); 2127 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2128 // XCHG rbx,ecx 2129 emit_opcode(cbuf,0x87); 2130 emit_opcode(cbuf,0xD9); 2131 %} 2132 2133 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2134 // [Lock] 2135 if( os::is_MP() ) 2136 emit_opcode(cbuf,0xF0); 2137 2138 // CMPXCHG [Eptr] 2139 emit_opcode(cbuf,0x0F); 2140 emit_opcode(cbuf,0xB1); 2141 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2142 %} 2143 2144 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2145 int res_encoding = $res$$reg; 2146 2147 // MOV res,0 2148 emit_opcode( cbuf, 0xB8 + res_encoding); 2149 emit_d32( cbuf, 0 ); 2150 // JNE,s fail 2151 emit_opcode(cbuf,0x75); 2152 emit_d8(cbuf, 5 ); 2153 // MOV res,1 2154 emit_opcode( cbuf, 0xB8 + res_encoding); 2155 emit_d32( cbuf, 1 ); 2156 // fail: 2157 %} 2158 2159 enc_class set_instruction_start( ) %{ 2160 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2161 %} 2162 2163 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2164 int reg_encoding = $ereg$$reg; 2165 int base = $mem$$base; 2166 int index = $mem$$index; 2167 int scale = $mem$$scale; 2168 int displace = $mem$$disp; 2169 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2170 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2171 %} 2172 2173 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2174 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2175 int base = $mem$$base; 2176 int index = $mem$$index; 2177 int scale = $mem$$scale; 2178 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2179 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2180 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2181 %} 2182 2183 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2184 int r1, r2; 2185 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2186 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2187 emit_opcode(cbuf,0x0F); 2188 emit_opcode(cbuf,$tertiary); 2189 emit_rm(cbuf, 0x3, r1, r2); 2190 emit_d8(cbuf,$cnt$$constant); 2191 emit_d8(cbuf,$primary); 2192 emit_rm(cbuf, 0x3, $secondary, r1); 2193 emit_d8(cbuf,$cnt$$constant); 2194 %} 2195 2196 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2197 emit_opcode( cbuf, 0x8B ); // Move 2198 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2199 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2200 emit_d8(cbuf,$primary); 2201 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2202 emit_d8(cbuf,$cnt$$constant-32); 2203 } 2204 emit_d8(cbuf,$primary); 2205 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2206 emit_d8(cbuf,31); 2207 %} 2208 2209 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2210 int r1, r2; 2211 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2212 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2213 2214 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2215 emit_rm(cbuf, 0x3, r1, r2); 2216 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2217 emit_opcode(cbuf,$primary); 2218 emit_rm(cbuf, 0x3, $secondary, r1); 2219 emit_d8(cbuf,$cnt$$constant-32); 2220 } 2221 emit_opcode(cbuf,0x33); // XOR r2,r2 2222 emit_rm(cbuf, 0x3, r2, r2); 2223 %} 2224 2225 // Clone of RegMem but accepts an extra parameter to access each 2226 // half of a double in memory; it never needs relocation info. 2227 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2228 emit_opcode(cbuf,$opcode$$constant); 2229 int reg_encoding = $rm_reg$$reg; 2230 int base = $mem$$base; 2231 int index = $mem$$index; 2232 int scale = $mem$$scale; 2233 int displace = $mem$$disp + $disp_for_half$$constant; 2234 relocInfo::relocType disp_reloc = relocInfo::none; 2235 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2236 %} 2237 2238 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2239 // 2240 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2241 // and it never needs relocation information. 2242 // Frequently used to move data between FPU's Stack Top and memory. 2243 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2244 int rm_byte_opcode = $rm_opcode$$constant; 2245 int base = $mem$$base; 2246 int index = $mem$$index; 2247 int scale = $mem$$scale; 2248 int displace = $mem$$disp; 2249 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2250 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2251 %} 2252 2253 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2254 int rm_byte_opcode = $rm_opcode$$constant; 2255 int base = $mem$$base; 2256 int index = $mem$$index; 2257 int scale = $mem$$scale; 2258 int displace = $mem$$disp; 2259 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2260 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2261 %} 2262 2263 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2264 int reg_encoding = $dst$$reg; 2265 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2266 int index = 0x04; // 0x04 indicates no index 2267 int scale = 0x00; // 0x00 indicates no scale 2268 int displace = $src1$$constant; // 0x00 indicates no displacement 2269 relocInfo::relocType disp_reloc = relocInfo::none; 2270 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2271 %} 2272 2273 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2274 // Compare dst,src 2275 emit_opcode(cbuf,0x3B); 2276 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2277 // jmp dst < src around move 2278 emit_opcode(cbuf,0x7C); 2279 emit_d8(cbuf,2); 2280 // move dst,src 2281 emit_opcode(cbuf,0x8B); 2282 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2283 %} 2284 2285 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2286 // Compare dst,src 2287 emit_opcode(cbuf,0x3B); 2288 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2289 // jmp dst > src around move 2290 emit_opcode(cbuf,0x7F); 2291 emit_d8(cbuf,2); 2292 // move dst,src 2293 emit_opcode(cbuf,0x8B); 2294 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2295 %} 2296 2297 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2298 // If src is FPR1, we can just FST to store it. 2299 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2300 int reg_encoding = 0x2; // Just store 2301 int base = $mem$$base; 2302 int index = $mem$$index; 2303 int scale = $mem$$scale; 2304 int displace = $mem$$disp; 2305 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2306 if( $src$$reg != FPR1L_enc ) { 2307 reg_encoding = 0x3; // Store & pop 2308 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2309 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2310 } 2311 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2312 emit_opcode(cbuf,$primary); 2313 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2314 %} 2315 2316 enc_class neg_reg(rRegI dst) %{ 2317 // NEG $dst 2318 emit_opcode(cbuf,0xF7); 2319 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2320 %} 2321 2322 enc_class setLT_reg(eCXRegI dst) %{ 2323 // SETLT $dst 2324 emit_opcode(cbuf,0x0F); 2325 emit_opcode(cbuf,0x9C); 2326 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2327 %} 2328 2329 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2330 int tmpReg = $tmp$$reg; 2331 2332 // SUB $p,$q 2333 emit_opcode(cbuf,0x2B); 2334 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2335 // SBB $tmp,$tmp 2336 emit_opcode(cbuf,0x1B); 2337 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2338 // AND $tmp,$y 2339 emit_opcode(cbuf,0x23); 2340 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2341 // ADD $p,$tmp 2342 emit_opcode(cbuf,0x03); 2343 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2344 %} 2345 2346 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2347 // TEST shift,32 2348 emit_opcode(cbuf,0xF7); 2349 emit_rm(cbuf, 0x3, 0, ECX_enc); 2350 emit_d32(cbuf,0x20); 2351 // JEQ,s small 2352 emit_opcode(cbuf, 0x74); 2353 emit_d8(cbuf, 0x04); 2354 // MOV $dst.hi,$dst.lo 2355 emit_opcode( cbuf, 0x8B ); 2356 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2357 // CLR $dst.lo 2358 emit_opcode(cbuf, 0x33); 2359 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2360 // small: 2361 // SHLD $dst.hi,$dst.lo,$shift 2362 emit_opcode(cbuf,0x0F); 2363 emit_opcode(cbuf,0xA5); 2364 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2365 // SHL $dst.lo,$shift" 2366 emit_opcode(cbuf,0xD3); 2367 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2368 %} 2369 2370 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2371 // TEST shift,32 2372 emit_opcode(cbuf,0xF7); 2373 emit_rm(cbuf, 0x3, 0, ECX_enc); 2374 emit_d32(cbuf,0x20); 2375 // JEQ,s small 2376 emit_opcode(cbuf, 0x74); 2377 emit_d8(cbuf, 0x04); 2378 // MOV $dst.lo,$dst.hi 2379 emit_opcode( cbuf, 0x8B ); 2380 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2381 // CLR $dst.hi 2382 emit_opcode(cbuf, 0x33); 2383 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2384 // small: 2385 // SHRD $dst.lo,$dst.hi,$shift 2386 emit_opcode(cbuf,0x0F); 2387 emit_opcode(cbuf,0xAD); 2388 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2389 // SHR $dst.hi,$shift" 2390 emit_opcode(cbuf,0xD3); 2391 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2392 %} 2393 2394 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2395 // TEST shift,32 2396 emit_opcode(cbuf,0xF7); 2397 emit_rm(cbuf, 0x3, 0, ECX_enc); 2398 emit_d32(cbuf,0x20); 2399 // JEQ,s small 2400 emit_opcode(cbuf, 0x74); 2401 emit_d8(cbuf, 0x05); 2402 // MOV $dst.lo,$dst.hi 2403 emit_opcode( cbuf, 0x8B ); 2404 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2405 // SAR $dst.hi,31 2406 emit_opcode(cbuf, 0xC1); 2407 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2408 emit_d8(cbuf, 0x1F ); 2409 // small: 2410 // SHRD $dst.lo,$dst.hi,$shift 2411 emit_opcode(cbuf,0x0F); 2412 emit_opcode(cbuf,0xAD); 2413 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2414 // SAR $dst.hi,$shift" 2415 emit_opcode(cbuf,0xD3); 2416 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2417 %} 2418 2419 2420 // ----------------- Encodings for floating point unit ----------------- 2421 // May leave result in FPU-TOS or FPU reg depending on opcodes 2422 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2423 $$$emit8$primary; 2424 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2425 %} 2426 2427 // Pop argument in FPR0 with FSTP ST(0) 2428 enc_class PopFPU() %{ 2429 emit_opcode( cbuf, 0xDD ); 2430 emit_d8( cbuf, 0xD8 ); 2431 %} 2432 2433 // !!!!! equivalent to Pop_Reg_F 2434 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2435 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2436 emit_d8( cbuf, 0xD8+$dst$$reg ); 2437 %} 2438 2439 enc_class Push_Reg_DPR( regDPR dst ) %{ 2440 emit_opcode( cbuf, 0xD9 ); 2441 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2442 %} 2443 2444 enc_class strictfp_bias1( regDPR dst ) %{ 2445 emit_opcode( cbuf, 0xDB ); // FLD m80real 2446 emit_opcode( cbuf, 0x2D ); 2447 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2448 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2449 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2450 %} 2451 2452 enc_class strictfp_bias2( regDPR dst ) %{ 2453 emit_opcode( cbuf, 0xDB ); // FLD m80real 2454 emit_opcode( cbuf, 0x2D ); 2455 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2456 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2457 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2458 %} 2459 2460 // Special case for moving an integer register to a stack slot. 2461 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2462 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2463 %} 2464 2465 // Special case for moving a register to a stack slot. 2466 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2467 // Opcode already emitted 2468 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2469 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2470 emit_d32(cbuf, $dst$$disp); // Displacement 2471 %} 2472 2473 // Push the integer in stackSlot 'src' onto FP-stack 2474 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2475 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2476 %} 2477 2478 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2479 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2480 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2481 %} 2482 2483 // Same as Pop_Mem_F except for opcode 2484 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2485 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2486 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2487 %} 2488 2489 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2490 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2491 emit_d8( cbuf, 0xD8+$dst$$reg ); 2492 %} 2493 2494 enc_class Push_Reg_FPR( regFPR dst ) %{ 2495 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2496 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2497 %} 2498 2499 // Push FPU's float to a stack-slot, and pop FPU-stack 2500 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2501 int pop = 0x02; 2502 if ($src$$reg != FPR1L_enc) { 2503 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2504 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2505 pop = 0x03; 2506 } 2507 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2508 %} 2509 2510 // Push FPU's double to a stack-slot, and pop FPU-stack 2511 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2512 int pop = 0x02; 2513 if ($src$$reg != FPR1L_enc) { 2514 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2515 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2516 pop = 0x03; 2517 } 2518 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2519 %} 2520 2521 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2522 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2523 int pop = 0xD0 - 1; // -1 since we skip FLD 2524 if ($src$$reg != FPR1L_enc) { 2525 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2526 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2527 pop = 0xD8; 2528 } 2529 emit_opcode( cbuf, 0xDD ); 2530 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2531 %} 2532 2533 2534 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2535 // load dst in FPR0 2536 emit_opcode( cbuf, 0xD9 ); 2537 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2538 if ($src$$reg != FPR1L_enc) { 2539 // fincstp 2540 emit_opcode (cbuf, 0xD9); 2541 emit_opcode (cbuf, 0xF7); 2542 // swap src with FPR1: 2543 // FXCH FPR1 with src 2544 emit_opcode(cbuf, 0xD9); 2545 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2546 // fdecstp 2547 emit_opcode (cbuf, 0xD9); 2548 emit_opcode (cbuf, 0xF6); 2549 } 2550 %} 2551 2552 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2553 MacroAssembler _masm(&cbuf); 2554 __ subptr(rsp, 8); 2555 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2556 __ fld_d(Address(rsp, 0)); 2557 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2558 __ fld_d(Address(rsp, 0)); 2559 %} 2560 2561 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2562 MacroAssembler _masm(&cbuf); 2563 __ subptr(rsp, 4); 2564 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2565 __ fld_s(Address(rsp, 0)); 2566 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2567 __ fld_s(Address(rsp, 0)); 2568 %} 2569 2570 enc_class Push_ResultD(regD dst) %{ 2571 MacroAssembler _masm(&cbuf); 2572 __ fstp_d(Address(rsp, 0)); 2573 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2574 __ addptr(rsp, 8); 2575 %} 2576 2577 enc_class Push_ResultF(regF dst, immI d8) %{ 2578 MacroAssembler _masm(&cbuf); 2579 __ fstp_s(Address(rsp, 0)); 2580 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2581 __ addptr(rsp, $d8$$constant); 2582 %} 2583 2584 enc_class Push_SrcD(regD src) %{ 2585 MacroAssembler _masm(&cbuf); 2586 __ subptr(rsp, 8); 2587 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2588 __ fld_d(Address(rsp, 0)); 2589 %} 2590 2591 enc_class push_stack_temp_qword() %{ 2592 MacroAssembler _masm(&cbuf); 2593 __ subptr(rsp, 8); 2594 %} 2595 2596 enc_class pop_stack_temp_qword() %{ 2597 MacroAssembler _masm(&cbuf); 2598 __ addptr(rsp, 8); 2599 %} 2600 2601 enc_class push_xmm_to_fpr1(regD src) %{ 2602 MacroAssembler _masm(&cbuf); 2603 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2604 __ fld_d(Address(rsp, 0)); 2605 %} 2606 2607 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2608 if ($src$$reg != FPR1L_enc) { 2609 // fincstp 2610 emit_opcode (cbuf, 0xD9); 2611 emit_opcode (cbuf, 0xF7); 2612 // FXCH FPR1 with src 2613 emit_opcode(cbuf, 0xD9); 2614 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2615 // fdecstp 2616 emit_opcode (cbuf, 0xD9); 2617 emit_opcode (cbuf, 0xF6); 2618 } 2619 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2620 // // FSTP FPR$dst$$reg 2621 // emit_opcode( cbuf, 0xDD ); 2622 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2623 %} 2624 2625 enc_class fnstsw_sahf_skip_parity() %{ 2626 // fnstsw ax 2627 emit_opcode( cbuf, 0xDF ); 2628 emit_opcode( cbuf, 0xE0 ); 2629 // sahf 2630 emit_opcode( cbuf, 0x9E ); 2631 // jnp ::skip 2632 emit_opcode( cbuf, 0x7B ); 2633 emit_opcode( cbuf, 0x05 ); 2634 %} 2635 2636 enc_class emitModDPR() %{ 2637 // fprem must be iterative 2638 // :: loop 2639 // fprem 2640 emit_opcode( cbuf, 0xD9 ); 2641 emit_opcode( cbuf, 0xF8 ); 2642 // wait 2643 emit_opcode( cbuf, 0x9b ); 2644 // fnstsw ax 2645 emit_opcode( cbuf, 0xDF ); 2646 emit_opcode( cbuf, 0xE0 ); 2647 // sahf 2648 emit_opcode( cbuf, 0x9E ); 2649 // jp ::loop 2650 emit_opcode( cbuf, 0x0F ); 2651 emit_opcode( cbuf, 0x8A ); 2652 emit_opcode( cbuf, 0xF4 ); 2653 emit_opcode( cbuf, 0xFF ); 2654 emit_opcode( cbuf, 0xFF ); 2655 emit_opcode( cbuf, 0xFF ); 2656 %} 2657 2658 enc_class fpu_flags() %{ 2659 // fnstsw_ax 2660 emit_opcode( cbuf, 0xDF); 2661 emit_opcode( cbuf, 0xE0); 2662 // test ax,0x0400 2663 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2664 emit_opcode( cbuf, 0xA9 ); 2665 emit_d16 ( cbuf, 0x0400 ); 2666 // // // This sequence works, but stalls for 12-16 cycles on PPro 2667 // // test rax,0x0400 2668 // emit_opcode( cbuf, 0xA9 ); 2669 // emit_d32 ( cbuf, 0x00000400 ); 2670 // 2671 // jz exit (no unordered comparison) 2672 emit_opcode( cbuf, 0x74 ); 2673 emit_d8 ( cbuf, 0x02 ); 2674 // mov ah,1 - treat as LT case (set carry flag) 2675 emit_opcode( cbuf, 0xB4 ); 2676 emit_d8 ( cbuf, 0x01 ); 2677 // sahf 2678 emit_opcode( cbuf, 0x9E); 2679 %} 2680 2681 enc_class cmpF_P6_fixup() %{ 2682 // Fixup the integer flags in case comparison involved a NaN 2683 // 2684 // JNP exit (no unordered comparison, P-flag is set by NaN) 2685 emit_opcode( cbuf, 0x7B ); 2686 emit_d8 ( cbuf, 0x03 ); 2687 // MOV AH,1 - treat as LT case (set carry flag) 2688 emit_opcode( cbuf, 0xB4 ); 2689 emit_d8 ( cbuf, 0x01 ); 2690 // SAHF 2691 emit_opcode( cbuf, 0x9E); 2692 // NOP // target for branch to avoid branch to branch 2693 emit_opcode( cbuf, 0x90); 2694 %} 2695 2696 // fnstsw_ax(); 2697 // sahf(); 2698 // movl(dst, nan_result); 2699 // jcc(Assembler::parity, exit); 2700 // movl(dst, less_result); 2701 // jcc(Assembler::below, exit); 2702 // movl(dst, equal_result); 2703 // jcc(Assembler::equal, exit); 2704 // movl(dst, greater_result); 2705 2706 // less_result = 1; 2707 // greater_result = -1; 2708 // equal_result = 0; 2709 // nan_result = -1; 2710 2711 enc_class CmpF_Result(rRegI dst) %{ 2712 // fnstsw_ax(); 2713 emit_opcode( cbuf, 0xDF); 2714 emit_opcode( cbuf, 0xE0); 2715 // sahf 2716 emit_opcode( cbuf, 0x9E); 2717 // movl(dst, nan_result); 2718 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2719 emit_d32( cbuf, -1 ); 2720 // jcc(Assembler::parity, exit); 2721 emit_opcode( cbuf, 0x7A ); 2722 emit_d8 ( cbuf, 0x13 ); 2723 // movl(dst, less_result); 2724 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2725 emit_d32( cbuf, -1 ); 2726 // jcc(Assembler::below, exit); 2727 emit_opcode( cbuf, 0x72 ); 2728 emit_d8 ( cbuf, 0x0C ); 2729 // movl(dst, equal_result); 2730 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2731 emit_d32( cbuf, 0 ); 2732 // jcc(Assembler::equal, exit); 2733 emit_opcode( cbuf, 0x74 ); 2734 emit_d8 ( cbuf, 0x05 ); 2735 // movl(dst, greater_result); 2736 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2737 emit_d32( cbuf, 1 ); 2738 %} 2739 2740 2741 // Compare the longs and set flags 2742 // BROKEN! Do Not use as-is 2743 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2744 // CMP $src1.hi,$src2.hi 2745 emit_opcode( cbuf, 0x3B ); 2746 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2747 // JNE,s done 2748 emit_opcode(cbuf,0x75); 2749 emit_d8(cbuf, 2 ); 2750 // CMP $src1.lo,$src2.lo 2751 emit_opcode( cbuf, 0x3B ); 2752 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2753 // done: 2754 %} 2755 2756 enc_class convert_int_long( regL dst, rRegI src ) %{ 2757 // mov $dst.lo,$src 2758 int dst_encoding = $dst$$reg; 2759 int src_encoding = $src$$reg; 2760 encode_Copy( cbuf, dst_encoding , src_encoding ); 2761 // mov $dst.hi,$src 2762 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2763 // sar $dst.hi,31 2764 emit_opcode( cbuf, 0xC1 ); 2765 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2766 emit_d8(cbuf, 0x1F ); 2767 %} 2768 2769 enc_class convert_long_double( eRegL src ) %{ 2770 // push $src.hi 2771 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2772 // push $src.lo 2773 emit_opcode(cbuf, 0x50+$src$$reg ); 2774 // fild 64-bits at [SP] 2775 emit_opcode(cbuf,0xdf); 2776 emit_d8(cbuf, 0x6C); 2777 emit_d8(cbuf, 0x24); 2778 emit_d8(cbuf, 0x00); 2779 // pop stack 2780 emit_opcode(cbuf, 0x83); // add SP, #8 2781 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2782 emit_d8(cbuf, 0x8); 2783 %} 2784 2785 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2786 // IMUL EDX:EAX,$src1 2787 emit_opcode( cbuf, 0xF7 ); 2788 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2789 // SAR EDX,$cnt-32 2790 int shift_count = ((int)$cnt$$constant) - 32; 2791 if (shift_count > 0) { 2792 emit_opcode(cbuf, 0xC1); 2793 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2794 emit_d8(cbuf, shift_count); 2795 } 2796 %} 2797 2798 // this version doesn't have add sp, 8 2799 enc_class convert_long_double2( eRegL src ) %{ 2800 // push $src.hi 2801 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2802 // push $src.lo 2803 emit_opcode(cbuf, 0x50+$src$$reg ); 2804 // fild 64-bits at [SP] 2805 emit_opcode(cbuf,0xdf); 2806 emit_d8(cbuf, 0x6C); 2807 emit_d8(cbuf, 0x24); 2808 emit_d8(cbuf, 0x00); 2809 %} 2810 2811 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2812 // Basic idea: long = (long)int * (long)int 2813 // IMUL EDX:EAX, src 2814 emit_opcode( cbuf, 0xF7 ); 2815 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2816 %} 2817 2818 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2819 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2820 // MUL EDX:EAX, src 2821 emit_opcode( cbuf, 0xF7 ); 2822 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2823 %} 2824 2825 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2826 // Basic idea: lo(result) = lo(x_lo * y_lo) 2827 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2828 // MOV $tmp,$src.lo 2829 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2830 // IMUL $tmp,EDX 2831 emit_opcode( cbuf, 0x0F ); 2832 emit_opcode( cbuf, 0xAF ); 2833 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2834 // MOV EDX,$src.hi 2835 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2836 // IMUL EDX,EAX 2837 emit_opcode( cbuf, 0x0F ); 2838 emit_opcode( cbuf, 0xAF ); 2839 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2840 // ADD $tmp,EDX 2841 emit_opcode( cbuf, 0x03 ); 2842 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2843 // MUL EDX:EAX,$src.lo 2844 emit_opcode( cbuf, 0xF7 ); 2845 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2846 // ADD EDX,ESI 2847 emit_opcode( cbuf, 0x03 ); 2848 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2849 %} 2850 2851 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2852 // Basic idea: lo(result) = lo(src * y_lo) 2853 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2854 // IMUL $tmp,EDX,$src 2855 emit_opcode( cbuf, 0x6B ); 2856 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2857 emit_d8( cbuf, (int)$src$$constant ); 2858 // MOV EDX,$src 2859 emit_opcode(cbuf, 0xB8 + EDX_enc); 2860 emit_d32( cbuf, (int)$src$$constant ); 2861 // MUL EDX:EAX,EDX 2862 emit_opcode( cbuf, 0xF7 ); 2863 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2864 // ADD EDX,ESI 2865 emit_opcode( cbuf, 0x03 ); 2866 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2867 %} 2868 2869 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2870 // PUSH src1.hi 2871 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2872 // PUSH src1.lo 2873 emit_opcode(cbuf, 0x50+$src1$$reg ); 2874 // PUSH src2.hi 2875 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2876 // PUSH src2.lo 2877 emit_opcode(cbuf, 0x50+$src2$$reg ); 2878 // CALL directly to the runtime 2879 cbuf.set_insts_mark(); 2880 emit_opcode(cbuf,0xE8); // Call into runtime 2881 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2882 // Restore stack 2883 emit_opcode(cbuf, 0x83); // add SP, #framesize 2884 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2885 emit_d8(cbuf, 4*4); 2886 %} 2887 2888 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2889 // PUSH src1.hi 2890 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2891 // PUSH src1.lo 2892 emit_opcode(cbuf, 0x50+$src1$$reg ); 2893 // PUSH src2.hi 2894 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2895 // PUSH src2.lo 2896 emit_opcode(cbuf, 0x50+$src2$$reg ); 2897 // CALL directly to the runtime 2898 cbuf.set_insts_mark(); 2899 emit_opcode(cbuf,0xE8); // Call into runtime 2900 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2901 // Restore stack 2902 emit_opcode(cbuf, 0x83); // add SP, #framesize 2903 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2904 emit_d8(cbuf, 4*4); 2905 %} 2906 2907 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2908 // MOV $tmp,$src.lo 2909 emit_opcode(cbuf, 0x8B); 2910 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2911 // OR $tmp,$src.hi 2912 emit_opcode(cbuf, 0x0B); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2914 %} 2915 2916 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2917 // CMP $src1.lo,$src2.lo 2918 emit_opcode( cbuf, 0x3B ); 2919 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2920 // JNE,s skip 2921 emit_cc(cbuf, 0x70, 0x5); 2922 emit_d8(cbuf,2); 2923 // CMP $src1.hi,$src2.hi 2924 emit_opcode( cbuf, 0x3B ); 2925 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2926 %} 2927 2928 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2929 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2930 emit_opcode( cbuf, 0x3B ); 2931 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2932 // MOV $tmp,$src1.hi 2933 emit_opcode( cbuf, 0x8B ); 2934 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2935 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2936 emit_opcode( cbuf, 0x1B ); 2937 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2938 %} 2939 2940 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2941 // XOR $tmp,$tmp 2942 emit_opcode(cbuf,0x33); // XOR 2943 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2944 // CMP $tmp,$src.lo 2945 emit_opcode( cbuf, 0x3B ); 2946 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2947 // SBB $tmp,$src.hi 2948 emit_opcode( cbuf, 0x1B ); 2949 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2950 %} 2951 2952 // Sniff, sniff... smells like Gnu Superoptimizer 2953 enc_class neg_long( eRegL dst ) %{ 2954 emit_opcode(cbuf,0xF7); // NEG hi 2955 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2956 emit_opcode(cbuf,0xF7); // NEG lo 2957 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2958 emit_opcode(cbuf,0x83); // SBB hi,0 2959 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2960 emit_d8 (cbuf,0 ); 2961 %} 2962 2963 enc_class enc_pop_rdx() %{ 2964 emit_opcode(cbuf,0x5A); 2965 %} 2966 2967 enc_class enc_rethrow() %{ 2968 cbuf.set_insts_mark(); 2969 emit_opcode(cbuf, 0xE9); // jmp entry 2970 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2971 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2972 %} 2973 2974 2975 // Convert a double to an int. Java semantics require we do complex 2976 // manglelations in the corner cases. So we set the rounding mode to 2977 // 'zero', store the darned double down as an int, and reset the 2978 // rounding mode to 'nearest'. The hardware throws an exception which 2979 // patches up the correct value directly to the stack. 2980 enc_class DPR2I_encoding( regDPR src ) %{ 2981 // Flip to round-to-zero mode. We attempted to allow invalid-op 2982 // exceptions here, so that a NAN or other corner-case value will 2983 // thrown an exception (but normal values get converted at full speed). 2984 // However, I2C adapters and other float-stack manglers leave pending 2985 // invalid-op exceptions hanging. We would have to clear them before 2986 // enabling them and that is more expensive than just testing for the 2987 // invalid value Intel stores down in the corner cases. 2988 emit_opcode(cbuf,0xD9); // FLDCW trunc 2989 emit_opcode(cbuf,0x2D); 2990 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2991 // Allocate a word 2992 emit_opcode(cbuf,0x83); // SUB ESP,4 2993 emit_opcode(cbuf,0xEC); 2994 emit_d8(cbuf,0x04); 2995 // Encoding assumes a double has been pushed into FPR0. 2996 // Store down the double as an int, popping the FPU stack 2997 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2998 emit_opcode(cbuf,0x1C); 2999 emit_d8(cbuf,0x24); 3000 // Restore the rounding mode; mask the exception 3001 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3002 emit_opcode(cbuf,0x2D); 3003 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3004 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3005 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3006 3007 // Load the converted int; adjust CPU stack 3008 emit_opcode(cbuf,0x58); // POP EAX 3009 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3010 emit_d32 (cbuf,0x80000000); // 0x80000000 3011 emit_opcode(cbuf,0x75); // JNE around_slow_call 3012 emit_d8 (cbuf,0x07); // Size of slow_call 3013 // Push src onto stack slow-path 3014 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3015 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3016 // CALL directly to the runtime 3017 cbuf.set_insts_mark(); 3018 emit_opcode(cbuf,0xE8); // Call into runtime 3019 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3020 // Carry on here... 3021 %} 3022 3023 enc_class DPR2L_encoding( regDPR src ) %{ 3024 emit_opcode(cbuf,0xD9); // FLDCW trunc 3025 emit_opcode(cbuf,0x2D); 3026 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3027 // Allocate a word 3028 emit_opcode(cbuf,0x83); // SUB ESP,8 3029 emit_opcode(cbuf,0xEC); 3030 emit_d8(cbuf,0x08); 3031 // Encoding assumes a double has been pushed into FPR0. 3032 // Store down the double as a long, popping the FPU stack 3033 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3034 emit_opcode(cbuf,0x3C); 3035 emit_d8(cbuf,0x24); 3036 // Restore the rounding mode; mask the exception 3037 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3038 emit_opcode(cbuf,0x2D); 3039 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3040 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3041 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3042 3043 // Load the converted int; adjust CPU stack 3044 emit_opcode(cbuf,0x58); // POP EAX 3045 emit_opcode(cbuf,0x5A); // POP EDX 3046 emit_opcode(cbuf,0x81); // CMP EDX,imm 3047 emit_d8 (cbuf,0xFA); // rdx 3048 emit_d32 (cbuf,0x80000000); // 0x80000000 3049 emit_opcode(cbuf,0x75); // JNE around_slow_call 3050 emit_d8 (cbuf,0x07+4); // Size of slow_call 3051 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3052 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3053 emit_opcode(cbuf,0x75); // JNE around_slow_call 3054 emit_d8 (cbuf,0x07); // Size of slow_call 3055 // Push src onto stack slow-path 3056 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3057 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3058 // CALL directly to the runtime 3059 cbuf.set_insts_mark(); 3060 emit_opcode(cbuf,0xE8); // Call into runtime 3061 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3062 // Carry on here... 3063 %} 3064 3065 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3066 // Operand was loaded from memory into fp ST (stack top) 3067 // FMUL ST,$src /* D8 C8+i */ 3068 emit_opcode(cbuf, 0xD8); 3069 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3070 %} 3071 3072 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3073 // FADDP ST,src2 /* D8 C0+i */ 3074 emit_opcode(cbuf, 0xD8); 3075 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3076 //could use FADDP src2,fpST /* DE C0+i */ 3077 %} 3078 3079 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3080 // FADDP src2,ST /* DE C0+i */ 3081 emit_opcode(cbuf, 0xDE); 3082 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3083 %} 3084 3085 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3086 // Operand has been loaded into fp ST (stack top) 3087 // FSUB ST,$src1 3088 emit_opcode(cbuf, 0xD8); 3089 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3090 3091 // FDIV 3092 emit_opcode(cbuf, 0xD8); 3093 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3094 %} 3095 3096 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3097 // Operand was loaded from memory into fp ST (stack top) 3098 // FADD ST,$src /* D8 C0+i */ 3099 emit_opcode(cbuf, 0xD8); 3100 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3101 3102 // FMUL ST,src2 /* D8 C*+i */ 3103 emit_opcode(cbuf, 0xD8); 3104 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3105 %} 3106 3107 3108 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3109 // Operand was loaded from memory into fp ST (stack top) 3110 // FADD ST,$src /* D8 C0+i */ 3111 emit_opcode(cbuf, 0xD8); 3112 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3113 3114 // FMULP src2,ST /* DE C8+i */ 3115 emit_opcode(cbuf, 0xDE); 3116 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3117 %} 3118 3119 // Atomically load the volatile long 3120 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3121 emit_opcode(cbuf,0xDF); 3122 int rm_byte_opcode = 0x05; 3123 int base = $mem$$base; 3124 int index = $mem$$index; 3125 int scale = $mem$$scale; 3126 int displace = $mem$$disp; 3127 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3128 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3129 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3130 %} 3131 3132 // Volatile Store Long. Must be atomic, so move it into 3133 // the FP TOS and then do a 64-bit FIST. Has to probe the 3134 // target address before the store (for null-ptr checks) 3135 // so the memory operand is used twice in the encoding. 3136 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3137 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3138 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3139 emit_opcode(cbuf,0xDF); 3140 int rm_byte_opcode = 0x07; 3141 int base = $mem$$base; 3142 int index = $mem$$index; 3143 int scale = $mem$$scale; 3144 int displace = $mem$$disp; 3145 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3146 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3147 %} 3148 3149 // Safepoint Poll. This polls the safepoint page, and causes an 3150 // exception if it is not readable. Unfortunately, it kills the condition code 3151 // in the process 3152 // We current use TESTL [spp],EDI 3153 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3154 3155 enc_class Safepoint_Poll() %{ 3156 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3157 emit_opcode(cbuf,0x85); 3158 emit_rm (cbuf, 0x0, 0x7, 0x5); 3159 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3160 %} 3161 %} 3162 3163 3164 //----------FRAME-------------------------------------------------------------- 3165 // Definition of frame structure and management information. 3166 // 3167 // S T A C K L A Y O U T Allocators stack-slot number 3168 // | (to get allocators register number 3169 // G Owned by | | v add OptoReg::stack0()) 3170 // r CALLER | | 3171 // o | +--------+ pad to even-align allocators stack-slot 3172 // w V | pad0 | numbers; owned by CALLER 3173 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3174 // h ^ | in | 5 3175 // | | args | 4 Holes in incoming args owned by SELF 3176 // | | | | 3 3177 // | | +--------+ 3178 // V | | old out| Empty on Intel, window on Sparc 3179 // | old |preserve| Must be even aligned. 3180 // | SP-+--------+----> Matcher::_old_SP, even aligned 3181 // | | in | 3 area for Intel ret address 3182 // Owned by |preserve| Empty on Sparc. 3183 // SELF +--------+ 3184 // | | pad2 | 2 pad to align old SP 3185 // | +--------+ 1 3186 // | | locks | 0 3187 // | +--------+----> OptoReg::stack0(), even aligned 3188 // | | pad1 | 11 pad to align new SP 3189 // | +--------+ 3190 // | | | 10 3191 // | | spills | 9 spills 3192 // V | | 8 (pad0 slot for callee) 3193 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3194 // ^ | out | 7 3195 // | | args | 6 Holes in outgoing args owned by CALLEE 3196 // Owned by +--------+ 3197 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3198 // | new |preserve| Must be even-aligned. 3199 // | SP-+--------+----> Matcher::_new_SP, even aligned 3200 // | | | 3201 // 3202 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3203 // known from SELF's arguments and the Java calling convention. 3204 // Region 6-7 is determined per call site. 3205 // Note 2: If the calling convention leaves holes in the incoming argument 3206 // area, those holes are owned by SELF. Holes in the outgoing area 3207 // are owned by the CALLEE. Holes should not be nessecary in the 3208 // incoming area, as the Java calling convention is completely under 3209 // the control of the AD file. Doubles can be sorted and packed to 3210 // avoid holes. Holes in the outgoing arguments may be nessecary for 3211 // varargs C calling conventions. 3212 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3213 // even aligned with pad0 as needed. 3214 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3215 // region 6-11 is even aligned; it may be padded out more so that 3216 // the region from SP to FP meets the minimum stack alignment. 3217 3218 frame %{ 3219 // What direction does stack grow in (assumed to be same for C & Java) 3220 stack_direction(TOWARDS_LOW); 3221 3222 // These three registers define part of the calling convention 3223 // between compiled code and the interpreter. 3224 inline_cache_reg(EAX); // Inline Cache Register 3225 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3226 3227 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3228 cisc_spilling_operand_name(indOffset32); 3229 3230 // Number of stack slots consumed by locking an object 3231 sync_stack_slots(1); 3232 3233 // Compiled code's Frame Pointer 3234 frame_pointer(ESP); 3235 // Interpreter stores its frame pointer in a register which is 3236 // stored to the stack by I2CAdaptors. 3237 // I2CAdaptors convert from interpreted java to compiled java. 3238 interpreter_frame_pointer(EBP); 3239 3240 // Stack alignment requirement 3241 // Alignment size in bytes (128-bit -> 16 bytes) 3242 stack_alignment(StackAlignmentInBytes); 3243 3244 // Number of stack slots between incoming argument block and the start of 3245 // a new frame. The PROLOG must add this many slots to the stack. The 3246 // EPILOG must remove this many slots. Intel needs one slot for 3247 // return address and one for rbp, (must save rbp) 3248 in_preserve_stack_slots(2+VerifyStackAtCalls); 3249 3250 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3251 // for calls to C. Supports the var-args backing area for register parms. 3252 varargs_C_out_slots_killed(0); 3253 3254 // The after-PROLOG location of the return address. Location of 3255 // return address specifies a type (REG or STACK) and a number 3256 // representing the register number (i.e. - use a register name) or 3257 // stack slot. 3258 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3259 // Otherwise, it is above the locks and verification slot and alignment word 3260 return_addr(STACK - 1 + 3261 round_to((Compile::current()->in_preserve_stack_slots() + 3262 Compile::current()->fixed_slots()), 3263 stack_alignment_in_slots())); 3264 3265 // Body of function which returns an integer array locating 3266 // arguments either in registers or in stack slots. Passed an array 3267 // of ideal registers called "sig" and a "length" count. Stack-slot 3268 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3269 // arguments for a CALLEE. Incoming stack arguments are 3270 // automatically biased by the preserve_stack_slots field above. 3271 calling_convention %{ 3272 // No difference between ingoing/outgoing just pass false 3273 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3274 %} 3275 3276 3277 // Body of function which returns an integer array locating 3278 // arguments either in registers or in stack slots. Passed an array 3279 // of ideal registers called "sig" and a "length" count. Stack-slot 3280 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3281 // arguments for a CALLEE. Incoming stack arguments are 3282 // automatically biased by the preserve_stack_slots field above. 3283 c_calling_convention %{ 3284 // This is obviously always outgoing 3285 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3286 %} 3287 3288 // Location of C & interpreter return values 3289 c_return_value %{ 3290 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3291 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3292 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3293 3294 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3295 // that C functions return float and double results in XMM0. 3296 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3297 return OptoRegPair(XMM0b_num,XMM0_num); 3298 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3299 return OptoRegPair(OptoReg::Bad,XMM0_num); 3300 3301 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3302 %} 3303 3304 // Location of return values 3305 return_value %{ 3306 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3307 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3308 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3309 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3310 return OptoRegPair(XMM0b_num,XMM0_num); 3311 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3312 return OptoRegPair(OptoReg::Bad,XMM0_num); 3313 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3314 %} 3315 3316 %} 3317 3318 //----------ATTRIBUTES--------------------------------------------------------- 3319 //----------Operand Attributes------------------------------------------------- 3320 op_attrib op_cost(0); // Required cost attribute 3321 3322 //----------Instruction Attributes--------------------------------------------- 3323 ins_attrib ins_cost(100); // Required cost attribute 3324 ins_attrib ins_size(8); // Required size attribute (in bits) 3325 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3326 // non-matching short branch variant of some 3327 // long branch? 3328 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3329 // specifies the alignment that some part of the instruction (not 3330 // necessarily the start) requires. If > 1, a compute_padding() 3331 // function must be provided for the instruction 3332 3333 //----------OPERANDS----------------------------------------------------------- 3334 // Operand definitions must precede instruction definitions for correct parsing 3335 // in the ADLC because operands constitute user defined types which are used in 3336 // instruction definitions. 3337 3338 //----------Simple Operands---------------------------------------------------- 3339 // Immediate Operands 3340 // Integer Immediate 3341 operand immI() %{ 3342 match(ConI); 3343 3344 op_cost(10); 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 // Constant for test vs zero 3350 operand immI0() %{ 3351 predicate(n->get_int() == 0); 3352 match(ConI); 3353 3354 op_cost(0); 3355 format %{ %} 3356 interface(CONST_INTER); 3357 %} 3358 3359 // Constant for increment 3360 operand immI1() %{ 3361 predicate(n->get_int() == 1); 3362 match(ConI); 3363 3364 op_cost(0); 3365 format %{ %} 3366 interface(CONST_INTER); 3367 %} 3368 3369 // Constant for decrement 3370 operand immI_M1() %{ 3371 predicate(n->get_int() == -1); 3372 match(ConI); 3373 3374 op_cost(0); 3375 format %{ %} 3376 interface(CONST_INTER); 3377 %} 3378 3379 // Valid scale values for addressing modes 3380 operand immI2() %{ 3381 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3382 match(ConI); 3383 3384 format %{ %} 3385 interface(CONST_INTER); 3386 %} 3387 3388 operand immI8() %{ 3389 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3390 match(ConI); 3391 3392 op_cost(5); 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 operand immI16() %{ 3398 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3399 match(ConI); 3400 3401 op_cost(10); 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 // Int Immediate non-negative 3407 operand immU31() 3408 %{ 3409 predicate(n->get_int() >= 0); 3410 match(ConI); 3411 3412 op_cost(0); 3413 format %{ %} 3414 interface(CONST_INTER); 3415 %} 3416 3417 // Constant for long shifts 3418 operand immI_32() %{ 3419 predicate( n->get_int() == 32 ); 3420 match(ConI); 3421 3422 op_cost(0); 3423 format %{ %} 3424 interface(CONST_INTER); 3425 %} 3426 3427 operand immI_1_31() %{ 3428 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3429 match(ConI); 3430 3431 op_cost(0); 3432 format %{ %} 3433 interface(CONST_INTER); 3434 %} 3435 3436 operand immI_32_63() %{ 3437 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3438 match(ConI); 3439 op_cost(0); 3440 3441 format %{ %} 3442 interface(CONST_INTER); 3443 %} 3444 3445 operand immI_1() %{ 3446 predicate( n->get_int() == 1 ); 3447 match(ConI); 3448 3449 op_cost(0); 3450 format %{ %} 3451 interface(CONST_INTER); 3452 %} 3453 3454 operand immI_2() %{ 3455 predicate( n->get_int() == 2 ); 3456 match(ConI); 3457 3458 op_cost(0); 3459 format %{ %} 3460 interface(CONST_INTER); 3461 %} 3462 3463 operand immI_3() %{ 3464 predicate( n->get_int() == 3 ); 3465 match(ConI); 3466 3467 op_cost(0); 3468 format %{ %} 3469 interface(CONST_INTER); 3470 %} 3471 3472 // Pointer Immediate 3473 operand immP() %{ 3474 match(ConP); 3475 3476 op_cost(10); 3477 format %{ %} 3478 interface(CONST_INTER); 3479 %} 3480 3481 // NULL Pointer Immediate 3482 operand immP0() %{ 3483 predicate( n->get_ptr() == 0 ); 3484 match(ConP); 3485 op_cost(0); 3486 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Long Immediate 3492 operand immL() %{ 3493 match(ConL); 3494 3495 op_cost(20); 3496 format %{ %} 3497 interface(CONST_INTER); 3498 %} 3499 3500 // Long Immediate zero 3501 operand immL0() %{ 3502 predicate( n->get_long() == 0L ); 3503 match(ConL); 3504 op_cost(0); 3505 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Long Immediate zero 3511 operand immL_M1() %{ 3512 predicate( n->get_long() == -1L ); 3513 match(ConL); 3514 op_cost(0); 3515 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Long immediate from 0 to 127. 3521 // Used for a shorter form of long mul by 10. 3522 operand immL_127() %{ 3523 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3524 match(ConL); 3525 op_cost(0); 3526 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 // Long Immediate: low 32-bit mask 3532 operand immL_32bits() %{ 3533 predicate(n->get_long() == 0xFFFFFFFFL); 3534 match(ConL); 3535 op_cost(0); 3536 3537 format %{ %} 3538 interface(CONST_INTER); 3539 %} 3540 3541 // Long Immediate: low 32-bit mask 3542 operand immL32() %{ 3543 predicate(n->get_long() == (int)(n->get_long())); 3544 match(ConL); 3545 op_cost(20); 3546 3547 format %{ %} 3548 interface(CONST_INTER); 3549 %} 3550 3551 //Double Immediate zero 3552 operand immDPR0() %{ 3553 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3554 // bug that generates code such that NaNs compare equal to 0.0 3555 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3556 match(ConD); 3557 3558 op_cost(5); 3559 format %{ %} 3560 interface(CONST_INTER); 3561 %} 3562 3563 // Double Immediate one 3564 operand immDPR1() %{ 3565 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3566 match(ConD); 3567 3568 op_cost(5); 3569 format %{ %} 3570 interface(CONST_INTER); 3571 %} 3572 3573 // Double Immediate 3574 operand immDPR() %{ 3575 predicate(UseSSE<=1); 3576 match(ConD); 3577 3578 op_cost(5); 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 operand immD() %{ 3584 predicate(UseSSE>=2); 3585 match(ConD); 3586 3587 op_cost(5); 3588 format %{ %} 3589 interface(CONST_INTER); 3590 %} 3591 3592 // Double Immediate zero 3593 operand immD0() %{ 3594 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3595 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3596 // compare equal to -0.0. 3597 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3598 match(ConD); 3599 3600 format %{ %} 3601 interface(CONST_INTER); 3602 %} 3603 3604 // Float Immediate zero 3605 operand immFPR0() %{ 3606 predicate(UseSSE == 0 && n->getf() == 0.0F); 3607 match(ConF); 3608 3609 op_cost(5); 3610 format %{ %} 3611 interface(CONST_INTER); 3612 %} 3613 3614 // Float Immediate one 3615 operand immFPR1() %{ 3616 predicate(UseSSE == 0 && n->getf() == 1.0F); 3617 match(ConF); 3618 3619 op_cost(5); 3620 format %{ %} 3621 interface(CONST_INTER); 3622 %} 3623 3624 // Float Immediate 3625 operand immFPR() %{ 3626 predicate( UseSSE == 0 ); 3627 match(ConF); 3628 3629 op_cost(5); 3630 format %{ %} 3631 interface(CONST_INTER); 3632 %} 3633 3634 // Float Immediate 3635 operand immF() %{ 3636 predicate(UseSSE >= 1); 3637 match(ConF); 3638 3639 op_cost(5); 3640 format %{ %} 3641 interface(CONST_INTER); 3642 %} 3643 3644 // Float Immediate zero. Zero and not -0.0 3645 operand immF0() %{ 3646 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3647 match(ConF); 3648 3649 op_cost(5); 3650 format %{ %} 3651 interface(CONST_INTER); 3652 %} 3653 3654 // Immediates for special shifts (sign extend) 3655 3656 // Constants for increment 3657 operand immI_16() %{ 3658 predicate( n->get_int() == 16 ); 3659 match(ConI); 3660 3661 format %{ %} 3662 interface(CONST_INTER); 3663 %} 3664 3665 operand immI_24() %{ 3666 predicate( n->get_int() == 24 ); 3667 match(ConI); 3668 3669 format %{ %} 3670 interface(CONST_INTER); 3671 %} 3672 3673 // Constant for byte-wide masking 3674 operand immI_255() %{ 3675 predicate( n->get_int() == 255 ); 3676 match(ConI); 3677 3678 format %{ %} 3679 interface(CONST_INTER); 3680 %} 3681 3682 // Constant for short-wide masking 3683 operand immI_65535() %{ 3684 predicate(n->get_int() == 65535); 3685 match(ConI); 3686 3687 format %{ %} 3688 interface(CONST_INTER); 3689 %} 3690 3691 // Register Operands 3692 // Integer Register 3693 operand rRegI() %{ 3694 constraint(ALLOC_IN_RC(int_reg)); 3695 match(RegI); 3696 match(xRegI); 3697 match(eAXRegI); 3698 match(eBXRegI); 3699 match(eCXRegI); 3700 match(eDXRegI); 3701 match(eDIRegI); 3702 match(eSIRegI); 3703 3704 format %{ %} 3705 interface(REG_INTER); 3706 %} 3707 3708 // Subset of Integer Register 3709 operand xRegI(rRegI reg) %{ 3710 constraint(ALLOC_IN_RC(int_x_reg)); 3711 match(reg); 3712 match(eAXRegI); 3713 match(eBXRegI); 3714 match(eCXRegI); 3715 match(eDXRegI); 3716 3717 format %{ %} 3718 interface(REG_INTER); 3719 %} 3720 3721 // Special Registers 3722 operand eAXRegI(xRegI reg) %{ 3723 constraint(ALLOC_IN_RC(eax_reg)); 3724 match(reg); 3725 match(rRegI); 3726 3727 format %{ "EAX" %} 3728 interface(REG_INTER); 3729 %} 3730 3731 // Special Registers 3732 operand eBXRegI(xRegI reg) %{ 3733 constraint(ALLOC_IN_RC(ebx_reg)); 3734 match(reg); 3735 match(rRegI); 3736 3737 format %{ "EBX" %} 3738 interface(REG_INTER); 3739 %} 3740 3741 operand eCXRegI(xRegI reg) %{ 3742 constraint(ALLOC_IN_RC(ecx_reg)); 3743 match(reg); 3744 match(rRegI); 3745 3746 format %{ "ECX" %} 3747 interface(REG_INTER); 3748 %} 3749 3750 operand eDXRegI(xRegI reg) %{ 3751 constraint(ALLOC_IN_RC(edx_reg)); 3752 match(reg); 3753 match(rRegI); 3754 3755 format %{ "EDX" %} 3756 interface(REG_INTER); 3757 %} 3758 3759 operand eDIRegI(xRegI reg) %{ 3760 constraint(ALLOC_IN_RC(edi_reg)); 3761 match(reg); 3762 match(rRegI); 3763 3764 format %{ "EDI" %} 3765 interface(REG_INTER); 3766 %} 3767 3768 operand naxRegI() %{ 3769 constraint(ALLOC_IN_RC(nax_reg)); 3770 match(RegI); 3771 match(eCXRegI); 3772 match(eDXRegI); 3773 match(eSIRegI); 3774 match(eDIRegI); 3775 3776 format %{ %} 3777 interface(REG_INTER); 3778 %} 3779 3780 operand nadxRegI() %{ 3781 constraint(ALLOC_IN_RC(nadx_reg)); 3782 match(RegI); 3783 match(eBXRegI); 3784 match(eCXRegI); 3785 match(eSIRegI); 3786 match(eDIRegI); 3787 3788 format %{ %} 3789 interface(REG_INTER); 3790 %} 3791 3792 operand ncxRegI() %{ 3793 constraint(ALLOC_IN_RC(ncx_reg)); 3794 match(RegI); 3795 match(eAXRegI); 3796 match(eDXRegI); 3797 match(eSIRegI); 3798 match(eDIRegI); 3799 3800 format %{ %} 3801 interface(REG_INTER); 3802 %} 3803 3804 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3805 // // 3806 operand eSIRegI(xRegI reg) %{ 3807 constraint(ALLOC_IN_RC(esi_reg)); 3808 match(reg); 3809 match(rRegI); 3810 3811 format %{ "ESI" %} 3812 interface(REG_INTER); 3813 %} 3814 3815 // Pointer Register 3816 operand anyRegP() %{ 3817 constraint(ALLOC_IN_RC(any_reg)); 3818 match(RegP); 3819 match(eAXRegP); 3820 match(eBXRegP); 3821 match(eCXRegP); 3822 match(eDIRegP); 3823 match(eRegP); 3824 3825 format %{ %} 3826 interface(REG_INTER); 3827 %} 3828 3829 operand eRegP() %{ 3830 constraint(ALLOC_IN_RC(int_reg)); 3831 match(RegP); 3832 match(eAXRegP); 3833 match(eBXRegP); 3834 match(eCXRegP); 3835 match(eDIRegP); 3836 3837 format %{ %} 3838 interface(REG_INTER); 3839 %} 3840 3841 // On windows95, EBP is not safe to use for implicit null tests. 3842 operand eRegP_no_EBP() %{ 3843 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3844 match(RegP); 3845 match(eAXRegP); 3846 match(eBXRegP); 3847 match(eCXRegP); 3848 match(eDIRegP); 3849 3850 op_cost(100); 3851 format %{ %} 3852 interface(REG_INTER); 3853 %} 3854 3855 operand naxRegP() %{ 3856 constraint(ALLOC_IN_RC(nax_reg)); 3857 match(RegP); 3858 match(eBXRegP); 3859 match(eDXRegP); 3860 match(eCXRegP); 3861 match(eSIRegP); 3862 match(eDIRegP); 3863 3864 format %{ %} 3865 interface(REG_INTER); 3866 %} 3867 3868 operand nabxRegP() %{ 3869 constraint(ALLOC_IN_RC(nabx_reg)); 3870 match(RegP); 3871 match(eCXRegP); 3872 match(eDXRegP); 3873 match(eSIRegP); 3874 match(eDIRegP); 3875 3876 format %{ %} 3877 interface(REG_INTER); 3878 %} 3879 3880 operand pRegP() %{ 3881 constraint(ALLOC_IN_RC(p_reg)); 3882 match(RegP); 3883 match(eBXRegP); 3884 match(eDXRegP); 3885 match(eSIRegP); 3886 match(eDIRegP); 3887 3888 format %{ %} 3889 interface(REG_INTER); 3890 %} 3891 3892 // Special Registers 3893 // Return a pointer value 3894 operand eAXRegP(eRegP reg) %{ 3895 constraint(ALLOC_IN_RC(eax_reg)); 3896 match(reg); 3897 format %{ "EAX" %} 3898 interface(REG_INTER); 3899 %} 3900 3901 // Used in AtomicAdd 3902 operand eBXRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(ebx_reg)); 3904 match(reg); 3905 format %{ "EBX" %} 3906 interface(REG_INTER); 3907 %} 3908 3909 // Tail-call (interprocedural jump) to interpreter 3910 operand eCXRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(ecx_reg)); 3912 match(reg); 3913 format %{ "ECX" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 operand eSIRegP(eRegP reg) %{ 3918 constraint(ALLOC_IN_RC(esi_reg)); 3919 match(reg); 3920 format %{ "ESI" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 // Used in rep stosw 3925 operand eDIRegP(eRegP reg) %{ 3926 constraint(ALLOC_IN_RC(edi_reg)); 3927 match(reg); 3928 format %{ "EDI" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand eRegL() %{ 3933 constraint(ALLOC_IN_RC(long_reg)); 3934 match(RegL); 3935 match(eADXRegL); 3936 3937 format %{ %} 3938 interface(REG_INTER); 3939 %} 3940 3941 operand eADXRegL( eRegL reg ) %{ 3942 constraint(ALLOC_IN_RC(eadx_reg)); 3943 match(reg); 3944 3945 format %{ "EDX:EAX" %} 3946 interface(REG_INTER); 3947 %} 3948 3949 operand eBCXRegL( eRegL reg ) %{ 3950 constraint(ALLOC_IN_RC(ebcx_reg)); 3951 match(reg); 3952 3953 format %{ "EBX:ECX" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 // Special case for integer high multiply 3958 operand eADXRegL_low_only() %{ 3959 constraint(ALLOC_IN_RC(eadx_reg)); 3960 match(RegL); 3961 3962 format %{ "EAX" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 // Flags register, used as output of compare instructions 3967 operand eFlagsReg() %{ 3968 constraint(ALLOC_IN_RC(int_flags)); 3969 match(RegFlags); 3970 3971 format %{ "EFLAGS" %} 3972 interface(REG_INTER); 3973 %} 3974 3975 // Flags register, used as output of FLOATING POINT compare instructions 3976 operand eFlagsRegU() %{ 3977 constraint(ALLOC_IN_RC(int_flags)); 3978 match(RegFlags); 3979 3980 format %{ "EFLAGS_U" %} 3981 interface(REG_INTER); 3982 %} 3983 3984 operand eFlagsRegUCF() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 predicate(false); 3988 3989 format %{ "EFLAGS_U_CF" %} 3990 interface(REG_INTER); 3991 %} 3992 3993 // Condition Code Register used by long compare 3994 operand flagsReg_long_LTGE() %{ 3995 constraint(ALLOC_IN_RC(int_flags)); 3996 match(RegFlags); 3997 format %{ "FLAGS_LTGE" %} 3998 interface(REG_INTER); 3999 %} 4000 operand flagsReg_long_EQNE() %{ 4001 constraint(ALLOC_IN_RC(int_flags)); 4002 match(RegFlags); 4003 format %{ "FLAGS_EQNE" %} 4004 interface(REG_INTER); 4005 %} 4006 operand flagsReg_long_LEGT() %{ 4007 constraint(ALLOC_IN_RC(int_flags)); 4008 match(RegFlags); 4009 format %{ "FLAGS_LEGT" %} 4010 interface(REG_INTER); 4011 %} 4012 4013 // Float register operands 4014 operand regDPR() %{ 4015 predicate( UseSSE < 2 ); 4016 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4017 match(RegD); 4018 match(regDPR1); 4019 match(regDPR2); 4020 format %{ %} 4021 interface(REG_INTER); 4022 %} 4023 4024 operand regDPR1(regDPR reg) %{ 4025 predicate( UseSSE < 2 ); 4026 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4027 match(reg); 4028 format %{ "FPR1" %} 4029 interface(REG_INTER); 4030 %} 4031 4032 operand regDPR2(regDPR reg) %{ 4033 predicate( UseSSE < 2 ); 4034 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4035 match(reg); 4036 format %{ "FPR2" %} 4037 interface(REG_INTER); 4038 %} 4039 4040 operand regnotDPR1(regDPR reg) %{ 4041 predicate( UseSSE < 2 ); 4042 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4043 match(reg); 4044 format %{ %} 4045 interface(REG_INTER); 4046 %} 4047 4048 // Float register operands 4049 operand regFPR() %{ 4050 predicate( UseSSE < 2 ); 4051 constraint(ALLOC_IN_RC(fp_flt_reg)); 4052 match(RegF); 4053 match(regFPR1); 4054 format %{ %} 4055 interface(REG_INTER); 4056 %} 4057 4058 // Float register operands 4059 operand regFPR1(regFPR reg) %{ 4060 predicate( UseSSE < 2 ); 4061 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4062 match(reg); 4063 format %{ "FPR1" %} 4064 interface(REG_INTER); 4065 %} 4066 4067 // XMM Float register operands 4068 operand regF() %{ 4069 predicate( UseSSE>=1 ); 4070 constraint(ALLOC_IN_RC(float_reg_legacy)); 4071 match(RegF); 4072 format %{ %} 4073 interface(REG_INTER); 4074 %} 4075 4076 // XMM Double register operands 4077 operand regD() %{ 4078 predicate( UseSSE>=2 ); 4079 constraint(ALLOC_IN_RC(double_reg_legacy)); 4080 match(RegD); 4081 format %{ %} 4082 interface(REG_INTER); 4083 %} 4084 4085 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4086 // runtime code generation via reg_class_dynamic. 4087 operand vecS() %{ 4088 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4089 match(VecS); 4090 4091 format %{ %} 4092 interface(REG_INTER); 4093 %} 4094 4095 operand vecD() %{ 4096 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4097 match(VecD); 4098 4099 format %{ %} 4100 interface(REG_INTER); 4101 %} 4102 4103 operand vecX() %{ 4104 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4105 match(VecX); 4106 4107 format %{ %} 4108 interface(REG_INTER); 4109 %} 4110 4111 operand vecY() %{ 4112 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4113 match(VecY); 4114 4115 format %{ %} 4116 interface(REG_INTER); 4117 %} 4118 4119 //----------Memory Operands---------------------------------------------------- 4120 // Direct Memory Operand 4121 operand direct(immP addr) %{ 4122 match(addr); 4123 4124 format %{ "[$addr]" %} 4125 interface(MEMORY_INTER) %{ 4126 base(0xFFFFFFFF); 4127 index(0x4); 4128 scale(0x0); 4129 disp($addr); 4130 %} 4131 %} 4132 4133 // Indirect Memory Operand 4134 operand indirect(eRegP reg) %{ 4135 constraint(ALLOC_IN_RC(int_reg)); 4136 match(reg); 4137 4138 format %{ "[$reg]" %} 4139 interface(MEMORY_INTER) %{ 4140 base($reg); 4141 index(0x4); 4142 scale(0x0); 4143 disp(0x0); 4144 %} 4145 %} 4146 4147 // Indirect Memory Plus Short Offset Operand 4148 operand indOffset8(eRegP reg, immI8 off) %{ 4149 match(AddP reg off); 4150 4151 format %{ "[$reg + $off]" %} 4152 interface(MEMORY_INTER) %{ 4153 base($reg); 4154 index(0x4); 4155 scale(0x0); 4156 disp($off); 4157 %} 4158 %} 4159 4160 // Indirect Memory Plus Long Offset Operand 4161 operand indOffset32(eRegP reg, immI off) %{ 4162 match(AddP reg off); 4163 4164 format %{ "[$reg + $off]" %} 4165 interface(MEMORY_INTER) %{ 4166 base($reg); 4167 index(0x4); 4168 scale(0x0); 4169 disp($off); 4170 %} 4171 %} 4172 4173 // Indirect Memory Plus Long Offset Operand 4174 operand indOffset32X(rRegI reg, immP off) %{ 4175 match(AddP off reg); 4176 4177 format %{ "[$reg + $off]" %} 4178 interface(MEMORY_INTER) %{ 4179 base($reg); 4180 index(0x4); 4181 scale(0x0); 4182 disp($off); 4183 %} 4184 %} 4185 4186 // Indirect Memory Plus Index Register Plus Offset Operand 4187 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4188 match(AddP (AddP reg ireg) off); 4189 4190 op_cost(10); 4191 format %{"[$reg + $off + $ireg]" %} 4192 interface(MEMORY_INTER) %{ 4193 base($reg); 4194 index($ireg); 4195 scale(0x0); 4196 disp($off); 4197 %} 4198 %} 4199 4200 // Indirect Memory Plus Index Register Plus Offset Operand 4201 operand indIndex(eRegP reg, rRegI ireg) %{ 4202 match(AddP reg ireg); 4203 4204 op_cost(10); 4205 format %{"[$reg + $ireg]" %} 4206 interface(MEMORY_INTER) %{ 4207 base($reg); 4208 index($ireg); 4209 scale(0x0); 4210 disp(0x0); 4211 %} 4212 %} 4213 4214 // // ------------------------------------------------------------------------- 4215 // // 486 architecture doesn't support "scale * index + offset" with out a base 4216 // // ------------------------------------------------------------------------- 4217 // // Scaled Memory Operands 4218 // // Indirect Memory Times Scale Plus Offset Operand 4219 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4220 // match(AddP off (LShiftI ireg scale)); 4221 // 4222 // op_cost(10); 4223 // format %{"[$off + $ireg << $scale]" %} 4224 // interface(MEMORY_INTER) %{ 4225 // base(0x4); 4226 // index($ireg); 4227 // scale($scale); 4228 // disp($off); 4229 // %} 4230 // %} 4231 4232 // Indirect Memory Times Scale Plus Index Register 4233 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4234 match(AddP reg (LShiftI ireg scale)); 4235 4236 op_cost(10); 4237 format %{"[$reg + $ireg << $scale]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index($ireg); 4241 scale($scale); 4242 disp(0x0); 4243 %} 4244 %} 4245 4246 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4247 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4248 match(AddP (AddP reg (LShiftI ireg scale)) off); 4249 4250 op_cost(10); 4251 format %{"[$reg + $off + $ireg << $scale]" %} 4252 interface(MEMORY_INTER) %{ 4253 base($reg); 4254 index($ireg); 4255 scale($scale); 4256 disp($off); 4257 %} 4258 %} 4259 4260 //----------Load Long Memory Operands------------------------------------------ 4261 // The load-long idiom will use it's address expression again after loading 4262 // the first word of the long. If the load-long destination overlaps with 4263 // registers used in the addressing expression, the 2nd half will be loaded 4264 // from a clobbered address. Fix this by requiring that load-long use 4265 // address registers that do not overlap with the load-long target. 4266 4267 // load-long support 4268 operand load_long_RegP() %{ 4269 constraint(ALLOC_IN_RC(esi_reg)); 4270 match(RegP); 4271 match(eSIRegP); 4272 op_cost(100); 4273 format %{ %} 4274 interface(REG_INTER); 4275 %} 4276 4277 // Indirect Memory Operand Long 4278 operand load_long_indirect(load_long_RegP reg) %{ 4279 constraint(ALLOC_IN_RC(esi_reg)); 4280 match(reg); 4281 4282 format %{ "[$reg]" %} 4283 interface(MEMORY_INTER) %{ 4284 base($reg); 4285 index(0x4); 4286 scale(0x0); 4287 disp(0x0); 4288 %} 4289 %} 4290 4291 // Indirect Memory Plus Long Offset Operand 4292 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4293 match(AddP reg off); 4294 4295 format %{ "[$reg + $off]" %} 4296 interface(MEMORY_INTER) %{ 4297 base($reg); 4298 index(0x4); 4299 scale(0x0); 4300 disp($off); 4301 %} 4302 %} 4303 4304 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4305 4306 4307 //----------Special Memory Operands-------------------------------------------- 4308 // Stack Slot Operand - This operand is used for loading and storing temporary 4309 // values on the stack where a match requires a value to 4310 // flow through memory. 4311 operand stackSlotP(sRegP reg) %{ 4312 constraint(ALLOC_IN_RC(stack_slots)); 4313 // No match rule because this operand is only generated in matching 4314 format %{ "[$reg]" %} 4315 interface(MEMORY_INTER) %{ 4316 base(0x4); // ESP 4317 index(0x4); // No Index 4318 scale(0x0); // No Scale 4319 disp($reg); // Stack Offset 4320 %} 4321 %} 4322 4323 operand stackSlotI(sRegI reg) %{ 4324 constraint(ALLOC_IN_RC(stack_slots)); 4325 // No match rule because this operand is only generated in matching 4326 format %{ "[$reg]" %} 4327 interface(MEMORY_INTER) %{ 4328 base(0x4); // ESP 4329 index(0x4); // No Index 4330 scale(0x0); // No Scale 4331 disp($reg); // Stack Offset 4332 %} 4333 %} 4334 4335 operand stackSlotF(sRegF reg) %{ 4336 constraint(ALLOC_IN_RC(stack_slots)); 4337 // No match rule because this operand is only generated in matching 4338 format %{ "[$reg]" %} 4339 interface(MEMORY_INTER) %{ 4340 base(0x4); // ESP 4341 index(0x4); // No Index 4342 scale(0x0); // No Scale 4343 disp($reg); // Stack Offset 4344 %} 4345 %} 4346 4347 operand stackSlotD(sRegD reg) %{ 4348 constraint(ALLOC_IN_RC(stack_slots)); 4349 // No match rule because this operand is only generated in matching 4350 format %{ "[$reg]" %} 4351 interface(MEMORY_INTER) %{ 4352 base(0x4); // ESP 4353 index(0x4); // No Index 4354 scale(0x0); // No Scale 4355 disp($reg); // Stack Offset 4356 %} 4357 %} 4358 4359 operand stackSlotL(sRegL reg) %{ 4360 constraint(ALLOC_IN_RC(stack_slots)); 4361 // No match rule because this operand is only generated in matching 4362 format %{ "[$reg]" %} 4363 interface(MEMORY_INTER) %{ 4364 base(0x4); // ESP 4365 index(0x4); // No Index 4366 scale(0x0); // No Scale 4367 disp($reg); // Stack Offset 4368 %} 4369 %} 4370 4371 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4372 // Indirect Memory Operand 4373 operand indirect_win95_safe(eRegP_no_EBP reg) 4374 %{ 4375 constraint(ALLOC_IN_RC(int_reg)); 4376 match(reg); 4377 4378 op_cost(100); 4379 format %{ "[$reg]" %} 4380 interface(MEMORY_INTER) %{ 4381 base($reg); 4382 index(0x4); 4383 scale(0x0); 4384 disp(0x0); 4385 %} 4386 %} 4387 4388 // Indirect Memory Plus Short Offset Operand 4389 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4390 %{ 4391 match(AddP reg off); 4392 4393 op_cost(100); 4394 format %{ "[$reg + $off]" %} 4395 interface(MEMORY_INTER) %{ 4396 base($reg); 4397 index(0x4); 4398 scale(0x0); 4399 disp($off); 4400 %} 4401 %} 4402 4403 // Indirect Memory Plus Long Offset Operand 4404 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4405 %{ 4406 match(AddP reg off); 4407 4408 op_cost(100); 4409 format %{ "[$reg + $off]" %} 4410 interface(MEMORY_INTER) %{ 4411 base($reg); 4412 index(0x4); 4413 scale(0x0); 4414 disp($off); 4415 %} 4416 %} 4417 4418 // Indirect Memory Plus Index Register Plus Offset Operand 4419 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4420 %{ 4421 match(AddP (AddP reg ireg) off); 4422 4423 op_cost(100); 4424 format %{"[$reg + $off + $ireg]" %} 4425 interface(MEMORY_INTER) %{ 4426 base($reg); 4427 index($ireg); 4428 scale(0x0); 4429 disp($off); 4430 %} 4431 %} 4432 4433 // Indirect Memory Times Scale Plus Index Register 4434 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4435 %{ 4436 match(AddP reg (LShiftI ireg scale)); 4437 4438 op_cost(100); 4439 format %{"[$reg + $ireg << $scale]" %} 4440 interface(MEMORY_INTER) %{ 4441 base($reg); 4442 index($ireg); 4443 scale($scale); 4444 disp(0x0); 4445 %} 4446 %} 4447 4448 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4449 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4450 %{ 4451 match(AddP (AddP reg (LShiftI ireg scale)) off); 4452 4453 op_cost(100); 4454 format %{"[$reg + $off + $ireg << $scale]" %} 4455 interface(MEMORY_INTER) %{ 4456 base($reg); 4457 index($ireg); 4458 scale($scale); 4459 disp($off); 4460 %} 4461 %} 4462 4463 //----------Conditional Branch Operands---------------------------------------- 4464 // Comparison Op - This is the operation of the comparison, and is limited to 4465 // the following set of codes: 4466 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4467 // 4468 // Other attributes of the comparison, such as unsignedness, are specified 4469 // by the comparison instruction that sets a condition code flags register. 4470 // That result is represented by a flags operand whose subtype is appropriate 4471 // to the unsignedness (etc.) of the comparison. 4472 // 4473 // Later, the instruction which matches both the Comparison Op (a Bool) and 4474 // the flags (produced by the Cmp) specifies the coding of the comparison op 4475 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4476 4477 // Comparision Code 4478 operand cmpOp() %{ 4479 match(Bool); 4480 4481 format %{ "" %} 4482 interface(COND_INTER) %{ 4483 equal(0x4, "e"); 4484 not_equal(0x5, "ne"); 4485 less(0xC, "l"); 4486 greater_equal(0xD, "ge"); 4487 less_equal(0xE, "le"); 4488 greater(0xF, "g"); 4489 overflow(0x0, "o"); 4490 no_overflow(0x1, "no"); 4491 %} 4492 %} 4493 4494 // Comparison Code, unsigned compare. Used by FP also, with 4495 // C2 (unordered) turned into GT or LT already. The other bits 4496 // C0 and C3 are turned into Carry & Zero flags. 4497 operand cmpOpU() %{ 4498 match(Bool); 4499 4500 format %{ "" %} 4501 interface(COND_INTER) %{ 4502 equal(0x4, "e"); 4503 not_equal(0x5, "ne"); 4504 less(0x2, "b"); 4505 greater_equal(0x3, "nb"); 4506 less_equal(0x6, "be"); 4507 greater(0x7, "nbe"); 4508 overflow(0x0, "o"); 4509 no_overflow(0x1, "no"); 4510 %} 4511 %} 4512 4513 // Floating comparisons that don't require any fixup for the unordered case 4514 operand cmpOpUCF() %{ 4515 match(Bool); 4516 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4517 n->as_Bool()->_test._test == BoolTest::ge || 4518 n->as_Bool()->_test._test == BoolTest::le || 4519 n->as_Bool()->_test._test == BoolTest::gt); 4520 format %{ "" %} 4521 interface(COND_INTER) %{ 4522 equal(0x4, "e"); 4523 not_equal(0x5, "ne"); 4524 less(0x2, "b"); 4525 greater_equal(0x3, "nb"); 4526 less_equal(0x6, "be"); 4527 greater(0x7, "nbe"); 4528 overflow(0x0, "o"); 4529 no_overflow(0x1, "no"); 4530 %} 4531 %} 4532 4533 4534 // Floating comparisons that can be fixed up with extra conditional jumps 4535 operand cmpOpUCF2() %{ 4536 match(Bool); 4537 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4538 n->as_Bool()->_test._test == BoolTest::eq); 4539 format %{ "" %} 4540 interface(COND_INTER) %{ 4541 equal(0x4, "e"); 4542 not_equal(0x5, "ne"); 4543 less(0x2, "b"); 4544 greater_equal(0x3, "nb"); 4545 less_equal(0x6, "be"); 4546 greater(0x7, "nbe"); 4547 overflow(0x0, "o"); 4548 no_overflow(0x1, "no"); 4549 %} 4550 %} 4551 4552 // Comparison Code for FP conditional move 4553 operand cmpOp_fcmov() %{ 4554 match(Bool); 4555 4556 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4557 n->as_Bool()->_test._test != BoolTest::no_overflow); 4558 format %{ "" %} 4559 interface(COND_INTER) %{ 4560 equal (0x0C8); 4561 not_equal (0x1C8); 4562 less (0x0C0); 4563 greater_equal(0x1C0); 4564 less_equal (0x0D0); 4565 greater (0x1D0); 4566 overflow(0x0, "o"); // not really supported by the instruction 4567 no_overflow(0x1, "no"); // not really supported by the instruction 4568 %} 4569 %} 4570 4571 // Comparision Code used in long compares 4572 operand cmpOp_commute() %{ 4573 match(Bool); 4574 4575 format %{ "" %} 4576 interface(COND_INTER) %{ 4577 equal(0x4, "e"); 4578 not_equal(0x5, "ne"); 4579 less(0xF, "g"); 4580 greater_equal(0xE, "le"); 4581 less_equal(0xD, "ge"); 4582 greater(0xC, "l"); 4583 overflow(0x0, "o"); 4584 no_overflow(0x1, "no"); 4585 %} 4586 %} 4587 4588 //----------OPERAND CLASSES---------------------------------------------------- 4589 // Operand Classes are groups of operands that are used as to simplify 4590 // instruction definitions by not requiring the AD writer to specify separate 4591 // instructions for every form of operand when the instruction accepts 4592 // multiple operand types with the same basic encoding and format. The classic 4593 // case of this is memory operands. 4594 4595 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4596 indIndex, indIndexScale, indIndexScaleOffset); 4597 4598 // Long memory operations are encoded in 2 instructions and a +4 offset. 4599 // This means some kind of offset is always required and you cannot use 4600 // an oop as the offset (done when working on static globals). 4601 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4602 indIndex, indIndexScale, indIndexScaleOffset); 4603 4604 4605 //----------PIPELINE----------------------------------------------------------- 4606 // Rules which define the behavior of the target architectures pipeline. 4607 pipeline %{ 4608 4609 //----------ATTRIBUTES--------------------------------------------------------- 4610 attributes %{ 4611 variable_size_instructions; // Fixed size instructions 4612 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4613 instruction_unit_size = 1; // An instruction is 1 bytes long 4614 instruction_fetch_unit_size = 16; // The processor fetches one line 4615 instruction_fetch_units = 1; // of 16 bytes 4616 4617 // List of nop instructions 4618 nops( MachNop ); 4619 %} 4620 4621 //----------RESOURCES---------------------------------------------------------- 4622 // Resources are the functional units available to the machine 4623 4624 // Generic P2/P3 pipeline 4625 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4626 // 3 instructions decoded per cycle. 4627 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4628 // 2 ALU op, only ALU0 handles mul/div instructions. 4629 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4630 MS0, MS1, MEM = MS0 | MS1, 4631 BR, FPU, 4632 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4633 4634 //----------PIPELINE DESCRIPTION----------------------------------------------- 4635 // Pipeline Description specifies the stages in the machine's pipeline 4636 4637 // Generic P2/P3 pipeline 4638 pipe_desc(S0, S1, S2, S3, S4, S5); 4639 4640 //----------PIPELINE CLASSES--------------------------------------------------- 4641 // Pipeline Classes describe the stages in which input and output are 4642 // referenced by the hardware pipeline. 4643 4644 // Naming convention: ialu or fpu 4645 // Then: _reg 4646 // Then: _reg if there is a 2nd register 4647 // Then: _long if it's a pair of instructions implementing a long 4648 // Then: _fat if it requires the big decoder 4649 // Or: _mem if it requires the big decoder and a memory unit. 4650 4651 // Integer ALU reg operation 4652 pipe_class ialu_reg(rRegI dst) %{ 4653 single_instruction; 4654 dst : S4(write); 4655 dst : S3(read); 4656 DECODE : S0; // any decoder 4657 ALU : S3; // any alu 4658 %} 4659 4660 // Long ALU reg operation 4661 pipe_class ialu_reg_long(eRegL dst) %{ 4662 instruction_count(2); 4663 dst : S4(write); 4664 dst : S3(read); 4665 DECODE : S0(2); // any 2 decoders 4666 ALU : S3(2); // both alus 4667 %} 4668 4669 // Integer ALU reg operation using big decoder 4670 pipe_class ialu_reg_fat(rRegI dst) %{ 4671 single_instruction; 4672 dst : S4(write); 4673 dst : S3(read); 4674 D0 : S0; // big decoder only 4675 ALU : S3; // any alu 4676 %} 4677 4678 // Long ALU reg operation using big decoder 4679 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4680 instruction_count(2); 4681 dst : S4(write); 4682 dst : S3(read); 4683 D0 : S0(2); // big decoder only; twice 4684 ALU : S3(2); // any 2 alus 4685 %} 4686 4687 // Integer ALU reg-reg operation 4688 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4689 single_instruction; 4690 dst : S4(write); 4691 src : S3(read); 4692 DECODE : S0; // any decoder 4693 ALU : S3; // any alu 4694 %} 4695 4696 // Long ALU reg-reg operation 4697 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4698 instruction_count(2); 4699 dst : S4(write); 4700 src : S3(read); 4701 DECODE : S0(2); // any 2 decoders 4702 ALU : S3(2); // both alus 4703 %} 4704 4705 // Integer ALU reg-reg operation 4706 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4707 single_instruction; 4708 dst : S4(write); 4709 src : S3(read); 4710 D0 : S0; // big decoder only 4711 ALU : S3; // any alu 4712 %} 4713 4714 // Long ALU reg-reg operation 4715 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4716 instruction_count(2); 4717 dst : S4(write); 4718 src : S3(read); 4719 D0 : S0(2); // big decoder only; twice 4720 ALU : S3(2); // both alus 4721 %} 4722 4723 // Integer ALU reg-mem operation 4724 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4725 single_instruction; 4726 dst : S5(write); 4727 mem : S3(read); 4728 D0 : S0; // big decoder only 4729 ALU : S4; // any alu 4730 MEM : S3; // any mem 4731 %} 4732 4733 // Long ALU reg-mem operation 4734 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4735 instruction_count(2); 4736 dst : S5(write); 4737 mem : S3(read); 4738 D0 : S0(2); // big decoder only; twice 4739 ALU : S4(2); // any 2 alus 4740 MEM : S3(2); // both mems 4741 %} 4742 4743 // Integer mem operation (prefetch) 4744 pipe_class ialu_mem(memory mem) 4745 %{ 4746 single_instruction; 4747 mem : S3(read); 4748 D0 : S0; // big decoder only 4749 MEM : S3; // any mem 4750 %} 4751 4752 // Integer Store to Memory 4753 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4754 single_instruction; 4755 mem : S3(read); 4756 src : S5(read); 4757 D0 : S0; // big decoder only 4758 ALU : S4; // any alu 4759 MEM : S3; 4760 %} 4761 4762 // Long Store to Memory 4763 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4764 instruction_count(2); 4765 mem : S3(read); 4766 src : S5(read); 4767 D0 : S0(2); // big decoder only; twice 4768 ALU : S4(2); // any 2 alus 4769 MEM : S3(2); // Both mems 4770 %} 4771 4772 // Integer Store to Memory 4773 pipe_class ialu_mem_imm(memory mem) %{ 4774 single_instruction; 4775 mem : S3(read); 4776 D0 : S0; // big decoder only 4777 ALU : S4; // any alu 4778 MEM : S3; 4779 %} 4780 4781 // Integer ALU0 reg-reg operation 4782 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4783 single_instruction; 4784 dst : S4(write); 4785 src : S3(read); 4786 D0 : S0; // Big decoder only 4787 ALU0 : S3; // only alu0 4788 %} 4789 4790 // Integer ALU0 reg-mem operation 4791 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4792 single_instruction; 4793 dst : S5(write); 4794 mem : S3(read); 4795 D0 : S0; // big decoder only 4796 ALU0 : S4; // ALU0 only 4797 MEM : S3; // any mem 4798 %} 4799 4800 // Integer ALU reg-reg operation 4801 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4802 single_instruction; 4803 cr : S4(write); 4804 src1 : S3(read); 4805 src2 : S3(read); 4806 DECODE : S0; // any decoder 4807 ALU : S3; // any alu 4808 %} 4809 4810 // Integer ALU reg-imm operation 4811 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4812 single_instruction; 4813 cr : S4(write); 4814 src1 : S3(read); 4815 DECODE : S0; // any decoder 4816 ALU : S3; // any alu 4817 %} 4818 4819 // Integer ALU reg-mem operation 4820 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4821 single_instruction; 4822 cr : S4(write); 4823 src1 : S3(read); 4824 src2 : S3(read); 4825 D0 : S0; // big decoder only 4826 ALU : S4; // any alu 4827 MEM : S3; 4828 %} 4829 4830 // Conditional move reg-reg 4831 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4832 instruction_count(4); 4833 y : S4(read); 4834 q : S3(read); 4835 p : S3(read); 4836 DECODE : S0(4); // any decoder 4837 %} 4838 4839 // Conditional move reg-reg 4840 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4841 single_instruction; 4842 dst : S4(write); 4843 src : S3(read); 4844 cr : S3(read); 4845 DECODE : S0; // any decoder 4846 %} 4847 4848 // Conditional move reg-mem 4849 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4850 single_instruction; 4851 dst : S4(write); 4852 src : S3(read); 4853 cr : S3(read); 4854 DECODE : S0; // any decoder 4855 MEM : S3; 4856 %} 4857 4858 // Conditional move reg-reg long 4859 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4860 single_instruction; 4861 dst : S4(write); 4862 src : S3(read); 4863 cr : S3(read); 4864 DECODE : S0(2); // any 2 decoders 4865 %} 4866 4867 // Conditional move double reg-reg 4868 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4869 single_instruction; 4870 dst : S4(write); 4871 src : S3(read); 4872 cr : S3(read); 4873 DECODE : S0; // any decoder 4874 %} 4875 4876 // Float reg-reg operation 4877 pipe_class fpu_reg(regDPR dst) %{ 4878 instruction_count(2); 4879 dst : S3(read); 4880 DECODE : S0(2); // any 2 decoders 4881 FPU : S3; 4882 %} 4883 4884 // Float reg-reg operation 4885 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4886 instruction_count(2); 4887 dst : S4(write); 4888 src : S3(read); 4889 DECODE : S0(2); // any 2 decoders 4890 FPU : S3; 4891 %} 4892 4893 // Float reg-reg operation 4894 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4895 instruction_count(3); 4896 dst : S4(write); 4897 src1 : S3(read); 4898 src2 : S3(read); 4899 DECODE : S0(3); // any 3 decoders 4900 FPU : S3(2); 4901 %} 4902 4903 // Float reg-reg operation 4904 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4905 instruction_count(4); 4906 dst : S4(write); 4907 src1 : S3(read); 4908 src2 : S3(read); 4909 src3 : S3(read); 4910 DECODE : S0(4); // any 3 decoders 4911 FPU : S3(2); 4912 %} 4913 4914 // Float reg-reg operation 4915 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4916 instruction_count(4); 4917 dst : S4(write); 4918 src1 : S3(read); 4919 src2 : S3(read); 4920 src3 : S3(read); 4921 DECODE : S1(3); // any 3 decoders 4922 D0 : S0; // Big decoder only 4923 FPU : S3(2); 4924 MEM : S3; 4925 %} 4926 4927 // Float reg-mem operation 4928 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4929 instruction_count(2); 4930 dst : S5(write); 4931 mem : S3(read); 4932 D0 : S0; // big decoder only 4933 DECODE : S1; // any decoder for FPU POP 4934 FPU : S4; 4935 MEM : S3; // any mem 4936 %} 4937 4938 // Float reg-mem operation 4939 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4940 instruction_count(3); 4941 dst : S5(write); 4942 src1 : S3(read); 4943 mem : S3(read); 4944 D0 : S0; // big decoder only 4945 DECODE : S1(2); // any decoder for FPU POP 4946 FPU : S4; 4947 MEM : S3; // any mem 4948 %} 4949 4950 // Float mem-reg operation 4951 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4952 instruction_count(2); 4953 src : S5(read); 4954 mem : S3(read); 4955 DECODE : S0; // any decoder for FPU PUSH 4956 D0 : S1; // big decoder only 4957 FPU : S4; 4958 MEM : S3; // any mem 4959 %} 4960 4961 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4962 instruction_count(3); 4963 src1 : S3(read); 4964 src2 : S3(read); 4965 mem : S3(read); 4966 DECODE : S0(2); // any decoder for FPU PUSH 4967 D0 : S1; // big decoder only 4968 FPU : S4; 4969 MEM : S3; // any mem 4970 %} 4971 4972 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4973 instruction_count(3); 4974 src1 : S3(read); 4975 src2 : S3(read); 4976 mem : S4(read); 4977 DECODE : S0; // any decoder for FPU PUSH 4978 D0 : S0(2); // big decoder only 4979 FPU : S4; 4980 MEM : S3(2); // any mem 4981 %} 4982 4983 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4984 instruction_count(2); 4985 src1 : S3(read); 4986 dst : S4(read); 4987 D0 : S0(2); // big decoder only 4988 MEM : S3(2); // any mem 4989 %} 4990 4991 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4992 instruction_count(3); 4993 src1 : S3(read); 4994 src2 : S3(read); 4995 dst : S4(read); 4996 D0 : S0(3); // big decoder only 4997 FPU : S4; 4998 MEM : S3(3); // any mem 4999 %} 5000 5001 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5002 instruction_count(3); 5003 src1 : S4(read); 5004 mem : S4(read); 5005 DECODE : S0; // any decoder for FPU PUSH 5006 D0 : S0(2); // big decoder only 5007 FPU : S4; 5008 MEM : S3(2); // any mem 5009 %} 5010 5011 // Float load constant 5012 pipe_class fpu_reg_con(regDPR dst) %{ 5013 instruction_count(2); 5014 dst : S5(write); 5015 D0 : S0; // big decoder only for the load 5016 DECODE : S1; // any decoder for FPU POP 5017 FPU : S4; 5018 MEM : S3; // any mem 5019 %} 5020 5021 // Float load constant 5022 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5023 instruction_count(3); 5024 dst : S5(write); 5025 src : S3(read); 5026 D0 : S0; // big decoder only for the load 5027 DECODE : S1(2); // any decoder for FPU POP 5028 FPU : S4; 5029 MEM : S3; // any mem 5030 %} 5031 5032 // UnConditional branch 5033 pipe_class pipe_jmp( label labl ) %{ 5034 single_instruction; 5035 BR : S3; 5036 %} 5037 5038 // Conditional branch 5039 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5040 single_instruction; 5041 cr : S1(read); 5042 BR : S3; 5043 %} 5044 5045 // Allocation idiom 5046 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5047 instruction_count(1); force_serialization; 5048 fixed_latency(6); 5049 heap_ptr : S3(read); 5050 DECODE : S0(3); 5051 D0 : S2; 5052 MEM : S3; 5053 ALU : S3(2); 5054 dst : S5(write); 5055 BR : S5; 5056 %} 5057 5058 // Generic big/slow expanded idiom 5059 pipe_class pipe_slow( ) %{ 5060 instruction_count(10); multiple_bundles; force_serialization; 5061 fixed_latency(100); 5062 D0 : S0(2); 5063 MEM : S3(2); 5064 %} 5065 5066 // The real do-nothing guy 5067 pipe_class empty( ) %{ 5068 instruction_count(0); 5069 %} 5070 5071 // Define the class for the Nop node 5072 define %{ 5073 MachNop = empty; 5074 %} 5075 5076 %} 5077 5078 //----------INSTRUCTIONS------------------------------------------------------- 5079 // 5080 // match -- States which machine-independent subtree may be replaced 5081 // by this instruction. 5082 // ins_cost -- The estimated cost of this instruction is used by instruction 5083 // selection to identify a minimum cost tree of machine 5084 // instructions that matches a tree of machine-independent 5085 // instructions. 5086 // format -- A string providing the disassembly for this instruction. 5087 // The value of an instruction's operand may be inserted 5088 // by referring to it with a '$' prefix. 5089 // opcode -- Three instruction opcodes may be provided. These are referred 5090 // to within an encode class as $primary, $secondary, and $tertiary 5091 // respectively. The primary opcode is commonly used to 5092 // indicate the type of machine instruction, while secondary 5093 // and tertiary are often used for prefix options or addressing 5094 // modes. 5095 // ins_encode -- A list of encode classes with parameters. The encode class 5096 // name must have been defined in an 'enc_class' specification 5097 // in the encode section of the architecture description. 5098 5099 //----------BSWAP-Instruction-------------------------------------------------- 5100 instruct bytes_reverse_int(rRegI dst) %{ 5101 match(Set dst (ReverseBytesI dst)); 5102 5103 format %{ "BSWAP $dst" %} 5104 opcode(0x0F, 0xC8); 5105 ins_encode( OpcP, OpcSReg(dst) ); 5106 ins_pipe( ialu_reg ); 5107 %} 5108 5109 instruct bytes_reverse_long(eRegL dst) %{ 5110 match(Set dst (ReverseBytesL dst)); 5111 5112 format %{ "BSWAP $dst.lo\n\t" 5113 "BSWAP $dst.hi\n\t" 5114 "XCHG $dst.lo $dst.hi" %} 5115 5116 ins_cost(125); 5117 ins_encode( bswap_long_bytes(dst) ); 5118 ins_pipe( ialu_reg_reg); 5119 %} 5120 5121 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5122 match(Set dst (ReverseBytesUS dst)); 5123 effect(KILL cr); 5124 5125 format %{ "BSWAP $dst\n\t" 5126 "SHR $dst,16\n\t" %} 5127 ins_encode %{ 5128 __ bswapl($dst$$Register); 5129 __ shrl($dst$$Register, 16); 5130 %} 5131 ins_pipe( ialu_reg ); 5132 %} 5133 5134 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5135 match(Set dst (ReverseBytesS dst)); 5136 effect(KILL cr); 5137 5138 format %{ "BSWAP $dst\n\t" 5139 "SAR $dst,16\n\t" %} 5140 ins_encode %{ 5141 __ bswapl($dst$$Register); 5142 __ sarl($dst$$Register, 16); 5143 %} 5144 ins_pipe( ialu_reg ); 5145 %} 5146 5147 5148 //---------- Zeros Count Instructions ------------------------------------------ 5149 5150 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5151 predicate(UseCountLeadingZerosInstruction); 5152 match(Set dst (CountLeadingZerosI src)); 5153 effect(KILL cr); 5154 5155 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5156 ins_encode %{ 5157 __ lzcntl($dst$$Register, $src$$Register); 5158 %} 5159 ins_pipe(ialu_reg); 5160 %} 5161 5162 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5163 predicate(!UseCountLeadingZerosInstruction); 5164 match(Set dst (CountLeadingZerosI src)); 5165 effect(KILL cr); 5166 5167 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5168 "JNZ skip\n\t" 5169 "MOV $dst, -1\n" 5170 "skip:\n\t" 5171 "NEG $dst\n\t" 5172 "ADD $dst, 31" %} 5173 ins_encode %{ 5174 Register Rdst = $dst$$Register; 5175 Register Rsrc = $src$$Register; 5176 Label skip; 5177 __ bsrl(Rdst, Rsrc); 5178 __ jccb(Assembler::notZero, skip); 5179 __ movl(Rdst, -1); 5180 __ bind(skip); 5181 __ negl(Rdst); 5182 __ addl(Rdst, BitsPerInt - 1); 5183 %} 5184 ins_pipe(ialu_reg); 5185 %} 5186 5187 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5188 predicate(UseCountLeadingZerosInstruction); 5189 match(Set dst (CountLeadingZerosL src)); 5190 effect(TEMP dst, KILL cr); 5191 5192 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5193 "JNC done\n\t" 5194 "LZCNT $dst, $src.lo\n\t" 5195 "ADD $dst, 32\n" 5196 "done:" %} 5197 ins_encode %{ 5198 Register Rdst = $dst$$Register; 5199 Register Rsrc = $src$$Register; 5200 Label done; 5201 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5202 __ jccb(Assembler::carryClear, done); 5203 __ lzcntl(Rdst, Rsrc); 5204 __ addl(Rdst, BitsPerInt); 5205 __ bind(done); 5206 %} 5207 ins_pipe(ialu_reg); 5208 %} 5209 5210 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5211 predicate(!UseCountLeadingZerosInstruction); 5212 match(Set dst (CountLeadingZerosL src)); 5213 effect(TEMP dst, KILL cr); 5214 5215 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5216 "JZ msw_is_zero\n\t" 5217 "ADD $dst, 32\n\t" 5218 "JMP not_zero\n" 5219 "msw_is_zero:\n\t" 5220 "BSR $dst, $src.lo\n\t" 5221 "JNZ not_zero\n\t" 5222 "MOV $dst, -1\n" 5223 "not_zero:\n\t" 5224 "NEG $dst\n\t" 5225 "ADD $dst, 63\n" %} 5226 ins_encode %{ 5227 Register Rdst = $dst$$Register; 5228 Register Rsrc = $src$$Register; 5229 Label msw_is_zero; 5230 Label not_zero; 5231 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5232 __ jccb(Assembler::zero, msw_is_zero); 5233 __ addl(Rdst, BitsPerInt); 5234 __ jmpb(not_zero); 5235 __ bind(msw_is_zero); 5236 __ bsrl(Rdst, Rsrc); 5237 __ jccb(Assembler::notZero, not_zero); 5238 __ movl(Rdst, -1); 5239 __ bind(not_zero); 5240 __ negl(Rdst); 5241 __ addl(Rdst, BitsPerLong - 1); 5242 %} 5243 ins_pipe(ialu_reg); 5244 %} 5245 5246 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5247 predicate(UseCountTrailingZerosInstruction); 5248 match(Set dst (CountTrailingZerosI src)); 5249 effect(KILL cr); 5250 5251 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5252 ins_encode %{ 5253 __ tzcntl($dst$$Register, $src$$Register); 5254 %} 5255 ins_pipe(ialu_reg); 5256 %} 5257 5258 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5259 predicate(!UseCountTrailingZerosInstruction); 5260 match(Set dst (CountTrailingZerosI src)); 5261 effect(KILL cr); 5262 5263 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5264 "JNZ done\n\t" 5265 "MOV $dst, 32\n" 5266 "done:" %} 5267 ins_encode %{ 5268 Register Rdst = $dst$$Register; 5269 Label done; 5270 __ bsfl(Rdst, $src$$Register); 5271 __ jccb(Assembler::notZero, done); 5272 __ movl(Rdst, BitsPerInt); 5273 __ bind(done); 5274 %} 5275 ins_pipe(ialu_reg); 5276 %} 5277 5278 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5279 predicate(UseCountTrailingZerosInstruction); 5280 match(Set dst (CountTrailingZerosL src)); 5281 effect(TEMP dst, KILL cr); 5282 5283 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5284 "JNC done\n\t" 5285 "TZCNT $dst, $src.hi\n\t" 5286 "ADD $dst, 32\n" 5287 "done:" %} 5288 ins_encode %{ 5289 Register Rdst = $dst$$Register; 5290 Register Rsrc = $src$$Register; 5291 Label done; 5292 __ tzcntl(Rdst, Rsrc); 5293 __ jccb(Assembler::carryClear, done); 5294 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5295 __ addl(Rdst, BitsPerInt); 5296 __ bind(done); 5297 %} 5298 ins_pipe(ialu_reg); 5299 %} 5300 5301 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5302 predicate(!UseCountTrailingZerosInstruction); 5303 match(Set dst (CountTrailingZerosL src)); 5304 effect(TEMP dst, KILL cr); 5305 5306 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5307 "JNZ done\n\t" 5308 "BSF $dst, $src.hi\n\t" 5309 "JNZ msw_not_zero\n\t" 5310 "MOV $dst, 32\n" 5311 "msw_not_zero:\n\t" 5312 "ADD $dst, 32\n" 5313 "done:" %} 5314 ins_encode %{ 5315 Register Rdst = $dst$$Register; 5316 Register Rsrc = $src$$Register; 5317 Label msw_not_zero; 5318 Label done; 5319 __ bsfl(Rdst, Rsrc); 5320 __ jccb(Assembler::notZero, done); 5321 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5322 __ jccb(Assembler::notZero, msw_not_zero); 5323 __ movl(Rdst, BitsPerInt); 5324 __ bind(msw_not_zero); 5325 __ addl(Rdst, BitsPerInt); 5326 __ bind(done); 5327 %} 5328 ins_pipe(ialu_reg); 5329 %} 5330 5331 5332 //---------- Population Count Instructions ------------------------------------- 5333 5334 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5335 predicate(UsePopCountInstruction); 5336 match(Set dst (PopCountI src)); 5337 effect(KILL cr); 5338 5339 format %{ "POPCNT $dst, $src" %} 5340 ins_encode %{ 5341 __ popcntl($dst$$Register, $src$$Register); 5342 %} 5343 ins_pipe(ialu_reg); 5344 %} 5345 5346 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5347 predicate(UsePopCountInstruction); 5348 match(Set dst (PopCountI (LoadI mem))); 5349 effect(KILL cr); 5350 5351 format %{ "POPCNT $dst, $mem" %} 5352 ins_encode %{ 5353 __ popcntl($dst$$Register, $mem$$Address); 5354 %} 5355 ins_pipe(ialu_reg); 5356 %} 5357 5358 // Note: Long.bitCount(long) returns an int. 5359 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5360 predicate(UsePopCountInstruction); 5361 match(Set dst (PopCountL src)); 5362 effect(KILL cr, TEMP tmp, TEMP dst); 5363 5364 format %{ "POPCNT $dst, $src.lo\n\t" 5365 "POPCNT $tmp, $src.hi\n\t" 5366 "ADD $dst, $tmp" %} 5367 ins_encode %{ 5368 __ popcntl($dst$$Register, $src$$Register); 5369 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5370 __ addl($dst$$Register, $tmp$$Register); 5371 %} 5372 ins_pipe(ialu_reg); 5373 %} 5374 5375 // Note: Long.bitCount(long) returns an int. 5376 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5377 predicate(UsePopCountInstruction); 5378 match(Set dst (PopCountL (LoadL mem))); 5379 effect(KILL cr, TEMP tmp, TEMP dst); 5380 5381 format %{ "POPCNT $dst, $mem\n\t" 5382 "POPCNT $tmp, $mem+4\n\t" 5383 "ADD $dst, $tmp" %} 5384 ins_encode %{ 5385 //__ popcntl($dst$$Register, $mem$$Address$$first); 5386 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5387 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5388 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5389 __ addl($dst$$Register, $tmp$$Register); 5390 %} 5391 ins_pipe(ialu_reg); 5392 %} 5393 5394 5395 //----------Load/Store/Move Instructions--------------------------------------- 5396 //----------Load Instructions-------------------------------------------------- 5397 // Load Byte (8bit signed) 5398 instruct loadB(xRegI dst, memory mem) %{ 5399 match(Set dst (LoadB mem)); 5400 5401 ins_cost(125); 5402 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5403 5404 ins_encode %{ 5405 __ movsbl($dst$$Register, $mem$$Address); 5406 %} 5407 5408 ins_pipe(ialu_reg_mem); 5409 %} 5410 5411 // Load Byte (8bit signed) into Long Register 5412 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5413 match(Set dst (ConvI2L (LoadB mem))); 5414 effect(KILL cr); 5415 5416 ins_cost(375); 5417 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5418 "MOV $dst.hi,$dst.lo\n\t" 5419 "SAR $dst.hi,7" %} 5420 5421 ins_encode %{ 5422 __ movsbl($dst$$Register, $mem$$Address); 5423 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5424 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5425 %} 5426 5427 ins_pipe(ialu_reg_mem); 5428 %} 5429 5430 // Load Unsigned Byte (8bit UNsigned) 5431 instruct loadUB(xRegI dst, memory mem) %{ 5432 match(Set dst (LoadUB mem)); 5433 5434 ins_cost(125); 5435 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5436 5437 ins_encode %{ 5438 __ movzbl($dst$$Register, $mem$$Address); 5439 %} 5440 5441 ins_pipe(ialu_reg_mem); 5442 %} 5443 5444 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5445 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5446 match(Set dst (ConvI2L (LoadUB mem))); 5447 effect(KILL cr); 5448 5449 ins_cost(250); 5450 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5451 "XOR $dst.hi,$dst.hi" %} 5452 5453 ins_encode %{ 5454 Register Rdst = $dst$$Register; 5455 __ movzbl(Rdst, $mem$$Address); 5456 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5457 %} 5458 5459 ins_pipe(ialu_reg_mem); 5460 %} 5461 5462 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5463 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5464 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5465 effect(KILL cr); 5466 5467 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5468 "XOR $dst.hi,$dst.hi\n\t" 5469 "AND $dst.lo,right_n_bits($mask, 8)" %} 5470 ins_encode %{ 5471 Register Rdst = $dst$$Register; 5472 __ movzbl(Rdst, $mem$$Address); 5473 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5474 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5475 %} 5476 ins_pipe(ialu_reg_mem); 5477 %} 5478 5479 // Load Short (16bit signed) 5480 instruct loadS(rRegI dst, memory mem) %{ 5481 match(Set dst (LoadS mem)); 5482 5483 ins_cost(125); 5484 format %{ "MOVSX $dst,$mem\t# short" %} 5485 5486 ins_encode %{ 5487 __ movswl($dst$$Register, $mem$$Address); 5488 %} 5489 5490 ins_pipe(ialu_reg_mem); 5491 %} 5492 5493 // Load Short (16 bit signed) to Byte (8 bit signed) 5494 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5495 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5496 5497 ins_cost(125); 5498 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5499 ins_encode %{ 5500 __ movsbl($dst$$Register, $mem$$Address); 5501 %} 5502 ins_pipe(ialu_reg_mem); 5503 %} 5504 5505 // Load Short (16bit signed) into Long Register 5506 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5507 match(Set dst (ConvI2L (LoadS mem))); 5508 effect(KILL cr); 5509 5510 ins_cost(375); 5511 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5512 "MOV $dst.hi,$dst.lo\n\t" 5513 "SAR $dst.hi,15" %} 5514 5515 ins_encode %{ 5516 __ movswl($dst$$Register, $mem$$Address); 5517 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5518 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5519 %} 5520 5521 ins_pipe(ialu_reg_mem); 5522 %} 5523 5524 // Load Unsigned Short/Char (16bit unsigned) 5525 instruct loadUS(rRegI dst, memory mem) %{ 5526 match(Set dst (LoadUS mem)); 5527 5528 ins_cost(125); 5529 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5530 5531 ins_encode %{ 5532 __ movzwl($dst$$Register, $mem$$Address); 5533 %} 5534 5535 ins_pipe(ialu_reg_mem); 5536 %} 5537 5538 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5539 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5540 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5541 5542 ins_cost(125); 5543 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5544 ins_encode %{ 5545 __ movsbl($dst$$Register, $mem$$Address); 5546 %} 5547 ins_pipe(ialu_reg_mem); 5548 %} 5549 5550 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5551 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5552 match(Set dst (ConvI2L (LoadUS mem))); 5553 effect(KILL cr); 5554 5555 ins_cost(250); 5556 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5557 "XOR $dst.hi,$dst.hi" %} 5558 5559 ins_encode %{ 5560 __ movzwl($dst$$Register, $mem$$Address); 5561 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5562 %} 5563 5564 ins_pipe(ialu_reg_mem); 5565 %} 5566 5567 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5568 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5569 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5570 effect(KILL cr); 5571 5572 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5573 "XOR $dst.hi,$dst.hi" %} 5574 ins_encode %{ 5575 Register Rdst = $dst$$Register; 5576 __ movzbl(Rdst, $mem$$Address); 5577 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5578 %} 5579 ins_pipe(ialu_reg_mem); 5580 %} 5581 5582 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5583 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5584 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5585 effect(KILL cr); 5586 5587 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5588 "XOR $dst.hi,$dst.hi\n\t" 5589 "AND $dst.lo,right_n_bits($mask, 16)" %} 5590 ins_encode %{ 5591 Register Rdst = $dst$$Register; 5592 __ movzwl(Rdst, $mem$$Address); 5593 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5594 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5595 %} 5596 ins_pipe(ialu_reg_mem); 5597 %} 5598 5599 // Load Integer 5600 instruct loadI(rRegI dst, memory mem) %{ 5601 match(Set dst (LoadI mem)); 5602 5603 ins_cost(125); 5604 format %{ "MOV $dst,$mem\t# int" %} 5605 5606 ins_encode %{ 5607 __ movl($dst$$Register, $mem$$Address); 5608 %} 5609 5610 ins_pipe(ialu_reg_mem); 5611 %} 5612 5613 // Load Integer (32 bit signed) to Byte (8 bit signed) 5614 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5615 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5616 5617 ins_cost(125); 5618 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5619 ins_encode %{ 5620 __ movsbl($dst$$Register, $mem$$Address); 5621 %} 5622 ins_pipe(ialu_reg_mem); 5623 %} 5624 5625 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5626 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5627 match(Set dst (AndI (LoadI mem) mask)); 5628 5629 ins_cost(125); 5630 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5631 ins_encode %{ 5632 __ movzbl($dst$$Register, $mem$$Address); 5633 %} 5634 ins_pipe(ialu_reg_mem); 5635 %} 5636 5637 // Load Integer (32 bit signed) to Short (16 bit signed) 5638 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5639 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5640 5641 ins_cost(125); 5642 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5643 ins_encode %{ 5644 __ movswl($dst$$Register, $mem$$Address); 5645 %} 5646 ins_pipe(ialu_reg_mem); 5647 %} 5648 5649 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5650 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5651 match(Set dst (AndI (LoadI mem) mask)); 5652 5653 ins_cost(125); 5654 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5655 ins_encode %{ 5656 __ movzwl($dst$$Register, $mem$$Address); 5657 %} 5658 ins_pipe(ialu_reg_mem); 5659 %} 5660 5661 // Load Integer into Long Register 5662 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5663 match(Set dst (ConvI2L (LoadI mem))); 5664 effect(KILL cr); 5665 5666 ins_cost(375); 5667 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5668 "MOV $dst.hi,$dst.lo\n\t" 5669 "SAR $dst.hi,31" %} 5670 5671 ins_encode %{ 5672 __ movl($dst$$Register, $mem$$Address); 5673 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5674 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5675 %} 5676 5677 ins_pipe(ialu_reg_mem); 5678 %} 5679 5680 // Load Integer with mask 0xFF into Long Register 5681 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5682 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5683 effect(KILL cr); 5684 5685 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5686 "XOR $dst.hi,$dst.hi" %} 5687 ins_encode %{ 5688 Register Rdst = $dst$$Register; 5689 __ movzbl(Rdst, $mem$$Address); 5690 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5691 %} 5692 ins_pipe(ialu_reg_mem); 5693 %} 5694 5695 // Load Integer with mask 0xFFFF into Long Register 5696 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5697 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5698 effect(KILL cr); 5699 5700 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5701 "XOR $dst.hi,$dst.hi" %} 5702 ins_encode %{ 5703 Register Rdst = $dst$$Register; 5704 __ movzwl(Rdst, $mem$$Address); 5705 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5706 %} 5707 ins_pipe(ialu_reg_mem); 5708 %} 5709 5710 // Load Integer with 31-bit mask into Long Register 5711 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5712 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5713 effect(KILL cr); 5714 5715 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5716 "XOR $dst.hi,$dst.hi\n\t" 5717 "AND $dst.lo,$mask" %} 5718 ins_encode %{ 5719 Register Rdst = $dst$$Register; 5720 __ movl(Rdst, $mem$$Address); 5721 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5722 __ andl(Rdst, $mask$$constant); 5723 %} 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Unsigned Integer into Long Register 5728 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5729 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5730 effect(KILL cr); 5731 5732 ins_cost(250); 5733 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5734 "XOR $dst.hi,$dst.hi" %} 5735 5736 ins_encode %{ 5737 __ movl($dst$$Register, $mem$$Address); 5738 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5739 %} 5740 5741 ins_pipe(ialu_reg_mem); 5742 %} 5743 5744 // Load Long. Cannot clobber address while loading, so restrict address 5745 // register to ESI 5746 instruct loadL(eRegL dst, load_long_memory mem) %{ 5747 predicate(!((LoadLNode*)n)->require_atomic_access()); 5748 match(Set dst (LoadL mem)); 5749 5750 ins_cost(250); 5751 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5752 "MOV $dst.hi,$mem+4" %} 5753 5754 ins_encode %{ 5755 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5756 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5757 __ movl($dst$$Register, Amemlo); 5758 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5759 %} 5760 5761 ins_pipe(ialu_reg_long_mem); 5762 %} 5763 5764 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5765 // then store it down to the stack and reload on the int 5766 // side. 5767 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5768 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5769 match(Set dst (LoadL mem)); 5770 5771 ins_cost(200); 5772 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5773 "FISTp $dst" %} 5774 ins_encode(enc_loadL_volatile(mem,dst)); 5775 ins_pipe( fpu_reg_mem ); 5776 %} 5777 5778 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5779 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5780 match(Set dst (LoadL mem)); 5781 effect(TEMP tmp); 5782 ins_cost(180); 5783 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5784 "MOVSD $dst,$tmp" %} 5785 ins_encode %{ 5786 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5787 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5793 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5794 match(Set dst (LoadL mem)); 5795 effect(TEMP tmp); 5796 ins_cost(160); 5797 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5798 "MOVD $dst.lo,$tmp\n\t" 5799 "PSRLQ $tmp,32\n\t" 5800 "MOVD $dst.hi,$tmp" %} 5801 ins_encode %{ 5802 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5803 __ movdl($dst$$Register, $tmp$$XMMRegister); 5804 __ psrlq($tmp$$XMMRegister, 32); 5805 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5806 %} 5807 ins_pipe( pipe_slow ); 5808 %} 5809 5810 // Load Range 5811 instruct loadRange(rRegI dst, memory mem) %{ 5812 match(Set dst (LoadRange mem)); 5813 5814 ins_cost(125); 5815 format %{ "MOV $dst,$mem" %} 5816 opcode(0x8B); 5817 ins_encode( OpcP, RegMem(dst,mem)); 5818 ins_pipe( ialu_reg_mem ); 5819 %} 5820 5821 5822 // Load Pointer 5823 instruct loadP(eRegP dst, memory mem) %{ 5824 match(Set dst (LoadP mem)); 5825 5826 ins_cost(125); 5827 format %{ "MOV $dst,$mem" %} 5828 opcode(0x8B); 5829 ins_encode( OpcP, RegMem(dst,mem)); 5830 ins_pipe( ialu_reg_mem ); 5831 %} 5832 5833 // Load Klass Pointer 5834 instruct loadKlass(eRegP dst, memory mem) %{ 5835 match(Set dst (LoadKlass mem)); 5836 5837 ins_cost(125); 5838 format %{ "MOV $dst,$mem" %} 5839 opcode(0x8B); 5840 ins_encode( OpcP, RegMem(dst,mem)); 5841 ins_pipe( ialu_reg_mem ); 5842 %} 5843 5844 // Load Double 5845 instruct loadDPR(regDPR dst, memory mem) %{ 5846 predicate(UseSSE<=1); 5847 match(Set dst (LoadD mem)); 5848 5849 ins_cost(150); 5850 format %{ "FLD_D ST,$mem\n\t" 5851 "FSTP $dst" %} 5852 opcode(0xDD); /* DD /0 */ 5853 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5854 Pop_Reg_DPR(dst) ); 5855 ins_pipe( fpu_reg_mem ); 5856 %} 5857 5858 // Load Double to XMM 5859 instruct loadD(regD dst, memory mem) %{ 5860 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5861 match(Set dst (LoadD mem)); 5862 ins_cost(145); 5863 format %{ "MOVSD $dst,$mem" %} 5864 ins_encode %{ 5865 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5866 %} 5867 ins_pipe( pipe_slow ); 5868 %} 5869 5870 instruct loadD_partial(regD dst, memory mem) %{ 5871 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5872 match(Set dst (LoadD mem)); 5873 ins_cost(145); 5874 format %{ "MOVLPD $dst,$mem" %} 5875 ins_encode %{ 5876 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5877 %} 5878 ins_pipe( pipe_slow ); 5879 %} 5880 5881 // Load to XMM register (single-precision floating point) 5882 // MOVSS instruction 5883 instruct loadF(regF dst, memory mem) %{ 5884 predicate(UseSSE>=1); 5885 match(Set dst (LoadF mem)); 5886 ins_cost(145); 5887 format %{ "MOVSS $dst,$mem" %} 5888 ins_encode %{ 5889 __ movflt ($dst$$XMMRegister, $mem$$Address); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 // Load Float 5895 instruct loadFPR(regFPR dst, memory mem) %{ 5896 predicate(UseSSE==0); 5897 match(Set dst (LoadF mem)); 5898 5899 ins_cost(150); 5900 format %{ "FLD_S ST,$mem\n\t" 5901 "FSTP $dst" %} 5902 opcode(0xD9); /* D9 /0 */ 5903 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5904 Pop_Reg_FPR(dst) ); 5905 ins_pipe( fpu_reg_mem ); 5906 %} 5907 5908 // Load Effective Address 5909 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5910 match(Set dst mem); 5911 5912 ins_cost(110); 5913 format %{ "LEA $dst,$mem" %} 5914 opcode(0x8D); 5915 ins_encode( OpcP, RegMem(dst,mem)); 5916 ins_pipe( ialu_reg_reg_fat ); 5917 %} 5918 5919 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5920 match(Set dst mem); 5921 5922 ins_cost(110); 5923 format %{ "LEA $dst,$mem" %} 5924 opcode(0x8D); 5925 ins_encode( OpcP, RegMem(dst,mem)); 5926 ins_pipe( ialu_reg_reg_fat ); 5927 %} 5928 5929 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5930 match(Set dst mem); 5931 5932 ins_cost(110); 5933 format %{ "LEA $dst,$mem" %} 5934 opcode(0x8D); 5935 ins_encode( OpcP, RegMem(dst,mem)); 5936 ins_pipe( ialu_reg_reg_fat ); 5937 %} 5938 5939 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5940 match(Set dst mem); 5941 5942 ins_cost(110); 5943 format %{ "LEA $dst,$mem" %} 5944 opcode(0x8D); 5945 ins_encode( OpcP, RegMem(dst,mem)); 5946 ins_pipe( ialu_reg_reg_fat ); 5947 %} 5948 5949 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5950 match(Set dst mem); 5951 5952 ins_cost(110); 5953 format %{ "LEA $dst,$mem" %} 5954 opcode(0x8D); 5955 ins_encode( OpcP, RegMem(dst,mem)); 5956 ins_pipe( ialu_reg_reg_fat ); 5957 %} 5958 5959 // Load Constant 5960 instruct loadConI(rRegI dst, immI src) %{ 5961 match(Set dst src); 5962 5963 format %{ "MOV $dst,$src" %} 5964 ins_encode( LdImmI(dst, src) ); 5965 ins_pipe( ialu_reg_fat ); 5966 %} 5967 5968 // Load Constant zero 5969 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5970 match(Set dst src); 5971 effect(KILL cr); 5972 5973 ins_cost(50); 5974 format %{ "XOR $dst,$dst" %} 5975 opcode(0x33); /* + rd */ 5976 ins_encode( OpcP, RegReg( dst, dst ) ); 5977 ins_pipe( ialu_reg ); 5978 %} 5979 5980 instruct loadConP(eRegP dst, immP src) %{ 5981 match(Set dst src); 5982 5983 format %{ "MOV $dst,$src" %} 5984 opcode(0xB8); /* + rd */ 5985 ins_encode( LdImmP(dst, src) ); 5986 ins_pipe( ialu_reg_fat ); 5987 %} 5988 5989 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5990 match(Set dst src); 5991 effect(KILL cr); 5992 ins_cost(200); 5993 format %{ "MOV $dst.lo,$src.lo\n\t" 5994 "MOV $dst.hi,$src.hi" %} 5995 opcode(0xB8); 5996 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5997 ins_pipe( ialu_reg_long_fat ); 5998 %} 5999 6000 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6001 match(Set dst src); 6002 effect(KILL cr); 6003 ins_cost(150); 6004 format %{ "XOR $dst.lo,$dst.lo\n\t" 6005 "XOR $dst.hi,$dst.hi" %} 6006 opcode(0x33,0x33); 6007 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6008 ins_pipe( ialu_reg_long ); 6009 %} 6010 6011 // The instruction usage is guarded by predicate in operand immFPR(). 6012 instruct loadConFPR(regFPR dst, immFPR con) %{ 6013 match(Set dst con); 6014 ins_cost(125); 6015 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6016 "FSTP $dst" %} 6017 ins_encode %{ 6018 __ fld_s($constantaddress($con)); 6019 __ fstp_d($dst$$reg); 6020 %} 6021 ins_pipe(fpu_reg_con); 6022 %} 6023 6024 // The instruction usage is guarded by predicate in operand immFPR0(). 6025 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6026 match(Set dst con); 6027 ins_cost(125); 6028 format %{ "FLDZ ST\n\t" 6029 "FSTP $dst" %} 6030 ins_encode %{ 6031 __ fldz(); 6032 __ fstp_d($dst$$reg); 6033 %} 6034 ins_pipe(fpu_reg_con); 6035 %} 6036 6037 // The instruction usage is guarded by predicate in operand immFPR1(). 6038 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6039 match(Set dst con); 6040 ins_cost(125); 6041 format %{ "FLD1 ST\n\t" 6042 "FSTP $dst" %} 6043 ins_encode %{ 6044 __ fld1(); 6045 __ fstp_d($dst$$reg); 6046 %} 6047 ins_pipe(fpu_reg_con); 6048 %} 6049 6050 // The instruction usage is guarded by predicate in operand immF(). 6051 instruct loadConF(regF dst, immF con) %{ 6052 match(Set dst con); 6053 ins_cost(125); 6054 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6055 ins_encode %{ 6056 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6057 %} 6058 ins_pipe(pipe_slow); 6059 %} 6060 6061 // The instruction usage is guarded by predicate in operand immF0(). 6062 instruct loadConF0(regF dst, immF0 src) %{ 6063 match(Set dst src); 6064 ins_cost(100); 6065 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6066 ins_encode %{ 6067 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6068 %} 6069 ins_pipe(pipe_slow); 6070 %} 6071 6072 // The instruction usage is guarded by predicate in operand immDPR(). 6073 instruct loadConDPR(regDPR dst, immDPR con) %{ 6074 match(Set dst con); 6075 ins_cost(125); 6076 6077 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6078 "FSTP $dst" %} 6079 ins_encode %{ 6080 __ fld_d($constantaddress($con)); 6081 __ fstp_d($dst$$reg); 6082 %} 6083 ins_pipe(fpu_reg_con); 6084 %} 6085 6086 // The instruction usage is guarded by predicate in operand immDPR0(). 6087 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6088 match(Set dst con); 6089 ins_cost(125); 6090 6091 format %{ "FLDZ ST\n\t" 6092 "FSTP $dst" %} 6093 ins_encode %{ 6094 __ fldz(); 6095 __ fstp_d($dst$$reg); 6096 %} 6097 ins_pipe(fpu_reg_con); 6098 %} 6099 6100 // The instruction usage is guarded by predicate in operand immDPR1(). 6101 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6102 match(Set dst con); 6103 ins_cost(125); 6104 6105 format %{ "FLD1 ST\n\t" 6106 "FSTP $dst" %} 6107 ins_encode %{ 6108 __ fld1(); 6109 __ fstp_d($dst$$reg); 6110 %} 6111 ins_pipe(fpu_reg_con); 6112 %} 6113 6114 // The instruction usage is guarded by predicate in operand immD(). 6115 instruct loadConD(regD dst, immD con) %{ 6116 match(Set dst con); 6117 ins_cost(125); 6118 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6119 ins_encode %{ 6120 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6121 %} 6122 ins_pipe(pipe_slow); 6123 %} 6124 6125 // The instruction usage is guarded by predicate in operand immD0(). 6126 instruct loadConD0(regD dst, immD0 src) %{ 6127 match(Set dst src); 6128 ins_cost(100); 6129 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6130 ins_encode %{ 6131 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6132 %} 6133 ins_pipe( pipe_slow ); 6134 %} 6135 6136 // Load Stack Slot 6137 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6138 match(Set dst src); 6139 ins_cost(125); 6140 6141 format %{ "MOV $dst,$src" %} 6142 opcode(0x8B); 6143 ins_encode( OpcP, RegMem(dst,src)); 6144 ins_pipe( ialu_reg_mem ); 6145 %} 6146 6147 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6148 match(Set dst src); 6149 6150 ins_cost(200); 6151 format %{ "MOV $dst,$src.lo\n\t" 6152 "MOV $dst+4,$src.hi" %} 6153 opcode(0x8B, 0x8B); 6154 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6155 ins_pipe( ialu_mem_long_reg ); 6156 %} 6157 6158 // Load Stack Slot 6159 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6160 match(Set dst src); 6161 ins_cost(125); 6162 6163 format %{ "MOV $dst,$src" %} 6164 opcode(0x8B); 6165 ins_encode( OpcP, RegMem(dst,src)); 6166 ins_pipe( ialu_reg_mem ); 6167 %} 6168 6169 // Load Stack Slot 6170 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6171 match(Set dst src); 6172 ins_cost(125); 6173 6174 format %{ "FLD_S $src\n\t" 6175 "FSTP $dst" %} 6176 opcode(0xD9); /* D9 /0, FLD m32real */ 6177 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6178 Pop_Reg_FPR(dst) ); 6179 ins_pipe( fpu_reg_mem ); 6180 %} 6181 6182 // Load Stack Slot 6183 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6184 match(Set dst src); 6185 ins_cost(125); 6186 6187 format %{ "FLD_D $src\n\t" 6188 "FSTP $dst" %} 6189 opcode(0xDD); /* DD /0, FLD m64real */ 6190 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6191 Pop_Reg_DPR(dst) ); 6192 ins_pipe( fpu_reg_mem ); 6193 %} 6194 6195 // Prefetch instructions for allocation. 6196 // Must be safe to execute with invalid address (cannot fault). 6197 6198 instruct prefetchAlloc0( memory mem ) %{ 6199 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6200 match(PrefetchAllocation mem); 6201 ins_cost(0); 6202 size(0); 6203 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6204 ins_encode(); 6205 ins_pipe(empty); 6206 %} 6207 6208 instruct prefetchAlloc( memory mem ) %{ 6209 predicate(AllocatePrefetchInstr==3); 6210 match( PrefetchAllocation mem ); 6211 ins_cost(100); 6212 6213 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6214 ins_encode %{ 6215 __ prefetchw($mem$$Address); 6216 %} 6217 ins_pipe(ialu_mem); 6218 %} 6219 6220 instruct prefetchAllocNTA( memory mem ) %{ 6221 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6222 match(PrefetchAllocation mem); 6223 ins_cost(100); 6224 6225 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6226 ins_encode %{ 6227 __ prefetchnta($mem$$Address); 6228 %} 6229 ins_pipe(ialu_mem); 6230 %} 6231 6232 instruct prefetchAllocT0( memory mem ) %{ 6233 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6234 match(PrefetchAllocation mem); 6235 ins_cost(100); 6236 6237 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6238 ins_encode %{ 6239 __ prefetcht0($mem$$Address); 6240 %} 6241 ins_pipe(ialu_mem); 6242 %} 6243 6244 instruct prefetchAllocT2( memory mem ) %{ 6245 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6246 match(PrefetchAllocation mem); 6247 ins_cost(100); 6248 6249 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6250 ins_encode %{ 6251 __ prefetcht2($mem$$Address); 6252 %} 6253 ins_pipe(ialu_mem); 6254 %} 6255 6256 //----------Store Instructions------------------------------------------------- 6257 6258 // Store Byte 6259 instruct storeB(memory mem, xRegI src) %{ 6260 match(Set mem (StoreB mem src)); 6261 6262 ins_cost(125); 6263 format %{ "MOV8 $mem,$src" %} 6264 opcode(0x88); 6265 ins_encode( OpcP, RegMem( src, mem ) ); 6266 ins_pipe( ialu_mem_reg ); 6267 %} 6268 6269 // Store Char/Short 6270 instruct storeC(memory mem, rRegI src) %{ 6271 match(Set mem (StoreC mem src)); 6272 6273 ins_cost(125); 6274 format %{ "MOV16 $mem,$src" %} 6275 opcode(0x89, 0x66); 6276 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6277 ins_pipe( ialu_mem_reg ); 6278 %} 6279 6280 // Store Integer 6281 instruct storeI(memory mem, rRegI src) %{ 6282 match(Set mem (StoreI mem src)); 6283 6284 ins_cost(125); 6285 format %{ "MOV $mem,$src" %} 6286 opcode(0x89); 6287 ins_encode( OpcP, RegMem( src, mem ) ); 6288 ins_pipe( ialu_mem_reg ); 6289 %} 6290 6291 // Store Long 6292 instruct storeL(long_memory mem, eRegL src) %{ 6293 predicate(!((StoreLNode*)n)->require_atomic_access()); 6294 match(Set mem (StoreL mem src)); 6295 6296 ins_cost(200); 6297 format %{ "MOV $mem,$src.lo\n\t" 6298 "MOV $mem+4,$src.hi" %} 6299 opcode(0x89, 0x89); 6300 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6301 ins_pipe( ialu_mem_long_reg ); 6302 %} 6303 6304 // Store Long to Integer 6305 instruct storeL2I(memory mem, eRegL src) %{ 6306 match(Set mem (StoreI mem (ConvL2I src))); 6307 6308 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6309 ins_encode %{ 6310 __ movl($mem$$Address, $src$$Register); 6311 %} 6312 ins_pipe(ialu_mem_reg); 6313 %} 6314 6315 // Volatile Store Long. Must be atomic, so move it into 6316 // the FP TOS and then do a 64-bit FIST. Has to probe the 6317 // target address before the store (for null-ptr checks) 6318 // so the memory operand is used twice in the encoding. 6319 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6320 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6321 match(Set mem (StoreL mem src)); 6322 effect( KILL cr ); 6323 ins_cost(400); 6324 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6325 "FILD $src\n\t" 6326 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6327 opcode(0x3B); 6328 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6329 ins_pipe( fpu_reg_mem ); 6330 %} 6331 6332 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6333 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6334 match(Set mem (StoreL mem src)); 6335 effect( TEMP tmp, KILL cr ); 6336 ins_cost(380); 6337 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6338 "MOVSD $tmp,$src\n\t" 6339 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6340 ins_encode %{ 6341 __ cmpl(rax, $mem$$Address); 6342 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6343 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6344 %} 6345 ins_pipe( pipe_slow ); 6346 %} 6347 6348 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6349 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6350 match(Set mem (StoreL mem src)); 6351 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6352 ins_cost(360); 6353 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6354 "MOVD $tmp,$src.lo\n\t" 6355 "MOVD $tmp2,$src.hi\n\t" 6356 "PUNPCKLDQ $tmp,$tmp2\n\t" 6357 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6358 ins_encode %{ 6359 __ cmpl(rax, $mem$$Address); 6360 __ movdl($tmp$$XMMRegister, $src$$Register); 6361 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6362 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6363 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6364 %} 6365 ins_pipe( pipe_slow ); 6366 %} 6367 6368 // Store Pointer; for storing unknown oops and raw pointers 6369 instruct storeP(memory mem, anyRegP src) %{ 6370 match(Set mem (StoreP mem src)); 6371 6372 ins_cost(125); 6373 format %{ "MOV $mem,$src" %} 6374 opcode(0x89); 6375 ins_encode( OpcP, RegMem( src, mem ) ); 6376 ins_pipe( ialu_mem_reg ); 6377 %} 6378 6379 // Store Integer Immediate 6380 instruct storeImmI(memory mem, immI src) %{ 6381 match(Set mem (StoreI mem src)); 6382 6383 ins_cost(150); 6384 format %{ "MOV $mem,$src" %} 6385 opcode(0xC7); /* C7 /0 */ 6386 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6387 ins_pipe( ialu_mem_imm ); 6388 %} 6389 6390 // Store Short/Char Immediate 6391 instruct storeImmI16(memory mem, immI16 src) %{ 6392 predicate(UseStoreImmI16); 6393 match(Set mem (StoreC mem src)); 6394 6395 ins_cost(150); 6396 format %{ "MOV16 $mem,$src" %} 6397 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6398 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6399 ins_pipe( ialu_mem_imm ); 6400 %} 6401 6402 // Store Pointer Immediate; null pointers or constant oops that do not 6403 // need card-mark barriers. 6404 instruct storeImmP(memory mem, immP src) %{ 6405 match(Set mem (StoreP mem src)); 6406 6407 ins_cost(150); 6408 format %{ "MOV $mem,$src" %} 6409 opcode(0xC7); /* C7 /0 */ 6410 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6411 ins_pipe( ialu_mem_imm ); 6412 %} 6413 6414 // Store Byte Immediate 6415 instruct storeImmB(memory mem, immI8 src) %{ 6416 match(Set mem (StoreB mem src)); 6417 6418 ins_cost(150); 6419 format %{ "MOV8 $mem,$src" %} 6420 opcode(0xC6); /* C6 /0 */ 6421 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6422 ins_pipe( ialu_mem_imm ); 6423 %} 6424 6425 // Store CMS card-mark Immediate 6426 instruct storeImmCM(memory mem, immI8 src) %{ 6427 match(Set mem (StoreCM mem src)); 6428 6429 ins_cost(150); 6430 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6431 opcode(0xC6); /* C6 /0 */ 6432 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6433 ins_pipe( ialu_mem_imm ); 6434 %} 6435 6436 // Store Double 6437 instruct storeDPR( memory mem, regDPR1 src) %{ 6438 predicate(UseSSE<=1); 6439 match(Set mem (StoreD mem src)); 6440 6441 ins_cost(100); 6442 format %{ "FST_D $mem,$src" %} 6443 opcode(0xDD); /* DD /2 */ 6444 ins_encode( enc_FPR_store(mem,src) ); 6445 ins_pipe( fpu_mem_reg ); 6446 %} 6447 6448 // Store double does rounding on x86 6449 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6450 predicate(UseSSE<=1); 6451 match(Set mem (StoreD mem (RoundDouble src))); 6452 6453 ins_cost(100); 6454 format %{ "FST_D $mem,$src\t# round" %} 6455 opcode(0xDD); /* DD /2 */ 6456 ins_encode( enc_FPR_store(mem,src) ); 6457 ins_pipe( fpu_mem_reg ); 6458 %} 6459 6460 // Store XMM register to memory (double-precision floating points) 6461 // MOVSD instruction 6462 instruct storeD(memory mem, regD src) %{ 6463 predicate(UseSSE>=2); 6464 match(Set mem (StoreD mem src)); 6465 ins_cost(95); 6466 format %{ "MOVSD $mem,$src" %} 6467 ins_encode %{ 6468 __ movdbl($mem$$Address, $src$$XMMRegister); 6469 %} 6470 ins_pipe( pipe_slow ); 6471 %} 6472 6473 // Store XMM register to memory (single-precision floating point) 6474 // MOVSS instruction 6475 instruct storeF(memory mem, regF src) %{ 6476 predicate(UseSSE>=1); 6477 match(Set mem (StoreF mem src)); 6478 ins_cost(95); 6479 format %{ "MOVSS $mem,$src" %} 6480 ins_encode %{ 6481 __ movflt($mem$$Address, $src$$XMMRegister); 6482 %} 6483 ins_pipe( pipe_slow ); 6484 %} 6485 6486 // Store Float 6487 instruct storeFPR( memory mem, regFPR1 src) %{ 6488 predicate(UseSSE==0); 6489 match(Set mem (StoreF mem src)); 6490 6491 ins_cost(100); 6492 format %{ "FST_S $mem,$src" %} 6493 opcode(0xD9); /* D9 /2 */ 6494 ins_encode( enc_FPR_store(mem,src) ); 6495 ins_pipe( fpu_mem_reg ); 6496 %} 6497 6498 // Store Float does rounding on x86 6499 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6500 predicate(UseSSE==0); 6501 match(Set mem (StoreF mem (RoundFloat src))); 6502 6503 ins_cost(100); 6504 format %{ "FST_S $mem,$src\t# round" %} 6505 opcode(0xD9); /* D9 /2 */ 6506 ins_encode( enc_FPR_store(mem,src) ); 6507 ins_pipe( fpu_mem_reg ); 6508 %} 6509 6510 // Store Float does rounding on x86 6511 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6512 predicate(UseSSE<=1); 6513 match(Set mem (StoreF mem (ConvD2F src))); 6514 6515 ins_cost(100); 6516 format %{ "FST_S $mem,$src\t# D-round" %} 6517 opcode(0xD9); /* D9 /2 */ 6518 ins_encode( enc_FPR_store(mem,src) ); 6519 ins_pipe( fpu_mem_reg ); 6520 %} 6521 6522 // Store immediate Float value (it is faster than store from FPU register) 6523 // The instruction usage is guarded by predicate in operand immFPR(). 6524 instruct storeFPR_imm( memory mem, immFPR src) %{ 6525 match(Set mem (StoreF mem src)); 6526 6527 ins_cost(50); 6528 format %{ "MOV $mem,$src\t# store float" %} 6529 opcode(0xC7); /* C7 /0 */ 6530 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6531 ins_pipe( ialu_mem_imm ); 6532 %} 6533 6534 // Store immediate Float value (it is faster than store from XMM register) 6535 // The instruction usage is guarded by predicate in operand immF(). 6536 instruct storeF_imm( memory mem, immF src) %{ 6537 match(Set mem (StoreF mem src)); 6538 6539 ins_cost(50); 6540 format %{ "MOV $mem,$src\t# store float" %} 6541 opcode(0xC7); /* C7 /0 */ 6542 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6543 ins_pipe( ialu_mem_imm ); 6544 %} 6545 6546 // Store Integer to stack slot 6547 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6548 match(Set dst src); 6549 6550 ins_cost(100); 6551 format %{ "MOV $dst,$src" %} 6552 opcode(0x89); 6553 ins_encode( OpcPRegSS( dst, src ) ); 6554 ins_pipe( ialu_mem_reg ); 6555 %} 6556 6557 // Store Integer to stack slot 6558 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6559 match(Set dst src); 6560 6561 ins_cost(100); 6562 format %{ "MOV $dst,$src" %} 6563 opcode(0x89); 6564 ins_encode( OpcPRegSS( dst, src ) ); 6565 ins_pipe( ialu_mem_reg ); 6566 %} 6567 6568 // Store Long to stack slot 6569 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6570 match(Set dst src); 6571 6572 ins_cost(200); 6573 format %{ "MOV $dst,$src.lo\n\t" 6574 "MOV $dst+4,$src.hi" %} 6575 opcode(0x89, 0x89); 6576 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6577 ins_pipe( ialu_mem_long_reg ); 6578 %} 6579 6580 //----------MemBar Instructions----------------------------------------------- 6581 // Memory barrier flavors 6582 6583 instruct membar_acquire() %{ 6584 match(MemBarAcquire); 6585 match(LoadFence); 6586 ins_cost(400); 6587 6588 size(0); 6589 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6590 ins_encode(); 6591 ins_pipe(empty); 6592 %} 6593 6594 instruct membar_acquire_lock() %{ 6595 match(MemBarAcquireLock); 6596 ins_cost(0); 6597 6598 size(0); 6599 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6600 ins_encode( ); 6601 ins_pipe(empty); 6602 %} 6603 6604 instruct membar_release() %{ 6605 match(MemBarRelease); 6606 match(StoreFence); 6607 ins_cost(400); 6608 6609 size(0); 6610 format %{ "MEMBAR-release ! (empty encoding)" %} 6611 ins_encode( ); 6612 ins_pipe(empty); 6613 %} 6614 6615 instruct membar_release_lock() %{ 6616 match(MemBarReleaseLock); 6617 ins_cost(0); 6618 6619 size(0); 6620 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6621 ins_encode( ); 6622 ins_pipe(empty); 6623 %} 6624 6625 instruct membar_volatile(eFlagsReg cr) %{ 6626 match(MemBarVolatile); 6627 effect(KILL cr); 6628 ins_cost(400); 6629 6630 format %{ 6631 $$template 6632 if (os::is_MP()) { 6633 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6634 } else { 6635 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6636 } 6637 %} 6638 ins_encode %{ 6639 __ membar(Assembler::StoreLoad); 6640 %} 6641 ins_pipe(pipe_slow); 6642 %} 6643 6644 instruct unnecessary_membar_volatile() %{ 6645 match(MemBarVolatile); 6646 predicate(Matcher::post_store_load_barrier(n)); 6647 ins_cost(0); 6648 6649 size(0); 6650 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6651 ins_encode( ); 6652 ins_pipe(empty); 6653 %} 6654 6655 instruct membar_storestore() %{ 6656 match(MemBarStoreStore); 6657 ins_cost(0); 6658 6659 size(0); 6660 format %{ "MEMBAR-storestore (empty encoding)" %} 6661 ins_encode( ); 6662 ins_pipe(empty); 6663 %} 6664 6665 //----------Move Instructions-------------------------------------------------- 6666 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6667 match(Set dst (CastX2P src)); 6668 format %{ "# X2P $dst, $src" %} 6669 ins_encode( /*empty encoding*/ ); 6670 ins_cost(0); 6671 ins_pipe(empty); 6672 %} 6673 6674 instruct castP2X(rRegI dst, eRegP src ) %{ 6675 match(Set dst (CastP2X src)); 6676 ins_cost(50); 6677 format %{ "MOV $dst, $src\t# CastP2X" %} 6678 ins_encode( enc_Copy( dst, src) ); 6679 ins_pipe( ialu_reg_reg ); 6680 %} 6681 6682 //----------Conditional Move--------------------------------------------------- 6683 // Conditional move 6684 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6685 predicate(!VM_Version::supports_cmov() ); 6686 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6687 ins_cost(200); 6688 format %{ "J$cop,us skip\t# signed cmove\n\t" 6689 "MOV $dst,$src\n" 6690 "skip:" %} 6691 ins_encode %{ 6692 Label Lskip; 6693 // Invert sense of branch from sense of CMOV 6694 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6695 __ movl($dst$$Register, $src$$Register); 6696 __ bind(Lskip); 6697 %} 6698 ins_pipe( pipe_cmov_reg ); 6699 %} 6700 6701 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6702 predicate(!VM_Version::supports_cmov() ); 6703 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6704 ins_cost(200); 6705 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6706 "MOV $dst,$src\n" 6707 "skip:" %} 6708 ins_encode %{ 6709 Label Lskip; 6710 // Invert sense of branch from sense of CMOV 6711 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6712 __ movl($dst$$Register, $src$$Register); 6713 __ bind(Lskip); 6714 %} 6715 ins_pipe( pipe_cmov_reg ); 6716 %} 6717 6718 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6719 predicate(VM_Version::supports_cmov() ); 6720 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6721 ins_cost(200); 6722 format %{ "CMOV$cop $dst,$src" %} 6723 opcode(0x0F,0x40); 6724 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6725 ins_pipe( pipe_cmov_reg ); 6726 %} 6727 6728 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6729 predicate(VM_Version::supports_cmov() ); 6730 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6731 ins_cost(200); 6732 format %{ "CMOV$cop $dst,$src" %} 6733 opcode(0x0F,0x40); 6734 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6735 ins_pipe( pipe_cmov_reg ); 6736 %} 6737 6738 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6739 predicate(VM_Version::supports_cmov() ); 6740 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6741 ins_cost(200); 6742 expand %{ 6743 cmovI_regU(cop, cr, dst, src); 6744 %} 6745 %} 6746 6747 // Conditional move 6748 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6749 predicate(VM_Version::supports_cmov() ); 6750 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6751 ins_cost(250); 6752 format %{ "CMOV$cop $dst,$src" %} 6753 opcode(0x0F,0x40); 6754 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6755 ins_pipe( pipe_cmov_mem ); 6756 %} 6757 6758 // Conditional move 6759 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6760 predicate(VM_Version::supports_cmov() ); 6761 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6762 ins_cost(250); 6763 format %{ "CMOV$cop $dst,$src" %} 6764 opcode(0x0F,0x40); 6765 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6766 ins_pipe( pipe_cmov_mem ); 6767 %} 6768 6769 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6770 predicate(VM_Version::supports_cmov() ); 6771 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6772 ins_cost(250); 6773 expand %{ 6774 cmovI_memU(cop, cr, dst, src); 6775 %} 6776 %} 6777 6778 // Conditional move 6779 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6780 predicate(VM_Version::supports_cmov() ); 6781 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6782 ins_cost(200); 6783 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6784 opcode(0x0F,0x40); 6785 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6786 ins_pipe( pipe_cmov_reg ); 6787 %} 6788 6789 // Conditional move (non-P6 version) 6790 // Note: a CMoveP is generated for stubs and native wrappers 6791 // regardless of whether we are on a P6, so we 6792 // emulate a cmov here 6793 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6794 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6795 ins_cost(300); 6796 format %{ "Jn$cop skip\n\t" 6797 "MOV $dst,$src\t# pointer\n" 6798 "skip:" %} 6799 opcode(0x8b); 6800 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6801 ins_pipe( pipe_cmov_reg ); 6802 %} 6803 6804 // Conditional move 6805 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6806 predicate(VM_Version::supports_cmov() ); 6807 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6808 ins_cost(200); 6809 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6810 opcode(0x0F,0x40); 6811 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6812 ins_pipe( pipe_cmov_reg ); 6813 %} 6814 6815 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6816 predicate(VM_Version::supports_cmov() ); 6817 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6818 ins_cost(200); 6819 expand %{ 6820 cmovP_regU(cop, cr, dst, src); 6821 %} 6822 %} 6823 6824 // DISABLED: Requires the ADLC to emit a bottom_type call that 6825 // correctly meets the two pointer arguments; one is an incoming 6826 // register but the other is a memory operand. ALSO appears to 6827 // be buggy with implicit null checks. 6828 // 6829 //// Conditional move 6830 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6831 // predicate(VM_Version::supports_cmov() ); 6832 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6833 // ins_cost(250); 6834 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6835 // opcode(0x0F,0x40); 6836 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6837 // ins_pipe( pipe_cmov_mem ); 6838 //%} 6839 // 6840 //// Conditional move 6841 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6842 // predicate(VM_Version::supports_cmov() ); 6843 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6844 // ins_cost(250); 6845 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6846 // opcode(0x0F,0x40); 6847 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6848 // ins_pipe( pipe_cmov_mem ); 6849 //%} 6850 6851 // Conditional move 6852 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6853 predicate(UseSSE<=1); 6854 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6855 ins_cost(200); 6856 format %{ "FCMOV$cop $dst,$src\t# double" %} 6857 opcode(0xDA); 6858 ins_encode( enc_cmov_dpr(cop,src) ); 6859 ins_pipe( pipe_cmovDPR_reg ); 6860 %} 6861 6862 // Conditional move 6863 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6864 predicate(UseSSE==0); 6865 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6866 ins_cost(200); 6867 format %{ "FCMOV$cop $dst,$src\t# float" %} 6868 opcode(0xDA); 6869 ins_encode( enc_cmov_dpr(cop,src) ); 6870 ins_pipe( pipe_cmovDPR_reg ); 6871 %} 6872 6873 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6874 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6875 predicate(UseSSE<=1); 6876 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6877 ins_cost(200); 6878 format %{ "Jn$cop skip\n\t" 6879 "MOV $dst,$src\t# double\n" 6880 "skip:" %} 6881 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6882 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6883 ins_pipe( pipe_cmovDPR_reg ); 6884 %} 6885 6886 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6887 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6888 predicate(UseSSE==0); 6889 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6890 ins_cost(200); 6891 format %{ "Jn$cop skip\n\t" 6892 "MOV $dst,$src\t# float\n" 6893 "skip:" %} 6894 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6895 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6896 ins_pipe( pipe_cmovDPR_reg ); 6897 %} 6898 6899 // No CMOVE with SSE/SSE2 6900 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6901 predicate (UseSSE>=1); 6902 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6903 ins_cost(200); 6904 format %{ "Jn$cop skip\n\t" 6905 "MOVSS $dst,$src\t# float\n" 6906 "skip:" %} 6907 ins_encode %{ 6908 Label skip; 6909 // Invert sense of branch from sense of CMOV 6910 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6911 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6912 __ bind(skip); 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 // No CMOVE with SSE/SSE2 6918 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6919 predicate (UseSSE>=2); 6920 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6921 ins_cost(200); 6922 format %{ "Jn$cop skip\n\t" 6923 "MOVSD $dst,$src\t# float\n" 6924 "skip:" %} 6925 ins_encode %{ 6926 Label skip; 6927 // Invert sense of branch from sense of CMOV 6928 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6929 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6930 __ bind(skip); 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 // unsigned version 6936 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6937 predicate (UseSSE>=1); 6938 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6939 ins_cost(200); 6940 format %{ "Jn$cop skip\n\t" 6941 "MOVSS $dst,$src\t# float\n" 6942 "skip:" %} 6943 ins_encode %{ 6944 Label skip; 6945 // Invert sense of branch from sense of CMOV 6946 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6947 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6948 __ bind(skip); 6949 %} 6950 ins_pipe( pipe_slow ); 6951 %} 6952 6953 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6954 predicate (UseSSE>=1); 6955 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6956 ins_cost(200); 6957 expand %{ 6958 fcmovF_regU(cop, cr, dst, src); 6959 %} 6960 %} 6961 6962 // unsigned version 6963 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6964 predicate (UseSSE>=2); 6965 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6966 ins_cost(200); 6967 format %{ "Jn$cop skip\n\t" 6968 "MOVSD $dst,$src\t# float\n" 6969 "skip:" %} 6970 ins_encode %{ 6971 Label skip; 6972 // Invert sense of branch from sense of CMOV 6973 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6974 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6975 __ bind(skip); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6981 predicate (UseSSE>=2); 6982 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6983 ins_cost(200); 6984 expand %{ 6985 fcmovD_regU(cop, cr, dst, src); 6986 %} 6987 %} 6988 6989 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6990 predicate(VM_Version::supports_cmov() ); 6991 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6992 ins_cost(200); 6993 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6994 "CMOV$cop $dst.hi,$src.hi" %} 6995 opcode(0x0F,0x40); 6996 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6997 ins_pipe( pipe_cmov_reg_long ); 6998 %} 6999 7000 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7001 predicate(VM_Version::supports_cmov() ); 7002 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7005 "CMOV$cop $dst.hi,$src.hi" %} 7006 opcode(0x0F,0x40); 7007 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7008 ins_pipe( pipe_cmov_reg_long ); 7009 %} 7010 7011 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7012 predicate(VM_Version::supports_cmov() ); 7013 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7014 ins_cost(200); 7015 expand %{ 7016 cmovL_regU(cop, cr, dst, src); 7017 %} 7018 %} 7019 7020 //----------Arithmetic Instructions-------------------------------------------- 7021 //----------Addition Instructions---------------------------------------------- 7022 7023 // Integer Addition Instructions 7024 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7025 match(Set dst (AddI dst src)); 7026 effect(KILL cr); 7027 7028 size(2); 7029 format %{ "ADD $dst,$src" %} 7030 opcode(0x03); 7031 ins_encode( OpcP, RegReg( dst, src) ); 7032 ins_pipe( ialu_reg_reg ); 7033 %} 7034 7035 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7036 match(Set dst (AddI dst src)); 7037 effect(KILL cr); 7038 7039 format %{ "ADD $dst,$src" %} 7040 opcode(0x81, 0x00); /* /0 id */ 7041 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7042 ins_pipe( ialu_reg ); 7043 %} 7044 7045 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7046 predicate(UseIncDec); 7047 match(Set dst (AddI dst src)); 7048 effect(KILL cr); 7049 7050 size(1); 7051 format %{ "INC $dst" %} 7052 opcode(0x40); /* */ 7053 ins_encode( Opc_plus( primary, dst ) ); 7054 ins_pipe( ialu_reg ); 7055 %} 7056 7057 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7058 match(Set dst (AddI src0 src1)); 7059 ins_cost(110); 7060 7061 format %{ "LEA $dst,[$src0 + $src1]" %} 7062 opcode(0x8D); /* 0x8D /r */ 7063 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7064 ins_pipe( ialu_reg_reg ); 7065 %} 7066 7067 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7068 match(Set dst (AddP src0 src1)); 7069 ins_cost(110); 7070 7071 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7072 opcode(0x8D); /* 0x8D /r */ 7073 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7074 ins_pipe( ialu_reg_reg ); 7075 %} 7076 7077 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7078 predicate(UseIncDec); 7079 match(Set dst (AddI dst src)); 7080 effect(KILL cr); 7081 7082 size(1); 7083 format %{ "DEC $dst" %} 7084 opcode(0x48); /* */ 7085 ins_encode( Opc_plus( primary, dst ) ); 7086 ins_pipe( ialu_reg ); 7087 %} 7088 7089 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7090 match(Set dst (AddP dst src)); 7091 effect(KILL cr); 7092 7093 size(2); 7094 format %{ "ADD $dst,$src" %} 7095 opcode(0x03); 7096 ins_encode( OpcP, RegReg( dst, src) ); 7097 ins_pipe( ialu_reg_reg ); 7098 %} 7099 7100 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7101 match(Set dst (AddP dst src)); 7102 effect(KILL cr); 7103 7104 format %{ "ADD $dst,$src" %} 7105 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7106 // ins_encode( RegImm( dst, src) ); 7107 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7108 ins_pipe( ialu_reg ); 7109 %} 7110 7111 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7112 match(Set dst (AddI dst (LoadI src))); 7113 effect(KILL cr); 7114 7115 ins_cost(125); 7116 format %{ "ADD $dst,$src" %} 7117 opcode(0x03); 7118 ins_encode( OpcP, RegMem( dst, src) ); 7119 ins_pipe( ialu_reg_mem ); 7120 %} 7121 7122 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7123 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7124 effect(KILL cr); 7125 7126 ins_cost(150); 7127 format %{ "ADD $dst,$src" %} 7128 opcode(0x01); /* Opcode 01 /r */ 7129 ins_encode( OpcP, RegMem( src, dst ) ); 7130 ins_pipe( ialu_mem_reg ); 7131 %} 7132 7133 // Add Memory with Immediate 7134 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7135 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7136 effect(KILL cr); 7137 7138 ins_cost(125); 7139 format %{ "ADD $dst,$src" %} 7140 opcode(0x81); /* Opcode 81 /0 id */ 7141 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7142 ins_pipe( ialu_mem_imm ); 7143 %} 7144 7145 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7146 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7147 effect(KILL cr); 7148 7149 ins_cost(125); 7150 format %{ "INC $dst" %} 7151 opcode(0xFF); /* Opcode FF /0 */ 7152 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7153 ins_pipe( ialu_mem_imm ); 7154 %} 7155 7156 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7157 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7158 effect(KILL cr); 7159 7160 ins_cost(125); 7161 format %{ "DEC $dst" %} 7162 opcode(0xFF); /* Opcode FF /1 */ 7163 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7164 ins_pipe( ialu_mem_imm ); 7165 %} 7166 7167 7168 instruct checkCastPP( eRegP dst ) %{ 7169 match(Set dst (CheckCastPP dst)); 7170 7171 size(0); 7172 format %{ "#checkcastPP of $dst" %} 7173 ins_encode( /*empty encoding*/ ); 7174 ins_pipe( empty ); 7175 %} 7176 7177 instruct castPP( eRegP dst ) %{ 7178 match(Set dst (CastPP dst)); 7179 format %{ "#castPP of $dst" %} 7180 ins_encode( /*empty encoding*/ ); 7181 ins_pipe( empty ); 7182 %} 7183 7184 instruct castII( rRegI dst ) %{ 7185 match(Set dst (CastII dst)); 7186 format %{ "#castII of $dst" %} 7187 ins_encode( /*empty encoding*/ ); 7188 ins_cost(0); 7189 ins_pipe( empty ); 7190 %} 7191 7192 7193 // Load-locked - same as a regular pointer load when used with compare-swap 7194 instruct loadPLocked(eRegP dst, memory mem) %{ 7195 match(Set dst (LoadPLocked mem)); 7196 7197 ins_cost(125); 7198 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7199 opcode(0x8B); 7200 ins_encode( OpcP, RegMem(dst,mem)); 7201 ins_pipe( ialu_reg_mem ); 7202 %} 7203 7204 // Conditional-store of the updated heap-top. 7205 // Used during allocation of the shared heap. 7206 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7207 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7208 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7209 // EAX is killed if there is contention, but then it's also unused. 7210 // In the common case of no contention, EAX holds the new oop address. 7211 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7212 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7213 ins_pipe( pipe_cmpxchg ); 7214 %} 7215 7216 // Conditional-store of an int value. 7217 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7218 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7219 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7220 effect(KILL oldval); 7221 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7222 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7223 ins_pipe( pipe_cmpxchg ); 7224 %} 7225 7226 // Conditional-store of a long value. 7227 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7228 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7229 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7230 effect(KILL oldval); 7231 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7232 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7233 "XCHG EBX,ECX" 7234 %} 7235 ins_encode %{ 7236 // Note: we need to swap rbx, and rcx before and after the 7237 // cmpxchg8 instruction because the instruction uses 7238 // rcx as the high order word of the new value to store but 7239 // our register encoding uses rbx. 7240 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7241 if( os::is_MP() ) 7242 __ lock(); 7243 __ cmpxchg8($mem$$Address); 7244 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7245 %} 7246 ins_pipe( pipe_cmpxchg ); 7247 %} 7248 7249 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7250 7251 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7252 predicate(VM_Version::supports_cx8()); 7253 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7254 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7255 effect(KILL cr, KILL oldval); 7256 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7257 "MOV $res,0\n\t" 7258 "JNE,s fail\n\t" 7259 "MOV $res,1\n" 7260 "fail:" %} 7261 ins_encode( enc_cmpxchg8(mem_ptr), 7262 enc_flags_ne_to_boolean(res) ); 7263 ins_pipe( pipe_cmpxchg ); 7264 %} 7265 7266 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7267 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7268 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7269 effect(KILL cr, KILL oldval); 7270 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7271 "MOV $res,0\n\t" 7272 "JNE,s fail\n\t" 7273 "MOV $res,1\n" 7274 "fail:" %} 7275 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7276 ins_pipe( pipe_cmpxchg ); 7277 %} 7278 7279 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7280 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7281 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7282 effect(KILL cr, KILL oldval); 7283 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7284 "MOV $res,0\n\t" 7285 "JNE,s fail\n\t" 7286 "MOV $res,1\n" 7287 "fail:" %} 7288 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7289 ins_pipe( pipe_cmpxchg ); 7290 %} 7291 7292 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7293 predicate(VM_Version::supports_cx8()); 7294 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7295 effect(KILL cr); 7296 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7297 ins_encode( enc_cmpxchg8(mem_ptr) ); 7298 ins_pipe( pipe_cmpxchg ); 7299 %} 7300 7301 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7302 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7303 effect(KILL cr); 7304 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7305 ins_encode( enc_cmpxchg(mem_ptr) ); 7306 ins_pipe( pipe_cmpxchg ); 7307 %} 7308 7309 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7310 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7311 effect(KILL cr); 7312 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7313 ins_encode( enc_cmpxchg(mem_ptr) ); 7314 ins_pipe( pipe_cmpxchg ); 7315 %} 7316 7317 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7318 predicate(n->as_LoadStore()->result_not_used()); 7319 match(Set dummy (GetAndAddI mem add)); 7320 effect(KILL cr); 7321 format %{ "ADDL [$mem],$add" %} 7322 ins_encode %{ 7323 if (os::is_MP()) { __ lock(); } 7324 __ addl($mem$$Address, $add$$constant); 7325 %} 7326 ins_pipe( pipe_cmpxchg ); 7327 %} 7328 7329 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7330 match(Set newval (GetAndAddI mem newval)); 7331 effect(KILL cr); 7332 format %{ "XADDL [$mem],$newval" %} 7333 ins_encode %{ 7334 if (os::is_MP()) { __ lock(); } 7335 __ xaddl($mem$$Address, $newval$$Register); 7336 %} 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 instruct xchgI( memory mem, rRegI newval) %{ 7341 match(Set newval (GetAndSetI mem newval)); 7342 format %{ "XCHGL $newval,[$mem]" %} 7343 ins_encode %{ 7344 __ xchgl($newval$$Register, $mem$$Address); 7345 %} 7346 ins_pipe( pipe_cmpxchg ); 7347 %} 7348 7349 instruct xchgP( memory mem, pRegP newval) %{ 7350 match(Set newval (GetAndSetP mem newval)); 7351 format %{ "XCHGL $newval,[$mem]" %} 7352 ins_encode %{ 7353 __ xchgl($newval$$Register, $mem$$Address); 7354 %} 7355 ins_pipe( pipe_cmpxchg ); 7356 %} 7357 7358 //----------Subtraction Instructions------------------------------------------- 7359 7360 // Integer Subtraction Instructions 7361 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7362 match(Set dst (SubI dst src)); 7363 effect(KILL cr); 7364 7365 size(2); 7366 format %{ "SUB $dst,$src" %} 7367 opcode(0x2B); 7368 ins_encode( OpcP, RegReg( dst, src) ); 7369 ins_pipe( ialu_reg_reg ); 7370 %} 7371 7372 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7373 match(Set dst (SubI dst src)); 7374 effect(KILL cr); 7375 7376 format %{ "SUB $dst,$src" %} 7377 opcode(0x81,0x05); /* Opcode 81 /5 */ 7378 // ins_encode( RegImm( dst, src) ); 7379 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7380 ins_pipe( ialu_reg ); 7381 %} 7382 7383 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7384 match(Set dst (SubI dst (LoadI src))); 7385 effect(KILL cr); 7386 7387 ins_cost(125); 7388 format %{ "SUB $dst,$src" %} 7389 opcode(0x2B); 7390 ins_encode( OpcP, RegMem( dst, src) ); 7391 ins_pipe( ialu_reg_mem ); 7392 %} 7393 7394 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7395 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7396 effect(KILL cr); 7397 7398 ins_cost(150); 7399 format %{ "SUB $dst,$src" %} 7400 opcode(0x29); /* Opcode 29 /r */ 7401 ins_encode( OpcP, RegMem( src, dst ) ); 7402 ins_pipe( ialu_mem_reg ); 7403 %} 7404 7405 // Subtract from a pointer 7406 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7407 match(Set dst (AddP dst (SubI zero src))); 7408 effect(KILL cr); 7409 7410 size(2); 7411 format %{ "SUB $dst,$src" %} 7412 opcode(0x2B); 7413 ins_encode( OpcP, RegReg( dst, src) ); 7414 ins_pipe( ialu_reg_reg ); 7415 %} 7416 7417 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7418 match(Set dst (SubI zero dst)); 7419 effect(KILL cr); 7420 7421 size(2); 7422 format %{ "NEG $dst" %} 7423 opcode(0xF7,0x03); // Opcode F7 /3 7424 ins_encode( OpcP, RegOpc( dst ) ); 7425 ins_pipe( ialu_reg ); 7426 %} 7427 7428 //----------Multiplication/Division Instructions------------------------------- 7429 // Integer Multiplication Instructions 7430 // Multiply Register 7431 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7432 match(Set dst (MulI dst src)); 7433 effect(KILL cr); 7434 7435 size(3); 7436 ins_cost(300); 7437 format %{ "IMUL $dst,$src" %} 7438 opcode(0xAF, 0x0F); 7439 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7440 ins_pipe( ialu_reg_reg_alu0 ); 7441 %} 7442 7443 // Multiply 32-bit Immediate 7444 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7445 match(Set dst (MulI src imm)); 7446 effect(KILL cr); 7447 7448 ins_cost(300); 7449 format %{ "IMUL $dst,$src,$imm" %} 7450 opcode(0x69); /* 69 /r id */ 7451 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7452 ins_pipe( ialu_reg_reg_alu0 ); 7453 %} 7454 7455 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7456 match(Set dst src); 7457 effect(KILL cr); 7458 7459 // Note that this is artificially increased to make it more expensive than loadConL 7460 ins_cost(250); 7461 format %{ "MOV EAX,$src\t// low word only" %} 7462 opcode(0xB8); 7463 ins_encode( LdImmL_Lo(dst, src) ); 7464 ins_pipe( ialu_reg_fat ); 7465 %} 7466 7467 // Multiply by 32-bit Immediate, taking the shifted high order results 7468 // (special case for shift by 32) 7469 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7470 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7471 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7472 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7473 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7474 effect(USE src1, KILL cr); 7475 7476 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7477 ins_cost(0*100 + 1*400 - 150); 7478 format %{ "IMUL EDX:EAX,$src1" %} 7479 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 // Multiply by 32-bit Immediate, taking the shifted high order results 7484 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7485 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7486 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7487 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7488 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7489 effect(USE src1, KILL cr); 7490 7491 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7492 ins_cost(1*100 + 1*400 - 150); 7493 format %{ "IMUL EDX:EAX,$src1\n\t" 7494 "SAR EDX,$cnt-32" %} 7495 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7496 ins_pipe( pipe_slow ); 7497 %} 7498 7499 // Multiply Memory 32-bit Immediate 7500 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7501 match(Set dst (MulI (LoadI src) imm)); 7502 effect(KILL cr); 7503 7504 ins_cost(300); 7505 format %{ "IMUL $dst,$src,$imm" %} 7506 opcode(0x69); /* 69 /r id */ 7507 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7508 ins_pipe( ialu_reg_mem_alu0 ); 7509 %} 7510 7511 // Multiply Memory 7512 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7513 match(Set dst (MulI dst (LoadI src))); 7514 effect(KILL cr); 7515 7516 ins_cost(350); 7517 format %{ "IMUL $dst,$src" %} 7518 opcode(0xAF, 0x0F); 7519 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7520 ins_pipe( ialu_reg_mem_alu0 ); 7521 %} 7522 7523 // Multiply Register Int to Long 7524 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7525 // Basic Idea: long = (long)int * (long)int 7526 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7527 effect(DEF dst, USE src, USE src1, KILL flags); 7528 7529 ins_cost(300); 7530 format %{ "IMUL $dst,$src1" %} 7531 7532 ins_encode( long_int_multiply( dst, src1 ) ); 7533 ins_pipe( ialu_reg_reg_alu0 ); 7534 %} 7535 7536 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7537 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7538 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7539 effect(KILL flags); 7540 7541 ins_cost(300); 7542 format %{ "MUL $dst,$src1" %} 7543 7544 ins_encode( long_uint_multiply(dst, src1) ); 7545 ins_pipe( ialu_reg_reg_alu0 ); 7546 %} 7547 7548 // Multiply Register Long 7549 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7550 match(Set dst (MulL dst src)); 7551 effect(KILL cr, TEMP tmp); 7552 ins_cost(4*100+3*400); 7553 // Basic idea: lo(result) = lo(x_lo * y_lo) 7554 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7555 format %{ "MOV $tmp,$src.lo\n\t" 7556 "IMUL $tmp,EDX\n\t" 7557 "MOV EDX,$src.hi\n\t" 7558 "IMUL EDX,EAX\n\t" 7559 "ADD $tmp,EDX\n\t" 7560 "MUL EDX:EAX,$src.lo\n\t" 7561 "ADD EDX,$tmp" %} 7562 ins_encode( long_multiply( dst, src, tmp ) ); 7563 ins_pipe( pipe_slow ); 7564 %} 7565 7566 // Multiply Register Long where the left operand's high 32 bits are zero 7567 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7568 predicate(is_operand_hi32_zero(n->in(1))); 7569 match(Set dst (MulL dst src)); 7570 effect(KILL cr, TEMP tmp); 7571 ins_cost(2*100+2*400); 7572 // Basic idea: lo(result) = lo(x_lo * y_lo) 7573 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7574 format %{ "MOV $tmp,$src.hi\n\t" 7575 "IMUL $tmp,EAX\n\t" 7576 "MUL EDX:EAX,$src.lo\n\t" 7577 "ADD EDX,$tmp" %} 7578 ins_encode %{ 7579 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7580 __ imull($tmp$$Register, rax); 7581 __ mull($src$$Register); 7582 __ addl(rdx, $tmp$$Register); 7583 %} 7584 ins_pipe( pipe_slow ); 7585 %} 7586 7587 // Multiply Register Long where the right operand's high 32 bits are zero 7588 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7589 predicate(is_operand_hi32_zero(n->in(2))); 7590 match(Set dst (MulL dst src)); 7591 effect(KILL cr, TEMP tmp); 7592 ins_cost(2*100+2*400); 7593 // Basic idea: lo(result) = lo(x_lo * y_lo) 7594 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7595 format %{ "MOV $tmp,$src.lo\n\t" 7596 "IMUL $tmp,EDX\n\t" 7597 "MUL EDX:EAX,$src.lo\n\t" 7598 "ADD EDX,$tmp" %} 7599 ins_encode %{ 7600 __ movl($tmp$$Register, $src$$Register); 7601 __ imull($tmp$$Register, rdx); 7602 __ mull($src$$Register); 7603 __ addl(rdx, $tmp$$Register); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7609 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7610 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7611 match(Set dst (MulL dst src)); 7612 effect(KILL cr); 7613 ins_cost(1*400); 7614 // Basic idea: lo(result) = lo(x_lo * y_lo) 7615 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7616 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7617 ins_encode %{ 7618 __ mull($src$$Register); 7619 %} 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 // Multiply Register Long by small constant 7624 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7625 match(Set dst (MulL dst src)); 7626 effect(KILL cr, TEMP tmp); 7627 ins_cost(2*100+2*400); 7628 size(12); 7629 // Basic idea: lo(result) = lo(src * EAX) 7630 // hi(result) = hi(src * EAX) + lo(src * EDX) 7631 format %{ "IMUL $tmp,EDX,$src\n\t" 7632 "MOV EDX,$src\n\t" 7633 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7634 "ADD EDX,$tmp" %} 7635 ins_encode( long_multiply_con( dst, src, tmp ) ); 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 // Integer DIV with Register 7640 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7641 match(Set rax (DivI rax div)); 7642 effect(KILL rdx, KILL cr); 7643 size(26); 7644 ins_cost(30*100+10*100); 7645 format %{ "CMP EAX,0x80000000\n\t" 7646 "JNE,s normal\n\t" 7647 "XOR EDX,EDX\n\t" 7648 "CMP ECX,-1\n\t" 7649 "JE,s done\n" 7650 "normal: CDQ\n\t" 7651 "IDIV $div\n\t" 7652 "done:" %} 7653 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7654 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7655 ins_pipe( ialu_reg_reg_alu0 ); 7656 %} 7657 7658 // Divide Register Long 7659 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7660 match(Set dst (DivL src1 src2)); 7661 effect( KILL cr, KILL cx, KILL bx ); 7662 ins_cost(10000); 7663 format %{ "PUSH $src1.hi\n\t" 7664 "PUSH $src1.lo\n\t" 7665 "PUSH $src2.hi\n\t" 7666 "PUSH $src2.lo\n\t" 7667 "CALL SharedRuntime::ldiv\n\t" 7668 "ADD ESP,16" %} 7669 ins_encode( long_div(src1,src2) ); 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 // Integer DIVMOD with Register, both quotient and mod results 7674 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7675 match(DivModI rax div); 7676 effect(KILL cr); 7677 size(26); 7678 ins_cost(30*100+10*100); 7679 format %{ "CMP EAX,0x80000000\n\t" 7680 "JNE,s normal\n\t" 7681 "XOR EDX,EDX\n\t" 7682 "CMP ECX,-1\n\t" 7683 "JE,s done\n" 7684 "normal: CDQ\n\t" 7685 "IDIV $div\n\t" 7686 "done:" %} 7687 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7688 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7689 ins_pipe( pipe_slow ); 7690 %} 7691 7692 // Integer MOD with Register 7693 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7694 match(Set rdx (ModI rax div)); 7695 effect(KILL rax, KILL cr); 7696 7697 size(26); 7698 ins_cost(300); 7699 format %{ "CDQ\n\t" 7700 "IDIV $div" %} 7701 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7702 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7703 ins_pipe( ialu_reg_reg_alu0 ); 7704 %} 7705 7706 // Remainder Register Long 7707 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7708 match(Set dst (ModL src1 src2)); 7709 effect( KILL cr, KILL cx, KILL bx ); 7710 ins_cost(10000); 7711 format %{ "PUSH $src1.hi\n\t" 7712 "PUSH $src1.lo\n\t" 7713 "PUSH $src2.hi\n\t" 7714 "PUSH $src2.lo\n\t" 7715 "CALL SharedRuntime::lrem\n\t" 7716 "ADD ESP,16" %} 7717 ins_encode( long_mod(src1,src2) ); 7718 ins_pipe( pipe_slow ); 7719 %} 7720 7721 // Divide Register Long (no special case since divisor != -1) 7722 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7723 match(Set dst (DivL dst imm)); 7724 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7725 ins_cost(1000); 7726 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7727 "XOR $tmp2,$tmp2\n\t" 7728 "CMP $tmp,EDX\n\t" 7729 "JA,s fast\n\t" 7730 "MOV $tmp2,EAX\n\t" 7731 "MOV EAX,EDX\n\t" 7732 "MOV EDX,0\n\t" 7733 "JLE,s pos\n\t" 7734 "LNEG EAX : $tmp2\n\t" 7735 "DIV $tmp # unsigned division\n\t" 7736 "XCHG EAX,$tmp2\n\t" 7737 "DIV $tmp\n\t" 7738 "LNEG $tmp2 : EAX\n\t" 7739 "JMP,s done\n" 7740 "pos:\n\t" 7741 "DIV $tmp\n\t" 7742 "XCHG EAX,$tmp2\n" 7743 "fast:\n\t" 7744 "DIV $tmp\n" 7745 "done:\n\t" 7746 "MOV EDX,$tmp2\n\t" 7747 "NEG EDX:EAX # if $imm < 0" %} 7748 ins_encode %{ 7749 int con = (int)$imm$$constant; 7750 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7751 int pcon = (con > 0) ? con : -con; 7752 Label Lfast, Lpos, Ldone; 7753 7754 __ movl($tmp$$Register, pcon); 7755 __ xorl($tmp2$$Register,$tmp2$$Register); 7756 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7757 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7758 7759 __ movl($tmp2$$Register, $dst$$Register); // save 7760 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7761 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7762 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7763 7764 // Negative dividend. 7765 // convert value to positive to use unsigned division 7766 __ lneg($dst$$Register, $tmp2$$Register); 7767 __ divl($tmp$$Register); 7768 __ xchgl($dst$$Register, $tmp2$$Register); 7769 __ divl($tmp$$Register); 7770 // revert result back to negative 7771 __ lneg($tmp2$$Register, $dst$$Register); 7772 __ jmpb(Ldone); 7773 7774 __ bind(Lpos); 7775 __ divl($tmp$$Register); // Use unsigned division 7776 __ xchgl($dst$$Register, $tmp2$$Register); 7777 // Fallthrow for final divide, tmp2 has 32 bit hi result 7778 7779 __ bind(Lfast); 7780 // fast path: src is positive 7781 __ divl($tmp$$Register); // Use unsigned division 7782 7783 __ bind(Ldone); 7784 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7785 if (con < 0) { 7786 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7787 } 7788 %} 7789 ins_pipe( pipe_slow ); 7790 %} 7791 7792 // Remainder Register Long (remainder fit into 32 bits) 7793 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7794 match(Set dst (ModL dst imm)); 7795 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7796 ins_cost(1000); 7797 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7798 "CMP $tmp,EDX\n\t" 7799 "JA,s fast\n\t" 7800 "MOV $tmp2,EAX\n\t" 7801 "MOV EAX,EDX\n\t" 7802 "MOV EDX,0\n\t" 7803 "JLE,s pos\n\t" 7804 "LNEG EAX : $tmp2\n\t" 7805 "DIV $tmp # unsigned division\n\t" 7806 "MOV EAX,$tmp2\n\t" 7807 "DIV $tmp\n\t" 7808 "NEG EDX\n\t" 7809 "JMP,s done\n" 7810 "pos:\n\t" 7811 "DIV $tmp\n\t" 7812 "MOV EAX,$tmp2\n" 7813 "fast:\n\t" 7814 "DIV $tmp\n" 7815 "done:\n\t" 7816 "MOV EAX,EDX\n\t" 7817 "SAR EDX,31\n\t" %} 7818 ins_encode %{ 7819 int con = (int)$imm$$constant; 7820 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7821 int pcon = (con > 0) ? con : -con; 7822 Label Lfast, Lpos, Ldone; 7823 7824 __ movl($tmp$$Register, pcon); 7825 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7826 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7827 7828 __ movl($tmp2$$Register, $dst$$Register); // save 7829 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7830 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7831 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7832 7833 // Negative dividend. 7834 // convert value to positive to use unsigned division 7835 __ lneg($dst$$Register, $tmp2$$Register); 7836 __ divl($tmp$$Register); 7837 __ movl($dst$$Register, $tmp2$$Register); 7838 __ divl($tmp$$Register); 7839 // revert remainder back to negative 7840 __ negl(HIGH_FROM_LOW($dst$$Register)); 7841 __ jmpb(Ldone); 7842 7843 __ bind(Lpos); 7844 __ divl($tmp$$Register); 7845 __ movl($dst$$Register, $tmp2$$Register); 7846 7847 __ bind(Lfast); 7848 // fast path: src is positive 7849 __ divl($tmp$$Register); 7850 7851 __ bind(Ldone); 7852 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7853 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7854 7855 %} 7856 ins_pipe( pipe_slow ); 7857 %} 7858 7859 // Integer Shift Instructions 7860 // Shift Left by one 7861 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7862 match(Set dst (LShiftI dst shift)); 7863 effect(KILL cr); 7864 7865 size(2); 7866 format %{ "SHL $dst,$shift" %} 7867 opcode(0xD1, 0x4); /* D1 /4 */ 7868 ins_encode( OpcP, RegOpc( dst ) ); 7869 ins_pipe( ialu_reg ); 7870 %} 7871 7872 // Shift Left by 8-bit immediate 7873 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7874 match(Set dst (LShiftI dst shift)); 7875 effect(KILL cr); 7876 7877 size(3); 7878 format %{ "SHL $dst,$shift" %} 7879 opcode(0xC1, 0x4); /* C1 /4 ib */ 7880 ins_encode( RegOpcImm( dst, shift) ); 7881 ins_pipe( ialu_reg ); 7882 %} 7883 7884 // Shift Left by variable 7885 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7886 match(Set dst (LShiftI dst shift)); 7887 effect(KILL cr); 7888 7889 size(2); 7890 format %{ "SHL $dst,$shift" %} 7891 opcode(0xD3, 0x4); /* D3 /4 */ 7892 ins_encode( OpcP, RegOpc( dst ) ); 7893 ins_pipe( ialu_reg_reg ); 7894 %} 7895 7896 // Arithmetic shift right by one 7897 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7898 match(Set dst (RShiftI dst shift)); 7899 effect(KILL cr); 7900 7901 size(2); 7902 format %{ "SAR $dst,$shift" %} 7903 opcode(0xD1, 0x7); /* D1 /7 */ 7904 ins_encode( OpcP, RegOpc( dst ) ); 7905 ins_pipe( ialu_reg ); 7906 %} 7907 7908 // Arithmetic shift right by one 7909 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7910 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7911 effect(KILL cr); 7912 format %{ "SAR $dst,$shift" %} 7913 opcode(0xD1, 0x7); /* D1 /7 */ 7914 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7915 ins_pipe( ialu_mem_imm ); 7916 %} 7917 7918 // Arithmetic Shift Right by 8-bit immediate 7919 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7920 match(Set dst (RShiftI dst shift)); 7921 effect(KILL cr); 7922 7923 size(3); 7924 format %{ "SAR $dst,$shift" %} 7925 opcode(0xC1, 0x7); /* C1 /7 ib */ 7926 ins_encode( RegOpcImm( dst, shift ) ); 7927 ins_pipe( ialu_mem_imm ); 7928 %} 7929 7930 // Arithmetic Shift Right by 8-bit immediate 7931 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7932 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7933 effect(KILL cr); 7934 7935 format %{ "SAR $dst,$shift" %} 7936 opcode(0xC1, 0x7); /* C1 /7 ib */ 7937 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7938 ins_pipe( ialu_mem_imm ); 7939 %} 7940 7941 // Arithmetic Shift Right by variable 7942 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7943 match(Set dst (RShiftI dst shift)); 7944 effect(KILL cr); 7945 7946 size(2); 7947 format %{ "SAR $dst,$shift" %} 7948 opcode(0xD3, 0x7); /* D3 /7 */ 7949 ins_encode( OpcP, RegOpc( dst ) ); 7950 ins_pipe( ialu_reg_reg ); 7951 %} 7952 7953 // Logical shift right by one 7954 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7955 match(Set dst (URShiftI dst shift)); 7956 effect(KILL cr); 7957 7958 size(2); 7959 format %{ "SHR $dst,$shift" %} 7960 opcode(0xD1, 0x5); /* D1 /5 */ 7961 ins_encode( OpcP, RegOpc( dst ) ); 7962 ins_pipe( ialu_reg ); 7963 %} 7964 7965 // Logical Shift Right by 8-bit immediate 7966 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7967 match(Set dst (URShiftI dst shift)); 7968 effect(KILL cr); 7969 7970 size(3); 7971 format %{ "SHR $dst,$shift" %} 7972 opcode(0xC1, 0x5); /* C1 /5 ib */ 7973 ins_encode( RegOpcImm( dst, shift) ); 7974 ins_pipe( ialu_reg ); 7975 %} 7976 7977 7978 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7979 // This idiom is used by the compiler for the i2b bytecode. 7980 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7981 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7982 7983 size(3); 7984 format %{ "MOVSX $dst,$src :8" %} 7985 ins_encode %{ 7986 __ movsbl($dst$$Register, $src$$Register); 7987 %} 7988 ins_pipe(ialu_reg_reg); 7989 %} 7990 7991 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7992 // This idiom is used by the compiler the i2s bytecode. 7993 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7994 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7995 7996 size(3); 7997 format %{ "MOVSX $dst,$src :16" %} 7998 ins_encode %{ 7999 __ movswl($dst$$Register, $src$$Register); 8000 %} 8001 ins_pipe(ialu_reg_reg); 8002 %} 8003 8004 8005 // Logical Shift Right by variable 8006 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8007 match(Set dst (URShiftI dst shift)); 8008 effect(KILL cr); 8009 8010 size(2); 8011 format %{ "SHR $dst,$shift" %} 8012 opcode(0xD3, 0x5); /* D3 /5 */ 8013 ins_encode( OpcP, RegOpc( dst ) ); 8014 ins_pipe( ialu_reg_reg ); 8015 %} 8016 8017 8018 //----------Logical Instructions----------------------------------------------- 8019 //----------Integer Logical Instructions--------------------------------------- 8020 // And Instructions 8021 // And Register with Register 8022 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8023 match(Set dst (AndI dst src)); 8024 effect(KILL cr); 8025 8026 size(2); 8027 format %{ "AND $dst,$src" %} 8028 opcode(0x23); 8029 ins_encode( OpcP, RegReg( dst, src) ); 8030 ins_pipe( ialu_reg_reg ); 8031 %} 8032 8033 // And Register with Immediate 8034 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8035 match(Set dst (AndI dst src)); 8036 effect(KILL cr); 8037 8038 format %{ "AND $dst,$src" %} 8039 opcode(0x81,0x04); /* Opcode 81 /4 */ 8040 // ins_encode( RegImm( dst, src) ); 8041 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8042 ins_pipe( ialu_reg ); 8043 %} 8044 8045 // And Register with Memory 8046 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8047 match(Set dst (AndI dst (LoadI src))); 8048 effect(KILL cr); 8049 8050 ins_cost(125); 8051 format %{ "AND $dst,$src" %} 8052 opcode(0x23); 8053 ins_encode( OpcP, RegMem( dst, src) ); 8054 ins_pipe( ialu_reg_mem ); 8055 %} 8056 8057 // And Memory with Register 8058 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8059 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8060 effect(KILL cr); 8061 8062 ins_cost(150); 8063 format %{ "AND $dst,$src" %} 8064 opcode(0x21); /* Opcode 21 /r */ 8065 ins_encode( OpcP, RegMem( src, dst ) ); 8066 ins_pipe( ialu_mem_reg ); 8067 %} 8068 8069 // And Memory with Immediate 8070 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8071 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8072 effect(KILL cr); 8073 8074 ins_cost(125); 8075 format %{ "AND $dst,$src" %} 8076 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8077 // ins_encode( MemImm( dst, src) ); 8078 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8079 ins_pipe( ialu_mem_imm ); 8080 %} 8081 8082 // BMI1 instructions 8083 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8084 match(Set dst (AndI (XorI src1 minus_1) src2)); 8085 predicate(UseBMI1Instructions); 8086 effect(KILL cr); 8087 8088 format %{ "ANDNL $dst, $src1, $src2" %} 8089 8090 ins_encode %{ 8091 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8092 %} 8093 ins_pipe(ialu_reg); 8094 %} 8095 8096 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8097 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8098 predicate(UseBMI1Instructions); 8099 effect(KILL cr); 8100 8101 ins_cost(125); 8102 format %{ "ANDNL $dst, $src1, $src2" %} 8103 8104 ins_encode %{ 8105 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8106 %} 8107 ins_pipe(ialu_reg_mem); 8108 %} 8109 8110 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8111 match(Set dst (AndI (SubI imm_zero src) src)); 8112 predicate(UseBMI1Instructions); 8113 effect(KILL cr); 8114 8115 format %{ "BLSIL $dst, $src" %} 8116 8117 ins_encode %{ 8118 __ blsil($dst$$Register, $src$$Register); 8119 %} 8120 ins_pipe(ialu_reg); 8121 %} 8122 8123 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8124 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8125 predicate(UseBMI1Instructions); 8126 effect(KILL cr); 8127 8128 ins_cost(125); 8129 format %{ "BLSIL $dst, $src" %} 8130 8131 ins_encode %{ 8132 __ blsil($dst$$Register, $src$$Address); 8133 %} 8134 ins_pipe(ialu_reg_mem); 8135 %} 8136 8137 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8138 %{ 8139 match(Set dst (XorI (AddI src minus_1) src)); 8140 predicate(UseBMI1Instructions); 8141 effect(KILL cr); 8142 8143 format %{ "BLSMSKL $dst, $src" %} 8144 8145 ins_encode %{ 8146 __ blsmskl($dst$$Register, $src$$Register); 8147 %} 8148 8149 ins_pipe(ialu_reg); 8150 %} 8151 8152 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8153 %{ 8154 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8155 predicate(UseBMI1Instructions); 8156 effect(KILL cr); 8157 8158 ins_cost(125); 8159 format %{ "BLSMSKL $dst, $src" %} 8160 8161 ins_encode %{ 8162 __ blsmskl($dst$$Register, $src$$Address); 8163 %} 8164 8165 ins_pipe(ialu_reg_mem); 8166 %} 8167 8168 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8169 %{ 8170 match(Set dst (AndI (AddI src minus_1) src) ); 8171 predicate(UseBMI1Instructions); 8172 effect(KILL cr); 8173 8174 format %{ "BLSRL $dst, $src" %} 8175 8176 ins_encode %{ 8177 __ blsrl($dst$$Register, $src$$Register); 8178 %} 8179 8180 ins_pipe(ialu_reg); 8181 %} 8182 8183 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8184 %{ 8185 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8186 predicate(UseBMI1Instructions); 8187 effect(KILL cr); 8188 8189 ins_cost(125); 8190 format %{ "BLSRL $dst, $src" %} 8191 8192 ins_encode %{ 8193 __ blsrl($dst$$Register, $src$$Address); 8194 %} 8195 8196 ins_pipe(ialu_reg_mem); 8197 %} 8198 8199 // Or Instructions 8200 // Or Register with Register 8201 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8202 match(Set dst (OrI dst src)); 8203 effect(KILL cr); 8204 8205 size(2); 8206 format %{ "OR $dst,$src" %} 8207 opcode(0x0B); 8208 ins_encode( OpcP, RegReg( dst, src) ); 8209 ins_pipe( ialu_reg_reg ); 8210 %} 8211 8212 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8213 match(Set dst (OrI dst (CastP2X src))); 8214 effect(KILL cr); 8215 8216 size(2); 8217 format %{ "OR $dst,$src" %} 8218 opcode(0x0B); 8219 ins_encode( OpcP, RegReg( dst, src) ); 8220 ins_pipe( ialu_reg_reg ); 8221 %} 8222 8223 8224 // Or Register with Immediate 8225 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8226 match(Set dst (OrI dst src)); 8227 effect(KILL cr); 8228 8229 format %{ "OR $dst,$src" %} 8230 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8231 // ins_encode( RegImm( dst, src) ); 8232 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8233 ins_pipe( ialu_reg ); 8234 %} 8235 8236 // Or Register with Memory 8237 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8238 match(Set dst (OrI dst (LoadI src))); 8239 effect(KILL cr); 8240 8241 ins_cost(125); 8242 format %{ "OR $dst,$src" %} 8243 opcode(0x0B); 8244 ins_encode( OpcP, RegMem( dst, src) ); 8245 ins_pipe( ialu_reg_mem ); 8246 %} 8247 8248 // Or Memory with Register 8249 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8250 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8251 effect(KILL cr); 8252 8253 ins_cost(150); 8254 format %{ "OR $dst,$src" %} 8255 opcode(0x09); /* Opcode 09 /r */ 8256 ins_encode( OpcP, RegMem( src, dst ) ); 8257 ins_pipe( ialu_mem_reg ); 8258 %} 8259 8260 // Or Memory with Immediate 8261 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8262 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8263 effect(KILL cr); 8264 8265 ins_cost(125); 8266 format %{ "OR $dst,$src" %} 8267 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8268 // ins_encode( MemImm( dst, src) ); 8269 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8270 ins_pipe( ialu_mem_imm ); 8271 %} 8272 8273 // ROL/ROR 8274 // ROL expand 8275 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8276 effect(USE_DEF dst, USE shift, KILL cr); 8277 8278 format %{ "ROL $dst, $shift" %} 8279 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8280 ins_encode( OpcP, RegOpc( dst )); 8281 ins_pipe( ialu_reg ); 8282 %} 8283 8284 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8285 effect(USE_DEF dst, USE shift, KILL cr); 8286 8287 format %{ "ROL $dst, $shift" %} 8288 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8289 ins_encode( RegOpcImm(dst, shift) ); 8290 ins_pipe(ialu_reg); 8291 %} 8292 8293 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8294 effect(USE_DEF dst, USE shift, KILL cr); 8295 8296 format %{ "ROL $dst, $shift" %} 8297 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8298 ins_encode(OpcP, RegOpc(dst)); 8299 ins_pipe( ialu_reg_reg ); 8300 %} 8301 // end of ROL expand 8302 8303 // ROL 32bit by one once 8304 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8305 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8306 8307 expand %{ 8308 rolI_eReg_imm1(dst, lshift, cr); 8309 %} 8310 %} 8311 8312 // ROL 32bit var by imm8 once 8313 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8314 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8315 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8316 8317 expand %{ 8318 rolI_eReg_imm8(dst, lshift, cr); 8319 %} 8320 %} 8321 8322 // ROL 32bit var by var once 8323 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8324 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8325 8326 expand %{ 8327 rolI_eReg_CL(dst, shift, cr); 8328 %} 8329 %} 8330 8331 // ROL 32bit var by var once 8332 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8333 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8334 8335 expand %{ 8336 rolI_eReg_CL(dst, shift, cr); 8337 %} 8338 %} 8339 8340 // ROR expand 8341 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8342 effect(USE_DEF dst, USE shift, KILL cr); 8343 8344 format %{ "ROR $dst, $shift" %} 8345 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8346 ins_encode( OpcP, RegOpc( dst ) ); 8347 ins_pipe( ialu_reg ); 8348 %} 8349 8350 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8351 effect (USE_DEF dst, USE shift, KILL cr); 8352 8353 format %{ "ROR $dst, $shift" %} 8354 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8355 ins_encode( RegOpcImm(dst, shift) ); 8356 ins_pipe( ialu_reg ); 8357 %} 8358 8359 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8360 effect(USE_DEF dst, USE shift, KILL cr); 8361 8362 format %{ "ROR $dst, $shift" %} 8363 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8364 ins_encode(OpcP, RegOpc(dst)); 8365 ins_pipe( ialu_reg_reg ); 8366 %} 8367 // end of ROR expand 8368 8369 // ROR right once 8370 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8371 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8372 8373 expand %{ 8374 rorI_eReg_imm1(dst, rshift, cr); 8375 %} 8376 %} 8377 8378 // ROR 32bit by immI8 once 8379 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8380 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8381 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8382 8383 expand %{ 8384 rorI_eReg_imm8(dst, rshift, cr); 8385 %} 8386 %} 8387 8388 // ROR 32bit var by var once 8389 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8390 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8391 8392 expand %{ 8393 rorI_eReg_CL(dst, shift, cr); 8394 %} 8395 %} 8396 8397 // ROR 32bit var by var once 8398 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8399 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8400 8401 expand %{ 8402 rorI_eReg_CL(dst, shift, cr); 8403 %} 8404 %} 8405 8406 // Xor Instructions 8407 // Xor Register with Register 8408 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8409 match(Set dst (XorI dst src)); 8410 effect(KILL cr); 8411 8412 size(2); 8413 format %{ "XOR $dst,$src" %} 8414 opcode(0x33); 8415 ins_encode( OpcP, RegReg( dst, src) ); 8416 ins_pipe( ialu_reg_reg ); 8417 %} 8418 8419 // Xor Register with Immediate -1 8420 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8421 match(Set dst (XorI dst imm)); 8422 8423 size(2); 8424 format %{ "NOT $dst" %} 8425 ins_encode %{ 8426 __ notl($dst$$Register); 8427 %} 8428 ins_pipe( ialu_reg ); 8429 %} 8430 8431 // Xor Register with Immediate 8432 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8433 match(Set dst (XorI dst src)); 8434 effect(KILL cr); 8435 8436 format %{ "XOR $dst,$src" %} 8437 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8438 // ins_encode( RegImm( dst, src) ); 8439 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8440 ins_pipe( ialu_reg ); 8441 %} 8442 8443 // Xor Register with Memory 8444 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8445 match(Set dst (XorI dst (LoadI src))); 8446 effect(KILL cr); 8447 8448 ins_cost(125); 8449 format %{ "XOR $dst,$src" %} 8450 opcode(0x33); 8451 ins_encode( OpcP, RegMem(dst, src) ); 8452 ins_pipe( ialu_reg_mem ); 8453 %} 8454 8455 // Xor Memory with Register 8456 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8457 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8458 effect(KILL cr); 8459 8460 ins_cost(150); 8461 format %{ "XOR $dst,$src" %} 8462 opcode(0x31); /* Opcode 31 /r */ 8463 ins_encode( OpcP, RegMem( src, dst ) ); 8464 ins_pipe( ialu_mem_reg ); 8465 %} 8466 8467 // Xor Memory with Immediate 8468 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8469 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8470 effect(KILL cr); 8471 8472 ins_cost(125); 8473 format %{ "XOR $dst,$src" %} 8474 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8475 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8476 ins_pipe( ialu_mem_imm ); 8477 %} 8478 8479 //----------Convert Int to Boolean--------------------------------------------- 8480 8481 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8482 effect( DEF dst, USE src ); 8483 format %{ "MOV $dst,$src" %} 8484 ins_encode( enc_Copy( dst, src) ); 8485 ins_pipe( ialu_reg_reg ); 8486 %} 8487 8488 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8489 effect( USE_DEF dst, USE src, KILL cr ); 8490 8491 size(4); 8492 format %{ "NEG $dst\n\t" 8493 "ADC $dst,$src" %} 8494 ins_encode( neg_reg(dst), 8495 OpcRegReg(0x13,dst,src) ); 8496 ins_pipe( ialu_reg_reg_long ); 8497 %} 8498 8499 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8500 match(Set dst (Conv2B src)); 8501 8502 expand %{ 8503 movI_nocopy(dst,src); 8504 ci2b(dst,src,cr); 8505 %} 8506 %} 8507 8508 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8509 effect( DEF dst, USE src ); 8510 format %{ "MOV $dst,$src" %} 8511 ins_encode( enc_Copy( dst, src) ); 8512 ins_pipe( ialu_reg_reg ); 8513 %} 8514 8515 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8516 effect( USE_DEF dst, USE src, KILL cr ); 8517 format %{ "NEG $dst\n\t" 8518 "ADC $dst,$src" %} 8519 ins_encode( neg_reg(dst), 8520 OpcRegReg(0x13,dst,src) ); 8521 ins_pipe( ialu_reg_reg_long ); 8522 %} 8523 8524 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8525 match(Set dst (Conv2B src)); 8526 8527 expand %{ 8528 movP_nocopy(dst,src); 8529 cp2b(dst,src,cr); 8530 %} 8531 %} 8532 8533 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8534 match(Set dst (CmpLTMask p q)); 8535 effect(KILL cr); 8536 ins_cost(400); 8537 8538 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8539 format %{ "XOR $dst,$dst\n\t" 8540 "CMP $p,$q\n\t" 8541 "SETlt $dst\n\t" 8542 "NEG $dst" %} 8543 ins_encode %{ 8544 Register Rp = $p$$Register; 8545 Register Rq = $q$$Register; 8546 Register Rd = $dst$$Register; 8547 Label done; 8548 __ xorl(Rd, Rd); 8549 __ cmpl(Rp, Rq); 8550 __ setb(Assembler::less, Rd); 8551 __ negl(Rd); 8552 %} 8553 8554 ins_pipe(pipe_slow); 8555 %} 8556 8557 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8558 match(Set dst (CmpLTMask dst zero)); 8559 effect(DEF dst, KILL cr); 8560 ins_cost(100); 8561 8562 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8563 ins_encode %{ 8564 __ sarl($dst$$Register, 31); 8565 %} 8566 ins_pipe(ialu_reg); 8567 %} 8568 8569 /* better to save a register than avoid a branch */ 8570 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8571 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8572 effect(KILL cr); 8573 ins_cost(400); 8574 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8575 "JGE done\n\t" 8576 "ADD $p,$y\n" 8577 "done: " %} 8578 ins_encode %{ 8579 Register Rp = $p$$Register; 8580 Register Rq = $q$$Register; 8581 Register Ry = $y$$Register; 8582 Label done; 8583 __ subl(Rp, Rq); 8584 __ jccb(Assembler::greaterEqual, done); 8585 __ addl(Rp, Ry); 8586 __ bind(done); 8587 %} 8588 8589 ins_pipe(pipe_cmplt); 8590 %} 8591 8592 /* better to save a register than avoid a branch */ 8593 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8594 match(Set y (AndI (CmpLTMask p q) y)); 8595 effect(KILL cr); 8596 8597 ins_cost(300); 8598 8599 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8600 "JLT done\n\t" 8601 "XORL $y, $y\n" 8602 "done: " %} 8603 ins_encode %{ 8604 Register Rp = $p$$Register; 8605 Register Rq = $q$$Register; 8606 Register Ry = $y$$Register; 8607 Label done; 8608 __ cmpl(Rp, Rq); 8609 __ jccb(Assembler::less, done); 8610 __ xorl(Ry, Ry); 8611 __ bind(done); 8612 %} 8613 8614 ins_pipe(pipe_cmplt); 8615 %} 8616 8617 /* If I enable this, I encourage spilling in the inner loop of compress. 8618 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8619 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8620 */ 8621 //----------Overflow Math Instructions----------------------------------------- 8622 8623 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8624 %{ 8625 match(Set cr (OverflowAddI op1 op2)); 8626 effect(DEF cr, USE_KILL op1, USE op2); 8627 8628 format %{ "ADD $op1, $op2\t# overflow check int" %} 8629 8630 ins_encode %{ 8631 __ addl($op1$$Register, $op2$$Register); 8632 %} 8633 ins_pipe(ialu_reg_reg); 8634 %} 8635 8636 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8637 %{ 8638 match(Set cr (OverflowAddI op1 op2)); 8639 effect(DEF cr, USE_KILL op1, USE op2); 8640 8641 format %{ "ADD $op1, $op2\t# overflow check int" %} 8642 8643 ins_encode %{ 8644 __ addl($op1$$Register, $op2$$constant); 8645 %} 8646 ins_pipe(ialu_reg_reg); 8647 %} 8648 8649 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8650 %{ 8651 match(Set cr (OverflowSubI op1 op2)); 8652 8653 format %{ "CMP $op1, $op2\t# overflow check int" %} 8654 ins_encode %{ 8655 __ cmpl($op1$$Register, $op2$$Register); 8656 %} 8657 ins_pipe(ialu_reg_reg); 8658 %} 8659 8660 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8661 %{ 8662 match(Set cr (OverflowSubI op1 op2)); 8663 8664 format %{ "CMP $op1, $op2\t# overflow check int" %} 8665 ins_encode %{ 8666 __ cmpl($op1$$Register, $op2$$constant); 8667 %} 8668 ins_pipe(ialu_reg_reg); 8669 %} 8670 8671 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8672 %{ 8673 match(Set cr (OverflowSubI zero op2)); 8674 effect(DEF cr, USE_KILL op2); 8675 8676 format %{ "NEG $op2\t# overflow check int" %} 8677 ins_encode %{ 8678 __ negl($op2$$Register); 8679 %} 8680 ins_pipe(ialu_reg_reg); 8681 %} 8682 8683 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8684 %{ 8685 match(Set cr (OverflowMulI op1 op2)); 8686 effect(DEF cr, USE_KILL op1, USE op2); 8687 8688 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8689 ins_encode %{ 8690 __ imull($op1$$Register, $op2$$Register); 8691 %} 8692 ins_pipe(ialu_reg_reg_alu0); 8693 %} 8694 8695 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8696 %{ 8697 match(Set cr (OverflowMulI op1 op2)); 8698 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8699 8700 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8701 ins_encode %{ 8702 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8703 %} 8704 ins_pipe(ialu_reg_reg_alu0); 8705 %} 8706 8707 //----------Long Instructions------------------------------------------------ 8708 // Add Long Register with Register 8709 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8710 match(Set dst (AddL dst src)); 8711 effect(KILL cr); 8712 ins_cost(200); 8713 format %{ "ADD $dst.lo,$src.lo\n\t" 8714 "ADC $dst.hi,$src.hi" %} 8715 opcode(0x03, 0x13); 8716 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8717 ins_pipe( ialu_reg_reg_long ); 8718 %} 8719 8720 // Add Long Register with Immediate 8721 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8722 match(Set dst (AddL dst src)); 8723 effect(KILL cr); 8724 format %{ "ADD $dst.lo,$src.lo\n\t" 8725 "ADC $dst.hi,$src.hi" %} 8726 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8727 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8728 ins_pipe( ialu_reg_long ); 8729 %} 8730 8731 // Add Long Register with Memory 8732 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8733 match(Set dst (AddL dst (LoadL mem))); 8734 effect(KILL cr); 8735 ins_cost(125); 8736 format %{ "ADD $dst.lo,$mem\n\t" 8737 "ADC $dst.hi,$mem+4" %} 8738 opcode(0x03, 0x13); 8739 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8740 ins_pipe( ialu_reg_long_mem ); 8741 %} 8742 8743 // Subtract Long Register with Register. 8744 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8745 match(Set dst (SubL dst src)); 8746 effect(KILL cr); 8747 ins_cost(200); 8748 format %{ "SUB $dst.lo,$src.lo\n\t" 8749 "SBB $dst.hi,$src.hi" %} 8750 opcode(0x2B, 0x1B); 8751 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8752 ins_pipe( ialu_reg_reg_long ); 8753 %} 8754 8755 // Subtract Long Register with Immediate 8756 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8757 match(Set dst (SubL dst src)); 8758 effect(KILL cr); 8759 format %{ "SUB $dst.lo,$src.lo\n\t" 8760 "SBB $dst.hi,$src.hi" %} 8761 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8762 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8763 ins_pipe( ialu_reg_long ); 8764 %} 8765 8766 // Subtract Long Register with Memory 8767 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8768 match(Set dst (SubL dst (LoadL mem))); 8769 effect(KILL cr); 8770 ins_cost(125); 8771 format %{ "SUB $dst.lo,$mem\n\t" 8772 "SBB $dst.hi,$mem+4" %} 8773 opcode(0x2B, 0x1B); 8774 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8775 ins_pipe( ialu_reg_long_mem ); 8776 %} 8777 8778 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8779 match(Set dst (SubL zero dst)); 8780 effect(KILL cr); 8781 ins_cost(300); 8782 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8783 ins_encode( neg_long(dst) ); 8784 ins_pipe( ialu_reg_reg_long ); 8785 %} 8786 8787 // And Long Register with Register 8788 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8789 match(Set dst (AndL dst src)); 8790 effect(KILL cr); 8791 format %{ "AND $dst.lo,$src.lo\n\t" 8792 "AND $dst.hi,$src.hi" %} 8793 opcode(0x23,0x23); 8794 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8795 ins_pipe( ialu_reg_reg_long ); 8796 %} 8797 8798 // And Long Register with Immediate 8799 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8800 match(Set dst (AndL dst src)); 8801 effect(KILL cr); 8802 format %{ "AND $dst.lo,$src.lo\n\t" 8803 "AND $dst.hi,$src.hi" %} 8804 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8805 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8806 ins_pipe( ialu_reg_long ); 8807 %} 8808 8809 // And Long Register with Memory 8810 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8811 match(Set dst (AndL dst (LoadL mem))); 8812 effect(KILL cr); 8813 ins_cost(125); 8814 format %{ "AND $dst.lo,$mem\n\t" 8815 "AND $dst.hi,$mem+4" %} 8816 opcode(0x23, 0x23); 8817 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8818 ins_pipe( ialu_reg_long_mem ); 8819 %} 8820 8821 // BMI1 instructions 8822 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8823 match(Set dst (AndL (XorL src1 minus_1) src2)); 8824 predicate(UseBMI1Instructions); 8825 effect(KILL cr, TEMP dst); 8826 8827 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8828 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8829 %} 8830 8831 ins_encode %{ 8832 Register Rdst = $dst$$Register; 8833 Register Rsrc1 = $src1$$Register; 8834 Register Rsrc2 = $src2$$Register; 8835 __ andnl(Rdst, Rsrc1, Rsrc2); 8836 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8837 %} 8838 ins_pipe(ialu_reg_reg_long); 8839 %} 8840 8841 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8842 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8843 predicate(UseBMI1Instructions); 8844 effect(KILL cr, TEMP dst); 8845 8846 ins_cost(125); 8847 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8848 "ANDNL $dst.hi, $src1.hi, $src2+4" 8849 %} 8850 8851 ins_encode %{ 8852 Register Rdst = $dst$$Register; 8853 Register Rsrc1 = $src1$$Register; 8854 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8855 8856 __ andnl(Rdst, Rsrc1, $src2$$Address); 8857 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8858 %} 8859 ins_pipe(ialu_reg_mem); 8860 %} 8861 8862 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8863 match(Set dst (AndL (SubL imm_zero src) src)); 8864 predicate(UseBMI1Instructions); 8865 effect(KILL cr, TEMP dst); 8866 8867 format %{ "MOVL $dst.hi, 0\n\t" 8868 "BLSIL $dst.lo, $src.lo\n\t" 8869 "JNZ done\n\t" 8870 "BLSIL $dst.hi, $src.hi\n" 8871 "done:" 8872 %} 8873 8874 ins_encode %{ 8875 Label done; 8876 Register Rdst = $dst$$Register; 8877 Register Rsrc = $src$$Register; 8878 __ movl(HIGH_FROM_LOW(Rdst), 0); 8879 __ blsil(Rdst, Rsrc); 8880 __ jccb(Assembler::notZero, done); 8881 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8882 __ bind(done); 8883 %} 8884 ins_pipe(ialu_reg); 8885 %} 8886 8887 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8888 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8889 predicate(UseBMI1Instructions); 8890 effect(KILL cr, TEMP dst); 8891 8892 ins_cost(125); 8893 format %{ "MOVL $dst.hi, 0\n\t" 8894 "BLSIL $dst.lo, $src\n\t" 8895 "JNZ done\n\t" 8896 "BLSIL $dst.hi, $src+4\n" 8897 "done:" 8898 %} 8899 8900 ins_encode %{ 8901 Label done; 8902 Register Rdst = $dst$$Register; 8903 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8904 8905 __ movl(HIGH_FROM_LOW(Rdst), 0); 8906 __ blsil(Rdst, $src$$Address); 8907 __ jccb(Assembler::notZero, done); 8908 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8909 __ bind(done); 8910 %} 8911 ins_pipe(ialu_reg_mem); 8912 %} 8913 8914 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8915 %{ 8916 match(Set dst (XorL (AddL src minus_1) src)); 8917 predicate(UseBMI1Instructions); 8918 effect(KILL cr, TEMP dst); 8919 8920 format %{ "MOVL $dst.hi, 0\n\t" 8921 "BLSMSKL $dst.lo, $src.lo\n\t" 8922 "JNC done\n\t" 8923 "BLSMSKL $dst.hi, $src.hi\n" 8924 "done:" 8925 %} 8926 8927 ins_encode %{ 8928 Label done; 8929 Register Rdst = $dst$$Register; 8930 Register Rsrc = $src$$Register; 8931 __ movl(HIGH_FROM_LOW(Rdst), 0); 8932 __ blsmskl(Rdst, Rsrc); 8933 __ jccb(Assembler::carryClear, done); 8934 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8935 __ bind(done); 8936 %} 8937 8938 ins_pipe(ialu_reg); 8939 %} 8940 8941 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8942 %{ 8943 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8944 predicate(UseBMI1Instructions); 8945 effect(KILL cr, TEMP dst); 8946 8947 ins_cost(125); 8948 format %{ "MOVL $dst.hi, 0\n\t" 8949 "BLSMSKL $dst.lo, $src\n\t" 8950 "JNC done\n\t" 8951 "BLSMSKL $dst.hi, $src+4\n" 8952 "done:" 8953 %} 8954 8955 ins_encode %{ 8956 Label done; 8957 Register Rdst = $dst$$Register; 8958 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8959 8960 __ movl(HIGH_FROM_LOW(Rdst), 0); 8961 __ blsmskl(Rdst, $src$$Address); 8962 __ jccb(Assembler::carryClear, done); 8963 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8964 __ bind(done); 8965 %} 8966 8967 ins_pipe(ialu_reg_mem); 8968 %} 8969 8970 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8971 %{ 8972 match(Set dst (AndL (AddL src minus_1) src) ); 8973 predicate(UseBMI1Instructions); 8974 effect(KILL cr, TEMP dst); 8975 8976 format %{ "MOVL $dst.hi, $src.hi\n\t" 8977 "BLSRL $dst.lo, $src.lo\n\t" 8978 "JNC done\n\t" 8979 "BLSRL $dst.hi, $src.hi\n" 8980 "done:" 8981 %} 8982 8983 ins_encode %{ 8984 Label done; 8985 Register Rdst = $dst$$Register; 8986 Register Rsrc = $src$$Register; 8987 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8988 __ blsrl(Rdst, Rsrc); 8989 __ jccb(Assembler::carryClear, done); 8990 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8991 __ bind(done); 8992 %} 8993 8994 ins_pipe(ialu_reg); 8995 %} 8996 8997 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8998 %{ 8999 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9000 predicate(UseBMI1Instructions); 9001 effect(KILL cr, TEMP dst); 9002 9003 ins_cost(125); 9004 format %{ "MOVL $dst.hi, $src+4\n\t" 9005 "BLSRL $dst.lo, $src\n\t" 9006 "JNC done\n\t" 9007 "BLSRL $dst.hi, $src+4\n" 9008 "done:" 9009 %} 9010 9011 ins_encode %{ 9012 Label done; 9013 Register Rdst = $dst$$Register; 9014 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9015 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9016 __ blsrl(Rdst, $src$$Address); 9017 __ jccb(Assembler::carryClear, done); 9018 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9019 __ bind(done); 9020 %} 9021 9022 ins_pipe(ialu_reg_mem); 9023 %} 9024 9025 // Or Long Register with Register 9026 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9027 match(Set dst (OrL dst src)); 9028 effect(KILL cr); 9029 format %{ "OR $dst.lo,$src.lo\n\t" 9030 "OR $dst.hi,$src.hi" %} 9031 opcode(0x0B,0x0B); 9032 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9033 ins_pipe( ialu_reg_reg_long ); 9034 %} 9035 9036 // Or Long Register with Immediate 9037 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9038 match(Set dst (OrL dst src)); 9039 effect(KILL cr); 9040 format %{ "OR $dst.lo,$src.lo\n\t" 9041 "OR $dst.hi,$src.hi" %} 9042 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9043 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9044 ins_pipe( ialu_reg_long ); 9045 %} 9046 9047 // Or Long Register with Memory 9048 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9049 match(Set dst (OrL dst (LoadL mem))); 9050 effect(KILL cr); 9051 ins_cost(125); 9052 format %{ "OR $dst.lo,$mem\n\t" 9053 "OR $dst.hi,$mem+4" %} 9054 opcode(0x0B,0x0B); 9055 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9056 ins_pipe( ialu_reg_long_mem ); 9057 %} 9058 9059 // Xor Long Register with Register 9060 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9061 match(Set dst (XorL dst src)); 9062 effect(KILL cr); 9063 format %{ "XOR $dst.lo,$src.lo\n\t" 9064 "XOR $dst.hi,$src.hi" %} 9065 opcode(0x33,0x33); 9066 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9067 ins_pipe( ialu_reg_reg_long ); 9068 %} 9069 9070 // Xor Long Register with Immediate -1 9071 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9072 match(Set dst (XorL dst imm)); 9073 format %{ "NOT $dst.lo\n\t" 9074 "NOT $dst.hi" %} 9075 ins_encode %{ 9076 __ notl($dst$$Register); 9077 __ notl(HIGH_FROM_LOW($dst$$Register)); 9078 %} 9079 ins_pipe( ialu_reg_long ); 9080 %} 9081 9082 // Xor Long Register with Immediate 9083 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9084 match(Set dst (XorL dst src)); 9085 effect(KILL cr); 9086 format %{ "XOR $dst.lo,$src.lo\n\t" 9087 "XOR $dst.hi,$src.hi" %} 9088 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9089 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9090 ins_pipe( ialu_reg_long ); 9091 %} 9092 9093 // Xor Long Register with Memory 9094 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9095 match(Set dst (XorL dst (LoadL mem))); 9096 effect(KILL cr); 9097 ins_cost(125); 9098 format %{ "XOR $dst.lo,$mem\n\t" 9099 "XOR $dst.hi,$mem+4" %} 9100 opcode(0x33,0x33); 9101 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9102 ins_pipe( ialu_reg_long_mem ); 9103 %} 9104 9105 // Shift Left Long by 1 9106 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9107 predicate(UseNewLongLShift); 9108 match(Set dst (LShiftL dst cnt)); 9109 effect(KILL cr); 9110 ins_cost(100); 9111 format %{ "ADD $dst.lo,$dst.lo\n\t" 9112 "ADC $dst.hi,$dst.hi" %} 9113 ins_encode %{ 9114 __ addl($dst$$Register,$dst$$Register); 9115 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9116 %} 9117 ins_pipe( ialu_reg_long ); 9118 %} 9119 9120 // Shift Left Long by 2 9121 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9122 predicate(UseNewLongLShift); 9123 match(Set dst (LShiftL dst cnt)); 9124 effect(KILL cr); 9125 ins_cost(100); 9126 format %{ "ADD $dst.lo,$dst.lo\n\t" 9127 "ADC $dst.hi,$dst.hi\n\t" 9128 "ADD $dst.lo,$dst.lo\n\t" 9129 "ADC $dst.hi,$dst.hi" %} 9130 ins_encode %{ 9131 __ addl($dst$$Register,$dst$$Register); 9132 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9133 __ addl($dst$$Register,$dst$$Register); 9134 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9135 %} 9136 ins_pipe( ialu_reg_long ); 9137 %} 9138 9139 // Shift Left Long by 3 9140 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9141 predicate(UseNewLongLShift); 9142 match(Set dst (LShiftL dst cnt)); 9143 effect(KILL cr); 9144 ins_cost(100); 9145 format %{ "ADD $dst.lo,$dst.lo\n\t" 9146 "ADC $dst.hi,$dst.hi\n\t" 9147 "ADD $dst.lo,$dst.lo\n\t" 9148 "ADC $dst.hi,$dst.hi\n\t" 9149 "ADD $dst.lo,$dst.lo\n\t" 9150 "ADC $dst.hi,$dst.hi" %} 9151 ins_encode %{ 9152 __ addl($dst$$Register,$dst$$Register); 9153 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9154 __ addl($dst$$Register,$dst$$Register); 9155 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9156 __ addl($dst$$Register,$dst$$Register); 9157 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9158 %} 9159 ins_pipe( ialu_reg_long ); 9160 %} 9161 9162 // Shift Left Long by 1-31 9163 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9164 match(Set dst (LShiftL dst cnt)); 9165 effect(KILL cr); 9166 ins_cost(200); 9167 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9168 "SHL $dst.lo,$cnt" %} 9169 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9170 ins_encode( move_long_small_shift(dst,cnt) ); 9171 ins_pipe( ialu_reg_long ); 9172 %} 9173 9174 // Shift Left Long by 32-63 9175 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9176 match(Set dst (LShiftL dst cnt)); 9177 effect(KILL cr); 9178 ins_cost(300); 9179 format %{ "MOV $dst.hi,$dst.lo\n" 9180 "\tSHL $dst.hi,$cnt-32\n" 9181 "\tXOR $dst.lo,$dst.lo" %} 9182 opcode(0xC1, 0x4); /* C1 /4 ib */ 9183 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9184 ins_pipe( ialu_reg_long ); 9185 %} 9186 9187 // Shift Left Long by variable 9188 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9189 match(Set dst (LShiftL dst shift)); 9190 effect(KILL cr); 9191 ins_cost(500+200); 9192 size(17); 9193 format %{ "TEST $shift,32\n\t" 9194 "JEQ,s small\n\t" 9195 "MOV $dst.hi,$dst.lo\n\t" 9196 "XOR $dst.lo,$dst.lo\n" 9197 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9198 "SHL $dst.lo,$shift" %} 9199 ins_encode( shift_left_long( dst, shift ) ); 9200 ins_pipe( pipe_slow ); 9201 %} 9202 9203 // Shift Right Long by 1-31 9204 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9205 match(Set dst (URShiftL dst cnt)); 9206 effect(KILL cr); 9207 ins_cost(200); 9208 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9209 "SHR $dst.hi,$cnt" %} 9210 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9211 ins_encode( move_long_small_shift(dst,cnt) ); 9212 ins_pipe( ialu_reg_long ); 9213 %} 9214 9215 // Shift Right Long by 32-63 9216 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9217 match(Set dst (URShiftL dst cnt)); 9218 effect(KILL cr); 9219 ins_cost(300); 9220 format %{ "MOV $dst.lo,$dst.hi\n" 9221 "\tSHR $dst.lo,$cnt-32\n" 9222 "\tXOR $dst.hi,$dst.hi" %} 9223 opcode(0xC1, 0x5); /* C1 /5 ib */ 9224 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9225 ins_pipe( ialu_reg_long ); 9226 %} 9227 9228 // Shift Right Long by variable 9229 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9230 match(Set dst (URShiftL dst shift)); 9231 effect(KILL cr); 9232 ins_cost(600); 9233 size(17); 9234 format %{ "TEST $shift,32\n\t" 9235 "JEQ,s small\n\t" 9236 "MOV $dst.lo,$dst.hi\n\t" 9237 "XOR $dst.hi,$dst.hi\n" 9238 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9239 "SHR $dst.hi,$shift" %} 9240 ins_encode( shift_right_long( dst, shift ) ); 9241 ins_pipe( pipe_slow ); 9242 %} 9243 9244 // Shift Right Long by 1-31 9245 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9246 match(Set dst (RShiftL dst cnt)); 9247 effect(KILL cr); 9248 ins_cost(200); 9249 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9250 "SAR $dst.hi,$cnt" %} 9251 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9252 ins_encode( move_long_small_shift(dst,cnt) ); 9253 ins_pipe( ialu_reg_long ); 9254 %} 9255 9256 // Shift Right Long by 32-63 9257 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9258 match(Set dst (RShiftL dst cnt)); 9259 effect(KILL cr); 9260 ins_cost(300); 9261 format %{ "MOV $dst.lo,$dst.hi\n" 9262 "\tSAR $dst.lo,$cnt-32\n" 9263 "\tSAR $dst.hi,31" %} 9264 opcode(0xC1, 0x7); /* C1 /7 ib */ 9265 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9266 ins_pipe( ialu_reg_long ); 9267 %} 9268 9269 // Shift Right arithmetic Long by variable 9270 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9271 match(Set dst (RShiftL dst shift)); 9272 effect(KILL cr); 9273 ins_cost(600); 9274 size(18); 9275 format %{ "TEST $shift,32\n\t" 9276 "JEQ,s small\n\t" 9277 "MOV $dst.lo,$dst.hi\n\t" 9278 "SAR $dst.hi,31\n" 9279 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9280 "SAR $dst.hi,$shift" %} 9281 ins_encode( shift_right_arith_long( dst, shift ) ); 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 9286 //----------Double Instructions------------------------------------------------ 9287 // Double Math 9288 9289 // Compare & branch 9290 9291 // P6 version of float compare, sets condition codes in EFLAGS 9292 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9293 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9294 match(Set cr (CmpD src1 src2)); 9295 effect(KILL rax); 9296 ins_cost(150); 9297 format %{ "FLD $src1\n\t" 9298 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9299 "JNP exit\n\t" 9300 "MOV ah,1 // saw a NaN, set CF\n\t" 9301 "SAHF\n" 9302 "exit:\tNOP // avoid branch to branch" %} 9303 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9304 ins_encode( Push_Reg_DPR(src1), 9305 OpcP, RegOpc(src2), 9306 cmpF_P6_fixup ); 9307 ins_pipe( pipe_slow ); 9308 %} 9309 9310 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9311 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9312 match(Set cr (CmpD src1 src2)); 9313 ins_cost(150); 9314 format %{ "FLD $src1\n\t" 9315 "FUCOMIP ST,$src2 // P6 instruction" %} 9316 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9317 ins_encode( Push_Reg_DPR(src1), 9318 OpcP, RegOpc(src2)); 9319 ins_pipe( pipe_slow ); 9320 %} 9321 9322 // Compare & branch 9323 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9324 predicate(UseSSE<=1); 9325 match(Set cr (CmpD src1 src2)); 9326 effect(KILL rax); 9327 ins_cost(200); 9328 format %{ "FLD $src1\n\t" 9329 "FCOMp $src2\n\t" 9330 "FNSTSW AX\n\t" 9331 "TEST AX,0x400\n\t" 9332 "JZ,s flags\n\t" 9333 "MOV AH,1\t# unordered treat as LT\n" 9334 "flags:\tSAHF" %} 9335 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9336 ins_encode( Push_Reg_DPR(src1), 9337 OpcP, RegOpc(src2), 9338 fpu_flags); 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 // Compare vs zero into -1,0,1 9343 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9344 predicate(UseSSE<=1); 9345 match(Set dst (CmpD3 src1 zero)); 9346 effect(KILL cr, KILL rax); 9347 ins_cost(280); 9348 format %{ "FTSTD $dst,$src1" %} 9349 opcode(0xE4, 0xD9); 9350 ins_encode( Push_Reg_DPR(src1), 9351 OpcS, OpcP, PopFPU, 9352 CmpF_Result(dst)); 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 // Compare into -1,0,1 9357 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9358 predicate(UseSSE<=1); 9359 match(Set dst (CmpD3 src1 src2)); 9360 effect(KILL cr, KILL rax); 9361 ins_cost(300); 9362 format %{ "FCMPD $dst,$src1,$src2" %} 9363 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9364 ins_encode( Push_Reg_DPR(src1), 9365 OpcP, RegOpc(src2), 9366 CmpF_Result(dst)); 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 // float compare and set condition codes in EFLAGS by XMM regs 9371 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9372 predicate(UseSSE>=2); 9373 match(Set cr (CmpD src1 src2)); 9374 ins_cost(145); 9375 format %{ "UCOMISD $src1,$src2\n\t" 9376 "JNP,s exit\n\t" 9377 "PUSHF\t# saw NaN, set CF\n\t" 9378 "AND [rsp], #0xffffff2b\n\t" 9379 "POPF\n" 9380 "exit:" %} 9381 ins_encode %{ 9382 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9383 emit_cmpfp_fixup(_masm); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9389 predicate(UseSSE>=2); 9390 match(Set cr (CmpD src1 src2)); 9391 ins_cost(100); 9392 format %{ "UCOMISD $src1,$src2" %} 9393 ins_encode %{ 9394 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 // float compare and set condition codes in EFLAGS by XMM regs 9400 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9401 predicate(UseSSE>=2); 9402 match(Set cr (CmpD src1 (LoadD src2))); 9403 ins_cost(145); 9404 format %{ "UCOMISD $src1,$src2\n\t" 9405 "JNP,s exit\n\t" 9406 "PUSHF\t# saw NaN, set CF\n\t" 9407 "AND [rsp], #0xffffff2b\n\t" 9408 "POPF\n" 9409 "exit:" %} 9410 ins_encode %{ 9411 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9412 emit_cmpfp_fixup(_masm); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9418 predicate(UseSSE>=2); 9419 match(Set cr (CmpD src1 (LoadD src2))); 9420 ins_cost(100); 9421 format %{ "UCOMISD $src1,$src2" %} 9422 ins_encode %{ 9423 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9424 %} 9425 ins_pipe( pipe_slow ); 9426 %} 9427 9428 // Compare into -1,0,1 in XMM 9429 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9430 predicate(UseSSE>=2); 9431 match(Set dst (CmpD3 src1 src2)); 9432 effect(KILL cr); 9433 ins_cost(255); 9434 format %{ "UCOMISD $src1, $src2\n\t" 9435 "MOV $dst, #-1\n\t" 9436 "JP,s done\n\t" 9437 "JB,s done\n\t" 9438 "SETNE $dst\n\t" 9439 "MOVZB $dst, $dst\n" 9440 "done:" %} 9441 ins_encode %{ 9442 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9443 emit_cmpfp3(_masm, $dst$$Register); 9444 %} 9445 ins_pipe( pipe_slow ); 9446 %} 9447 9448 // Compare into -1,0,1 in XMM and memory 9449 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9450 predicate(UseSSE>=2); 9451 match(Set dst (CmpD3 src1 (LoadD src2))); 9452 effect(KILL cr); 9453 ins_cost(275); 9454 format %{ "UCOMISD $src1, $src2\n\t" 9455 "MOV $dst, #-1\n\t" 9456 "JP,s done\n\t" 9457 "JB,s done\n\t" 9458 "SETNE $dst\n\t" 9459 "MOVZB $dst, $dst\n" 9460 "done:" %} 9461 ins_encode %{ 9462 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9463 emit_cmpfp3(_masm, $dst$$Register); 9464 %} 9465 ins_pipe( pipe_slow ); 9466 %} 9467 9468 9469 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9470 predicate (UseSSE <=1); 9471 match(Set dst (SubD dst src)); 9472 9473 format %{ "FLD $src\n\t" 9474 "DSUBp $dst,ST" %} 9475 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9476 ins_cost(150); 9477 ins_encode( Push_Reg_DPR(src), 9478 OpcP, RegOpc(dst) ); 9479 ins_pipe( fpu_reg_reg ); 9480 %} 9481 9482 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9483 predicate (UseSSE <=1); 9484 match(Set dst (RoundDouble (SubD src1 src2))); 9485 ins_cost(250); 9486 9487 format %{ "FLD $src2\n\t" 9488 "DSUB ST,$src1\n\t" 9489 "FSTP_D $dst\t# D-round" %} 9490 opcode(0xD8, 0x5); 9491 ins_encode( Push_Reg_DPR(src2), 9492 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9493 ins_pipe( fpu_mem_reg_reg ); 9494 %} 9495 9496 9497 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9498 predicate (UseSSE <=1); 9499 match(Set dst (SubD dst (LoadD src))); 9500 ins_cost(150); 9501 9502 format %{ "FLD $src\n\t" 9503 "DSUBp $dst,ST" %} 9504 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9505 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9506 OpcP, RegOpc(dst) ); 9507 ins_pipe( fpu_reg_mem ); 9508 %} 9509 9510 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9511 predicate (UseSSE<=1); 9512 match(Set dst (AbsD src)); 9513 ins_cost(100); 9514 format %{ "FABS" %} 9515 opcode(0xE1, 0xD9); 9516 ins_encode( OpcS, OpcP ); 9517 ins_pipe( fpu_reg_reg ); 9518 %} 9519 9520 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9521 predicate(UseSSE<=1); 9522 match(Set dst (NegD src)); 9523 ins_cost(100); 9524 format %{ "FCHS" %} 9525 opcode(0xE0, 0xD9); 9526 ins_encode( OpcS, OpcP ); 9527 ins_pipe( fpu_reg_reg ); 9528 %} 9529 9530 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9531 predicate(UseSSE<=1); 9532 match(Set dst (AddD dst src)); 9533 format %{ "FLD $src\n\t" 9534 "DADD $dst,ST" %} 9535 size(4); 9536 ins_cost(150); 9537 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9538 ins_encode( Push_Reg_DPR(src), 9539 OpcP, RegOpc(dst) ); 9540 ins_pipe( fpu_reg_reg ); 9541 %} 9542 9543 9544 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9545 predicate(UseSSE<=1); 9546 match(Set dst (RoundDouble (AddD src1 src2))); 9547 ins_cost(250); 9548 9549 format %{ "FLD $src2\n\t" 9550 "DADD ST,$src1\n\t" 9551 "FSTP_D $dst\t# D-round" %} 9552 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9553 ins_encode( Push_Reg_DPR(src2), 9554 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9555 ins_pipe( fpu_mem_reg_reg ); 9556 %} 9557 9558 9559 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9560 predicate(UseSSE<=1); 9561 match(Set dst (AddD dst (LoadD src))); 9562 ins_cost(150); 9563 9564 format %{ "FLD $src\n\t" 9565 "DADDp $dst,ST" %} 9566 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9567 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9568 OpcP, RegOpc(dst) ); 9569 ins_pipe( fpu_reg_mem ); 9570 %} 9571 9572 // add-to-memory 9573 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9574 predicate(UseSSE<=1); 9575 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9576 ins_cost(150); 9577 9578 format %{ "FLD_D $dst\n\t" 9579 "DADD ST,$src\n\t" 9580 "FST_D $dst" %} 9581 opcode(0xDD, 0x0); 9582 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9583 Opcode(0xD8), RegOpc(src), 9584 set_instruction_start, 9585 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9586 ins_pipe( fpu_reg_mem ); 9587 %} 9588 9589 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9590 predicate(UseSSE<=1); 9591 match(Set dst (AddD dst con)); 9592 ins_cost(125); 9593 format %{ "FLD1\n\t" 9594 "DADDp $dst,ST" %} 9595 ins_encode %{ 9596 __ fld1(); 9597 __ faddp($dst$$reg); 9598 %} 9599 ins_pipe(fpu_reg); 9600 %} 9601 9602 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9603 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9604 match(Set dst (AddD dst con)); 9605 ins_cost(200); 9606 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9607 "DADDp $dst,ST" %} 9608 ins_encode %{ 9609 __ fld_d($constantaddress($con)); 9610 __ faddp($dst$$reg); 9611 %} 9612 ins_pipe(fpu_reg_mem); 9613 %} 9614 9615 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9616 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9617 match(Set dst (RoundDouble (AddD src con))); 9618 ins_cost(200); 9619 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9620 "DADD ST,$src\n\t" 9621 "FSTP_D $dst\t# D-round" %} 9622 ins_encode %{ 9623 __ fld_d($constantaddress($con)); 9624 __ fadd($src$$reg); 9625 __ fstp_d(Address(rsp, $dst$$disp)); 9626 %} 9627 ins_pipe(fpu_mem_reg_con); 9628 %} 9629 9630 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9631 predicate(UseSSE<=1); 9632 match(Set dst (MulD dst src)); 9633 format %{ "FLD $src\n\t" 9634 "DMULp $dst,ST" %} 9635 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9636 ins_cost(150); 9637 ins_encode( Push_Reg_DPR(src), 9638 OpcP, RegOpc(dst) ); 9639 ins_pipe( fpu_reg_reg ); 9640 %} 9641 9642 // Strict FP instruction biases argument before multiply then 9643 // biases result to avoid double rounding of subnormals. 9644 // 9645 // scale arg1 by multiplying arg1 by 2^(-15360) 9646 // load arg2 9647 // multiply scaled arg1 by arg2 9648 // rescale product by 2^(15360) 9649 // 9650 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9651 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9652 match(Set dst (MulD dst src)); 9653 ins_cost(1); // Select this instruction for all strict FP double multiplies 9654 9655 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9656 "DMULp $dst,ST\n\t" 9657 "FLD $src\n\t" 9658 "DMULp $dst,ST\n\t" 9659 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9660 "DMULp $dst,ST\n\t" %} 9661 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9662 ins_encode( strictfp_bias1(dst), 9663 Push_Reg_DPR(src), 9664 OpcP, RegOpc(dst), 9665 strictfp_bias2(dst) ); 9666 ins_pipe( fpu_reg_reg ); 9667 %} 9668 9669 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9670 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9671 match(Set dst (MulD dst con)); 9672 ins_cost(200); 9673 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9674 "DMULp $dst,ST" %} 9675 ins_encode %{ 9676 __ fld_d($constantaddress($con)); 9677 __ fmulp($dst$$reg); 9678 %} 9679 ins_pipe(fpu_reg_mem); 9680 %} 9681 9682 9683 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9684 predicate( UseSSE<=1 ); 9685 match(Set dst (MulD dst (LoadD src))); 9686 ins_cost(200); 9687 format %{ "FLD_D $src\n\t" 9688 "DMULp $dst,ST" %} 9689 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9690 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9691 OpcP, RegOpc(dst) ); 9692 ins_pipe( fpu_reg_mem ); 9693 %} 9694 9695 // 9696 // Cisc-alternate to reg-reg multiply 9697 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9698 predicate( UseSSE<=1 ); 9699 match(Set dst (MulD src (LoadD mem))); 9700 ins_cost(250); 9701 format %{ "FLD_D $mem\n\t" 9702 "DMUL ST,$src\n\t" 9703 "FSTP_D $dst" %} 9704 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9705 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9706 OpcReg_FPR(src), 9707 Pop_Reg_DPR(dst) ); 9708 ins_pipe( fpu_reg_reg_mem ); 9709 %} 9710 9711 9712 // MACRO3 -- addDPR a mulDPR 9713 // This instruction is a '2-address' instruction in that the result goes 9714 // back to src2. This eliminates a move from the macro; possibly the 9715 // register allocator will have to add it back (and maybe not). 9716 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9717 predicate( UseSSE<=1 ); 9718 match(Set src2 (AddD (MulD src0 src1) src2)); 9719 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9720 "DMUL ST,$src1\n\t" 9721 "DADDp $src2,ST" %} 9722 ins_cost(250); 9723 opcode(0xDD); /* LoadD DD /0 */ 9724 ins_encode( Push_Reg_FPR(src0), 9725 FMul_ST_reg(src1), 9726 FAddP_reg_ST(src2) ); 9727 ins_pipe( fpu_reg_reg_reg ); 9728 %} 9729 9730 9731 // MACRO3 -- subDPR a mulDPR 9732 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9733 predicate( UseSSE<=1 ); 9734 match(Set src2 (SubD (MulD src0 src1) src2)); 9735 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9736 "DMUL ST,$src1\n\t" 9737 "DSUBRp $src2,ST" %} 9738 ins_cost(250); 9739 ins_encode( Push_Reg_FPR(src0), 9740 FMul_ST_reg(src1), 9741 Opcode(0xDE), Opc_plus(0xE0,src2)); 9742 ins_pipe( fpu_reg_reg_reg ); 9743 %} 9744 9745 9746 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9747 predicate( UseSSE<=1 ); 9748 match(Set dst (DivD dst src)); 9749 9750 format %{ "FLD $src\n\t" 9751 "FDIVp $dst,ST" %} 9752 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9753 ins_cost(150); 9754 ins_encode( Push_Reg_DPR(src), 9755 OpcP, RegOpc(dst) ); 9756 ins_pipe( fpu_reg_reg ); 9757 %} 9758 9759 // Strict FP instruction biases argument before division then 9760 // biases result, to avoid double rounding of subnormals. 9761 // 9762 // scale dividend by multiplying dividend by 2^(-15360) 9763 // load divisor 9764 // divide scaled dividend by divisor 9765 // rescale quotient by 2^(15360) 9766 // 9767 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9768 predicate (UseSSE<=1); 9769 match(Set dst (DivD dst src)); 9770 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9771 ins_cost(01); 9772 9773 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9774 "DMULp $dst,ST\n\t" 9775 "FLD $src\n\t" 9776 "FDIVp $dst,ST\n\t" 9777 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9778 "DMULp $dst,ST\n\t" %} 9779 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9780 ins_encode( strictfp_bias1(dst), 9781 Push_Reg_DPR(src), 9782 OpcP, RegOpc(dst), 9783 strictfp_bias2(dst) ); 9784 ins_pipe( fpu_reg_reg ); 9785 %} 9786 9787 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9788 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9789 match(Set dst (RoundDouble (DivD src1 src2))); 9790 9791 format %{ "FLD $src1\n\t" 9792 "FDIV ST,$src2\n\t" 9793 "FSTP_D $dst\t# D-round" %} 9794 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9795 ins_encode( Push_Reg_DPR(src1), 9796 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9797 ins_pipe( fpu_mem_reg_reg ); 9798 %} 9799 9800 9801 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9802 predicate(UseSSE<=1); 9803 match(Set dst (ModD dst src)); 9804 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9805 9806 format %{ "DMOD $dst,$src" %} 9807 ins_cost(250); 9808 ins_encode(Push_Reg_Mod_DPR(dst, src), 9809 emitModDPR(), 9810 Push_Result_Mod_DPR(src), 9811 Pop_Reg_DPR(dst)); 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9816 predicate(UseSSE>=2); 9817 match(Set dst (ModD src0 src1)); 9818 effect(KILL rax, KILL cr); 9819 9820 format %{ "SUB ESP,8\t # DMOD\n" 9821 "\tMOVSD [ESP+0],$src1\n" 9822 "\tFLD_D [ESP+0]\n" 9823 "\tMOVSD [ESP+0],$src0\n" 9824 "\tFLD_D [ESP+0]\n" 9825 "loop:\tFPREM\n" 9826 "\tFWAIT\n" 9827 "\tFNSTSW AX\n" 9828 "\tSAHF\n" 9829 "\tJP loop\n" 9830 "\tFSTP_D [ESP+0]\n" 9831 "\tMOVSD $dst,[ESP+0]\n" 9832 "\tADD ESP,8\n" 9833 "\tFSTP ST0\t # Restore FPU Stack" 9834 %} 9835 ins_cost(250); 9836 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9837 ins_pipe( pipe_slow ); 9838 %} 9839 9840 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9841 predicate (UseSSE<=1); 9842 match(Set dst(AtanD dst src)); 9843 format %{ "DATA $dst,$src" %} 9844 opcode(0xD9, 0xF3); 9845 ins_encode( Push_Reg_DPR(src), 9846 OpcP, OpcS, RegOpc(dst) ); 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9851 predicate (UseSSE>=2); 9852 match(Set dst(AtanD dst src)); 9853 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9854 format %{ "DATA $dst,$src" %} 9855 opcode(0xD9, 0xF3); 9856 ins_encode( Push_SrcD(src), 9857 OpcP, OpcS, Push_ResultD(dst) ); 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9862 predicate (UseSSE<=1); 9863 match(Set dst (SqrtD src)); 9864 format %{ "DSQRT $dst,$src" %} 9865 opcode(0xFA, 0xD9); 9866 ins_encode( Push_Reg_DPR(src), 9867 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 //-------------Float Instructions------------------------------- 9872 // Float Math 9873 9874 // Code for float compare: 9875 // fcompp(); 9876 // fwait(); fnstsw_ax(); 9877 // sahf(); 9878 // movl(dst, unordered_result); 9879 // jcc(Assembler::parity, exit); 9880 // movl(dst, less_result); 9881 // jcc(Assembler::below, exit); 9882 // movl(dst, equal_result); 9883 // jcc(Assembler::equal, exit); 9884 // movl(dst, greater_result); 9885 // exit: 9886 9887 // P6 version of float compare, sets condition codes in EFLAGS 9888 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9889 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9890 match(Set cr (CmpF src1 src2)); 9891 effect(KILL rax); 9892 ins_cost(150); 9893 format %{ "FLD $src1\n\t" 9894 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9895 "JNP exit\n\t" 9896 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9897 "SAHF\n" 9898 "exit:\tNOP // avoid branch to branch" %} 9899 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9900 ins_encode( Push_Reg_DPR(src1), 9901 OpcP, RegOpc(src2), 9902 cmpF_P6_fixup ); 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9907 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9908 match(Set cr (CmpF src1 src2)); 9909 ins_cost(100); 9910 format %{ "FLD $src1\n\t" 9911 "FUCOMIP ST,$src2 // P6 instruction" %} 9912 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9913 ins_encode( Push_Reg_DPR(src1), 9914 OpcP, RegOpc(src2)); 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 9919 // Compare & branch 9920 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9921 predicate(UseSSE == 0); 9922 match(Set cr (CmpF src1 src2)); 9923 effect(KILL rax); 9924 ins_cost(200); 9925 format %{ "FLD $src1\n\t" 9926 "FCOMp $src2\n\t" 9927 "FNSTSW AX\n\t" 9928 "TEST AX,0x400\n\t" 9929 "JZ,s flags\n\t" 9930 "MOV AH,1\t# unordered treat as LT\n" 9931 "flags:\tSAHF" %} 9932 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9933 ins_encode( Push_Reg_DPR(src1), 9934 OpcP, RegOpc(src2), 9935 fpu_flags); 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 // Compare vs zero into -1,0,1 9940 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9941 predicate(UseSSE == 0); 9942 match(Set dst (CmpF3 src1 zero)); 9943 effect(KILL cr, KILL rax); 9944 ins_cost(280); 9945 format %{ "FTSTF $dst,$src1" %} 9946 opcode(0xE4, 0xD9); 9947 ins_encode( Push_Reg_DPR(src1), 9948 OpcS, OpcP, PopFPU, 9949 CmpF_Result(dst)); 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 // Compare into -1,0,1 9954 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9955 predicate(UseSSE == 0); 9956 match(Set dst (CmpF3 src1 src2)); 9957 effect(KILL cr, KILL rax); 9958 ins_cost(300); 9959 format %{ "FCMPF $dst,$src1,$src2" %} 9960 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9961 ins_encode( Push_Reg_DPR(src1), 9962 OpcP, RegOpc(src2), 9963 CmpF_Result(dst)); 9964 ins_pipe( pipe_slow ); 9965 %} 9966 9967 // float compare and set condition codes in EFLAGS by XMM regs 9968 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9969 predicate(UseSSE>=1); 9970 match(Set cr (CmpF src1 src2)); 9971 ins_cost(145); 9972 format %{ "UCOMISS $src1,$src2\n\t" 9973 "JNP,s exit\n\t" 9974 "PUSHF\t# saw NaN, set CF\n\t" 9975 "AND [rsp], #0xffffff2b\n\t" 9976 "POPF\n" 9977 "exit:" %} 9978 ins_encode %{ 9979 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9980 emit_cmpfp_fixup(_masm); 9981 %} 9982 ins_pipe( pipe_slow ); 9983 %} 9984 9985 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9986 predicate(UseSSE>=1); 9987 match(Set cr (CmpF src1 src2)); 9988 ins_cost(100); 9989 format %{ "UCOMISS $src1,$src2" %} 9990 ins_encode %{ 9991 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9992 %} 9993 ins_pipe( pipe_slow ); 9994 %} 9995 9996 // float compare and set condition codes in EFLAGS by XMM regs 9997 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 9998 predicate(UseSSE>=1); 9999 match(Set cr (CmpF src1 (LoadF src2))); 10000 ins_cost(165); 10001 format %{ "UCOMISS $src1,$src2\n\t" 10002 "JNP,s exit\n\t" 10003 "PUSHF\t# saw NaN, set CF\n\t" 10004 "AND [rsp], #0xffffff2b\n\t" 10005 "POPF\n" 10006 "exit:" %} 10007 ins_encode %{ 10008 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10009 emit_cmpfp_fixup(_masm); 10010 %} 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10015 predicate(UseSSE>=1); 10016 match(Set cr (CmpF src1 (LoadF src2))); 10017 ins_cost(100); 10018 format %{ "UCOMISS $src1,$src2" %} 10019 ins_encode %{ 10020 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10021 %} 10022 ins_pipe( pipe_slow ); 10023 %} 10024 10025 // Compare into -1,0,1 in XMM 10026 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10027 predicate(UseSSE>=1); 10028 match(Set dst (CmpF3 src1 src2)); 10029 effect(KILL cr); 10030 ins_cost(255); 10031 format %{ "UCOMISS $src1, $src2\n\t" 10032 "MOV $dst, #-1\n\t" 10033 "JP,s done\n\t" 10034 "JB,s done\n\t" 10035 "SETNE $dst\n\t" 10036 "MOVZB $dst, $dst\n" 10037 "done:" %} 10038 ins_encode %{ 10039 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10040 emit_cmpfp3(_masm, $dst$$Register); 10041 %} 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 // Compare into -1,0,1 in XMM and memory 10046 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10047 predicate(UseSSE>=1); 10048 match(Set dst (CmpF3 src1 (LoadF src2))); 10049 effect(KILL cr); 10050 ins_cost(275); 10051 format %{ "UCOMISS $src1, $src2\n\t" 10052 "MOV $dst, #-1\n\t" 10053 "JP,s done\n\t" 10054 "JB,s done\n\t" 10055 "SETNE $dst\n\t" 10056 "MOVZB $dst, $dst\n" 10057 "done:" %} 10058 ins_encode %{ 10059 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10060 emit_cmpfp3(_masm, $dst$$Register); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 // Spill to obtain 24-bit precision 10066 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10067 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10068 match(Set dst (SubF src1 src2)); 10069 10070 format %{ "FSUB $dst,$src1 - $src2" %} 10071 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10072 ins_encode( Push_Reg_FPR(src1), 10073 OpcReg_FPR(src2), 10074 Pop_Mem_FPR(dst) ); 10075 ins_pipe( fpu_mem_reg_reg ); 10076 %} 10077 // 10078 // This instruction does not round to 24-bits 10079 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10080 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10081 match(Set dst (SubF dst src)); 10082 10083 format %{ "FSUB $dst,$src" %} 10084 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10085 ins_encode( Push_Reg_FPR(src), 10086 OpcP, RegOpc(dst) ); 10087 ins_pipe( fpu_reg_reg ); 10088 %} 10089 10090 // Spill to obtain 24-bit precision 10091 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10092 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10093 match(Set dst (AddF src1 src2)); 10094 10095 format %{ "FADD $dst,$src1,$src2" %} 10096 opcode(0xD8, 0x0); /* D8 C0+i */ 10097 ins_encode( Push_Reg_FPR(src2), 10098 OpcReg_FPR(src1), 10099 Pop_Mem_FPR(dst) ); 10100 ins_pipe( fpu_mem_reg_reg ); 10101 %} 10102 // 10103 // This instruction does not round to 24-bits 10104 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10105 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10106 match(Set dst (AddF dst src)); 10107 10108 format %{ "FLD $src\n\t" 10109 "FADDp $dst,ST" %} 10110 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10111 ins_encode( Push_Reg_FPR(src), 10112 OpcP, RegOpc(dst) ); 10113 ins_pipe( fpu_reg_reg ); 10114 %} 10115 10116 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10117 predicate(UseSSE==0); 10118 match(Set dst (AbsF src)); 10119 ins_cost(100); 10120 format %{ "FABS" %} 10121 opcode(0xE1, 0xD9); 10122 ins_encode( OpcS, OpcP ); 10123 ins_pipe( fpu_reg_reg ); 10124 %} 10125 10126 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10127 predicate(UseSSE==0); 10128 match(Set dst (NegF src)); 10129 ins_cost(100); 10130 format %{ "FCHS" %} 10131 opcode(0xE0, 0xD9); 10132 ins_encode( OpcS, OpcP ); 10133 ins_pipe( fpu_reg_reg ); 10134 %} 10135 10136 // Cisc-alternate to addFPR_reg 10137 // Spill to obtain 24-bit precision 10138 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10139 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10140 match(Set dst (AddF src1 (LoadF src2))); 10141 10142 format %{ "FLD $src2\n\t" 10143 "FADD ST,$src1\n\t" 10144 "FSTP_S $dst" %} 10145 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10146 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10147 OpcReg_FPR(src1), 10148 Pop_Mem_FPR(dst) ); 10149 ins_pipe( fpu_mem_reg_mem ); 10150 %} 10151 // 10152 // Cisc-alternate to addFPR_reg 10153 // This instruction does not round to 24-bits 10154 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10155 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10156 match(Set dst (AddF dst (LoadF src))); 10157 10158 format %{ "FADD $dst,$src" %} 10159 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10160 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10161 OpcP, RegOpc(dst) ); 10162 ins_pipe( fpu_reg_mem ); 10163 %} 10164 10165 // // Following two instructions for _222_mpegaudio 10166 // Spill to obtain 24-bit precision 10167 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10168 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10169 match(Set dst (AddF src1 src2)); 10170 10171 format %{ "FADD $dst,$src1,$src2" %} 10172 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10173 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10174 OpcReg_FPR(src2), 10175 Pop_Mem_FPR(dst) ); 10176 ins_pipe( fpu_mem_reg_mem ); 10177 %} 10178 10179 // Cisc-spill variant 10180 // Spill to obtain 24-bit precision 10181 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10182 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10183 match(Set dst (AddF src1 (LoadF src2))); 10184 10185 format %{ "FADD $dst,$src1,$src2 cisc" %} 10186 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10187 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10188 set_instruction_start, 10189 OpcP, RMopc_Mem(secondary,src1), 10190 Pop_Mem_FPR(dst) ); 10191 ins_pipe( fpu_mem_mem_mem ); 10192 %} 10193 10194 // Spill to obtain 24-bit precision 10195 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10196 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10197 match(Set dst (AddF src1 src2)); 10198 10199 format %{ "FADD $dst,$src1,$src2" %} 10200 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10201 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10202 set_instruction_start, 10203 OpcP, RMopc_Mem(secondary,src1), 10204 Pop_Mem_FPR(dst) ); 10205 ins_pipe( fpu_mem_mem_mem ); 10206 %} 10207 10208 10209 // Spill to obtain 24-bit precision 10210 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10211 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10212 match(Set dst (AddF src con)); 10213 format %{ "FLD $src\n\t" 10214 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10215 "FSTP_S $dst" %} 10216 ins_encode %{ 10217 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10218 __ fadd_s($constantaddress($con)); 10219 __ fstp_s(Address(rsp, $dst$$disp)); 10220 %} 10221 ins_pipe(fpu_mem_reg_con); 10222 %} 10223 // 10224 // This instruction does not round to 24-bits 10225 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10226 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10227 match(Set dst (AddF src con)); 10228 format %{ "FLD $src\n\t" 10229 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10230 "FSTP $dst" %} 10231 ins_encode %{ 10232 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10233 __ fadd_s($constantaddress($con)); 10234 __ fstp_d($dst$$reg); 10235 %} 10236 ins_pipe(fpu_reg_reg_con); 10237 %} 10238 10239 // Spill to obtain 24-bit precision 10240 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10241 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10242 match(Set dst (MulF src1 src2)); 10243 10244 format %{ "FLD $src1\n\t" 10245 "FMUL $src2\n\t" 10246 "FSTP_S $dst" %} 10247 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10248 ins_encode( Push_Reg_FPR(src1), 10249 OpcReg_FPR(src2), 10250 Pop_Mem_FPR(dst) ); 10251 ins_pipe( fpu_mem_reg_reg ); 10252 %} 10253 // 10254 // This instruction does not round to 24-bits 10255 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10256 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10257 match(Set dst (MulF src1 src2)); 10258 10259 format %{ "FLD $src1\n\t" 10260 "FMUL $src2\n\t" 10261 "FSTP_S $dst" %} 10262 opcode(0xD8, 0x1); /* D8 C8+i */ 10263 ins_encode( Push_Reg_FPR(src2), 10264 OpcReg_FPR(src1), 10265 Pop_Reg_FPR(dst) ); 10266 ins_pipe( fpu_reg_reg_reg ); 10267 %} 10268 10269 10270 // Spill to obtain 24-bit precision 10271 // Cisc-alternate to reg-reg multiply 10272 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10273 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10274 match(Set dst (MulF src1 (LoadF src2))); 10275 10276 format %{ "FLD_S $src2\n\t" 10277 "FMUL $src1\n\t" 10278 "FSTP_S $dst" %} 10279 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10280 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10281 OpcReg_FPR(src1), 10282 Pop_Mem_FPR(dst) ); 10283 ins_pipe( fpu_mem_reg_mem ); 10284 %} 10285 // 10286 // This instruction does not round to 24-bits 10287 // Cisc-alternate to reg-reg multiply 10288 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10289 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10290 match(Set dst (MulF src1 (LoadF src2))); 10291 10292 format %{ "FMUL $dst,$src1,$src2" %} 10293 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10294 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10295 OpcReg_FPR(src1), 10296 Pop_Reg_FPR(dst) ); 10297 ins_pipe( fpu_reg_reg_mem ); 10298 %} 10299 10300 // Spill to obtain 24-bit precision 10301 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10302 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10303 match(Set dst (MulF src1 src2)); 10304 10305 format %{ "FMUL $dst,$src1,$src2" %} 10306 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10307 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10308 set_instruction_start, 10309 OpcP, RMopc_Mem(secondary,src1), 10310 Pop_Mem_FPR(dst) ); 10311 ins_pipe( fpu_mem_mem_mem ); 10312 %} 10313 10314 // Spill to obtain 24-bit precision 10315 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10316 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10317 match(Set dst (MulF src con)); 10318 10319 format %{ "FLD $src\n\t" 10320 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10321 "FSTP_S $dst" %} 10322 ins_encode %{ 10323 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10324 __ fmul_s($constantaddress($con)); 10325 __ fstp_s(Address(rsp, $dst$$disp)); 10326 %} 10327 ins_pipe(fpu_mem_reg_con); 10328 %} 10329 // 10330 // This instruction does not round to 24-bits 10331 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10332 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10333 match(Set dst (MulF src con)); 10334 10335 format %{ "FLD $src\n\t" 10336 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10337 "FSTP $dst" %} 10338 ins_encode %{ 10339 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10340 __ fmul_s($constantaddress($con)); 10341 __ fstp_d($dst$$reg); 10342 %} 10343 ins_pipe(fpu_reg_reg_con); 10344 %} 10345 10346 10347 // 10348 // MACRO1 -- subsume unshared load into mulFPR 10349 // This instruction does not round to 24-bits 10350 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10351 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10352 match(Set dst (MulF (LoadF mem1) src)); 10353 10354 format %{ "FLD $mem1 ===MACRO1===\n\t" 10355 "FMUL ST,$src\n\t" 10356 "FSTP $dst" %} 10357 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10358 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10359 OpcReg_FPR(src), 10360 Pop_Reg_FPR(dst) ); 10361 ins_pipe( fpu_reg_reg_mem ); 10362 %} 10363 // 10364 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10365 // This instruction does not round to 24-bits 10366 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10367 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10368 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10369 ins_cost(95); 10370 10371 format %{ "FLD $mem1 ===MACRO2===\n\t" 10372 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10373 "FADD ST,$src2\n\t" 10374 "FSTP $dst" %} 10375 opcode(0xD9); /* LoadF D9 /0 */ 10376 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10377 FMul_ST_reg(src1), 10378 FAdd_ST_reg(src2), 10379 Pop_Reg_FPR(dst) ); 10380 ins_pipe( fpu_reg_mem_reg_reg ); 10381 %} 10382 10383 // MACRO3 -- addFPR a mulFPR 10384 // This instruction does not round to 24-bits. It is a '2-address' 10385 // instruction in that the result goes back to src2. This eliminates 10386 // a move from the macro; possibly the register allocator will have 10387 // to add it back (and maybe not). 10388 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10389 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10390 match(Set src2 (AddF (MulF src0 src1) src2)); 10391 10392 format %{ "FLD $src0 ===MACRO3===\n\t" 10393 "FMUL ST,$src1\n\t" 10394 "FADDP $src2,ST" %} 10395 opcode(0xD9); /* LoadF D9 /0 */ 10396 ins_encode( Push_Reg_FPR(src0), 10397 FMul_ST_reg(src1), 10398 FAddP_reg_ST(src2) ); 10399 ins_pipe( fpu_reg_reg_reg ); 10400 %} 10401 10402 // MACRO4 -- divFPR subFPR 10403 // This instruction does not round to 24-bits 10404 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10405 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10406 match(Set dst (DivF (SubF src2 src1) src3)); 10407 10408 format %{ "FLD $src2 ===MACRO4===\n\t" 10409 "FSUB ST,$src1\n\t" 10410 "FDIV ST,$src3\n\t" 10411 "FSTP $dst" %} 10412 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10413 ins_encode( Push_Reg_FPR(src2), 10414 subFPR_divFPR_encode(src1,src3), 10415 Pop_Reg_FPR(dst) ); 10416 ins_pipe( fpu_reg_reg_reg_reg ); 10417 %} 10418 10419 // Spill to obtain 24-bit precision 10420 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10421 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10422 match(Set dst (DivF src1 src2)); 10423 10424 format %{ "FDIV $dst,$src1,$src2" %} 10425 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10426 ins_encode( Push_Reg_FPR(src1), 10427 OpcReg_FPR(src2), 10428 Pop_Mem_FPR(dst) ); 10429 ins_pipe( fpu_mem_reg_reg ); 10430 %} 10431 // 10432 // This instruction does not round to 24-bits 10433 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10434 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10435 match(Set dst (DivF dst src)); 10436 10437 format %{ "FDIV $dst,$src" %} 10438 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10439 ins_encode( Push_Reg_FPR(src), 10440 OpcP, RegOpc(dst) ); 10441 ins_pipe( fpu_reg_reg ); 10442 %} 10443 10444 10445 // Spill to obtain 24-bit precision 10446 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10447 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10448 match(Set dst (ModF src1 src2)); 10449 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10450 10451 format %{ "FMOD $dst,$src1,$src2" %} 10452 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10453 emitModDPR(), 10454 Push_Result_Mod_DPR(src2), 10455 Pop_Mem_FPR(dst)); 10456 ins_pipe( pipe_slow ); 10457 %} 10458 // 10459 // This instruction does not round to 24-bits 10460 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10461 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10462 match(Set dst (ModF dst src)); 10463 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10464 10465 format %{ "FMOD $dst,$src" %} 10466 ins_encode(Push_Reg_Mod_DPR(dst, src), 10467 emitModDPR(), 10468 Push_Result_Mod_DPR(src), 10469 Pop_Reg_FPR(dst)); 10470 ins_pipe( pipe_slow ); 10471 %} 10472 10473 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10474 predicate(UseSSE>=1); 10475 match(Set dst (ModF src0 src1)); 10476 effect(KILL rax, KILL cr); 10477 format %{ "SUB ESP,4\t # FMOD\n" 10478 "\tMOVSS [ESP+0],$src1\n" 10479 "\tFLD_S [ESP+0]\n" 10480 "\tMOVSS [ESP+0],$src0\n" 10481 "\tFLD_S [ESP+0]\n" 10482 "loop:\tFPREM\n" 10483 "\tFWAIT\n" 10484 "\tFNSTSW AX\n" 10485 "\tSAHF\n" 10486 "\tJP loop\n" 10487 "\tFSTP_S [ESP+0]\n" 10488 "\tMOVSS $dst,[ESP+0]\n" 10489 "\tADD ESP,4\n" 10490 "\tFSTP ST0\t # Restore FPU Stack" 10491 %} 10492 ins_cost(250); 10493 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10494 ins_pipe( pipe_slow ); 10495 %} 10496 10497 10498 //----------Arithmetic Conversion Instructions--------------------------------- 10499 // The conversions operations are all Alpha sorted. Please keep it that way! 10500 10501 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10502 predicate(UseSSE==0); 10503 match(Set dst (RoundFloat src)); 10504 ins_cost(125); 10505 format %{ "FST_S $dst,$src\t# F-round" %} 10506 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10507 ins_pipe( fpu_mem_reg ); 10508 %} 10509 10510 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10511 predicate(UseSSE<=1); 10512 match(Set dst (RoundDouble src)); 10513 ins_cost(125); 10514 format %{ "FST_D $dst,$src\t# D-round" %} 10515 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10516 ins_pipe( fpu_mem_reg ); 10517 %} 10518 10519 // Force rounding to 24-bit precision and 6-bit exponent 10520 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10521 predicate(UseSSE==0); 10522 match(Set dst (ConvD2F src)); 10523 format %{ "FST_S $dst,$src\t# F-round" %} 10524 expand %{ 10525 roundFloat_mem_reg(dst,src); 10526 %} 10527 %} 10528 10529 // Force rounding to 24-bit precision and 6-bit exponent 10530 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10531 predicate(UseSSE==1); 10532 match(Set dst (ConvD2F src)); 10533 effect( KILL cr ); 10534 format %{ "SUB ESP,4\n\t" 10535 "FST_S [ESP],$src\t# F-round\n\t" 10536 "MOVSS $dst,[ESP]\n\t" 10537 "ADD ESP,4" %} 10538 ins_encode %{ 10539 __ subptr(rsp, 4); 10540 if ($src$$reg != FPR1L_enc) { 10541 __ fld_s($src$$reg-1); 10542 __ fstp_s(Address(rsp, 0)); 10543 } else { 10544 __ fst_s(Address(rsp, 0)); 10545 } 10546 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10547 __ addptr(rsp, 4); 10548 %} 10549 ins_pipe( pipe_slow ); 10550 %} 10551 10552 // Force rounding double precision to single precision 10553 instruct convD2F_reg(regF dst, regD src) %{ 10554 predicate(UseSSE>=2); 10555 match(Set dst (ConvD2F src)); 10556 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10557 ins_encode %{ 10558 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10559 %} 10560 ins_pipe( pipe_slow ); 10561 %} 10562 10563 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10564 predicate(UseSSE==0); 10565 match(Set dst (ConvF2D src)); 10566 format %{ "FST_S $dst,$src\t# D-round" %} 10567 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10568 ins_pipe( fpu_reg_reg ); 10569 %} 10570 10571 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10572 predicate(UseSSE==1); 10573 match(Set dst (ConvF2D src)); 10574 format %{ "FST_D $dst,$src\t# D-round" %} 10575 expand %{ 10576 roundDouble_mem_reg(dst,src); 10577 %} 10578 %} 10579 10580 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10581 predicate(UseSSE==1); 10582 match(Set dst (ConvF2D src)); 10583 effect( KILL cr ); 10584 format %{ "SUB ESP,4\n\t" 10585 "MOVSS [ESP] $src\n\t" 10586 "FLD_S [ESP]\n\t" 10587 "ADD ESP,4\n\t" 10588 "FSTP $dst\t# D-round" %} 10589 ins_encode %{ 10590 __ subptr(rsp, 4); 10591 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10592 __ fld_s(Address(rsp, 0)); 10593 __ addptr(rsp, 4); 10594 __ fstp_d($dst$$reg); 10595 %} 10596 ins_pipe( pipe_slow ); 10597 %} 10598 10599 instruct convF2D_reg(regD dst, regF src) %{ 10600 predicate(UseSSE>=2); 10601 match(Set dst (ConvF2D src)); 10602 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10603 ins_encode %{ 10604 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10605 %} 10606 ins_pipe( pipe_slow ); 10607 %} 10608 10609 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10610 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10611 predicate(UseSSE<=1); 10612 match(Set dst (ConvD2I src)); 10613 effect( KILL tmp, KILL cr ); 10614 format %{ "FLD $src\t# Convert double to int \n\t" 10615 "FLDCW trunc mode\n\t" 10616 "SUB ESP,4\n\t" 10617 "FISTp [ESP + #0]\n\t" 10618 "FLDCW std/24-bit mode\n\t" 10619 "POP EAX\n\t" 10620 "CMP EAX,0x80000000\n\t" 10621 "JNE,s fast\n\t" 10622 "FLD_D $src\n\t" 10623 "CALL d2i_wrapper\n" 10624 "fast:" %} 10625 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10626 ins_pipe( pipe_slow ); 10627 %} 10628 10629 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10630 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10631 predicate(UseSSE>=2); 10632 match(Set dst (ConvD2I src)); 10633 effect( KILL tmp, KILL cr ); 10634 format %{ "CVTTSD2SI $dst, $src\n\t" 10635 "CMP $dst,0x80000000\n\t" 10636 "JNE,s fast\n\t" 10637 "SUB ESP, 8\n\t" 10638 "MOVSD [ESP], $src\n\t" 10639 "FLD_D [ESP]\n\t" 10640 "ADD ESP, 8\n\t" 10641 "CALL d2i_wrapper\n" 10642 "fast:" %} 10643 ins_encode %{ 10644 Label fast; 10645 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10646 __ cmpl($dst$$Register, 0x80000000); 10647 __ jccb(Assembler::notEqual, fast); 10648 __ subptr(rsp, 8); 10649 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10650 __ fld_d(Address(rsp, 0)); 10651 __ addptr(rsp, 8); 10652 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10653 __ bind(fast); 10654 %} 10655 ins_pipe( pipe_slow ); 10656 %} 10657 10658 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10659 predicate(UseSSE<=1); 10660 match(Set dst (ConvD2L src)); 10661 effect( KILL cr ); 10662 format %{ "FLD $src\t# Convert double to long\n\t" 10663 "FLDCW trunc mode\n\t" 10664 "SUB ESP,8\n\t" 10665 "FISTp [ESP + #0]\n\t" 10666 "FLDCW std/24-bit mode\n\t" 10667 "POP EAX\n\t" 10668 "POP EDX\n\t" 10669 "CMP EDX,0x80000000\n\t" 10670 "JNE,s fast\n\t" 10671 "TEST EAX,EAX\n\t" 10672 "JNE,s fast\n\t" 10673 "FLD $src\n\t" 10674 "CALL d2l_wrapper\n" 10675 "fast:" %} 10676 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10677 ins_pipe( pipe_slow ); 10678 %} 10679 10680 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10681 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10682 predicate (UseSSE>=2); 10683 match(Set dst (ConvD2L src)); 10684 effect( KILL cr ); 10685 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10686 "MOVSD [ESP],$src\n\t" 10687 "FLD_D [ESP]\n\t" 10688 "FLDCW trunc mode\n\t" 10689 "FISTp [ESP + #0]\n\t" 10690 "FLDCW std/24-bit mode\n\t" 10691 "POP EAX\n\t" 10692 "POP EDX\n\t" 10693 "CMP EDX,0x80000000\n\t" 10694 "JNE,s fast\n\t" 10695 "TEST EAX,EAX\n\t" 10696 "JNE,s fast\n\t" 10697 "SUB ESP,8\n\t" 10698 "MOVSD [ESP],$src\n\t" 10699 "FLD_D [ESP]\n\t" 10700 "ADD ESP,8\n\t" 10701 "CALL d2l_wrapper\n" 10702 "fast:" %} 10703 ins_encode %{ 10704 Label fast; 10705 __ subptr(rsp, 8); 10706 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10707 __ fld_d(Address(rsp, 0)); 10708 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10709 __ fistp_d(Address(rsp, 0)); 10710 // Restore the rounding mode, mask the exception 10711 if (Compile::current()->in_24_bit_fp_mode()) { 10712 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10713 } else { 10714 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10715 } 10716 // Load the converted long, adjust CPU stack 10717 __ pop(rax); 10718 __ pop(rdx); 10719 __ cmpl(rdx, 0x80000000); 10720 __ jccb(Assembler::notEqual, fast); 10721 __ testl(rax, rax); 10722 __ jccb(Assembler::notEqual, fast); 10723 __ subptr(rsp, 8); 10724 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10725 __ fld_d(Address(rsp, 0)); 10726 __ addptr(rsp, 8); 10727 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10728 __ bind(fast); 10729 %} 10730 ins_pipe( pipe_slow ); 10731 %} 10732 10733 // Convert a double to an int. Java semantics require we do complex 10734 // manglations in the corner cases. So we set the rounding mode to 10735 // 'zero', store the darned double down as an int, and reset the 10736 // rounding mode to 'nearest'. The hardware stores a flag value down 10737 // if we would overflow or converted a NAN; we check for this and 10738 // and go the slow path if needed. 10739 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10740 predicate(UseSSE==0); 10741 match(Set dst (ConvF2I src)); 10742 effect( KILL tmp, KILL cr ); 10743 format %{ "FLD $src\t# Convert float to int \n\t" 10744 "FLDCW trunc mode\n\t" 10745 "SUB ESP,4\n\t" 10746 "FISTp [ESP + #0]\n\t" 10747 "FLDCW std/24-bit mode\n\t" 10748 "POP EAX\n\t" 10749 "CMP EAX,0x80000000\n\t" 10750 "JNE,s fast\n\t" 10751 "FLD $src\n\t" 10752 "CALL d2i_wrapper\n" 10753 "fast:" %} 10754 // DPR2I_encoding works for FPR2I 10755 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10756 ins_pipe( pipe_slow ); 10757 %} 10758 10759 // Convert a float in xmm to an int reg. 10760 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10761 predicate(UseSSE>=1); 10762 match(Set dst (ConvF2I src)); 10763 effect( KILL tmp, KILL cr ); 10764 format %{ "CVTTSS2SI $dst, $src\n\t" 10765 "CMP $dst,0x80000000\n\t" 10766 "JNE,s fast\n\t" 10767 "SUB ESP, 4\n\t" 10768 "MOVSS [ESP], $src\n\t" 10769 "FLD [ESP]\n\t" 10770 "ADD ESP, 4\n\t" 10771 "CALL d2i_wrapper\n" 10772 "fast:" %} 10773 ins_encode %{ 10774 Label fast; 10775 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10776 __ cmpl($dst$$Register, 0x80000000); 10777 __ jccb(Assembler::notEqual, fast); 10778 __ subptr(rsp, 4); 10779 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10780 __ fld_s(Address(rsp, 0)); 10781 __ addptr(rsp, 4); 10782 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10783 __ bind(fast); 10784 %} 10785 ins_pipe( pipe_slow ); 10786 %} 10787 10788 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10789 predicate(UseSSE==0); 10790 match(Set dst (ConvF2L src)); 10791 effect( KILL cr ); 10792 format %{ "FLD $src\t# Convert float to long\n\t" 10793 "FLDCW trunc mode\n\t" 10794 "SUB ESP,8\n\t" 10795 "FISTp [ESP + #0]\n\t" 10796 "FLDCW std/24-bit mode\n\t" 10797 "POP EAX\n\t" 10798 "POP EDX\n\t" 10799 "CMP EDX,0x80000000\n\t" 10800 "JNE,s fast\n\t" 10801 "TEST EAX,EAX\n\t" 10802 "JNE,s fast\n\t" 10803 "FLD $src\n\t" 10804 "CALL d2l_wrapper\n" 10805 "fast:" %} 10806 // DPR2L_encoding works for FPR2L 10807 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10808 ins_pipe( pipe_slow ); 10809 %} 10810 10811 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10812 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10813 predicate (UseSSE>=1); 10814 match(Set dst (ConvF2L src)); 10815 effect( KILL cr ); 10816 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10817 "MOVSS [ESP],$src\n\t" 10818 "FLD_S [ESP]\n\t" 10819 "FLDCW trunc mode\n\t" 10820 "FISTp [ESP + #0]\n\t" 10821 "FLDCW std/24-bit mode\n\t" 10822 "POP EAX\n\t" 10823 "POP EDX\n\t" 10824 "CMP EDX,0x80000000\n\t" 10825 "JNE,s fast\n\t" 10826 "TEST EAX,EAX\n\t" 10827 "JNE,s fast\n\t" 10828 "SUB ESP,4\t# Convert float to long\n\t" 10829 "MOVSS [ESP],$src\n\t" 10830 "FLD_S [ESP]\n\t" 10831 "ADD ESP,4\n\t" 10832 "CALL d2l_wrapper\n" 10833 "fast:" %} 10834 ins_encode %{ 10835 Label fast; 10836 __ subptr(rsp, 8); 10837 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10838 __ fld_s(Address(rsp, 0)); 10839 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10840 __ fistp_d(Address(rsp, 0)); 10841 // Restore the rounding mode, mask the exception 10842 if (Compile::current()->in_24_bit_fp_mode()) { 10843 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10844 } else { 10845 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10846 } 10847 // Load the converted long, adjust CPU stack 10848 __ pop(rax); 10849 __ pop(rdx); 10850 __ cmpl(rdx, 0x80000000); 10851 __ jccb(Assembler::notEqual, fast); 10852 __ testl(rax, rax); 10853 __ jccb(Assembler::notEqual, fast); 10854 __ subptr(rsp, 4); 10855 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10856 __ fld_s(Address(rsp, 0)); 10857 __ addptr(rsp, 4); 10858 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10859 __ bind(fast); 10860 %} 10861 ins_pipe( pipe_slow ); 10862 %} 10863 10864 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10865 predicate( UseSSE<=1 ); 10866 match(Set dst (ConvI2D src)); 10867 format %{ "FILD $src\n\t" 10868 "FSTP $dst" %} 10869 opcode(0xDB, 0x0); /* DB /0 */ 10870 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10871 ins_pipe( fpu_reg_mem ); 10872 %} 10873 10874 instruct convI2D_reg(regD dst, rRegI src) %{ 10875 predicate( UseSSE>=2 && !UseXmmI2D ); 10876 match(Set dst (ConvI2D src)); 10877 format %{ "CVTSI2SD $dst,$src" %} 10878 ins_encode %{ 10879 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10880 %} 10881 ins_pipe( pipe_slow ); 10882 %} 10883 10884 instruct convI2D_mem(regD dst, memory mem) %{ 10885 predicate( UseSSE>=2 ); 10886 match(Set dst (ConvI2D (LoadI mem))); 10887 format %{ "CVTSI2SD $dst,$mem" %} 10888 ins_encode %{ 10889 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10890 %} 10891 ins_pipe( pipe_slow ); 10892 %} 10893 10894 instruct convXI2D_reg(regD dst, rRegI src) 10895 %{ 10896 predicate( UseSSE>=2 && UseXmmI2D ); 10897 match(Set dst (ConvI2D src)); 10898 10899 format %{ "MOVD $dst,$src\n\t" 10900 "CVTDQ2PD $dst,$dst\t# i2d" %} 10901 ins_encode %{ 10902 __ movdl($dst$$XMMRegister, $src$$Register); 10903 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10904 %} 10905 ins_pipe(pipe_slow); // XXX 10906 %} 10907 10908 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10909 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10910 match(Set dst (ConvI2D (LoadI mem))); 10911 format %{ "FILD $mem\n\t" 10912 "FSTP $dst" %} 10913 opcode(0xDB); /* DB /0 */ 10914 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10915 Pop_Reg_DPR(dst)); 10916 ins_pipe( fpu_reg_mem ); 10917 %} 10918 10919 // Convert a byte to a float; no rounding step needed. 10920 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10921 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10922 match(Set dst (ConvI2F src)); 10923 format %{ "FILD $src\n\t" 10924 "FSTP $dst" %} 10925 10926 opcode(0xDB, 0x0); /* DB /0 */ 10927 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10928 ins_pipe( fpu_reg_mem ); 10929 %} 10930 10931 // In 24-bit mode, force exponent rounding by storing back out 10932 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10933 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10934 match(Set dst (ConvI2F src)); 10935 ins_cost(200); 10936 format %{ "FILD $src\n\t" 10937 "FSTP_S $dst" %} 10938 opcode(0xDB, 0x0); /* DB /0 */ 10939 ins_encode( Push_Mem_I(src), 10940 Pop_Mem_FPR(dst)); 10941 ins_pipe( fpu_mem_mem ); 10942 %} 10943 10944 // In 24-bit mode, force exponent rounding by storing back out 10945 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10946 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10947 match(Set dst (ConvI2F (LoadI mem))); 10948 ins_cost(200); 10949 format %{ "FILD $mem\n\t" 10950 "FSTP_S $dst" %} 10951 opcode(0xDB); /* DB /0 */ 10952 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10953 Pop_Mem_FPR(dst)); 10954 ins_pipe( fpu_mem_mem ); 10955 %} 10956 10957 // This instruction does not round to 24-bits 10958 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10959 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10960 match(Set dst (ConvI2F src)); 10961 format %{ "FILD $src\n\t" 10962 "FSTP $dst" %} 10963 opcode(0xDB, 0x0); /* DB /0 */ 10964 ins_encode( Push_Mem_I(src), 10965 Pop_Reg_FPR(dst)); 10966 ins_pipe( fpu_reg_mem ); 10967 %} 10968 10969 // This instruction does not round to 24-bits 10970 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10971 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10972 match(Set dst (ConvI2F (LoadI mem))); 10973 format %{ "FILD $mem\n\t" 10974 "FSTP $dst" %} 10975 opcode(0xDB); /* DB /0 */ 10976 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10977 Pop_Reg_FPR(dst)); 10978 ins_pipe( fpu_reg_mem ); 10979 %} 10980 10981 // Convert an int to a float in xmm; no rounding step needed. 10982 instruct convI2F_reg(regF dst, rRegI src) %{ 10983 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 10984 match(Set dst (ConvI2F src)); 10985 format %{ "CVTSI2SS $dst, $src" %} 10986 ins_encode %{ 10987 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 10988 %} 10989 ins_pipe( pipe_slow ); 10990 %} 10991 10992 instruct convXI2F_reg(regF dst, rRegI src) 10993 %{ 10994 predicate( UseSSE>=2 && UseXmmI2F ); 10995 match(Set dst (ConvI2F src)); 10996 10997 format %{ "MOVD $dst,$src\n\t" 10998 "CVTDQ2PS $dst,$dst\t# i2f" %} 10999 ins_encode %{ 11000 __ movdl($dst$$XMMRegister, $src$$Register); 11001 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11002 %} 11003 ins_pipe(pipe_slow); // XXX 11004 %} 11005 11006 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11007 match(Set dst (ConvI2L src)); 11008 effect(KILL cr); 11009 ins_cost(375); 11010 format %{ "MOV $dst.lo,$src\n\t" 11011 "MOV $dst.hi,$src\n\t" 11012 "SAR $dst.hi,31" %} 11013 ins_encode(convert_int_long(dst,src)); 11014 ins_pipe( ialu_reg_reg_long ); 11015 %} 11016 11017 // Zero-extend convert int to long 11018 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11019 match(Set dst (AndL (ConvI2L src) mask) ); 11020 effect( KILL flags ); 11021 ins_cost(250); 11022 format %{ "MOV $dst.lo,$src\n\t" 11023 "XOR $dst.hi,$dst.hi" %} 11024 opcode(0x33); // XOR 11025 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11026 ins_pipe( ialu_reg_reg_long ); 11027 %} 11028 11029 // Zero-extend long 11030 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11031 match(Set dst (AndL src mask) ); 11032 effect( KILL flags ); 11033 ins_cost(250); 11034 format %{ "MOV $dst.lo,$src.lo\n\t" 11035 "XOR $dst.hi,$dst.hi\n\t" %} 11036 opcode(0x33); // XOR 11037 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11038 ins_pipe( ialu_reg_reg_long ); 11039 %} 11040 11041 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11042 predicate (UseSSE<=1); 11043 match(Set dst (ConvL2D src)); 11044 effect( KILL cr ); 11045 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11046 "PUSH $src.lo\n\t" 11047 "FILD ST,[ESP + #0]\n\t" 11048 "ADD ESP,8\n\t" 11049 "FSTP_D $dst\t# D-round" %} 11050 opcode(0xDF, 0x5); /* DF /5 */ 11051 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11052 ins_pipe( pipe_slow ); 11053 %} 11054 11055 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11056 predicate (UseSSE>=2); 11057 match(Set dst (ConvL2D src)); 11058 effect( KILL cr ); 11059 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11060 "PUSH $src.lo\n\t" 11061 "FILD_D [ESP]\n\t" 11062 "FSTP_D [ESP]\n\t" 11063 "MOVSD $dst,[ESP]\n\t" 11064 "ADD ESP,8" %} 11065 opcode(0xDF, 0x5); /* DF /5 */ 11066 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11067 ins_pipe( pipe_slow ); 11068 %} 11069 11070 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11071 predicate (UseSSE>=1); 11072 match(Set dst (ConvL2F src)); 11073 effect( KILL cr ); 11074 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11075 "PUSH $src.lo\n\t" 11076 "FILD_D [ESP]\n\t" 11077 "FSTP_S [ESP]\n\t" 11078 "MOVSS $dst,[ESP]\n\t" 11079 "ADD ESP,8" %} 11080 opcode(0xDF, 0x5); /* DF /5 */ 11081 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11082 ins_pipe( pipe_slow ); 11083 %} 11084 11085 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11086 match(Set dst (ConvL2F src)); 11087 effect( KILL cr ); 11088 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11089 "PUSH $src.lo\n\t" 11090 "FILD ST,[ESP + #0]\n\t" 11091 "ADD ESP,8\n\t" 11092 "FSTP_S $dst\t# F-round" %} 11093 opcode(0xDF, 0x5); /* DF /5 */ 11094 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11095 ins_pipe( pipe_slow ); 11096 %} 11097 11098 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11099 match(Set dst (ConvL2I src)); 11100 effect( DEF dst, USE src ); 11101 format %{ "MOV $dst,$src.lo" %} 11102 ins_encode(enc_CopyL_Lo(dst,src)); 11103 ins_pipe( ialu_reg_reg ); 11104 %} 11105 11106 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11107 match(Set dst (MoveF2I src)); 11108 effect( DEF dst, USE src ); 11109 ins_cost(100); 11110 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11111 ins_encode %{ 11112 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11113 %} 11114 ins_pipe( ialu_reg_mem ); 11115 %} 11116 11117 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11118 predicate(UseSSE==0); 11119 match(Set dst (MoveF2I src)); 11120 effect( DEF dst, USE src ); 11121 11122 ins_cost(125); 11123 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11124 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11125 ins_pipe( fpu_mem_reg ); 11126 %} 11127 11128 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11129 predicate(UseSSE>=1); 11130 match(Set dst (MoveF2I src)); 11131 effect( DEF dst, USE src ); 11132 11133 ins_cost(95); 11134 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11135 ins_encode %{ 11136 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11137 %} 11138 ins_pipe( pipe_slow ); 11139 %} 11140 11141 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11142 predicate(UseSSE>=2); 11143 match(Set dst (MoveF2I src)); 11144 effect( DEF dst, USE src ); 11145 ins_cost(85); 11146 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11147 ins_encode %{ 11148 __ movdl($dst$$Register, $src$$XMMRegister); 11149 %} 11150 ins_pipe( pipe_slow ); 11151 %} 11152 11153 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11154 match(Set dst (MoveI2F src)); 11155 effect( DEF dst, USE src ); 11156 11157 ins_cost(100); 11158 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11159 ins_encode %{ 11160 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11161 %} 11162 ins_pipe( ialu_mem_reg ); 11163 %} 11164 11165 11166 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11167 predicate(UseSSE==0); 11168 match(Set dst (MoveI2F src)); 11169 effect(DEF dst, USE src); 11170 11171 ins_cost(125); 11172 format %{ "FLD_S $src\n\t" 11173 "FSTP $dst\t# MoveI2F_stack_reg" %} 11174 opcode(0xD9); /* D9 /0, FLD m32real */ 11175 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11176 Pop_Reg_FPR(dst) ); 11177 ins_pipe( fpu_reg_mem ); 11178 %} 11179 11180 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11181 predicate(UseSSE>=1); 11182 match(Set dst (MoveI2F src)); 11183 effect( DEF dst, USE src ); 11184 11185 ins_cost(95); 11186 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11187 ins_encode %{ 11188 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11189 %} 11190 ins_pipe( pipe_slow ); 11191 %} 11192 11193 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11194 predicate(UseSSE>=2); 11195 match(Set dst (MoveI2F src)); 11196 effect( DEF dst, USE src ); 11197 11198 ins_cost(85); 11199 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11200 ins_encode %{ 11201 __ movdl($dst$$XMMRegister, $src$$Register); 11202 %} 11203 ins_pipe( pipe_slow ); 11204 %} 11205 11206 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11207 match(Set dst (MoveD2L src)); 11208 effect(DEF dst, USE src); 11209 11210 ins_cost(250); 11211 format %{ "MOV $dst.lo,$src\n\t" 11212 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11213 opcode(0x8B, 0x8B); 11214 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11215 ins_pipe( ialu_mem_long_reg ); 11216 %} 11217 11218 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11219 predicate(UseSSE<=1); 11220 match(Set dst (MoveD2L src)); 11221 effect(DEF dst, USE src); 11222 11223 ins_cost(125); 11224 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11225 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11226 ins_pipe( fpu_mem_reg ); 11227 %} 11228 11229 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11230 predicate(UseSSE>=2); 11231 match(Set dst (MoveD2L src)); 11232 effect(DEF dst, USE src); 11233 ins_cost(95); 11234 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11235 ins_encode %{ 11236 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11237 %} 11238 ins_pipe( pipe_slow ); 11239 %} 11240 11241 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11242 predicate(UseSSE>=2); 11243 match(Set dst (MoveD2L src)); 11244 effect(DEF dst, USE src, TEMP tmp); 11245 ins_cost(85); 11246 format %{ "MOVD $dst.lo,$src\n\t" 11247 "PSHUFLW $tmp,$src,0x4E\n\t" 11248 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11249 ins_encode %{ 11250 __ movdl($dst$$Register, $src$$XMMRegister); 11251 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11252 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11253 %} 11254 ins_pipe( pipe_slow ); 11255 %} 11256 11257 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11258 match(Set dst (MoveL2D src)); 11259 effect(DEF dst, USE src); 11260 11261 ins_cost(200); 11262 format %{ "MOV $dst,$src.lo\n\t" 11263 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11264 opcode(0x89, 0x89); 11265 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11266 ins_pipe( ialu_mem_long_reg ); 11267 %} 11268 11269 11270 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11271 predicate(UseSSE<=1); 11272 match(Set dst (MoveL2D src)); 11273 effect(DEF dst, USE src); 11274 ins_cost(125); 11275 11276 format %{ "FLD_D $src\n\t" 11277 "FSTP $dst\t# MoveL2D_stack_reg" %} 11278 opcode(0xDD); /* DD /0, FLD m64real */ 11279 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11280 Pop_Reg_DPR(dst) ); 11281 ins_pipe( fpu_reg_mem ); 11282 %} 11283 11284 11285 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11286 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11287 match(Set dst (MoveL2D src)); 11288 effect(DEF dst, USE src); 11289 11290 ins_cost(95); 11291 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11292 ins_encode %{ 11293 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11294 %} 11295 ins_pipe( pipe_slow ); 11296 %} 11297 11298 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11299 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11300 match(Set dst (MoveL2D src)); 11301 effect(DEF dst, USE src); 11302 11303 ins_cost(95); 11304 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11305 ins_encode %{ 11306 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11307 %} 11308 ins_pipe( pipe_slow ); 11309 %} 11310 11311 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11312 predicate(UseSSE>=2); 11313 match(Set dst (MoveL2D src)); 11314 effect(TEMP dst, USE src, TEMP tmp); 11315 ins_cost(85); 11316 format %{ "MOVD $dst,$src.lo\n\t" 11317 "MOVD $tmp,$src.hi\n\t" 11318 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11319 ins_encode %{ 11320 __ movdl($dst$$XMMRegister, $src$$Register); 11321 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11322 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11323 %} 11324 ins_pipe( pipe_slow ); 11325 %} 11326 11327 11328 // ======================================================================= 11329 // fast clearing of an array 11330 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11331 predicate(!((ClearArrayNode*)n)->is_large()); 11332 match(Set dummy (ClearArray cnt base)); 11333 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11334 11335 format %{ $$template 11336 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11337 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11338 $$emit$$"JG LARGE\n\t" 11339 $$emit$$"SHL ECX, 1\n\t" 11340 $$emit$$"DEC ECX\n\t" 11341 $$emit$$"JS DONE\t# Zero length\n\t" 11342 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11343 $$emit$$"DEC ECX\n\t" 11344 $$emit$$"JGE LOOP\n\t" 11345 $$emit$$"JMP DONE\n\t" 11346 $$emit$$"# LARGE:\n\t" 11347 if (UseFastStosb) { 11348 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11349 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11350 } else { 11351 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11352 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11353 } 11354 $$emit$$"# DONE" 11355 %} 11356 ins_encode %{ 11357 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11358 %} 11359 ins_pipe( pipe_slow ); 11360 %} 11361 11362 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11363 predicate(((ClearArrayNode*)n)->is_large()); 11364 match(Set dummy (ClearArray cnt base)); 11365 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11366 format %{ $$template 11367 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11368 if (UseFastStosb) { 11369 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11370 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11371 } else { 11372 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11373 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11374 } 11375 $$emit$$"# DONE" 11376 %} 11377 ins_encode %{ 11378 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11379 %} 11380 ins_pipe( pipe_slow ); 11381 %} 11382 11383 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11384 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11385 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11386 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11387 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11388 11389 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11390 ins_encode %{ 11391 __ string_compare($str1$$Register, $str2$$Register, 11392 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11393 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11394 %} 11395 ins_pipe( pipe_slow ); 11396 %} 11397 11398 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11399 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11400 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11401 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11402 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11403 11404 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11405 ins_encode %{ 11406 __ string_compare($str1$$Register, $str2$$Register, 11407 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11408 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11409 %} 11410 ins_pipe( pipe_slow ); 11411 %} 11412 11413 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11414 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11415 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11416 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11417 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11418 11419 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11420 ins_encode %{ 11421 __ string_compare($str1$$Register, $str2$$Register, 11422 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11423 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11424 %} 11425 ins_pipe( pipe_slow ); 11426 %} 11427 11428 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11429 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11430 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11431 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11432 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11433 11434 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11435 ins_encode %{ 11436 __ string_compare($str2$$Register, $str1$$Register, 11437 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11438 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11439 %} 11440 ins_pipe( pipe_slow ); 11441 %} 11442 11443 // fast string equals 11444 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11445 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11446 match(Set result (StrEquals (Binary str1 str2) cnt)); 11447 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11448 11449 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11450 ins_encode %{ 11451 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11452 $cnt$$Register, $result$$Register, $tmp3$$Register, 11453 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11454 %} 11455 11456 ins_pipe( pipe_slow ); 11457 %} 11458 11459 // fast search of substring with known size. 11460 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11461 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11462 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11463 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11464 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11465 11466 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11467 ins_encode %{ 11468 int icnt2 = (int)$int_cnt2$$constant; 11469 if (icnt2 >= 16) { 11470 // IndexOf for constant substrings with size >= 16 elements 11471 // which don't need to be loaded through stack. 11472 __ string_indexofC8($str1$$Register, $str2$$Register, 11473 $cnt1$$Register, $cnt2$$Register, 11474 icnt2, $result$$Register, 11475 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11476 } else { 11477 // Small strings are loaded through stack if they cross page boundary. 11478 __ string_indexof($str1$$Register, $str2$$Register, 11479 $cnt1$$Register, $cnt2$$Register, 11480 icnt2, $result$$Register, 11481 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11482 } 11483 %} 11484 ins_pipe( pipe_slow ); 11485 %} 11486 11487 // fast search of substring with known size. 11488 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11489 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11490 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11491 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11492 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11493 11494 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11495 ins_encode %{ 11496 int icnt2 = (int)$int_cnt2$$constant; 11497 if (icnt2 >= 8) { 11498 // IndexOf for constant substrings with size >= 8 elements 11499 // which don't need to be loaded through stack. 11500 __ string_indexofC8($str1$$Register, $str2$$Register, 11501 $cnt1$$Register, $cnt2$$Register, 11502 icnt2, $result$$Register, 11503 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11504 } else { 11505 // Small strings are loaded through stack if they cross page boundary. 11506 __ string_indexof($str1$$Register, $str2$$Register, 11507 $cnt1$$Register, $cnt2$$Register, 11508 icnt2, $result$$Register, 11509 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11510 } 11511 %} 11512 ins_pipe( pipe_slow ); 11513 %} 11514 11515 // fast search of substring with known size. 11516 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11517 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11518 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11519 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11520 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11521 11522 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11523 ins_encode %{ 11524 int icnt2 = (int)$int_cnt2$$constant; 11525 if (icnt2 >= 8) { 11526 // IndexOf for constant substrings with size >= 8 elements 11527 // which don't need to be loaded through stack. 11528 __ string_indexofC8($str1$$Register, $str2$$Register, 11529 $cnt1$$Register, $cnt2$$Register, 11530 icnt2, $result$$Register, 11531 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11532 } else { 11533 // Small strings are loaded through stack if they cross page boundary. 11534 __ string_indexof($str1$$Register, $str2$$Register, 11535 $cnt1$$Register, $cnt2$$Register, 11536 icnt2, $result$$Register, 11537 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11538 } 11539 %} 11540 ins_pipe( pipe_slow ); 11541 %} 11542 11543 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11544 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11545 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11546 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11547 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11548 11549 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11550 ins_encode %{ 11551 __ string_indexof($str1$$Register, $str2$$Register, 11552 $cnt1$$Register, $cnt2$$Register, 11553 (-1), $result$$Register, 11554 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11555 %} 11556 ins_pipe( pipe_slow ); 11557 %} 11558 11559 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11560 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11561 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11562 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11563 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11564 11565 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11566 ins_encode %{ 11567 __ string_indexof($str1$$Register, $str2$$Register, 11568 $cnt1$$Register, $cnt2$$Register, 11569 (-1), $result$$Register, 11570 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11571 %} 11572 ins_pipe( pipe_slow ); 11573 %} 11574 11575 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11576 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11577 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11578 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11579 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11580 11581 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11582 ins_encode %{ 11583 __ string_indexof($str1$$Register, $str2$$Register, 11584 $cnt1$$Register, $cnt2$$Register, 11585 (-1), $result$$Register, 11586 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11587 %} 11588 ins_pipe( pipe_slow ); 11589 %} 11590 11591 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11592 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11593 predicate(UseSSE42Intrinsics); 11594 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11595 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11596 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11597 ins_encode %{ 11598 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11599 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11600 %} 11601 ins_pipe( pipe_slow ); 11602 %} 11603 11604 // fast array equals 11605 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11606 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11607 %{ 11608 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11609 match(Set result (AryEq ary1 ary2)); 11610 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11611 //ins_cost(300); 11612 11613 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11614 ins_encode %{ 11615 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11616 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11617 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11618 %} 11619 ins_pipe( pipe_slow ); 11620 %} 11621 11622 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11623 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11624 %{ 11625 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11626 match(Set result (AryEq ary1 ary2)); 11627 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11628 //ins_cost(300); 11629 11630 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11631 ins_encode %{ 11632 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11633 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11634 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11635 %} 11636 ins_pipe( pipe_slow ); 11637 %} 11638 11639 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11640 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11641 %{ 11642 match(Set result (HasNegatives ary1 len)); 11643 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11644 11645 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11646 ins_encode %{ 11647 __ has_negatives($ary1$$Register, $len$$Register, 11648 $result$$Register, $tmp3$$Register, 11649 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11650 %} 11651 ins_pipe( pipe_slow ); 11652 %} 11653 11654 // fast char[] to byte[] compression 11655 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11656 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11657 match(Set result (StrCompressedCopy src (Binary dst len))); 11658 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11659 11660 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11661 ins_encode %{ 11662 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11663 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11664 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11665 %} 11666 ins_pipe( pipe_slow ); 11667 %} 11668 11669 // fast byte[] to char[] inflation 11670 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11671 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11672 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11673 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11674 11675 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11676 ins_encode %{ 11677 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11678 $tmp1$$XMMRegister, $tmp2$$Register); 11679 %} 11680 ins_pipe( pipe_slow ); 11681 %} 11682 11683 // encode char[] to byte[] in ISO_8859_1 11684 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11685 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11686 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11687 match(Set result (EncodeISOArray src (Binary dst len))); 11688 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11689 11690 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11691 ins_encode %{ 11692 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11693 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11694 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11695 %} 11696 ins_pipe( pipe_slow ); 11697 %} 11698 11699 11700 //----------Control Flow Instructions------------------------------------------ 11701 // Signed compare Instructions 11702 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11703 match(Set cr (CmpI op1 op2)); 11704 effect( DEF cr, USE op1, USE op2 ); 11705 format %{ "CMP $op1,$op2" %} 11706 opcode(0x3B); /* Opcode 3B /r */ 11707 ins_encode( OpcP, RegReg( op1, op2) ); 11708 ins_pipe( ialu_cr_reg_reg ); 11709 %} 11710 11711 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11712 match(Set cr (CmpI op1 op2)); 11713 effect( DEF cr, USE op1 ); 11714 format %{ "CMP $op1,$op2" %} 11715 opcode(0x81,0x07); /* Opcode 81 /7 */ 11716 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11717 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11718 ins_pipe( ialu_cr_reg_imm ); 11719 %} 11720 11721 // Cisc-spilled version of cmpI_eReg 11722 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11723 match(Set cr (CmpI op1 (LoadI op2))); 11724 11725 format %{ "CMP $op1,$op2" %} 11726 ins_cost(500); 11727 opcode(0x3B); /* Opcode 3B /r */ 11728 ins_encode( OpcP, RegMem( op1, op2) ); 11729 ins_pipe( ialu_cr_reg_mem ); 11730 %} 11731 11732 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11733 match(Set cr (CmpI src zero)); 11734 effect( DEF cr, USE src ); 11735 11736 format %{ "TEST $src,$src" %} 11737 opcode(0x85); 11738 ins_encode( OpcP, RegReg( src, src ) ); 11739 ins_pipe( ialu_cr_reg_imm ); 11740 %} 11741 11742 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11743 match(Set cr (CmpI (AndI src con) zero)); 11744 11745 format %{ "TEST $src,$con" %} 11746 opcode(0xF7,0x00); 11747 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11748 ins_pipe( ialu_cr_reg_imm ); 11749 %} 11750 11751 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11752 match(Set cr (CmpI (AndI src mem) zero)); 11753 11754 format %{ "TEST $src,$mem" %} 11755 opcode(0x85); 11756 ins_encode( OpcP, RegMem( src, mem ) ); 11757 ins_pipe( ialu_cr_reg_mem ); 11758 %} 11759 11760 // Unsigned compare Instructions; really, same as signed except they 11761 // produce an eFlagsRegU instead of eFlagsReg. 11762 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11763 match(Set cr (CmpU op1 op2)); 11764 11765 format %{ "CMPu $op1,$op2" %} 11766 opcode(0x3B); /* Opcode 3B /r */ 11767 ins_encode( OpcP, RegReg( op1, op2) ); 11768 ins_pipe( ialu_cr_reg_reg ); 11769 %} 11770 11771 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11772 match(Set cr (CmpU op1 op2)); 11773 11774 format %{ "CMPu $op1,$op2" %} 11775 opcode(0x81,0x07); /* Opcode 81 /7 */ 11776 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11777 ins_pipe( ialu_cr_reg_imm ); 11778 %} 11779 11780 // // Cisc-spilled version of cmpU_eReg 11781 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11782 match(Set cr (CmpU op1 (LoadI op2))); 11783 11784 format %{ "CMPu $op1,$op2" %} 11785 ins_cost(500); 11786 opcode(0x3B); /* Opcode 3B /r */ 11787 ins_encode( OpcP, RegMem( op1, op2) ); 11788 ins_pipe( ialu_cr_reg_mem ); 11789 %} 11790 11791 // // Cisc-spilled version of cmpU_eReg 11792 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11793 // match(Set cr (CmpU (LoadI op1) op2)); 11794 // 11795 // format %{ "CMPu $op1,$op2" %} 11796 // ins_cost(500); 11797 // opcode(0x39); /* Opcode 39 /r */ 11798 // ins_encode( OpcP, RegMem( op1, op2) ); 11799 //%} 11800 11801 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11802 match(Set cr (CmpU src zero)); 11803 11804 format %{ "TESTu $src,$src" %} 11805 opcode(0x85); 11806 ins_encode( OpcP, RegReg( src, src ) ); 11807 ins_pipe( ialu_cr_reg_imm ); 11808 %} 11809 11810 // Unsigned pointer compare Instructions 11811 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11812 match(Set cr (CmpP op1 op2)); 11813 11814 format %{ "CMPu $op1,$op2" %} 11815 opcode(0x3B); /* Opcode 3B /r */ 11816 ins_encode( OpcP, RegReg( op1, op2) ); 11817 ins_pipe( ialu_cr_reg_reg ); 11818 %} 11819 11820 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11821 match(Set cr (CmpP op1 op2)); 11822 11823 format %{ "CMPu $op1,$op2" %} 11824 opcode(0x81,0x07); /* Opcode 81 /7 */ 11825 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11826 ins_pipe( ialu_cr_reg_imm ); 11827 %} 11828 11829 // // Cisc-spilled version of cmpP_eReg 11830 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11831 match(Set cr (CmpP op1 (LoadP op2))); 11832 11833 format %{ "CMPu $op1,$op2" %} 11834 ins_cost(500); 11835 opcode(0x3B); /* Opcode 3B /r */ 11836 ins_encode( OpcP, RegMem( op1, op2) ); 11837 ins_pipe( ialu_cr_reg_mem ); 11838 %} 11839 11840 // // Cisc-spilled version of cmpP_eReg 11841 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11842 // match(Set cr (CmpP (LoadP op1) op2)); 11843 // 11844 // format %{ "CMPu $op1,$op2" %} 11845 // ins_cost(500); 11846 // opcode(0x39); /* Opcode 39 /r */ 11847 // ins_encode( OpcP, RegMem( op1, op2) ); 11848 //%} 11849 11850 // Compare raw pointer (used in out-of-heap check). 11851 // Only works because non-oop pointers must be raw pointers 11852 // and raw pointers have no anti-dependencies. 11853 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11854 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11855 match(Set cr (CmpP op1 (LoadP op2))); 11856 11857 format %{ "CMPu $op1,$op2" %} 11858 opcode(0x3B); /* Opcode 3B /r */ 11859 ins_encode( OpcP, RegMem( op1, op2) ); 11860 ins_pipe( ialu_cr_reg_mem ); 11861 %} 11862 11863 // 11864 // This will generate a signed flags result. This should be ok 11865 // since any compare to a zero should be eq/neq. 11866 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11867 match(Set cr (CmpP src zero)); 11868 11869 format %{ "TEST $src,$src" %} 11870 opcode(0x85); 11871 ins_encode( OpcP, RegReg( src, src ) ); 11872 ins_pipe( ialu_cr_reg_imm ); 11873 %} 11874 11875 // Cisc-spilled version of testP_reg 11876 // This will generate a signed flags result. This should be ok 11877 // since any compare to a zero should be eq/neq. 11878 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11879 match(Set cr (CmpP (LoadP op) zero)); 11880 11881 format %{ "TEST $op,0xFFFFFFFF" %} 11882 ins_cost(500); 11883 opcode(0xF7); /* Opcode F7 /0 */ 11884 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11885 ins_pipe( ialu_cr_reg_imm ); 11886 %} 11887 11888 // Yanked all unsigned pointer compare operations. 11889 // Pointer compares are done with CmpP which is already unsigned. 11890 11891 //----------Max and Min-------------------------------------------------------- 11892 // Min Instructions 11893 //// 11894 // *** Min and Max using the conditional move are slower than the 11895 // *** branch version on a Pentium III. 11896 // // Conditional move for min 11897 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11898 // effect( USE_DEF op2, USE op1, USE cr ); 11899 // format %{ "CMOVlt $op2,$op1\t! min" %} 11900 // opcode(0x4C,0x0F); 11901 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11902 // ins_pipe( pipe_cmov_reg ); 11903 //%} 11904 // 11905 //// Min Register with Register (P6 version) 11906 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11907 // predicate(VM_Version::supports_cmov() ); 11908 // match(Set op2 (MinI op1 op2)); 11909 // ins_cost(200); 11910 // expand %{ 11911 // eFlagsReg cr; 11912 // compI_eReg(cr,op1,op2); 11913 // cmovI_reg_lt(op2,op1,cr); 11914 // %} 11915 //%} 11916 11917 // Min Register with Register (generic version) 11918 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11919 match(Set dst (MinI dst src)); 11920 effect(KILL flags); 11921 ins_cost(300); 11922 11923 format %{ "MIN $dst,$src" %} 11924 opcode(0xCC); 11925 ins_encode( min_enc(dst,src) ); 11926 ins_pipe( pipe_slow ); 11927 %} 11928 11929 // Max Register with Register 11930 // *** Min and Max using the conditional move are slower than the 11931 // *** branch version on a Pentium III. 11932 // // Conditional move for max 11933 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11934 // effect( USE_DEF op2, USE op1, USE cr ); 11935 // format %{ "CMOVgt $op2,$op1\t! max" %} 11936 // opcode(0x4F,0x0F); 11937 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11938 // ins_pipe( pipe_cmov_reg ); 11939 //%} 11940 // 11941 // // Max Register with Register (P6 version) 11942 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11943 // predicate(VM_Version::supports_cmov() ); 11944 // match(Set op2 (MaxI op1 op2)); 11945 // ins_cost(200); 11946 // expand %{ 11947 // eFlagsReg cr; 11948 // compI_eReg(cr,op1,op2); 11949 // cmovI_reg_gt(op2,op1,cr); 11950 // %} 11951 //%} 11952 11953 // Max Register with Register (generic version) 11954 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11955 match(Set dst (MaxI dst src)); 11956 effect(KILL flags); 11957 ins_cost(300); 11958 11959 format %{ "MAX $dst,$src" %} 11960 opcode(0xCC); 11961 ins_encode( max_enc(dst,src) ); 11962 ins_pipe( pipe_slow ); 11963 %} 11964 11965 // ============================================================================ 11966 // Counted Loop limit node which represents exact final iterator value. 11967 // Note: the resulting value should fit into integer range since 11968 // counted loops have limit check on overflow. 11969 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11970 match(Set limit (LoopLimit (Binary init limit) stride)); 11971 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11972 ins_cost(300); 11973 11974 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11975 ins_encode %{ 11976 int strd = (int)$stride$$constant; 11977 assert(strd != 1 && strd != -1, "sanity"); 11978 int m1 = (strd > 0) ? 1 : -1; 11979 // Convert limit to long (EAX:EDX) 11980 __ cdql(); 11981 // Convert init to long (init:tmp) 11982 __ movl($tmp$$Register, $init$$Register); 11983 __ sarl($tmp$$Register, 31); 11984 // $limit - $init 11985 __ subl($limit$$Register, $init$$Register); 11986 __ sbbl($limit_hi$$Register, $tmp$$Register); 11987 // + ($stride - 1) 11988 if (strd > 0) { 11989 __ addl($limit$$Register, (strd - 1)); 11990 __ adcl($limit_hi$$Register, 0); 11991 __ movl($tmp$$Register, strd); 11992 } else { 11993 __ addl($limit$$Register, (strd + 1)); 11994 __ adcl($limit_hi$$Register, -1); 11995 __ lneg($limit_hi$$Register, $limit$$Register); 11996 __ movl($tmp$$Register, -strd); 11997 } 11998 // signed devision: (EAX:EDX) / pos_stride 11999 __ idivl($tmp$$Register); 12000 if (strd < 0) { 12001 // restore sign 12002 __ negl($tmp$$Register); 12003 } 12004 // (EAX) * stride 12005 __ mull($tmp$$Register); 12006 // + init (ignore upper bits) 12007 __ addl($limit$$Register, $init$$Register); 12008 %} 12009 ins_pipe( pipe_slow ); 12010 %} 12011 12012 // ============================================================================ 12013 // Branch Instructions 12014 // Jump Table 12015 instruct jumpXtnd(rRegI switch_val) %{ 12016 match(Jump switch_val); 12017 ins_cost(350); 12018 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12019 ins_encode %{ 12020 // Jump to Address(table_base + switch_reg) 12021 Address index(noreg, $switch_val$$Register, Address::times_1); 12022 __ jump(ArrayAddress($constantaddress, index)); 12023 %} 12024 ins_pipe(pipe_jmp); 12025 %} 12026 12027 // Jump Direct - Label defines a relative address from JMP+1 12028 instruct jmpDir(label labl) %{ 12029 match(Goto); 12030 effect(USE labl); 12031 12032 ins_cost(300); 12033 format %{ "JMP $labl" %} 12034 size(5); 12035 ins_encode %{ 12036 Label* L = $labl$$label; 12037 __ jmp(*L, false); // Always long jump 12038 %} 12039 ins_pipe( pipe_jmp ); 12040 %} 12041 12042 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12043 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12044 match(If cop cr); 12045 effect(USE labl); 12046 12047 ins_cost(300); 12048 format %{ "J$cop $labl" %} 12049 size(6); 12050 ins_encode %{ 12051 Label* L = $labl$$label; 12052 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12053 %} 12054 ins_pipe( pipe_jcc ); 12055 %} 12056 12057 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12058 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12059 predicate(!n->has_vector_mask_set()); 12060 match(CountedLoopEnd cop cr); 12061 effect(USE labl); 12062 12063 ins_cost(300); 12064 format %{ "J$cop $labl\t# Loop end" %} 12065 size(6); 12066 ins_encode %{ 12067 Label* L = $labl$$label; 12068 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12069 %} 12070 ins_pipe( pipe_jcc ); 12071 %} 12072 12073 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12074 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12075 predicate(!n->has_vector_mask_set()); 12076 match(CountedLoopEnd cop cmp); 12077 effect(USE labl); 12078 12079 ins_cost(300); 12080 format %{ "J$cop,u $labl\t# Loop end" %} 12081 size(6); 12082 ins_encode %{ 12083 Label* L = $labl$$label; 12084 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12085 %} 12086 ins_pipe( pipe_jcc ); 12087 %} 12088 12089 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12090 predicate(!n->has_vector_mask_set()); 12091 match(CountedLoopEnd cop cmp); 12092 effect(USE labl); 12093 12094 ins_cost(200); 12095 format %{ "J$cop,u $labl\t# Loop end" %} 12096 size(6); 12097 ins_encode %{ 12098 Label* L = $labl$$label; 12099 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12100 %} 12101 ins_pipe( pipe_jcc ); 12102 %} 12103 12104 // mask version 12105 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12106 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12107 predicate(n->has_vector_mask_set()); 12108 match(CountedLoopEnd cop cr); 12109 effect(USE labl); 12110 12111 ins_cost(400); 12112 format %{ "J$cop $labl\t# Loop end\n\t" 12113 "restorevectmask \t# vector mask restore for loops" %} 12114 size(10); 12115 ins_encode %{ 12116 Label* L = $labl$$label; 12117 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12118 __ restorevectmask(); 12119 %} 12120 ins_pipe( pipe_jcc ); 12121 %} 12122 12123 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12124 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12125 predicate(n->has_vector_mask_set()); 12126 match(CountedLoopEnd cop cmp); 12127 effect(USE labl); 12128 12129 ins_cost(400); 12130 format %{ "J$cop,u $labl\t# Loop end\n\t" 12131 "restorevectmask \t# vector mask restore for loops" %} 12132 size(10); 12133 ins_encode %{ 12134 Label* L = $labl$$label; 12135 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12136 __ restorevectmask(); 12137 %} 12138 ins_pipe( pipe_jcc ); 12139 %} 12140 12141 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12142 predicate(n->has_vector_mask_set()); 12143 match(CountedLoopEnd cop cmp); 12144 effect(USE labl); 12145 12146 ins_cost(300); 12147 format %{ "J$cop,u $labl\t# Loop end\n\t" 12148 "restorevectmask \t# vector mask restore for loops" %} 12149 size(10); 12150 ins_encode %{ 12151 Label* L = $labl$$label; 12152 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12153 __ restorevectmask(); 12154 %} 12155 ins_pipe( pipe_jcc ); 12156 %} 12157 12158 // Jump Direct Conditional - using unsigned comparison 12159 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12160 match(If cop cmp); 12161 effect(USE labl); 12162 12163 ins_cost(300); 12164 format %{ "J$cop,u $labl" %} 12165 size(6); 12166 ins_encode %{ 12167 Label* L = $labl$$label; 12168 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12169 %} 12170 ins_pipe(pipe_jcc); 12171 %} 12172 12173 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12174 match(If cop cmp); 12175 effect(USE labl); 12176 12177 ins_cost(200); 12178 format %{ "J$cop,u $labl" %} 12179 size(6); 12180 ins_encode %{ 12181 Label* L = $labl$$label; 12182 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12183 %} 12184 ins_pipe(pipe_jcc); 12185 %} 12186 12187 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12188 match(If cop cmp); 12189 effect(USE labl); 12190 12191 ins_cost(200); 12192 format %{ $$template 12193 if ($cop$$cmpcode == Assembler::notEqual) { 12194 $$emit$$"JP,u $labl\n\t" 12195 $$emit$$"J$cop,u $labl" 12196 } else { 12197 $$emit$$"JP,u done\n\t" 12198 $$emit$$"J$cop,u $labl\n\t" 12199 $$emit$$"done:" 12200 } 12201 %} 12202 ins_encode %{ 12203 Label* l = $labl$$label; 12204 if ($cop$$cmpcode == Assembler::notEqual) { 12205 __ jcc(Assembler::parity, *l, false); 12206 __ jcc(Assembler::notEqual, *l, false); 12207 } else if ($cop$$cmpcode == Assembler::equal) { 12208 Label done; 12209 __ jccb(Assembler::parity, done); 12210 __ jcc(Assembler::equal, *l, false); 12211 __ bind(done); 12212 } else { 12213 ShouldNotReachHere(); 12214 } 12215 %} 12216 ins_pipe(pipe_jcc); 12217 %} 12218 12219 // ============================================================================ 12220 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12221 // array for an instance of the superklass. Set a hidden internal cache on a 12222 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12223 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12224 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12225 match(Set result (PartialSubtypeCheck sub super)); 12226 effect( KILL rcx, KILL cr ); 12227 12228 ins_cost(1100); // slightly larger than the next version 12229 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12230 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12231 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12232 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12233 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12234 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12235 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12236 "miss:\t" %} 12237 12238 opcode(0x1); // Force a XOR of EDI 12239 ins_encode( enc_PartialSubtypeCheck() ); 12240 ins_pipe( pipe_slow ); 12241 %} 12242 12243 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12244 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12245 effect( KILL rcx, KILL result ); 12246 12247 ins_cost(1000); 12248 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12249 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12250 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12251 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12252 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12253 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12254 "miss:\t" %} 12255 12256 opcode(0x0); // No need to XOR EDI 12257 ins_encode( enc_PartialSubtypeCheck() ); 12258 ins_pipe( pipe_slow ); 12259 %} 12260 12261 // ============================================================================ 12262 // Branch Instructions -- short offset versions 12263 // 12264 // These instructions are used to replace jumps of a long offset (the default 12265 // match) with jumps of a shorter offset. These instructions are all tagged 12266 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12267 // match rules in general matching. Instead, the ADLC generates a conversion 12268 // method in the MachNode which can be used to do in-place replacement of the 12269 // long variant with the shorter variant. The compiler will determine if a 12270 // branch can be taken by the is_short_branch_offset() predicate in the machine 12271 // specific code section of the file. 12272 12273 // Jump Direct - Label defines a relative address from JMP+1 12274 instruct jmpDir_short(label labl) %{ 12275 match(Goto); 12276 effect(USE labl); 12277 12278 ins_cost(300); 12279 format %{ "JMP,s $labl" %} 12280 size(2); 12281 ins_encode %{ 12282 Label* L = $labl$$label; 12283 __ jmpb(*L); 12284 %} 12285 ins_pipe( pipe_jmp ); 12286 ins_short_branch(1); 12287 %} 12288 12289 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12290 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12291 match(If cop cr); 12292 effect(USE labl); 12293 12294 ins_cost(300); 12295 format %{ "J$cop,s $labl" %} 12296 size(2); 12297 ins_encode %{ 12298 Label* L = $labl$$label; 12299 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12300 %} 12301 ins_pipe( pipe_jcc ); 12302 ins_short_branch(1); 12303 %} 12304 12305 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12306 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12307 match(CountedLoopEnd cop cr); 12308 effect(USE labl); 12309 12310 ins_cost(300); 12311 format %{ "J$cop,s $labl\t# Loop end" %} 12312 size(2); 12313 ins_encode %{ 12314 Label* L = $labl$$label; 12315 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12316 %} 12317 ins_pipe( pipe_jcc ); 12318 ins_short_branch(1); 12319 %} 12320 12321 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12322 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12323 match(CountedLoopEnd cop cmp); 12324 effect(USE labl); 12325 12326 ins_cost(300); 12327 format %{ "J$cop,us $labl\t# Loop end" %} 12328 size(2); 12329 ins_encode %{ 12330 Label* L = $labl$$label; 12331 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12332 %} 12333 ins_pipe( pipe_jcc ); 12334 ins_short_branch(1); 12335 %} 12336 12337 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12338 match(CountedLoopEnd cop cmp); 12339 effect(USE labl); 12340 12341 ins_cost(300); 12342 format %{ "J$cop,us $labl\t# Loop end" %} 12343 size(2); 12344 ins_encode %{ 12345 Label* L = $labl$$label; 12346 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12347 %} 12348 ins_pipe( pipe_jcc ); 12349 ins_short_branch(1); 12350 %} 12351 12352 // Jump Direct Conditional - using unsigned comparison 12353 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12354 match(If cop cmp); 12355 effect(USE labl); 12356 12357 ins_cost(300); 12358 format %{ "J$cop,us $labl" %} 12359 size(2); 12360 ins_encode %{ 12361 Label* L = $labl$$label; 12362 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12363 %} 12364 ins_pipe( pipe_jcc ); 12365 ins_short_branch(1); 12366 %} 12367 12368 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12369 match(If cop cmp); 12370 effect(USE labl); 12371 12372 ins_cost(300); 12373 format %{ "J$cop,us $labl" %} 12374 size(2); 12375 ins_encode %{ 12376 Label* L = $labl$$label; 12377 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12378 %} 12379 ins_pipe( pipe_jcc ); 12380 ins_short_branch(1); 12381 %} 12382 12383 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12384 match(If cop cmp); 12385 effect(USE labl); 12386 12387 ins_cost(300); 12388 format %{ $$template 12389 if ($cop$$cmpcode == Assembler::notEqual) { 12390 $$emit$$"JP,u,s $labl\n\t" 12391 $$emit$$"J$cop,u,s $labl" 12392 } else { 12393 $$emit$$"JP,u,s done\n\t" 12394 $$emit$$"J$cop,u,s $labl\n\t" 12395 $$emit$$"done:" 12396 } 12397 %} 12398 size(4); 12399 ins_encode %{ 12400 Label* l = $labl$$label; 12401 if ($cop$$cmpcode == Assembler::notEqual) { 12402 __ jccb(Assembler::parity, *l); 12403 __ jccb(Assembler::notEqual, *l); 12404 } else if ($cop$$cmpcode == Assembler::equal) { 12405 Label done; 12406 __ jccb(Assembler::parity, done); 12407 __ jccb(Assembler::equal, *l); 12408 __ bind(done); 12409 } else { 12410 ShouldNotReachHere(); 12411 } 12412 %} 12413 ins_pipe(pipe_jcc); 12414 ins_short_branch(1); 12415 %} 12416 12417 // ============================================================================ 12418 // Long Compare 12419 // 12420 // Currently we hold longs in 2 registers. Comparing such values efficiently 12421 // is tricky. The flavor of compare used depends on whether we are testing 12422 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12423 // The GE test is the negated LT test. The LE test can be had by commuting 12424 // the operands (yielding a GE test) and then negating; negate again for the 12425 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12426 // NE test is negated from that. 12427 12428 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12429 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12430 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12431 // are collapsed internally in the ADLC's dfa-gen code. The match for 12432 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12433 // foo match ends up with the wrong leaf. One fix is to not match both 12434 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12435 // both forms beat the trinary form of long-compare and both are very useful 12436 // on Intel which has so few registers. 12437 12438 // Manifest a CmpL result in an integer register. Very painful. 12439 // This is the test to avoid. 12440 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12441 match(Set dst (CmpL3 src1 src2)); 12442 effect( KILL flags ); 12443 ins_cost(1000); 12444 format %{ "XOR $dst,$dst\n\t" 12445 "CMP $src1.hi,$src2.hi\n\t" 12446 "JLT,s m_one\n\t" 12447 "JGT,s p_one\n\t" 12448 "CMP $src1.lo,$src2.lo\n\t" 12449 "JB,s m_one\n\t" 12450 "JEQ,s done\n" 12451 "p_one:\tINC $dst\n\t" 12452 "JMP,s done\n" 12453 "m_one:\tDEC $dst\n" 12454 "done:" %} 12455 ins_encode %{ 12456 Label p_one, m_one, done; 12457 __ xorptr($dst$$Register, $dst$$Register); 12458 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12459 __ jccb(Assembler::less, m_one); 12460 __ jccb(Assembler::greater, p_one); 12461 __ cmpl($src1$$Register, $src2$$Register); 12462 __ jccb(Assembler::below, m_one); 12463 __ jccb(Assembler::equal, done); 12464 __ bind(p_one); 12465 __ incrementl($dst$$Register); 12466 __ jmpb(done); 12467 __ bind(m_one); 12468 __ decrementl($dst$$Register); 12469 __ bind(done); 12470 %} 12471 ins_pipe( pipe_slow ); 12472 %} 12473 12474 //====== 12475 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12476 // compares. Can be used for LE or GT compares by reversing arguments. 12477 // NOT GOOD FOR EQ/NE tests. 12478 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12479 match( Set flags (CmpL src zero )); 12480 ins_cost(100); 12481 format %{ "TEST $src.hi,$src.hi" %} 12482 opcode(0x85); 12483 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12484 ins_pipe( ialu_cr_reg_reg ); 12485 %} 12486 12487 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12488 // compares. Can be used for LE or GT compares by reversing arguments. 12489 // NOT GOOD FOR EQ/NE tests. 12490 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12491 match( Set flags (CmpL src1 src2 )); 12492 effect( TEMP tmp ); 12493 ins_cost(300); 12494 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12495 "MOV $tmp,$src1.hi\n\t" 12496 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12497 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12498 ins_pipe( ialu_cr_reg_reg ); 12499 %} 12500 12501 // Long compares reg < zero/req OR reg >= zero/req. 12502 // Just a wrapper for a normal branch, plus the predicate test. 12503 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12504 match(If cmp flags); 12505 effect(USE labl); 12506 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12507 expand %{ 12508 jmpCon(cmp,flags,labl); // JLT or JGE... 12509 %} 12510 %} 12511 12512 // Compare 2 longs and CMOVE longs. 12513 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12514 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12515 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12516 ins_cost(400); 12517 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12518 "CMOV$cmp $dst.hi,$src.hi" %} 12519 opcode(0x0F,0x40); 12520 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12521 ins_pipe( pipe_cmov_reg_long ); 12522 %} 12523 12524 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12525 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12526 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12527 ins_cost(500); 12528 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12529 "CMOV$cmp $dst.hi,$src.hi" %} 12530 opcode(0x0F,0x40); 12531 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12532 ins_pipe( pipe_cmov_reg_long ); 12533 %} 12534 12535 // Compare 2 longs and CMOVE ints. 12536 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12537 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12538 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12539 ins_cost(200); 12540 format %{ "CMOV$cmp $dst,$src" %} 12541 opcode(0x0F,0x40); 12542 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12543 ins_pipe( pipe_cmov_reg ); 12544 %} 12545 12546 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12547 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12548 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12549 ins_cost(250); 12550 format %{ "CMOV$cmp $dst,$src" %} 12551 opcode(0x0F,0x40); 12552 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12553 ins_pipe( pipe_cmov_mem ); 12554 %} 12555 12556 // Compare 2 longs and CMOVE ints. 12557 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12558 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12559 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12560 ins_cost(200); 12561 format %{ "CMOV$cmp $dst,$src" %} 12562 opcode(0x0F,0x40); 12563 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12564 ins_pipe( pipe_cmov_reg ); 12565 %} 12566 12567 // Compare 2 longs and CMOVE doubles 12568 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12569 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12570 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12571 ins_cost(200); 12572 expand %{ 12573 fcmovDPR_regS(cmp,flags,dst,src); 12574 %} 12575 %} 12576 12577 // Compare 2 longs and CMOVE doubles 12578 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12579 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12580 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12581 ins_cost(200); 12582 expand %{ 12583 fcmovD_regS(cmp,flags,dst,src); 12584 %} 12585 %} 12586 12587 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12588 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12589 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12590 ins_cost(200); 12591 expand %{ 12592 fcmovFPR_regS(cmp,flags,dst,src); 12593 %} 12594 %} 12595 12596 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12597 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12598 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12599 ins_cost(200); 12600 expand %{ 12601 fcmovF_regS(cmp,flags,dst,src); 12602 %} 12603 %} 12604 12605 //====== 12606 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12607 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12608 match( Set flags (CmpL src zero )); 12609 effect(TEMP tmp); 12610 ins_cost(200); 12611 format %{ "MOV $tmp,$src.lo\n\t" 12612 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12613 ins_encode( long_cmp_flags0( src, tmp ) ); 12614 ins_pipe( ialu_reg_reg_long ); 12615 %} 12616 12617 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12618 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12619 match( Set flags (CmpL src1 src2 )); 12620 ins_cost(200+300); 12621 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12622 "JNE,s skip\n\t" 12623 "CMP $src1.hi,$src2.hi\n\t" 12624 "skip:\t" %} 12625 ins_encode( long_cmp_flags1( src1, src2 ) ); 12626 ins_pipe( ialu_cr_reg_reg ); 12627 %} 12628 12629 // Long compare reg == zero/reg OR reg != zero/reg 12630 // Just a wrapper for a normal branch, plus the predicate test. 12631 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12632 match(If cmp flags); 12633 effect(USE labl); 12634 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12635 expand %{ 12636 jmpCon(cmp,flags,labl); // JEQ or JNE... 12637 %} 12638 %} 12639 12640 // Compare 2 longs and CMOVE longs. 12641 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12642 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12643 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12644 ins_cost(400); 12645 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12646 "CMOV$cmp $dst.hi,$src.hi" %} 12647 opcode(0x0F,0x40); 12648 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12649 ins_pipe( pipe_cmov_reg_long ); 12650 %} 12651 12652 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12653 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12654 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12655 ins_cost(500); 12656 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12657 "CMOV$cmp $dst.hi,$src.hi" %} 12658 opcode(0x0F,0x40); 12659 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12660 ins_pipe( pipe_cmov_reg_long ); 12661 %} 12662 12663 // Compare 2 longs and CMOVE ints. 12664 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12665 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12666 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12667 ins_cost(200); 12668 format %{ "CMOV$cmp $dst,$src" %} 12669 opcode(0x0F,0x40); 12670 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12671 ins_pipe( pipe_cmov_reg ); 12672 %} 12673 12674 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12675 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12676 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12677 ins_cost(250); 12678 format %{ "CMOV$cmp $dst,$src" %} 12679 opcode(0x0F,0x40); 12680 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12681 ins_pipe( pipe_cmov_mem ); 12682 %} 12683 12684 // Compare 2 longs and CMOVE ints. 12685 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12686 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12687 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12688 ins_cost(200); 12689 format %{ "CMOV$cmp $dst,$src" %} 12690 opcode(0x0F,0x40); 12691 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12692 ins_pipe( pipe_cmov_reg ); 12693 %} 12694 12695 // Compare 2 longs and CMOVE doubles 12696 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12697 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12698 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12699 ins_cost(200); 12700 expand %{ 12701 fcmovDPR_regS(cmp,flags,dst,src); 12702 %} 12703 %} 12704 12705 // Compare 2 longs and CMOVE doubles 12706 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12707 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12708 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12709 ins_cost(200); 12710 expand %{ 12711 fcmovD_regS(cmp,flags,dst,src); 12712 %} 12713 %} 12714 12715 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12716 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12717 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12718 ins_cost(200); 12719 expand %{ 12720 fcmovFPR_regS(cmp,flags,dst,src); 12721 %} 12722 %} 12723 12724 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12725 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12726 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12727 ins_cost(200); 12728 expand %{ 12729 fcmovF_regS(cmp,flags,dst,src); 12730 %} 12731 %} 12732 12733 //====== 12734 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12735 // Same as cmpL_reg_flags_LEGT except must negate src 12736 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12737 match( Set flags (CmpL src zero )); 12738 effect( TEMP tmp ); 12739 ins_cost(300); 12740 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12741 "CMP $tmp,$src.lo\n\t" 12742 "SBB $tmp,$src.hi\n\t" %} 12743 ins_encode( long_cmp_flags3(src, tmp) ); 12744 ins_pipe( ialu_reg_reg_long ); 12745 %} 12746 12747 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12748 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12749 // requires a commuted test to get the same result. 12750 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12751 match( Set flags (CmpL src1 src2 )); 12752 effect( TEMP tmp ); 12753 ins_cost(300); 12754 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12755 "MOV $tmp,$src2.hi\n\t" 12756 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12757 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12758 ins_pipe( ialu_cr_reg_reg ); 12759 %} 12760 12761 // Long compares reg < zero/req OR reg >= zero/req. 12762 // Just a wrapper for a normal branch, plus the predicate test 12763 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12764 match(If cmp flags); 12765 effect(USE labl); 12766 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12767 ins_cost(300); 12768 expand %{ 12769 jmpCon(cmp,flags,labl); // JGT or JLE... 12770 %} 12771 %} 12772 12773 // Compare 2 longs and CMOVE longs. 12774 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12775 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12776 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12777 ins_cost(400); 12778 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12779 "CMOV$cmp $dst.hi,$src.hi" %} 12780 opcode(0x0F,0x40); 12781 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12782 ins_pipe( pipe_cmov_reg_long ); 12783 %} 12784 12785 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12786 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12787 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12788 ins_cost(500); 12789 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12790 "CMOV$cmp $dst.hi,$src.hi+4" %} 12791 opcode(0x0F,0x40); 12792 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12793 ins_pipe( pipe_cmov_reg_long ); 12794 %} 12795 12796 // Compare 2 longs and CMOVE ints. 12797 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12798 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12799 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12800 ins_cost(200); 12801 format %{ "CMOV$cmp $dst,$src" %} 12802 opcode(0x0F,0x40); 12803 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12804 ins_pipe( pipe_cmov_reg ); 12805 %} 12806 12807 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12808 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12809 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12810 ins_cost(250); 12811 format %{ "CMOV$cmp $dst,$src" %} 12812 opcode(0x0F,0x40); 12813 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12814 ins_pipe( pipe_cmov_mem ); 12815 %} 12816 12817 // Compare 2 longs and CMOVE ptrs. 12818 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12819 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12820 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12821 ins_cost(200); 12822 format %{ "CMOV$cmp $dst,$src" %} 12823 opcode(0x0F,0x40); 12824 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12825 ins_pipe( pipe_cmov_reg ); 12826 %} 12827 12828 // Compare 2 longs and CMOVE doubles 12829 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12830 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12831 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12832 ins_cost(200); 12833 expand %{ 12834 fcmovDPR_regS(cmp,flags,dst,src); 12835 %} 12836 %} 12837 12838 // Compare 2 longs and CMOVE doubles 12839 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12840 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12841 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12842 ins_cost(200); 12843 expand %{ 12844 fcmovD_regS(cmp,flags,dst,src); 12845 %} 12846 %} 12847 12848 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12849 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12850 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12851 ins_cost(200); 12852 expand %{ 12853 fcmovFPR_regS(cmp,flags,dst,src); 12854 %} 12855 %} 12856 12857 12858 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12859 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12860 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12861 ins_cost(200); 12862 expand %{ 12863 fcmovF_regS(cmp,flags,dst,src); 12864 %} 12865 %} 12866 12867 12868 // ============================================================================ 12869 // Procedure Call/Return Instructions 12870 // Call Java Static Instruction 12871 // Note: If this code changes, the corresponding ret_addr_offset() and 12872 // compute_padding() functions will have to be adjusted. 12873 instruct CallStaticJavaDirect(method meth) %{ 12874 match(CallStaticJava); 12875 effect(USE meth); 12876 12877 ins_cost(300); 12878 format %{ "CALL,static " %} 12879 opcode(0xE8); /* E8 cd */ 12880 ins_encode( pre_call_resets, 12881 Java_Static_Call( meth ), 12882 call_epilog, 12883 post_call_FPU ); 12884 ins_pipe( pipe_slow ); 12885 ins_alignment(4); 12886 %} 12887 12888 // Call Java Dynamic Instruction 12889 // Note: If this code changes, the corresponding ret_addr_offset() and 12890 // compute_padding() functions will have to be adjusted. 12891 instruct CallDynamicJavaDirect(method meth) %{ 12892 match(CallDynamicJava); 12893 effect(USE meth); 12894 12895 ins_cost(300); 12896 format %{ "MOV EAX,(oop)-1\n\t" 12897 "CALL,dynamic" %} 12898 opcode(0xE8); /* E8 cd */ 12899 ins_encode( pre_call_resets, 12900 Java_Dynamic_Call( meth ), 12901 call_epilog, 12902 post_call_FPU ); 12903 ins_pipe( pipe_slow ); 12904 ins_alignment(4); 12905 %} 12906 12907 // Call Runtime Instruction 12908 instruct CallRuntimeDirect(method meth) %{ 12909 match(CallRuntime ); 12910 effect(USE meth); 12911 12912 ins_cost(300); 12913 format %{ "CALL,runtime " %} 12914 opcode(0xE8); /* E8 cd */ 12915 // Use FFREEs to clear entries in float stack 12916 ins_encode( pre_call_resets, 12917 FFree_Float_Stack_All, 12918 Java_To_Runtime( meth ), 12919 post_call_FPU ); 12920 ins_pipe( pipe_slow ); 12921 %} 12922 12923 // Call runtime without safepoint 12924 instruct CallLeafDirect(method meth) %{ 12925 match(CallLeaf); 12926 effect(USE meth); 12927 12928 ins_cost(300); 12929 format %{ "CALL_LEAF,runtime " %} 12930 opcode(0xE8); /* E8 cd */ 12931 ins_encode( pre_call_resets, 12932 FFree_Float_Stack_All, 12933 Java_To_Runtime( meth ), 12934 Verify_FPU_For_Leaf, post_call_FPU ); 12935 ins_pipe( pipe_slow ); 12936 %} 12937 12938 instruct CallLeafNoFPDirect(method meth) %{ 12939 match(CallLeafNoFP); 12940 effect(USE meth); 12941 12942 ins_cost(300); 12943 format %{ "CALL_LEAF_NOFP,runtime " %} 12944 opcode(0xE8); /* E8 cd */ 12945 ins_encode(Java_To_Runtime(meth)); 12946 ins_pipe( pipe_slow ); 12947 %} 12948 12949 12950 // Return Instruction 12951 // Remove the return address & jump to it. 12952 instruct Ret() %{ 12953 match(Return); 12954 format %{ "RET" %} 12955 opcode(0xC3); 12956 ins_encode(OpcP); 12957 ins_pipe( pipe_jmp ); 12958 %} 12959 12960 // Tail Call; Jump from runtime stub to Java code. 12961 // Also known as an 'interprocedural jump'. 12962 // Target of jump will eventually return to caller. 12963 // TailJump below removes the return address. 12964 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12965 match(TailCall jump_target method_oop ); 12966 ins_cost(300); 12967 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12968 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12969 ins_encode( OpcP, RegOpc(jump_target) ); 12970 ins_pipe( pipe_jmp ); 12971 %} 12972 12973 12974 // Tail Jump; remove the return address; jump to target. 12975 // TailCall above leaves the return address around. 12976 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12977 match( TailJump jump_target ex_oop ); 12978 ins_cost(300); 12979 format %{ "POP EDX\t# pop return address into dummy\n\t" 12980 "JMP $jump_target " %} 12981 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12982 ins_encode( enc_pop_rdx, 12983 OpcP, RegOpc(jump_target) ); 12984 ins_pipe( pipe_jmp ); 12985 %} 12986 12987 // Create exception oop: created by stack-crawling runtime code. 12988 // Created exception is now available to this handler, and is setup 12989 // just prior to jumping to this handler. No code emitted. 12990 instruct CreateException( eAXRegP ex_oop ) 12991 %{ 12992 match(Set ex_oop (CreateEx)); 12993 12994 size(0); 12995 // use the following format syntax 12996 format %{ "# exception oop is in EAX; no code emitted" %} 12997 ins_encode(); 12998 ins_pipe( empty ); 12999 %} 13000 13001 13002 // Rethrow exception: 13003 // The exception oop will come in the first argument position. 13004 // Then JUMP (not call) to the rethrow stub code. 13005 instruct RethrowException() 13006 %{ 13007 match(Rethrow); 13008 13009 // use the following format syntax 13010 format %{ "JMP rethrow_stub" %} 13011 ins_encode(enc_rethrow); 13012 ins_pipe( pipe_jmp ); 13013 %} 13014 13015 // inlined locking and unlocking 13016 13017 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13018 predicate(Compile::current()->use_rtm()); 13019 match(Set cr (FastLock object box)); 13020 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13021 ins_cost(300); 13022 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13023 ins_encode %{ 13024 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13025 $scr$$Register, $cx1$$Register, $cx2$$Register, 13026 _counters, _rtm_counters, _stack_rtm_counters, 13027 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13028 true, ra_->C->profile_rtm()); 13029 %} 13030 ins_pipe(pipe_slow); 13031 %} 13032 13033 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13034 predicate(!Compile::current()->use_rtm()); 13035 match(Set cr (FastLock object box)); 13036 effect(TEMP tmp, TEMP scr, USE_KILL box); 13037 ins_cost(300); 13038 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13039 ins_encode %{ 13040 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13041 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13042 %} 13043 ins_pipe(pipe_slow); 13044 %} 13045 13046 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13047 match(Set cr (FastUnlock object box)); 13048 effect(TEMP tmp, USE_KILL box); 13049 ins_cost(300); 13050 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13051 ins_encode %{ 13052 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13053 %} 13054 ins_pipe(pipe_slow); 13055 %} 13056 13057 13058 13059 // ============================================================================ 13060 // Safepoint Instruction 13061 instruct safePoint_poll(eFlagsReg cr) %{ 13062 match(SafePoint); 13063 effect(KILL cr); 13064 13065 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13066 // On SPARC that might be acceptable as we can generate the address with 13067 // just a sethi, saving an or. By polling at offset 0 we can end up 13068 // putting additional pressure on the index-0 in the D$. Because of 13069 // alignment (just like the situation at hand) the lower indices tend 13070 // to see more traffic. It'd be better to change the polling address 13071 // to offset 0 of the last $line in the polling page. 13072 13073 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13074 ins_cost(125); 13075 size(6) ; 13076 ins_encode( Safepoint_Poll() ); 13077 ins_pipe( ialu_reg_mem ); 13078 %} 13079 13080 13081 // ============================================================================ 13082 // This name is KNOWN by the ADLC and cannot be changed. 13083 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13084 // for this guy. 13085 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13086 match(Set dst (ThreadLocal)); 13087 effect(DEF dst, KILL cr); 13088 13089 format %{ "MOV $dst, Thread::current()" %} 13090 ins_encode %{ 13091 Register dstReg = as_Register($dst$$reg); 13092 __ get_thread(dstReg); 13093 %} 13094 ins_pipe( ialu_reg_fat ); 13095 %} 13096 13097 13098 13099 //----------PEEPHOLE RULES----------------------------------------------------- 13100 // These must follow all instruction definitions as they use the names 13101 // defined in the instructions definitions. 13102 // 13103 // peepmatch ( root_instr_name [preceding_instruction]* ); 13104 // 13105 // peepconstraint %{ 13106 // (instruction_number.operand_name relational_op instruction_number.operand_name 13107 // [, ...] ); 13108 // // instruction numbers are zero-based using left to right order in peepmatch 13109 // 13110 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13111 // // provide an instruction_number.operand_name for each operand that appears 13112 // // in the replacement instruction's match rule 13113 // 13114 // ---------VM FLAGS--------------------------------------------------------- 13115 // 13116 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13117 // 13118 // Each peephole rule is given an identifying number starting with zero and 13119 // increasing by one in the order seen by the parser. An individual peephole 13120 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13121 // on the command-line. 13122 // 13123 // ---------CURRENT LIMITATIONS---------------------------------------------- 13124 // 13125 // Only match adjacent instructions in same basic block 13126 // Only equality constraints 13127 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13128 // Only one replacement instruction 13129 // 13130 // ---------EXAMPLE---------------------------------------------------------- 13131 // 13132 // // pertinent parts of existing instructions in architecture description 13133 // instruct movI(rRegI dst, rRegI src) %{ 13134 // match(Set dst (CopyI src)); 13135 // %} 13136 // 13137 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13138 // match(Set dst (AddI dst src)); 13139 // effect(KILL cr); 13140 // %} 13141 // 13142 // // Change (inc mov) to lea 13143 // peephole %{ 13144 // // increment preceeded by register-register move 13145 // peepmatch ( incI_eReg movI ); 13146 // // require that the destination register of the increment 13147 // // match the destination register of the move 13148 // peepconstraint ( 0.dst == 1.dst ); 13149 // // construct a replacement instruction that sets 13150 // // the destination to ( move's source register + one ) 13151 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13152 // %} 13153 // 13154 // Implementation no longer uses movX instructions since 13155 // machine-independent system no longer uses CopyX nodes. 13156 // 13157 // peephole %{ 13158 // peepmatch ( incI_eReg movI ); 13159 // peepconstraint ( 0.dst == 1.dst ); 13160 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13161 // %} 13162 // 13163 // peephole %{ 13164 // peepmatch ( decI_eReg movI ); 13165 // peepconstraint ( 0.dst == 1.dst ); 13166 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13167 // %} 13168 // 13169 // peephole %{ 13170 // peepmatch ( addI_eReg_imm movI ); 13171 // peepconstraint ( 0.dst == 1.dst ); 13172 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13173 // %} 13174 // 13175 // peephole %{ 13176 // peepmatch ( addP_eReg_imm movP ); 13177 // peepconstraint ( 0.dst == 1.dst ); 13178 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13179 // %} 13180 13181 // // Change load of spilled value to only a spill 13182 // instruct storeI(memory mem, rRegI src) %{ 13183 // match(Set mem (StoreI mem src)); 13184 // %} 13185 // 13186 // instruct loadI(rRegI dst, memory mem) %{ 13187 // match(Set dst (LoadI mem)); 13188 // %} 13189 // 13190 peephole %{ 13191 peepmatch ( loadI storeI ); 13192 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13193 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13194 %} 13195 13196 //----------SMARTSPILL RULES--------------------------------------------------- 13197 // These must follow all instruction definitions as they use the names 13198 // defined in the instructions definitions.