1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 if (reg_lo+1 == reg_hi) { // double move? 799 if (is_load) { 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } else { 805 if (is_load) { 806 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 807 } else { 808 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 809 } 810 } 811 #ifndef PRODUCT 812 } else if (!do_size) { 813 if (size != 0) st->print("\n\t"); 814 if (reg_lo+1 == reg_hi) { // double move? 815 if (is_load) st->print("%s %s,[ESP + #%d]", 816 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 817 Matcher::regName[reg_lo], offset); 818 else st->print("MOVSD [ESP + #%d],%s", 819 offset, Matcher::regName[reg_lo]); 820 } else { 821 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 822 Matcher::regName[reg_lo], offset); 823 else st->print("MOVSS [ESP + #%d],%s", 824 offset, Matcher::regName[reg_lo]); 825 } 826 #endif 827 } 828 bool is_single_byte = false; 829 if ((UseAVX > 2) && (offset != 0)) { 830 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 831 } 832 int offset_size = 0; 833 if (UseAVX > 2 ) { 834 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 835 } else { 836 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 837 } 838 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 840 return size+5+offset_size; 841 } 842 843 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 845 int src_hi, int dst_hi, int size, outputStream* st ) { 846 if (cbuf) { 847 MacroAssembler _masm(cbuf); 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 850 as_XMMRegister(Matcher::_regEncode[src_lo])); 851 } else { 852 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } 855 #ifndef PRODUCT 856 } else if (!do_size) { 857 if (size != 0) st->print("\n\t"); 858 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 859 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 860 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } else { 865 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 866 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } else { 868 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } 870 } 871 #endif 872 } 873 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 874 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 875 int sz = (UseAVX > 2) ? 6 : 4; 876 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 877 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 878 return size + sz; 879 } 880 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 MacroAssembler _masm(cbuf); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888 #ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891 #endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894 } 895 896 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 src_offset += 4; 974 dst_offset += 4; 975 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 case Op_VecX: 980 case Op_VecY: 981 case Op_VecZ: 982 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 983 break; 984 default: 985 ShouldNotReachHere(); 986 } 987 if (cbuf) { 988 MacroAssembler _masm(cbuf); 989 int offset = __ offset(); 990 switch (ireg) { 991 case Op_VecS: 992 __ pushl(Address(rsp, src_offset)); 993 __ popl (Address(rsp, dst_offset)); 994 break; 995 case Op_VecD: 996 __ pushl(Address(rsp, src_offset)); 997 __ popl (Address(rsp, dst_offset)); 998 __ pushl(Address(rsp, src_offset+4)); 999 __ popl (Address(rsp, dst_offset+4)); 1000 break; 1001 case Op_VecX: 1002 __ movdqu(Address(rsp, -16), xmm0); 1003 __ movdqu(xmm0, Address(rsp, src_offset)); 1004 __ movdqu(Address(rsp, dst_offset), xmm0); 1005 __ movdqu(xmm0, Address(rsp, -16)); 1006 break; 1007 case Op_VecY: 1008 __ vmovdqu(Address(rsp, -32), xmm0); 1009 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1010 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1011 __ vmovdqu(xmm0, Address(rsp, -32)); 1012 break; 1013 case Op_VecZ: 1014 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1015 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1016 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1017 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1018 break; 1019 default: 1020 ShouldNotReachHere(); 1021 } 1022 int size = __ offset() - offset; 1023 assert(size == calc_size, "incorrect size calculattion"); 1024 return size; 1025 #ifndef PRODUCT 1026 } else if (!do_size) { 1027 switch (ireg) { 1028 case Op_VecS: 1029 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1030 "popl [rsp + #%d]", 1031 src_offset, dst_offset); 1032 break; 1033 case Op_VecD: 1034 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1035 "popq [rsp + #%d]\n\t" 1036 "pushl [rsp + #%d]\n\t" 1037 "popq [rsp + #%d]", 1038 src_offset, dst_offset, src_offset+4, dst_offset+4); 1039 break; 1040 case Op_VecX: 1041 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1042 "movdqu xmm0, [rsp + #%d]\n\t" 1043 "movdqu [rsp + #%d], xmm0\n\t" 1044 "movdqu xmm0, [rsp - #16]", 1045 src_offset, dst_offset); 1046 break; 1047 case Op_VecY: 1048 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1049 "vmovdqu xmm0, [rsp + #%d]\n\t" 1050 "vmovdqu [rsp + #%d], xmm0\n\t" 1051 "vmovdqu xmm0, [rsp - #32]", 1052 src_offset, dst_offset); 1053 break; 1054 case Op_VecZ: 1055 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1056 "vmovdqu xmm0, [rsp + #%d]\n\t" 1057 "vmovdqu [rsp + #%d], xmm0\n\t" 1058 "vmovdqu xmm0, [rsp - #64]", 1059 src_offset, dst_offset); 1060 break; 1061 default: 1062 ShouldNotReachHere(); 1063 } 1064 #endif 1065 } 1066 return calc_size; 1067 } 1068 1069 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1070 // Get registers to move 1071 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1072 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1073 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1074 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1075 1076 enum RC src_second_rc = rc_class(src_second); 1077 enum RC src_first_rc = rc_class(src_first); 1078 enum RC dst_second_rc = rc_class(dst_second); 1079 enum RC dst_first_rc = rc_class(dst_first); 1080 1081 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1082 1083 // Generate spill code! 1084 int size = 0; 1085 1086 if( src_first == dst_first && src_second == dst_second ) 1087 return size; // Self copy, no move 1088 1089 if (bottom_type()->isa_vect() != NULL) { 1090 uint ireg = ideal_reg(); 1091 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1092 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1093 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1094 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1095 // mem -> mem 1096 int src_offset = ra_->reg2offset(src_first); 1097 int dst_offset = ra_->reg2offset(dst_first); 1098 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1099 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1100 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1101 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1102 int stack_offset = ra_->reg2offset(dst_first); 1103 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1104 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1105 int stack_offset = ra_->reg2offset(src_first); 1106 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1107 } else { 1108 ShouldNotReachHere(); 1109 } 1110 } 1111 1112 // -------------------------------------- 1113 // Check for mem-mem move. push/pop to move. 1114 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1115 if( src_second == dst_first ) { // overlapping stack copy ranges 1116 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1117 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1118 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1119 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1120 } 1121 // move low bits 1122 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1123 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1124 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1125 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1126 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1127 } 1128 return size; 1129 } 1130 1131 // -------------------------------------- 1132 // Check for integer reg-reg copy 1133 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1134 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1135 1136 // Check for integer store 1137 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1138 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1139 1140 // Check for integer load 1141 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1142 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1143 1144 // Check for integer reg-xmm reg copy 1145 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1146 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1147 "no 64 bit integer-float reg moves" ); 1148 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1149 } 1150 // -------------------------------------- 1151 // Check for float reg-reg copy 1152 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1153 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1154 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1155 if( cbuf ) { 1156 1157 // Note the mucking with the register encode to compensate for the 0/1 1158 // indexing issue mentioned in a comment in the reg_def sections 1159 // for FPR registers many lines above here. 1160 1161 if( src_first != FPR1L_num ) { 1162 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1163 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1164 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1165 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1166 } else { 1167 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1168 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1169 } 1170 #ifndef PRODUCT 1171 } else if( !do_size ) { 1172 if( size != 0 ) st->print("\n\t"); 1173 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1174 else st->print( "FST %s", Matcher::regName[dst_first]); 1175 #endif 1176 } 1177 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1178 } 1179 1180 // Check for float store 1181 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1182 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1183 } 1184 1185 // Check for float load 1186 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1187 int offset = ra_->reg2offset(src_first); 1188 const char *op_str; 1189 int op; 1190 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1191 op_str = "FLD_D"; 1192 op = 0xDD; 1193 } else { // 32-bit load 1194 op_str = "FLD_S"; 1195 op = 0xD9; 1196 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1197 } 1198 if( cbuf ) { 1199 emit_opcode (*cbuf, op ); 1200 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1201 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1202 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1203 #ifndef PRODUCT 1204 } else if( !do_size ) { 1205 if( size != 0 ) st->print("\n\t"); 1206 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1207 #endif 1208 } 1209 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1210 return size + 3+offset_size+2; 1211 } 1212 1213 // Check for xmm reg-reg copy 1214 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1215 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1216 (src_first+1 == src_second && dst_first+1 == dst_second), 1217 "no non-adjacent float-moves" ); 1218 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1219 } 1220 1221 // Check for xmm reg-integer reg copy 1222 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1223 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1224 "no 64 bit float-integer reg moves" ); 1225 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1226 } 1227 1228 // Check for xmm store 1229 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1230 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1231 } 1232 1233 // Check for float xmm load 1234 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1235 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1236 } 1237 1238 // Copy from float reg to xmm reg 1239 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1240 // copy to the top of stack from floating point reg 1241 // and use LEA to preserve flags 1242 if( cbuf ) { 1243 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1244 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1245 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1246 emit_d8(*cbuf,0xF8); 1247 #ifndef PRODUCT 1248 } else if( !do_size ) { 1249 if( size != 0 ) st->print("\n\t"); 1250 st->print("LEA ESP,[ESP-8]"); 1251 #endif 1252 } 1253 size += 4; 1254 1255 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1256 1257 // Copy from the temp memory to the xmm reg. 1258 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1259 1260 if( cbuf ) { 1261 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1262 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1263 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1264 emit_d8(*cbuf,0x08); 1265 #ifndef PRODUCT 1266 } else if( !do_size ) { 1267 if( size != 0 ) st->print("\n\t"); 1268 st->print("LEA ESP,[ESP+8]"); 1269 #endif 1270 } 1271 size += 4; 1272 return size; 1273 } 1274 1275 assert( size > 0, "missed a case" ); 1276 1277 // -------------------------------------------------------------------- 1278 // Check for second bits still needing moving. 1279 if( src_second == dst_second ) 1280 return size; // Self copy; no move 1281 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1282 1283 // Check for second word int-int move 1284 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1285 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1286 1287 // Check for second word integer store 1288 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1289 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1290 1291 // Check for second word integer load 1292 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1293 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1294 1295 1296 Unimplemented(); 1297 return 0; // Mute compiler 1298 } 1299 1300 #ifndef PRODUCT 1301 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1302 implementation( NULL, ra_, false, st ); 1303 } 1304 #endif 1305 1306 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1307 implementation( &cbuf, ra_, false, NULL ); 1308 } 1309 1310 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1311 return implementation( NULL, ra_, true, NULL ); 1312 } 1313 1314 1315 //============================================================================= 1316 #ifndef PRODUCT 1317 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1318 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1319 int reg = ra_->get_reg_first(this); 1320 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1321 } 1322 #endif 1323 1324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1325 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1326 int reg = ra_->get_encode(this); 1327 if( offset >= 128 ) { 1328 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1329 emit_rm(cbuf, 0x2, reg, 0x04); 1330 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1331 emit_d32(cbuf, offset); 1332 } 1333 else { 1334 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1335 emit_rm(cbuf, 0x1, reg, 0x04); 1336 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1337 emit_d8(cbuf, offset); 1338 } 1339 } 1340 1341 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1342 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1343 if( offset >= 128 ) { 1344 return 7; 1345 } 1346 else { 1347 return 4; 1348 } 1349 } 1350 1351 //============================================================================= 1352 #ifndef PRODUCT 1353 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1354 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1355 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1356 st->print_cr("\tNOP"); 1357 st->print_cr("\tNOP"); 1358 if( !OptoBreakpoint ) 1359 st->print_cr("\tNOP"); 1360 } 1361 #endif 1362 1363 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1364 MacroAssembler masm(&cbuf); 1365 #ifdef ASSERT 1366 uint insts_size = cbuf.insts_size(); 1367 #endif 1368 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1369 masm.jump_cc(Assembler::notEqual, 1370 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1371 /* WARNING these NOPs are critical so that verified entry point is properly 1372 aligned for patching by NativeJump::patch_verified_entry() */ 1373 int nops_cnt = 2; 1374 if( !OptoBreakpoint ) // Leave space for int3 1375 nops_cnt += 1; 1376 masm.nop(nops_cnt); 1377 1378 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1379 } 1380 1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1382 return OptoBreakpoint ? 11 : 12; 1383 } 1384 1385 1386 //============================================================================= 1387 1388 int Matcher::regnum_to_fpu_offset(int regnum) { 1389 return regnum - 32; // The FP registers are in the second chunk 1390 } 1391 1392 // This is UltraSparc specific, true just means we have fast l2f conversion 1393 const bool Matcher::convL2FSupported(void) { 1394 return true; 1395 } 1396 1397 // Is this branch offset short enough that a short branch can be used? 1398 // 1399 // NOTE: If the platform does not provide any short branch variants, then 1400 // this method should return false for offset 0. 1401 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1402 // The passed offset is relative to address of the branch. 1403 // On 86 a branch displacement is calculated relative to address 1404 // of a next instruction. 1405 offset -= br_size; 1406 1407 // the short version of jmpConUCF2 contains multiple branches, 1408 // making the reach slightly less 1409 if (rule == jmpConUCF2_rule) 1410 return (-126 <= offset && offset <= 125); 1411 return (-128 <= offset && offset <= 127); 1412 } 1413 1414 const bool Matcher::isSimpleConstant64(jlong value) { 1415 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1416 return false; 1417 } 1418 1419 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1420 const bool Matcher::init_array_count_is_in_bytes = false; 1421 1422 // Threshold size for cleararray. 1423 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1424 1425 // Needs 2 CMOV's for longs. 1426 const int Matcher::long_cmove_cost() { return 1; } 1427 1428 // No CMOVF/CMOVD with SSE/SSE2 1429 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1430 1431 // Does the CPU require late expand (see block.cpp for description of late expand)? 1432 const bool Matcher::require_postalloc_expand = false; 1433 1434 // Should the Matcher clone shifts on addressing modes, expecting them to 1435 // be subsumed into complex addressing expressions or compute them into 1436 // registers? True for Intel but false for most RISCs 1437 const bool Matcher::clone_shift_expressions = true; 1438 1439 // Do we need to mask the count passed to shift instructions or does 1440 // the cpu only look at the lower 5/6 bits anyway? 1441 const bool Matcher::need_masked_shift_count = false; 1442 1443 bool Matcher::narrow_oop_use_complex_address() { 1444 ShouldNotCallThis(); 1445 return true; 1446 } 1447 1448 bool Matcher::narrow_klass_use_complex_address() { 1449 ShouldNotCallThis(); 1450 return true; 1451 } 1452 1453 1454 // Is it better to copy float constants, or load them directly from memory? 1455 // Intel can load a float constant from a direct address, requiring no 1456 // extra registers. Most RISCs will have to materialize an address into a 1457 // register first, so they would do better to copy the constant from stack. 1458 const bool Matcher::rematerialize_float_constants = true; 1459 1460 // If CPU can load and store mis-aligned doubles directly then no fixup is 1461 // needed. Else we split the double into 2 integer pieces and move it 1462 // piece-by-piece. Only happens when passing doubles into C code as the 1463 // Java calling convention forces doubles to be aligned. 1464 const bool Matcher::misaligned_doubles_ok = true; 1465 1466 1467 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1468 // Get the memory operand from the node 1469 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1470 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1471 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1472 uint opcnt = 1; // First operand 1473 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1474 while( idx >= skipped+num_edges ) { 1475 skipped += num_edges; 1476 opcnt++; // Bump operand count 1477 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1478 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1479 } 1480 1481 MachOper *memory = node->_opnds[opcnt]; 1482 MachOper *new_memory = NULL; 1483 switch (memory->opcode()) { 1484 case DIRECT: 1485 case INDOFFSET32X: 1486 // No transformation necessary. 1487 return; 1488 case INDIRECT: 1489 new_memory = new indirect_win95_safeOper( ); 1490 break; 1491 case INDOFFSET8: 1492 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1493 break; 1494 case INDOFFSET32: 1495 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1496 break; 1497 case INDINDEXOFFSET: 1498 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1499 break; 1500 case INDINDEXSCALE: 1501 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1502 break; 1503 case INDINDEXSCALEOFFSET: 1504 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1505 break; 1506 case LOAD_LONG_INDIRECT: 1507 case LOAD_LONG_INDOFFSET32: 1508 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1509 return; 1510 default: 1511 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1512 return; 1513 } 1514 node->_opnds[opcnt] = new_memory; 1515 } 1516 1517 // Advertise here if the CPU requires explicit rounding operations 1518 // to implement the UseStrictFP mode. 1519 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1520 1521 // Are floats conerted to double when stored to stack during deoptimization? 1522 // On x32 it is stored with convertion only when FPU is used for floats. 1523 bool Matcher::float_in_double() { return (UseSSE == 0); } 1524 1525 // Do ints take an entire long register or just half? 1526 const bool Matcher::int_in_long = false; 1527 1528 // Return whether or not this register is ever used as an argument. This 1529 // function is used on startup to build the trampoline stubs in generateOptoStub. 1530 // Registers not mentioned will be killed by the VM call in the trampoline, and 1531 // arguments in those registers not be available to the callee. 1532 bool Matcher::can_be_java_arg( int reg ) { 1533 if( reg == ECX_num || reg == EDX_num ) return true; 1534 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1535 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1536 return false; 1537 } 1538 1539 bool Matcher::is_spillable_arg( int reg ) { 1540 return can_be_java_arg(reg); 1541 } 1542 1543 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1544 // Use hardware integer DIV instruction when 1545 // it is faster than a code which use multiply. 1546 // Only when constant divisor fits into 32 bit 1547 // (min_jint is excluded to get only correct 1548 // positive 32 bit values from negative). 1549 return VM_Version::has_fast_idiv() && 1550 (divisor == (int)divisor && divisor != min_jint); 1551 } 1552 1553 // Register for DIVI projection of divmodI 1554 RegMask Matcher::divI_proj_mask() { 1555 return EAX_REG_mask(); 1556 } 1557 1558 // Register for MODI projection of divmodI 1559 RegMask Matcher::modI_proj_mask() { 1560 return EDX_REG_mask(); 1561 } 1562 1563 // Register for DIVL projection of divmodL 1564 RegMask Matcher::divL_proj_mask() { 1565 ShouldNotReachHere(); 1566 return RegMask(); 1567 } 1568 1569 // Register for MODL projection of divmodL 1570 RegMask Matcher::modL_proj_mask() { 1571 ShouldNotReachHere(); 1572 return RegMask(); 1573 } 1574 1575 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1576 return NO_REG_mask(); 1577 } 1578 1579 // Returns true if the high 32 bits of the value is known to be zero. 1580 bool is_operand_hi32_zero(Node* n) { 1581 int opc = n->Opcode(); 1582 if (opc == Op_AndL) { 1583 Node* o2 = n->in(2); 1584 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1585 return true; 1586 } 1587 } 1588 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1589 return true; 1590 } 1591 return false; 1592 } 1593 1594 %} 1595 1596 //----------ENCODING BLOCK----------------------------------------------------- 1597 // This block specifies the encoding classes used by the compiler to output 1598 // byte streams. Encoding classes generate functions which are called by 1599 // Machine Instruction Nodes in order to generate the bit encoding of the 1600 // instruction. Operands specify their base encoding interface with the 1601 // interface keyword. There are currently supported four interfaces, 1602 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1603 // operand to generate a function which returns its register number when 1604 // queried. CONST_INTER causes an operand to generate a function which 1605 // returns the value of the constant when queried. MEMORY_INTER causes an 1606 // operand to generate four functions which return the Base Register, the 1607 // Index Register, the Scale Value, and the Offset Value of the operand when 1608 // queried. COND_INTER causes an operand to generate six functions which 1609 // return the encoding code (ie - encoding bits for the instruction) 1610 // associated with each basic boolean condition for a conditional instruction. 1611 // Instructions specify two basic values for encoding. They use the 1612 // ins_encode keyword to specify their encoding class (which must be one of 1613 // the class names specified in the encoding block), and they use the 1614 // opcode keyword to specify, in order, their primary, secondary, and 1615 // tertiary opcode. Only the opcode sections which a particular instruction 1616 // needs for encoding need to be specified. 1617 encode %{ 1618 // Build emit functions for each basic byte or larger field in the intel 1619 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1620 // code in the enc_class source block. Emit functions will live in the 1621 // main source block for now. In future, we can generalize this by 1622 // adding a syntax that specifies the sizes of fields in an order, 1623 // so that the adlc can build the emit functions automagically 1624 1625 // Emit primary opcode 1626 enc_class OpcP %{ 1627 emit_opcode(cbuf, $primary); 1628 %} 1629 1630 // Emit secondary opcode 1631 enc_class OpcS %{ 1632 emit_opcode(cbuf, $secondary); 1633 %} 1634 1635 // Emit opcode directly 1636 enc_class Opcode(immI d8) %{ 1637 emit_opcode(cbuf, $d8$$constant); 1638 %} 1639 1640 enc_class SizePrefix %{ 1641 emit_opcode(cbuf,0x66); 1642 %} 1643 1644 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1645 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1646 %} 1647 1648 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1649 emit_opcode(cbuf,$opcode$$constant); 1650 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1651 %} 1652 1653 enc_class mov_r32_imm0( rRegI dst ) %{ 1654 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1655 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1656 %} 1657 1658 enc_class cdq_enc %{ 1659 // Full implementation of Java idiv and irem; checks for 1660 // special case as described in JVM spec., p.243 & p.271. 1661 // 1662 // normal case special case 1663 // 1664 // input : rax,: dividend min_int 1665 // reg: divisor -1 1666 // 1667 // output: rax,: quotient (= rax, idiv reg) min_int 1668 // rdx: remainder (= rax, irem reg) 0 1669 // 1670 // Code sequnce: 1671 // 1672 // 81 F8 00 00 00 80 cmp rax,80000000h 1673 // 0F 85 0B 00 00 00 jne normal_case 1674 // 33 D2 xor rdx,edx 1675 // 83 F9 FF cmp rcx,0FFh 1676 // 0F 84 03 00 00 00 je done 1677 // normal_case: 1678 // 99 cdq 1679 // F7 F9 idiv rax,ecx 1680 // done: 1681 // 1682 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1683 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1684 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1685 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1686 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1687 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1688 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1689 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1690 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1691 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1692 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1693 // normal_case: 1694 emit_opcode(cbuf,0x99); // cdq 1695 // idiv (note: must be emitted by the user of this rule) 1696 // normal: 1697 %} 1698 1699 // Dense encoding for older common ops 1700 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1701 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1702 %} 1703 1704 1705 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1706 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1707 // Check for 8-bit immediate, and set sign extend bit in opcode 1708 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1709 emit_opcode(cbuf, $primary | 0x02); 1710 } 1711 else { // If 32-bit immediate 1712 emit_opcode(cbuf, $primary); 1713 } 1714 %} 1715 1716 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1717 // Emit primary opcode and set sign-extend bit 1718 // Check for 8-bit immediate, and set sign extend bit in opcode 1719 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1720 emit_opcode(cbuf, $primary | 0x02); } 1721 else { // If 32-bit immediate 1722 emit_opcode(cbuf, $primary); 1723 } 1724 // Emit r/m byte with secondary opcode, after primary opcode. 1725 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1726 %} 1727 1728 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1729 // Check for 8-bit immediate, and set sign extend bit in opcode 1730 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1731 $$$emit8$imm$$constant; 1732 } 1733 else { // If 32-bit immediate 1734 // Output immediate 1735 $$$emit32$imm$$constant; 1736 } 1737 %} 1738 1739 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1740 // Emit primary opcode and set sign-extend bit 1741 // Check for 8-bit immediate, and set sign extend bit in opcode 1742 int con = (int)$imm$$constant; // Throw away top bits 1743 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1744 // Emit r/m byte with secondary opcode, after primary opcode. 1745 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1746 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1747 else emit_d32(cbuf,con); 1748 %} 1749 1750 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1751 // Emit primary opcode and set sign-extend bit 1752 // Check for 8-bit immediate, and set sign extend bit in opcode 1753 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1754 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1755 // Emit r/m byte with tertiary opcode, after primary opcode. 1756 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1757 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1758 else emit_d32(cbuf,con); 1759 %} 1760 1761 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1762 emit_cc(cbuf, $secondary, $dst$$reg ); 1763 %} 1764 1765 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1766 int destlo = $dst$$reg; 1767 int desthi = HIGH_FROM_LOW(destlo); 1768 // bswap lo 1769 emit_opcode(cbuf, 0x0F); 1770 emit_cc(cbuf, 0xC8, destlo); 1771 // bswap hi 1772 emit_opcode(cbuf, 0x0F); 1773 emit_cc(cbuf, 0xC8, desthi); 1774 // xchg lo and hi 1775 emit_opcode(cbuf, 0x87); 1776 emit_rm(cbuf, 0x3, destlo, desthi); 1777 %} 1778 1779 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1780 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1781 %} 1782 1783 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1784 $$$emit8$primary; 1785 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1786 %} 1787 1788 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1789 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1790 emit_d8(cbuf, op >> 8 ); 1791 emit_d8(cbuf, op & 255); 1792 %} 1793 1794 // emulate a CMOV with a conditional branch around a MOV 1795 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1796 // Invert sense of branch from sense of CMOV 1797 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1798 emit_d8( cbuf, $brOffs$$constant ); 1799 %} 1800 1801 enc_class enc_PartialSubtypeCheck( ) %{ 1802 Register Redi = as_Register(EDI_enc); // result register 1803 Register Reax = as_Register(EAX_enc); // super class 1804 Register Recx = as_Register(ECX_enc); // killed 1805 Register Resi = as_Register(ESI_enc); // sub class 1806 Label miss; 1807 1808 MacroAssembler _masm(&cbuf); 1809 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1810 NULL, &miss, 1811 /*set_cond_codes:*/ true); 1812 if ($primary) { 1813 __ xorptr(Redi, Redi); 1814 } 1815 __ bind(miss); 1816 %} 1817 1818 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1819 MacroAssembler masm(&cbuf); 1820 int start = masm.offset(); 1821 if (UseSSE >= 2) { 1822 if (VerifyFPU) { 1823 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1824 } 1825 } else { 1826 // External c_calling_convention expects the FPU stack to be 'clean'. 1827 // Compiled code leaves it dirty. Do cleanup now. 1828 masm.empty_FPU_stack(); 1829 } 1830 if (sizeof_FFree_Float_Stack_All == -1) { 1831 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1832 } else { 1833 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1834 } 1835 %} 1836 1837 enc_class Verify_FPU_For_Leaf %{ 1838 if( VerifyFPU ) { 1839 MacroAssembler masm(&cbuf); 1840 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1841 } 1842 %} 1843 1844 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1845 // This is the instruction starting address for relocation info. 1846 cbuf.set_insts_mark(); 1847 $$$emit8$primary; 1848 // CALL directly to the runtime 1849 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1850 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1851 1852 if (UseSSE >= 2) { 1853 MacroAssembler _masm(&cbuf); 1854 BasicType rt = tf()->return_type(); 1855 1856 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1857 // A C runtime call where the return value is unused. In SSE2+ 1858 // mode the result needs to be removed from the FPU stack. It's 1859 // likely that this function call could be removed by the 1860 // optimizer if the C function is a pure function. 1861 __ ffree(0); 1862 } else if (rt == T_FLOAT) { 1863 __ lea(rsp, Address(rsp, -4)); 1864 __ fstp_s(Address(rsp, 0)); 1865 __ movflt(xmm0, Address(rsp, 0)); 1866 __ lea(rsp, Address(rsp, 4)); 1867 } else if (rt == T_DOUBLE) { 1868 __ lea(rsp, Address(rsp, -8)); 1869 __ fstp_d(Address(rsp, 0)); 1870 __ movdbl(xmm0, Address(rsp, 0)); 1871 __ lea(rsp, Address(rsp, 8)); 1872 } 1873 } 1874 %} 1875 1876 1877 enc_class pre_call_resets %{ 1878 // If method sets FPU control word restore it here 1879 debug_only(int off0 = cbuf.insts_size()); 1880 if (ra_->C->in_24_bit_fp_mode()) { 1881 MacroAssembler _masm(&cbuf); 1882 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1883 } 1884 if (ra_->C->max_vector_size() > 16) { 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 } 1890 debug_only(int off1 = cbuf.insts_size()); 1891 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1892 %} 1893 1894 enc_class post_call_FPU %{ 1895 // If method sets FPU control word do it here also 1896 if (Compile::current()->in_24_bit_fp_mode()) { 1897 MacroAssembler masm(&cbuf); 1898 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1899 } 1900 %} 1901 1902 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1903 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1904 // who we intended to call. 1905 cbuf.set_insts_mark(); 1906 $$$emit8$primary; 1907 1908 if (!_method) { 1909 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1910 runtime_call_Relocation::spec(), 1911 RELOC_IMM32); 1912 } else { 1913 int method_index = resolved_method_index(cbuf); 1914 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1915 : static_call_Relocation::spec(method_index); 1916 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1917 rspec, RELOC_DISP32); 1918 // Emit stubs for static call. 1919 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1920 if (stub == NULL) { 1921 ciEnv::current()->record_failure("CodeCache is full"); 1922 return; 1923 } 1924 } 1925 %} 1926 1927 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1928 MacroAssembler _masm(&cbuf); 1929 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1930 %} 1931 1932 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1933 int disp = in_bytes(Method::from_compiled_offset()); 1934 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1935 1936 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1937 cbuf.set_insts_mark(); 1938 $$$emit8$primary; 1939 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1940 emit_d8(cbuf, disp); // Displacement 1941 1942 %} 1943 1944 // Following encoding is no longer used, but may be restored if calling 1945 // convention changes significantly. 1946 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1947 // 1948 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1949 // // int ic_reg = Matcher::inline_cache_reg(); 1950 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1951 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1952 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1953 // 1954 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1955 // // // so we load it immediately before the call 1956 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1957 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1958 // 1959 // // xor rbp,ebp 1960 // emit_opcode(cbuf, 0x33); 1961 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1962 // 1963 // // CALL to interpreter. 1964 // cbuf.set_insts_mark(); 1965 // $$$emit8$primary; 1966 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1967 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1968 // %} 1969 1970 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1971 $$$emit8$primary; 1972 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1973 $$$emit8$shift$$constant; 1974 %} 1975 1976 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1977 // Load immediate does not have a zero or sign extended version 1978 // for 8-bit immediates 1979 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1980 $$$emit32$src$$constant; 1981 %} 1982 1983 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1984 // Load immediate does not have a zero or sign extended version 1985 // for 8-bit immediates 1986 emit_opcode(cbuf, $primary + $dst$$reg); 1987 $$$emit32$src$$constant; 1988 %} 1989 1990 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1991 // Load immediate does not have a zero or sign extended version 1992 // for 8-bit immediates 1993 int dst_enc = $dst$$reg; 1994 int src_con = $src$$constant & 0x0FFFFFFFFL; 1995 if (src_con == 0) { 1996 // xor dst, dst 1997 emit_opcode(cbuf, 0x33); 1998 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1999 } else { 2000 emit_opcode(cbuf, $primary + dst_enc); 2001 emit_d32(cbuf, src_con); 2002 } 2003 %} 2004 2005 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2006 // Load immediate does not have a zero or sign extended version 2007 // for 8-bit immediates 2008 int dst_enc = $dst$$reg + 2; 2009 int src_con = ((julong)($src$$constant)) >> 32; 2010 if (src_con == 0) { 2011 // xor dst, dst 2012 emit_opcode(cbuf, 0x33); 2013 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2014 } else { 2015 emit_opcode(cbuf, $primary + dst_enc); 2016 emit_d32(cbuf, src_con); 2017 } 2018 %} 2019 2020 2021 // Encode a reg-reg copy. If it is useless, then empty encoding. 2022 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2023 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2024 %} 2025 2026 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2027 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2028 %} 2029 2030 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2031 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2032 %} 2033 2034 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2035 $$$emit8$primary; 2036 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2037 %} 2038 2039 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2040 $$$emit8$secondary; 2041 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2042 %} 2043 2044 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2045 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2046 %} 2047 2048 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2049 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2050 %} 2051 2052 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2053 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2054 %} 2055 2056 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2057 // Output immediate 2058 $$$emit32$src$$constant; 2059 %} 2060 2061 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2062 // Output Float immediate bits 2063 jfloat jf = $src$$constant; 2064 int jf_as_bits = jint_cast( jf ); 2065 emit_d32(cbuf, jf_as_bits); 2066 %} 2067 2068 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2069 // Output Float immediate bits 2070 jfloat jf = $src$$constant; 2071 int jf_as_bits = jint_cast( jf ); 2072 emit_d32(cbuf, jf_as_bits); 2073 %} 2074 2075 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2076 // Output immediate 2077 $$$emit16$src$$constant; 2078 %} 2079 2080 enc_class Con_d32(immI src) %{ 2081 emit_d32(cbuf,$src$$constant); 2082 %} 2083 2084 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2085 // Output immediate memory reference 2086 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2087 emit_d32(cbuf, 0x00); 2088 %} 2089 2090 enc_class lock_prefix( ) %{ 2091 if( os::is_MP() ) 2092 emit_opcode(cbuf,0xF0); // [Lock] 2093 %} 2094 2095 // Cmp-xchg long value. 2096 // Note: we need to swap rbx, and rcx before and after the 2097 // cmpxchg8 instruction because the instruction uses 2098 // rcx as the high order word of the new value to store but 2099 // our register encoding uses rbx,. 2100 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2101 2102 // XCHG rbx,ecx 2103 emit_opcode(cbuf,0x87); 2104 emit_opcode(cbuf,0xD9); 2105 // [Lock] 2106 if( os::is_MP() ) 2107 emit_opcode(cbuf,0xF0); 2108 // CMPXCHG8 [Eptr] 2109 emit_opcode(cbuf,0x0F); 2110 emit_opcode(cbuf,0xC7); 2111 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2112 // XCHG rbx,ecx 2113 emit_opcode(cbuf,0x87); 2114 emit_opcode(cbuf,0xD9); 2115 %} 2116 2117 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2118 // [Lock] 2119 if( os::is_MP() ) 2120 emit_opcode(cbuf,0xF0); 2121 2122 // CMPXCHG [Eptr] 2123 emit_opcode(cbuf,0x0F); 2124 emit_opcode(cbuf,0xB1); 2125 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2126 %} 2127 2128 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2129 int res_encoding = $res$$reg; 2130 2131 // MOV res,0 2132 emit_opcode( cbuf, 0xB8 + res_encoding); 2133 emit_d32( cbuf, 0 ); 2134 // JNE,s fail 2135 emit_opcode(cbuf,0x75); 2136 emit_d8(cbuf, 5 ); 2137 // MOV res,1 2138 emit_opcode( cbuf, 0xB8 + res_encoding); 2139 emit_d32( cbuf, 1 ); 2140 // fail: 2141 %} 2142 2143 enc_class set_instruction_start( ) %{ 2144 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2145 %} 2146 2147 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2148 int reg_encoding = $ereg$$reg; 2149 int base = $mem$$base; 2150 int index = $mem$$index; 2151 int scale = $mem$$scale; 2152 int displace = $mem$$disp; 2153 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2154 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2155 %} 2156 2157 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2158 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2159 int base = $mem$$base; 2160 int index = $mem$$index; 2161 int scale = $mem$$scale; 2162 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2163 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2164 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2165 %} 2166 2167 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2168 int r1, r2; 2169 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2170 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2171 emit_opcode(cbuf,0x0F); 2172 emit_opcode(cbuf,$tertiary); 2173 emit_rm(cbuf, 0x3, r1, r2); 2174 emit_d8(cbuf,$cnt$$constant); 2175 emit_d8(cbuf,$primary); 2176 emit_rm(cbuf, 0x3, $secondary, r1); 2177 emit_d8(cbuf,$cnt$$constant); 2178 %} 2179 2180 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2181 emit_opcode( cbuf, 0x8B ); // Move 2182 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2183 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2184 emit_d8(cbuf,$primary); 2185 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2186 emit_d8(cbuf,$cnt$$constant-32); 2187 } 2188 emit_d8(cbuf,$primary); 2189 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2190 emit_d8(cbuf,31); 2191 %} 2192 2193 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2194 int r1, r2; 2195 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2196 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2197 2198 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2199 emit_rm(cbuf, 0x3, r1, r2); 2200 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2201 emit_opcode(cbuf,$primary); 2202 emit_rm(cbuf, 0x3, $secondary, r1); 2203 emit_d8(cbuf,$cnt$$constant-32); 2204 } 2205 emit_opcode(cbuf,0x33); // XOR r2,r2 2206 emit_rm(cbuf, 0x3, r2, r2); 2207 %} 2208 2209 // Clone of RegMem but accepts an extra parameter to access each 2210 // half of a double in memory; it never needs relocation info. 2211 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2212 emit_opcode(cbuf,$opcode$$constant); 2213 int reg_encoding = $rm_reg$$reg; 2214 int base = $mem$$base; 2215 int index = $mem$$index; 2216 int scale = $mem$$scale; 2217 int displace = $mem$$disp + $disp_for_half$$constant; 2218 relocInfo::relocType disp_reloc = relocInfo::none; 2219 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2220 %} 2221 2222 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2223 // 2224 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2225 // and it never needs relocation information. 2226 // Frequently used to move data between FPU's Stack Top and memory. 2227 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2228 int rm_byte_opcode = $rm_opcode$$constant; 2229 int base = $mem$$base; 2230 int index = $mem$$index; 2231 int scale = $mem$$scale; 2232 int displace = $mem$$disp; 2233 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2234 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2235 %} 2236 2237 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2238 int rm_byte_opcode = $rm_opcode$$constant; 2239 int base = $mem$$base; 2240 int index = $mem$$index; 2241 int scale = $mem$$scale; 2242 int displace = $mem$$disp; 2243 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2244 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2245 %} 2246 2247 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2248 int reg_encoding = $dst$$reg; 2249 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2250 int index = 0x04; // 0x04 indicates no index 2251 int scale = 0x00; // 0x00 indicates no scale 2252 int displace = $src1$$constant; // 0x00 indicates no displacement 2253 relocInfo::relocType disp_reloc = relocInfo::none; 2254 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2255 %} 2256 2257 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2258 // Compare dst,src 2259 emit_opcode(cbuf,0x3B); 2260 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2261 // jmp dst < src around move 2262 emit_opcode(cbuf,0x7C); 2263 emit_d8(cbuf,2); 2264 // move dst,src 2265 emit_opcode(cbuf,0x8B); 2266 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2267 %} 2268 2269 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2270 // Compare dst,src 2271 emit_opcode(cbuf,0x3B); 2272 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2273 // jmp dst > src around move 2274 emit_opcode(cbuf,0x7F); 2275 emit_d8(cbuf,2); 2276 // move dst,src 2277 emit_opcode(cbuf,0x8B); 2278 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2279 %} 2280 2281 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2282 // If src is FPR1, we can just FST to store it. 2283 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2284 int reg_encoding = 0x2; // Just store 2285 int base = $mem$$base; 2286 int index = $mem$$index; 2287 int scale = $mem$$scale; 2288 int displace = $mem$$disp; 2289 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2290 if( $src$$reg != FPR1L_enc ) { 2291 reg_encoding = 0x3; // Store & pop 2292 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2293 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2294 } 2295 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2296 emit_opcode(cbuf,$primary); 2297 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2298 %} 2299 2300 enc_class neg_reg(rRegI dst) %{ 2301 // NEG $dst 2302 emit_opcode(cbuf,0xF7); 2303 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2304 %} 2305 2306 enc_class setLT_reg(eCXRegI dst) %{ 2307 // SETLT $dst 2308 emit_opcode(cbuf,0x0F); 2309 emit_opcode(cbuf,0x9C); 2310 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2311 %} 2312 2313 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2314 int tmpReg = $tmp$$reg; 2315 2316 // SUB $p,$q 2317 emit_opcode(cbuf,0x2B); 2318 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2319 // SBB $tmp,$tmp 2320 emit_opcode(cbuf,0x1B); 2321 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2322 // AND $tmp,$y 2323 emit_opcode(cbuf,0x23); 2324 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2325 // ADD $p,$tmp 2326 emit_opcode(cbuf,0x03); 2327 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2328 %} 2329 2330 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2331 // TEST shift,32 2332 emit_opcode(cbuf,0xF7); 2333 emit_rm(cbuf, 0x3, 0, ECX_enc); 2334 emit_d32(cbuf,0x20); 2335 // JEQ,s small 2336 emit_opcode(cbuf, 0x74); 2337 emit_d8(cbuf, 0x04); 2338 // MOV $dst.hi,$dst.lo 2339 emit_opcode( cbuf, 0x8B ); 2340 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2341 // CLR $dst.lo 2342 emit_opcode(cbuf, 0x33); 2343 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2344 // small: 2345 // SHLD $dst.hi,$dst.lo,$shift 2346 emit_opcode(cbuf,0x0F); 2347 emit_opcode(cbuf,0xA5); 2348 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2349 // SHL $dst.lo,$shift" 2350 emit_opcode(cbuf,0xD3); 2351 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2352 %} 2353 2354 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2355 // TEST shift,32 2356 emit_opcode(cbuf,0xF7); 2357 emit_rm(cbuf, 0x3, 0, ECX_enc); 2358 emit_d32(cbuf,0x20); 2359 // JEQ,s small 2360 emit_opcode(cbuf, 0x74); 2361 emit_d8(cbuf, 0x04); 2362 // MOV $dst.lo,$dst.hi 2363 emit_opcode( cbuf, 0x8B ); 2364 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2365 // CLR $dst.hi 2366 emit_opcode(cbuf, 0x33); 2367 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2368 // small: 2369 // SHRD $dst.lo,$dst.hi,$shift 2370 emit_opcode(cbuf,0x0F); 2371 emit_opcode(cbuf,0xAD); 2372 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2373 // SHR $dst.hi,$shift" 2374 emit_opcode(cbuf,0xD3); 2375 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2376 %} 2377 2378 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2379 // TEST shift,32 2380 emit_opcode(cbuf,0xF7); 2381 emit_rm(cbuf, 0x3, 0, ECX_enc); 2382 emit_d32(cbuf,0x20); 2383 // JEQ,s small 2384 emit_opcode(cbuf, 0x74); 2385 emit_d8(cbuf, 0x05); 2386 // MOV $dst.lo,$dst.hi 2387 emit_opcode( cbuf, 0x8B ); 2388 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2389 // SAR $dst.hi,31 2390 emit_opcode(cbuf, 0xC1); 2391 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2392 emit_d8(cbuf, 0x1F ); 2393 // small: 2394 // SHRD $dst.lo,$dst.hi,$shift 2395 emit_opcode(cbuf,0x0F); 2396 emit_opcode(cbuf,0xAD); 2397 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2398 // SAR $dst.hi,$shift" 2399 emit_opcode(cbuf,0xD3); 2400 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2401 %} 2402 2403 2404 // ----------------- Encodings for floating point unit ----------------- 2405 // May leave result in FPU-TOS or FPU reg depending on opcodes 2406 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2407 $$$emit8$primary; 2408 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2409 %} 2410 2411 // Pop argument in FPR0 with FSTP ST(0) 2412 enc_class PopFPU() %{ 2413 emit_opcode( cbuf, 0xDD ); 2414 emit_d8( cbuf, 0xD8 ); 2415 %} 2416 2417 // !!!!! equivalent to Pop_Reg_F 2418 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2419 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2420 emit_d8( cbuf, 0xD8+$dst$$reg ); 2421 %} 2422 2423 enc_class Push_Reg_DPR( regDPR dst ) %{ 2424 emit_opcode( cbuf, 0xD9 ); 2425 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2426 %} 2427 2428 enc_class strictfp_bias1( regDPR dst ) %{ 2429 emit_opcode( cbuf, 0xDB ); // FLD m80real 2430 emit_opcode( cbuf, 0x2D ); 2431 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2432 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2433 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2434 %} 2435 2436 enc_class strictfp_bias2( regDPR dst ) %{ 2437 emit_opcode( cbuf, 0xDB ); // FLD m80real 2438 emit_opcode( cbuf, 0x2D ); 2439 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2440 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2441 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2442 %} 2443 2444 // Special case for moving an integer register to a stack slot. 2445 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2446 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2447 %} 2448 2449 // Special case for moving a register to a stack slot. 2450 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2451 // Opcode already emitted 2452 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2453 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2454 emit_d32(cbuf, $dst$$disp); // Displacement 2455 %} 2456 2457 // Push the integer in stackSlot 'src' onto FP-stack 2458 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2459 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2460 %} 2461 2462 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2463 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2464 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2465 %} 2466 2467 // Same as Pop_Mem_F except for opcode 2468 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2469 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2470 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2471 %} 2472 2473 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2474 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2475 emit_d8( cbuf, 0xD8+$dst$$reg ); 2476 %} 2477 2478 enc_class Push_Reg_FPR( regFPR dst ) %{ 2479 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2480 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2481 %} 2482 2483 // Push FPU's float to a stack-slot, and pop FPU-stack 2484 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2485 int pop = 0x02; 2486 if ($src$$reg != FPR1L_enc) { 2487 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2488 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2489 pop = 0x03; 2490 } 2491 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2492 %} 2493 2494 // Push FPU's double to a stack-slot, and pop FPU-stack 2495 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2496 int pop = 0x02; 2497 if ($src$$reg != FPR1L_enc) { 2498 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2499 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2500 pop = 0x03; 2501 } 2502 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2503 %} 2504 2505 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2506 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2507 int pop = 0xD0 - 1; // -1 since we skip FLD 2508 if ($src$$reg != FPR1L_enc) { 2509 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2510 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2511 pop = 0xD8; 2512 } 2513 emit_opcode( cbuf, 0xDD ); 2514 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2515 %} 2516 2517 2518 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2519 // load dst in FPR0 2520 emit_opcode( cbuf, 0xD9 ); 2521 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2522 if ($src$$reg != FPR1L_enc) { 2523 // fincstp 2524 emit_opcode (cbuf, 0xD9); 2525 emit_opcode (cbuf, 0xF7); 2526 // swap src with FPR1: 2527 // FXCH FPR1 with src 2528 emit_opcode(cbuf, 0xD9); 2529 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2530 // fdecstp 2531 emit_opcode (cbuf, 0xD9); 2532 emit_opcode (cbuf, 0xF6); 2533 } 2534 %} 2535 2536 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2537 MacroAssembler _masm(&cbuf); 2538 __ subptr(rsp, 8); 2539 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2540 __ fld_d(Address(rsp, 0)); 2541 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2542 __ fld_d(Address(rsp, 0)); 2543 %} 2544 2545 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2546 MacroAssembler _masm(&cbuf); 2547 __ subptr(rsp, 4); 2548 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2549 __ fld_s(Address(rsp, 0)); 2550 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2551 __ fld_s(Address(rsp, 0)); 2552 %} 2553 2554 enc_class Push_ResultD(regD dst) %{ 2555 MacroAssembler _masm(&cbuf); 2556 __ fstp_d(Address(rsp, 0)); 2557 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2558 __ addptr(rsp, 8); 2559 %} 2560 2561 enc_class Push_ResultF(regF dst, immI d8) %{ 2562 MacroAssembler _masm(&cbuf); 2563 __ fstp_s(Address(rsp, 0)); 2564 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2565 __ addptr(rsp, $d8$$constant); 2566 %} 2567 2568 enc_class Push_SrcD(regD src) %{ 2569 MacroAssembler _masm(&cbuf); 2570 __ subptr(rsp, 8); 2571 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2572 __ fld_d(Address(rsp, 0)); 2573 %} 2574 2575 enc_class push_stack_temp_qword() %{ 2576 MacroAssembler _masm(&cbuf); 2577 __ subptr(rsp, 8); 2578 %} 2579 2580 enc_class pop_stack_temp_qword() %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ addptr(rsp, 8); 2583 %} 2584 2585 enc_class push_xmm_to_fpr1(regD src) %{ 2586 MacroAssembler _masm(&cbuf); 2587 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2588 __ fld_d(Address(rsp, 0)); 2589 %} 2590 2591 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2592 if ($src$$reg != FPR1L_enc) { 2593 // fincstp 2594 emit_opcode (cbuf, 0xD9); 2595 emit_opcode (cbuf, 0xF7); 2596 // FXCH FPR1 with src 2597 emit_opcode(cbuf, 0xD9); 2598 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2599 // fdecstp 2600 emit_opcode (cbuf, 0xD9); 2601 emit_opcode (cbuf, 0xF6); 2602 } 2603 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2604 // // FSTP FPR$dst$$reg 2605 // emit_opcode( cbuf, 0xDD ); 2606 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2607 %} 2608 2609 enc_class fnstsw_sahf_skip_parity() %{ 2610 // fnstsw ax 2611 emit_opcode( cbuf, 0xDF ); 2612 emit_opcode( cbuf, 0xE0 ); 2613 // sahf 2614 emit_opcode( cbuf, 0x9E ); 2615 // jnp ::skip 2616 emit_opcode( cbuf, 0x7B ); 2617 emit_opcode( cbuf, 0x05 ); 2618 %} 2619 2620 enc_class emitModDPR() %{ 2621 // fprem must be iterative 2622 // :: loop 2623 // fprem 2624 emit_opcode( cbuf, 0xD9 ); 2625 emit_opcode( cbuf, 0xF8 ); 2626 // wait 2627 emit_opcode( cbuf, 0x9b ); 2628 // fnstsw ax 2629 emit_opcode( cbuf, 0xDF ); 2630 emit_opcode( cbuf, 0xE0 ); 2631 // sahf 2632 emit_opcode( cbuf, 0x9E ); 2633 // jp ::loop 2634 emit_opcode( cbuf, 0x0F ); 2635 emit_opcode( cbuf, 0x8A ); 2636 emit_opcode( cbuf, 0xF4 ); 2637 emit_opcode( cbuf, 0xFF ); 2638 emit_opcode( cbuf, 0xFF ); 2639 emit_opcode( cbuf, 0xFF ); 2640 %} 2641 2642 enc_class fpu_flags() %{ 2643 // fnstsw_ax 2644 emit_opcode( cbuf, 0xDF); 2645 emit_opcode( cbuf, 0xE0); 2646 // test ax,0x0400 2647 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2648 emit_opcode( cbuf, 0xA9 ); 2649 emit_d16 ( cbuf, 0x0400 ); 2650 // // // This sequence works, but stalls for 12-16 cycles on PPro 2651 // // test rax,0x0400 2652 // emit_opcode( cbuf, 0xA9 ); 2653 // emit_d32 ( cbuf, 0x00000400 ); 2654 // 2655 // jz exit (no unordered comparison) 2656 emit_opcode( cbuf, 0x74 ); 2657 emit_d8 ( cbuf, 0x02 ); 2658 // mov ah,1 - treat as LT case (set carry flag) 2659 emit_opcode( cbuf, 0xB4 ); 2660 emit_d8 ( cbuf, 0x01 ); 2661 // sahf 2662 emit_opcode( cbuf, 0x9E); 2663 %} 2664 2665 enc_class cmpF_P6_fixup() %{ 2666 // Fixup the integer flags in case comparison involved a NaN 2667 // 2668 // JNP exit (no unordered comparison, P-flag is set by NaN) 2669 emit_opcode( cbuf, 0x7B ); 2670 emit_d8 ( cbuf, 0x03 ); 2671 // MOV AH,1 - treat as LT case (set carry flag) 2672 emit_opcode( cbuf, 0xB4 ); 2673 emit_d8 ( cbuf, 0x01 ); 2674 // SAHF 2675 emit_opcode( cbuf, 0x9E); 2676 // NOP // target for branch to avoid branch to branch 2677 emit_opcode( cbuf, 0x90); 2678 %} 2679 2680 // fnstsw_ax(); 2681 // sahf(); 2682 // movl(dst, nan_result); 2683 // jcc(Assembler::parity, exit); 2684 // movl(dst, less_result); 2685 // jcc(Assembler::below, exit); 2686 // movl(dst, equal_result); 2687 // jcc(Assembler::equal, exit); 2688 // movl(dst, greater_result); 2689 2690 // less_result = 1; 2691 // greater_result = -1; 2692 // equal_result = 0; 2693 // nan_result = -1; 2694 2695 enc_class CmpF_Result(rRegI dst) %{ 2696 // fnstsw_ax(); 2697 emit_opcode( cbuf, 0xDF); 2698 emit_opcode( cbuf, 0xE0); 2699 // sahf 2700 emit_opcode( cbuf, 0x9E); 2701 // movl(dst, nan_result); 2702 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2703 emit_d32( cbuf, -1 ); 2704 // jcc(Assembler::parity, exit); 2705 emit_opcode( cbuf, 0x7A ); 2706 emit_d8 ( cbuf, 0x13 ); 2707 // movl(dst, less_result); 2708 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2709 emit_d32( cbuf, -1 ); 2710 // jcc(Assembler::below, exit); 2711 emit_opcode( cbuf, 0x72 ); 2712 emit_d8 ( cbuf, 0x0C ); 2713 // movl(dst, equal_result); 2714 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2715 emit_d32( cbuf, 0 ); 2716 // jcc(Assembler::equal, exit); 2717 emit_opcode( cbuf, 0x74 ); 2718 emit_d8 ( cbuf, 0x05 ); 2719 // movl(dst, greater_result); 2720 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2721 emit_d32( cbuf, 1 ); 2722 %} 2723 2724 2725 // Compare the longs and set flags 2726 // BROKEN! Do Not use as-is 2727 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2728 // CMP $src1.hi,$src2.hi 2729 emit_opcode( cbuf, 0x3B ); 2730 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2731 // JNE,s done 2732 emit_opcode(cbuf,0x75); 2733 emit_d8(cbuf, 2 ); 2734 // CMP $src1.lo,$src2.lo 2735 emit_opcode( cbuf, 0x3B ); 2736 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2737 // done: 2738 %} 2739 2740 enc_class convert_int_long( regL dst, rRegI src ) %{ 2741 // mov $dst.lo,$src 2742 int dst_encoding = $dst$$reg; 2743 int src_encoding = $src$$reg; 2744 encode_Copy( cbuf, dst_encoding , src_encoding ); 2745 // mov $dst.hi,$src 2746 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2747 // sar $dst.hi,31 2748 emit_opcode( cbuf, 0xC1 ); 2749 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2750 emit_d8(cbuf, 0x1F ); 2751 %} 2752 2753 enc_class convert_long_double( eRegL src ) %{ 2754 // push $src.hi 2755 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2756 // push $src.lo 2757 emit_opcode(cbuf, 0x50+$src$$reg ); 2758 // fild 64-bits at [SP] 2759 emit_opcode(cbuf,0xdf); 2760 emit_d8(cbuf, 0x6C); 2761 emit_d8(cbuf, 0x24); 2762 emit_d8(cbuf, 0x00); 2763 // pop stack 2764 emit_opcode(cbuf, 0x83); // add SP, #8 2765 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2766 emit_d8(cbuf, 0x8); 2767 %} 2768 2769 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2770 // IMUL EDX:EAX,$src1 2771 emit_opcode( cbuf, 0xF7 ); 2772 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2773 // SAR EDX,$cnt-32 2774 int shift_count = ((int)$cnt$$constant) - 32; 2775 if (shift_count > 0) { 2776 emit_opcode(cbuf, 0xC1); 2777 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2778 emit_d8(cbuf, shift_count); 2779 } 2780 %} 2781 2782 // this version doesn't have add sp, 8 2783 enc_class convert_long_double2( eRegL src ) %{ 2784 // push $src.hi 2785 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2786 // push $src.lo 2787 emit_opcode(cbuf, 0x50+$src$$reg ); 2788 // fild 64-bits at [SP] 2789 emit_opcode(cbuf,0xdf); 2790 emit_d8(cbuf, 0x6C); 2791 emit_d8(cbuf, 0x24); 2792 emit_d8(cbuf, 0x00); 2793 %} 2794 2795 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2796 // Basic idea: long = (long)int * (long)int 2797 // IMUL EDX:EAX, src 2798 emit_opcode( cbuf, 0xF7 ); 2799 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2800 %} 2801 2802 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2803 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2804 // MUL EDX:EAX, src 2805 emit_opcode( cbuf, 0xF7 ); 2806 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2807 %} 2808 2809 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2810 // Basic idea: lo(result) = lo(x_lo * y_lo) 2811 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2812 // MOV $tmp,$src.lo 2813 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2814 // IMUL $tmp,EDX 2815 emit_opcode( cbuf, 0x0F ); 2816 emit_opcode( cbuf, 0xAF ); 2817 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2818 // MOV EDX,$src.hi 2819 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2820 // IMUL EDX,EAX 2821 emit_opcode( cbuf, 0x0F ); 2822 emit_opcode( cbuf, 0xAF ); 2823 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2824 // ADD $tmp,EDX 2825 emit_opcode( cbuf, 0x03 ); 2826 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2827 // MUL EDX:EAX,$src.lo 2828 emit_opcode( cbuf, 0xF7 ); 2829 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2830 // ADD EDX,ESI 2831 emit_opcode( cbuf, 0x03 ); 2832 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2833 %} 2834 2835 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2836 // Basic idea: lo(result) = lo(src * y_lo) 2837 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2838 // IMUL $tmp,EDX,$src 2839 emit_opcode( cbuf, 0x6B ); 2840 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2841 emit_d8( cbuf, (int)$src$$constant ); 2842 // MOV EDX,$src 2843 emit_opcode(cbuf, 0xB8 + EDX_enc); 2844 emit_d32( cbuf, (int)$src$$constant ); 2845 // MUL EDX:EAX,EDX 2846 emit_opcode( cbuf, 0xF7 ); 2847 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2848 // ADD EDX,ESI 2849 emit_opcode( cbuf, 0x03 ); 2850 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2851 %} 2852 2853 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2854 // PUSH src1.hi 2855 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2856 // PUSH src1.lo 2857 emit_opcode(cbuf, 0x50+$src1$$reg ); 2858 // PUSH src2.hi 2859 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2860 // PUSH src2.lo 2861 emit_opcode(cbuf, 0x50+$src2$$reg ); 2862 // CALL directly to the runtime 2863 cbuf.set_insts_mark(); 2864 emit_opcode(cbuf,0xE8); // Call into runtime 2865 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2866 // Restore stack 2867 emit_opcode(cbuf, 0x83); // add SP, #framesize 2868 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2869 emit_d8(cbuf, 4*4); 2870 %} 2871 2872 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2873 // PUSH src1.hi 2874 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2875 // PUSH src1.lo 2876 emit_opcode(cbuf, 0x50+$src1$$reg ); 2877 // PUSH src2.hi 2878 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2879 // PUSH src2.lo 2880 emit_opcode(cbuf, 0x50+$src2$$reg ); 2881 // CALL directly to the runtime 2882 cbuf.set_insts_mark(); 2883 emit_opcode(cbuf,0xE8); // Call into runtime 2884 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2885 // Restore stack 2886 emit_opcode(cbuf, 0x83); // add SP, #framesize 2887 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2888 emit_d8(cbuf, 4*4); 2889 %} 2890 2891 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2892 // MOV $tmp,$src.lo 2893 emit_opcode(cbuf, 0x8B); 2894 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2895 // OR $tmp,$src.hi 2896 emit_opcode(cbuf, 0x0B); 2897 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2898 %} 2899 2900 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2901 // CMP $src1.lo,$src2.lo 2902 emit_opcode( cbuf, 0x3B ); 2903 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2904 // JNE,s skip 2905 emit_cc(cbuf, 0x70, 0x5); 2906 emit_d8(cbuf,2); 2907 // CMP $src1.hi,$src2.hi 2908 emit_opcode( cbuf, 0x3B ); 2909 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2910 %} 2911 2912 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2913 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2914 emit_opcode( cbuf, 0x3B ); 2915 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2916 // MOV $tmp,$src1.hi 2917 emit_opcode( cbuf, 0x8B ); 2918 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2919 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2920 emit_opcode( cbuf, 0x1B ); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2922 %} 2923 2924 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2925 // XOR $tmp,$tmp 2926 emit_opcode(cbuf,0x33); // XOR 2927 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2928 // CMP $tmp,$src.lo 2929 emit_opcode( cbuf, 0x3B ); 2930 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2931 // SBB $tmp,$src.hi 2932 emit_opcode( cbuf, 0x1B ); 2933 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2934 %} 2935 2936 // Sniff, sniff... smells like Gnu Superoptimizer 2937 enc_class neg_long( eRegL dst ) %{ 2938 emit_opcode(cbuf,0xF7); // NEG hi 2939 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2940 emit_opcode(cbuf,0xF7); // NEG lo 2941 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2942 emit_opcode(cbuf,0x83); // SBB hi,0 2943 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2944 emit_d8 (cbuf,0 ); 2945 %} 2946 2947 enc_class enc_pop_rdx() %{ 2948 emit_opcode(cbuf,0x5A); 2949 %} 2950 2951 enc_class enc_rethrow() %{ 2952 cbuf.set_insts_mark(); 2953 emit_opcode(cbuf, 0xE9); // jmp entry 2954 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2955 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2956 %} 2957 2958 2959 // Convert a double to an int. Java semantics require we do complex 2960 // manglelations in the corner cases. So we set the rounding mode to 2961 // 'zero', store the darned double down as an int, and reset the 2962 // rounding mode to 'nearest'. The hardware throws an exception which 2963 // patches up the correct value directly to the stack. 2964 enc_class DPR2I_encoding( regDPR src ) %{ 2965 // Flip to round-to-zero mode. We attempted to allow invalid-op 2966 // exceptions here, so that a NAN or other corner-case value will 2967 // thrown an exception (but normal values get converted at full speed). 2968 // However, I2C adapters and other float-stack manglers leave pending 2969 // invalid-op exceptions hanging. We would have to clear them before 2970 // enabling them and that is more expensive than just testing for the 2971 // invalid value Intel stores down in the corner cases. 2972 emit_opcode(cbuf,0xD9); // FLDCW trunc 2973 emit_opcode(cbuf,0x2D); 2974 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2975 // Allocate a word 2976 emit_opcode(cbuf,0x83); // SUB ESP,4 2977 emit_opcode(cbuf,0xEC); 2978 emit_d8(cbuf,0x04); 2979 // Encoding assumes a double has been pushed into FPR0. 2980 // Store down the double as an int, popping the FPU stack 2981 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2982 emit_opcode(cbuf,0x1C); 2983 emit_d8(cbuf,0x24); 2984 // Restore the rounding mode; mask the exception 2985 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2986 emit_opcode(cbuf,0x2D); 2987 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2988 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2989 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2990 2991 // Load the converted int; adjust CPU stack 2992 emit_opcode(cbuf,0x58); // POP EAX 2993 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2994 emit_d32 (cbuf,0x80000000); // 0x80000000 2995 emit_opcode(cbuf,0x75); // JNE around_slow_call 2996 emit_d8 (cbuf,0x07); // Size of slow_call 2997 // Push src onto stack slow-path 2998 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2999 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3000 // CALL directly to the runtime 3001 cbuf.set_insts_mark(); 3002 emit_opcode(cbuf,0xE8); // Call into runtime 3003 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3004 // Carry on here... 3005 %} 3006 3007 enc_class DPR2L_encoding( regDPR src ) %{ 3008 emit_opcode(cbuf,0xD9); // FLDCW trunc 3009 emit_opcode(cbuf,0x2D); 3010 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3011 // Allocate a word 3012 emit_opcode(cbuf,0x83); // SUB ESP,8 3013 emit_opcode(cbuf,0xEC); 3014 emit_d8(cbuf,0x08); 3015 // Encoding assumes a double has been pushed into FPR0. 3016 // Store down the double as a long, popping the FPU stack 3017 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3018 emit_opcode(cbuf,0x3C); 3019 emit_d8(cbuf,0x24); 3020 // Restore the rounding mode; mask the exception 3021 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3022 emit_opcode(cbuf,0x2D); 3023 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3024 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3025 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3026 3027 // Load the converted int; adjust CPU stack 3028 emit_opcode(cbuf,0x58); // POP EAX 3029 emit_opcode(cbuf,0x5A); // POP EDX 3030 emit_opcode(cbuf,0x81); // CMP EDX,imm 3031 emit_d8 (cbuf,0xFA); // rdx 3032 emit_d32 (cbuf,0x80000000); // 0x80000000 3033 emit_opcode(cbuf,0x75); // JNE around_slow_call 3034 emit_d8 (cbuf,0x07+4); // Size of slow_call 3035 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3036 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3037 emit_opcode(cbuf,0x75); // JNE around_slow_call 3038 emit_d8 (cbuf,0x07); // Size of slow_call 3039 // Push src onto stack slow-path 3040 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3041 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3042 // CALL directly to the runtime 3043 cbuf.set_insts_mark(); 3044 emit_opcode(cbuf,0xE8); // Call into runtime 3045 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3046 // Carry on here... 3047 %} 3048 3049 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3050 // Operand was loaded from memory into fp ST (stack top) 3051 // FMUL ST,$src /* D8 C8+i */ 3052 emit_opcode(cbuf, 0xD8); 3053 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3054 %} 3055 3056 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3057 // FADDP ST,src2 /* D8 C0+i */ 3058 emit_opcode(cbuf, 0xD8); 3059 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3060 //could use FADDP src2,fpST /* DE C0+i */ 3061 %} 3062 3063 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3064 // FADDP src2,ST /* DE C0+i */ 3065 emit_opcode(cbuf, 0xDE); 3066 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3067 %} 3068 3069 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3070 // Operand has been loaded into fp ST (stack top) 3071 // FSUB ST,$src1 3072 emit_opcode(cbuf, 0xD8); 3073 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3074 3075 // FDIV 3076 emit_opcode(cbuf, 0xD8); 3077 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3078 %} 3079 3080 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3081 // Operand was loaded from memory into fp ST (stack top) 3082 // FADD ST,$src /* D8 C0+i */ 3083 emit_opcode(cbuf, 0xD8); 3084 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3085 3086 // FMUL ST,src2 /* D8 C*+i */ 3087 emit_opcode(cbuf, 0xD8); 3088 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3089 %} 3090 3091 3092 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3093 // Operand was loaded from memory into fp ST (stack top) 3094 // FADD ST,$src /* D8 C0+i */ 3095 emit_opcode(cbuf, 0xD8); 3096 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3097 3098 // FMULP src2,ST /* DE C8+i */ 3099 emit_opcode(cbuf, 0xDE); 3100 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3101 %} 3102 3103 // Atomically load the volatile long 3104 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3105 emit_opcode(cbuf,0xDF); 3106 int rm_byte_opcode = 0x05; 3107 int base = $mem$$base; 3108 int index = $mem$$index; 3109 int scale = $mem$$scale; 3110 int displace = $mem$$disp; 3111 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3112 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3113 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3114 %} 3115 3116 // Volatile Store Long. Must be atomic, so move it into 3117 // the FP TOS and then do a 64-bit FIST. Has to probe the 3118 // target address before the store (for null-ptr checks) 3119 // so the memory operand is used twice in the encoding. 3120 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3121 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3122 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3123 emit_opcode(cbuf,0xDF); 3124 int rm_byte_opcode = 0x07; 3125 int base = $mem$$base; 3126 int index = $mem$$index; 3127 int scale = $mem$$scale; 3128 int displace = $mem$$disp; 3129 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3130 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3131 %} 3132 3133 // Safepoint Poll. This polls the safepoint page, and causes an 3134 // exception if it is not readable. Unfortunately, it kills the condition code 3135 // in the process 3136 // We current use TESTL [spp],EDI 3137 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3138 3139 enc_class Safepoint_Poll() %{ 3140 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3141 emit_opcode(cbuf,0x85); 3142 emit_rm (cbuf, 0x0, 0x7, 0x5); 3143 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3144 %} 3145 %} 3146 3147 3148 //----------FRAME-------------------------------------------------------------- 3149 // Definition of frame structure and management information. 3150 // 3151 // S T A C K L A Y O U T Allocators stack-slot number 3152 // | (to get allocators register number 3153 // G Owned by | | v add OptoReg::stack0()) 3154 // r CALLER | | 3155 // o | +--------+ pad to even-align allocators stack-slot 3156 // w V | pad0 | numbers; owned by CALLER 3157 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3158 // h ^ | in | 5 3159 // | | args | 4 Holes in incoming args owned by SELF 3160 // | | | | 3 3161 // | | +--------+ 3162 // V | | old out| Empty on Intel, window on Sparc 3163 // | old |preserve| Must be even aligned. 3164 // | SP-+--------+----> Matcher::_old_SP, even aligned 3165 // | | in | 3 area for Intel ret address 3166 // Owned by |preserve| Empty on Sparc. 3167 // SELF +--------+ 3168 // | | pad2 | 2 pad to align old SP 3169 // | +--------+ 1 3170 // | | locks | 0 3171 // | +--------+----> OptoReg::stack0(), even aligned 3172 // | | pad1 | 11 pad to align new SP 3173 // | +--------+ 3174 // | | | 10 3175 // | | spills | 9 spills 3176 // V | | 8 (pad0 slot for callee) 3177 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3178 // ^ | out | 7 3179 // | | args | 6 Holes in outgoing args owned by CALLEE 3180 // Owned by +--------+ 3181 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3182 // | new |preserve| Must be even-aligned. 3183 // | SP-+--------+----> Matcher::_new_SP, even aligned 3184 // | | | 3185 // 3186 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3187 // known from SELF's arguments and the Java calling convention. 3188 // Region 6-7 is determined per call site. 3189 // Note 2: If the calling convention leaves holes in the incoming argument 3190 // area, those holes are owned by SELF. Holes in the outgoing area 3191 // are owned by the CALLEE. Holes should not be nessecary in the 3192 // incoming area, as the Java calling convention is completely under 3193 // the control of the AD file. Doubles can be sorted and packed to 3194 // avoid holes. Holes in the outgoing arguments may be nessecary for 3195 // varargs C calling conventions. 3196 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3197 // even aligned with pad0 as needed. 3198 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3199 // region 6-11 is even aligned; it may be padded out more so that 3200 // the region from SP to FP meets the minimum stack alignment. 3201 3202 frame %{ 3203 // What direction does stack grow in (assumed to be same for C & Java) 3204 stack_direction(TOWARDS_LOW); 3205 3206 // These three registers define part of the calling convention 3207 // between compiled code and the interpreter. 3208 inline_cache_reg(EAX); // Inline Cache Register 3209 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3210 3211 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3212 cisc_spilling_operand_name(indOffset32); 3213 3214 // Number of stack slots consumed by locking an object 3215 sync_stack_slots(1); 3216 3217 // Compiled code's Frame Pointer 3218 frame_pointer(ESP); 3219 // Interpreter stores its frame pointer in a register which is 3220 // stored to the stack by I2CAdaptors. 3221 // I2CAdaptors convert from interpreted java to compiled java. 3222 interpreter_frame_pointer(EBP); 3223 3224 // Stack alignment requirement 3225 // Alignment size in bytes (128-bit -> 16 bytes) 3226 stack_alignment(StackAlignmentInBytes); 3227 3228 // Number of stack slots between incoming argument block and the start of 3229 // a new frame. The PROLOG must add this many slots to the stack. The 3230 // EPILOG must remove this many slots. Intel needs one slot for 3231 // return address and one for rbp, (must save rbp) 3232 in_preserve_stack_slots(2+VerifyStackAtCalls); 3233 3234 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3235 // for calls to C. Supports the var-args backing area for register parms. 3236 varargs_C_out_slots_killed(0); 3237 3238 // The after-PROLOG location of the return address. Location of 3239 // return address specifies a type (REG or STACK) and a number 3240 // representing the register number (i.e. - use a register name) or 3241 // stack slot. 3242 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3243 // Otherwise, it is above the locks and verification slot and alignment word 3244 return_addr(STACK - 1 + 3245 round_to((Compile::current()->in_preserve_stack_slots() + 3246 Compile::current()->fixed_slots()), 3247 stack_alignment_in_slots())); 3248 3249 // Body of function which returns an integer array locating 3250 // arguments either in registers or in stack slots. Passed an array 3251 // of ideal registers called "sig" and a "length" count. Stack-slot 3252 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3253 // arguments for a CALLEE. Incoming stack arguments are 3254 // automatically biased by the preserve_stack_slots field above. 3255 calling_convention %{ 3256 // No difference between ingoing/outgoing just pass false 3257 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3258 %} 3259 3260 3261 // Body of function which returns an integer array locating 3262 // arguments either in registers or in stack slots. Passed an array 3263 // of ideal registers called "sig" and a "length" count. Stack-slot 3264 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3265 // arguments for a CALLEE. Incoming stack arguments are 3266 // automatically biased by the preserve_stack_slots field above. 3267 c_calling_convention %{ 3268 // This is obviously always outgoing 3269 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3270 %} 3271 3272 // Location of C & interpreter return values 3273 c_return_value %{ 3274 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3275 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3276 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3277 3278 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3279 // that C functions return float and double results in XMM0. 3280 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3281 return OptoRegPair(XMM0b_num,XMM0_num); 3282 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3283 return OptoRegPair(OptoReg::Bad,XMM0_num); 3284 3285 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3286 %} 3287 3288 // Location of return values 3289 return_value %{ 3290 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3291 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3292 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3293 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3294 return OptoRegPair(XMM0b_num,XMM0_num); 3295 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3296 return OptoRegPair(OptoReg::Bad,XMM0_num); 3297 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3298 %} 3299 3300 %} 3301 3302 //----------ATTRIBUTES--------------------------------------------------------- 3303 //----------Operand Attributes------------------------------------------------- 3304 op_attrib op_cost(0); // Required cost attribute 3305 3306 //----------Instruction Attributes--------------------------------------------- 3307 ins_attrib ins_cost(100); // Required cost attribute 3308 ins_attrib ins_size(8); // Required size attribute (in bits) 3309 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3310 // non-matching short branch variant of some 3311 // long branch? 3312 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3313 // specifies the alignment that some part of the instruction (not 3314 // necessarily the start) requires. If > 1, a compute_padding() 3315 // function must be provided for the instruction 3316 3317 //----------OPERANDS----------------------------------------------------------- 3318 // Operand definitions must precede instruction definitions for correct parsing 3319 // in the ADLC because operands constitute user defined types which are used in 3320 // instruction definitions. 3321 3322 //----------Simple Operands---------------------------------------------------- 3323 // Immediate Operands 3324 // Integer Immediate 3325 operand immI() %{ 3326 match(ConI); 3327 3328 op_cost(10); 3329 format %{ %} 3330 interface(CONST_INTER); 3331 %} 3332 3333 // Constant for test vs zero 3334 operand immI0() %{ 3335 predicate(n->get_int() == 0); 3336 match(ConI); 3337 3338 op_cost(0); 3339 format %{ %} 3340 interface(CONST_INTER); 3341 %} 3342 3343 // Constant for increment 3344 operand immI1() %{ 3345 predicate(n->get_int() == 1); 3346 match(ConI); 3347 3348 op_cost(0); 3349 format %{ %} 3350 interface(CONST_INTER); 3351 %} 3352 3353 // Constant for decrement 3354 operand immI_M1() %{ 3355 predicate(n->get_int() == -1); 3356 match(ConI); 3357 3358 op_cost(0); 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 // Valid scale values for addressing modes 3364 operand immI2() %{ 3365 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3366 match(ConI); 3367 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 operand immI8() %{ 3373 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3374 match(ConI); 3375 3376 op_cost(5); 3377 format %{ %} 3378 interface(CONST_INTER); 3379 %} 3380 3381 operand immI16() %{ 3382 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3383 match(ConI); 3384 3385 op_cost(10); 3386 format %{ %} 3387 interface(CONST_INTER); 3388 %} 3389 3390 // Int Immediate non-negative 3391 operand immU31() 3392 %{ 3393 predicate(n->get_int() >= 0); 3394 match(ConI); 3395 3396 op_cost(0); 3397 format %{ %} 3398 interface(CONST_INTER); 3399 %} 3400 3401 // Constant for long shifts 3402 operand immI_32() %{ 3403 predicate( n->get_int() == 32 ); 3404 match(ConI); 3405 3406 op_cost(0); 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 operand immI_1_31() %{ 3412 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3413 match(ConI); 3414 3415 op_cost(0); 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 operand immI_32_63() %{ 3421 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3422 match(ConI); 3423 op_cost(0); 3424 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 operand immI_1() %{ 3430 predicate( n->get_int() == 1 ); 3431 match(ConI); 3432 3433 op_cost(0); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 operand immI_2() %{ 3439 predicate( n->get_int() == 2 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 operand immI_3() %{ 3448 predicate( n->get_int() == 3 ); 3449 match(ConI); 3450 3451 op_cost(0); 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 // Pointer Immediate 3457 operand immP() %{ 3458 match(ConP); 3459 3460 op_cost(10); 3461 format %{ %} 3462 interface(CONST_INTER); 3463 %} 3464 3465 // NULL Pointer Immediate 3466 operand immP0() %{ 3467 predicate( n->get_ptr() == 0 ); 3468 match(ConP); 3469 op_cost(0); 3470 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Long Immediate 3476 operand immL() %{ 3477 match(ConL); 3478 3479 op_cost(20); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 // Long Immediate zero 3485 operand immL0() %{ 3486 predicate( n->get_long() == 0L ); 3487 match(ConL); 3488 op_cost(0); 3489 3490 format %{ %} 3491 interface(CONST_INTER); 3492 %} 3493 3494 // Long Immediate zero 3495 operand immL_M1() %{ 3496 predicate( n->get_long() == -1L ); 3497 match(ConL); 3498 op_cost(0); 3499 3500 format %{ %} 3501 interface(CONST_INTER); 3502 %} 3503 3504 // Long immediate from 0 to 127. 3505 // Used for a shorter form of long mul by 10. 3506 operand immL_127() %{ 3507 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3508 match(ConL); 3509 op_cost(0); 3510 3511 format %{ %} 3512 interface(CONST_INTER); 3513 %} 3514 3515 // Long Immediate: low 32-bit mask 3516 operand immL_32bits() %{ 3517 predicate(n->get_long() == 0xFFFFFFFFL); 3518 match(ConL); 3519 op_cost(0); 3520 3521 format %{ %} 3522 interface(CONST_INTER); 3523 %} 3524 3525 // Long Immediate: low 32-bit mask 3526 operand immL32() %{ 3527 predicate(n->get_long() == (int)(n->get_long())); 3528 match(ConL); 3529 op_cost(20); 3530 3531 format %{ %} 3532 interface(CONST_INTER); 3533 %} 3534 3535 //Double Immediate zero 3536 operand immDPR0() %{ 3537 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3538 // bug that generates code such that NaNs compare equal to 0.0 3539 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3540 match(ConD); 3541 3542 op_cost(5); 3543 format %{ %} 3544 interface(CONST_INTER); 3545 %} 3546 3547 // Double Immediate one 3548 operand immDPR1() %{ 3549 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3550 match(ConD); 3551 3552 op_cost(5); 3553 format %{ %} 3554 interface(CONST_INTER); 3555 %} 3556 3557 // Double Immediate 3558 operand immDPR() %{ 3559 predicate(UseSSE<=1); 3560 match(ConD); 3561 3562 op_cost(5); 3563 format %{ %} 3564 interface(CONST_INTER); 3565 %} 3566 3567 operand immD() %{ 3568 predicate(UseSSE>=2); 3569 match(ConD); 3570 3571 op_cost(5); 3572 format %{ %} 3573 interface(CONST_INTER); 3574 %} 3575 3576 // Double Immediate zero 3577 operand immD0() %{ 3578 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3579 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3580 // compare equal to -0.0. 3581 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3582 match(ConD); 3583 3584 format %{ %} 3585 interface(CONST_INTER); 3586 %} 3587 3588 // Float Immediate zero 3589 operand immFPR0() %{ 3590 predicate(UseSSE == 0 && n->getf() == 0.0F); 3591 match(ConF); 3592 3593 op_cost(5); 3594 format %{ %} 3595 interface(CONST_INTER); 3596 %} 3597 3598 // Float Immediate one 3599 operand immFPR1() %{ 3600 predicate(UseSSE == 0 && n->getf() == 1.0F); 3601 match(ConF); 3602 3603 op_cost(5); 3604 format %{ %} 3605 interface(CONST_INTER); 3606 %} 3607 3608 // Float Immediate 3609 operand immFPR() %{ 3610 predicate( UseSSE == 0 ); 3611 match(ConF); 3612 3613 op_cost(5); 3614 format %{ %} 3615 interface(CONST_INTER); 3616 %} 3617 3618 // Float Immediate 3619 operand immF() %{ 3620 predicate(UseSSE >= 1); 3621 match(ConF); 3622 3623 op_cost(5); 3624 format %{ %} 3625 interface(CONST_INTER); 3626 %} 3627 3628 // Float Immediate zero. Zero and not -0.0 3629 operand immF0() %{ 3630 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3631 match(ConF); 3632 3633 op_cost(5); 3634 format %{ %} 3635 interface(CONST_INTER); 3636 %} 3637 3638 // Immediates for special shifts (sign extend) 3639 3640 // Constants for increment 3641 operand immI_16() %{ 3642 predicate( n->get_int() == 16 ); 3643 match(ConI); 3644 3645 format %{ %} 3646 interface(CONST_INTER); 3647 %} 3648 3649 operand immI_24() %{ 3650 predicate( n->get_int() == 24 ); 3651 match(ConI); 3652 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Constant for byte-wide masking 3658 operand immI_255() %{ 3659 predicate( n->get_int() == 255 ); 3660 match(ConI); 3661 3662 format %{ %} 3663 interface(CONST_INTER); 3664 %} 3665 3666 // Constant for short-wide masking 3667 operand immI_65535() %{ 3668 predicate(n->get_int() == 65535); 3669 match(ConI); 3670 3671 format %{ %} 3672 interface(CONST_INTER); 3673 %} 3674 3675 // Register Operands 3676 // Integer Register 3677 operand rRegI() %{ 3678 constraint(ALLOC_IN_RC(int_reg)); 3679 match(RegI); 3680 match(xRegI); 3681 match(eAXRegI); 3682 match(eBXRegI); 3683 match(eCXRegI); 3684 match(eDXRegI); 3685 match(eDIRegI); 3686 match(eSIRegI); 3687 3688 format %{ %} 3689 interface(REG_INTER); 3690 %} 3691 3692 // Subset of Integer Register 3693 operand xRegI(rRegI reg) %{ 3694 constraint(ALLOC_IN_RC(int_x_reg)); 3695 match(reg); 3696 match(eAXRegI); 3697 match(eBXRegI); 3698 match(eCXRegI); 3699 match(eDXRegI); 3700 3701 format %{ %} 3702 interface(REG_INTER); 3703 %} 3704 3705 // Special Registers 3706 operand eAXRegI(xRegI reg) %{ 3707 constraint(ALLOC_IN_RC(eax_reg)); 3708 match(reg); 3709 match(rRegI); 3710 3711 format %{ "EAX" %} 3712 interface(REG_INTER); 3713 %} 3714 3715 // Special Registers 3716 operand eBXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(ebx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "EBX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eCXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(ecx_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "ECX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand eDXRegI(xRegI reg) %{ 3735 constraint(ALLOC_IN_RC(edx_reg)); 3736 match(reg); 3737 match(rRegI); 3738 3739 format %{ "EDX" %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand eDIRegI(xRegI reg) %{ 3744 constraint(ALLOC_IN_RC(edi_reg)); 3745 match(reg); 3746 match(rRegI); 3747 3748 format %{ "EDI" %} 3749 interface(REG_INTER); 3750 %} 3751 3752 operand naxRegI() %{ 3753 constraint(ALLOC_IN_RC(nax_reg)); 3754 match(RegI); 3755 match(eCXRegI); 3756 match(eDXRegI); 3757 match(eSIRegI); 3758 match(eDIRegI); 3759 3760 format %{ %} 3761 interface(REG_INTER); 3762 %} 3763 3764 operand nadxRegI() %{ 3765 constraint(ALLOC_IN_RC(nadx_reg)); 3766 match(RegI); 3767 match(eBXRegI); 3768 match(eCXRegI); 3769 match(eSIRegI); 3770 match(eDIRegI); 3771 3772 format %{ %} 3773 interface(REG_INTER); 3774 %} 3775 3776 operand ncxRegI() %{ 3777 constraint(ALLOC_IN_RC(ncx_reg)); 3778 match(RegI); 3779 match(eAXRegI); 3780 match(eDXRegI); 3781 match(eSIRegI); 3782 match(eDIRegI); 3783 3784 format %{ %} 3785 interface(REG_INTER); 3786 %} 3787 3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3789 // // 3790 operand eSIRegI(xRegI reg) %{ 3791 constraint(ALLOC_IN_RC(esi_reg)); 3792 match(reg); 3793 match(rRegI); 3794 3795 format %{ "ESI" %} 3796 interface(REG_INTER); 3797 %} 3798 3799 // Pointer Register 3800 operand anyRegP() %{ 3801 constraint(ALLOC_IN_RC(any_reg)); 3802 match(RegP); 3803 match(eAXRegP); 3804 match(eBXRegP); 3805 match(eCXRegP); 3806 match(eDIRegP); 3807 match(eRegP); 3808 3809 format %{ %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eRegP() %{ 3814 constraint(ALLOC_IN_RC(int_reg)); 3815 match(RegP); 3816 match(eAXRegP); 3817 match(eBXRegP); 3818 match(eCXRegP); 3819 match(eDIRegP); 3820 3821 format %{ %} 3822 interface(REG_INTER); 3823 %} 3824 3825 // On windows95, EBP is not safe to use for implicit null tests. 3826 operand eRegP_no_EBP() %{ 3827 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3828 match(RegP); 3829 match(eAXRegP); 3830 match(eBXRegP); 3831 match(eCXRegP); 3832 match(eDIRegP); 3833 3834 op_cost(100); 3835 format %{ %} 3836 interface(REG_INTER); 3837 %} 3838 3839 operand naxRegP() %{ 3840 constraint(ALLOC_IN_RC(nax_reg)); 3841 match(RegP); 3842 match(eBXRegP); 3843 match(eDXRegP); 3844 match(eCXRegP); 3845 match(eSIRegP); 3846 match(eDIRegP); 3847 3848 format %{ %} 3849 interface(REG_INTER); 3850 %} 3851 3852 operand nabxRegP() %{ 3853 constraint(ALLOC_IN_RC(nabx_reg)); 3854 match(RegP); 3855 match(eCXRegP); 3856 match(eDXRegP); 3857 match(eSIRegP); 3858 match(eDIRegP); 3859 3860 format %{ %} 3861 interface(REG_INTER); 3862 %} 3863 3864 operand pRegP() %{ 3865 constraint(ALLOC_IN_RC(p_reg)); 3866 match(RegP); 3867 match(eBXRegP); 3868 match(eDXRegP); 3869 match(eSIRegP); 3870 match(eDIRegP); 3871 3872 format %{ %} 3873 interface(REG_INTER); 3874 %} 3875 3876 // Special Registers 3877 // Return a pointer value 3878 operand eAXRegP(eRegP reg) %{ 3879 constraint(ALLOC_IN_RC(eax_reg)); 3880 match(reg); 3881 format %{ "EAX" %} 3882 interface(REG_INTER); 3883 %} 3884 3885 // Used in AtomicAdd 3886 operand eBXRegP(eRegP reg) %{ 3887 constraint(ALLOC_IN_RC(ebx_reg)); 3888 match(reg); 3889 format %{ "EBX" %} 3890 interface(REG_INTER); 3891 %} 3892 3893 // Tail-call (interprocedural jump) to interpreter 3894 operand eCXRegP(eRegP reg) %{ 3895 constraint(ALLOC_IN_RC(ecx_reg)); 3896 match(reg); 3897 format %{ "ECX" %} 3898 interface(REG_INTER); 3899 %} 3900 3901 operand eSIRegP(eRegP reg) %{ 3902 constraint(ALLOC_IN_RC(esi_reg)); 3903 match(reg); 3904 format %{ "ESI" %} 3905 interface(REG_INTER); 3906 %} 3907 3908 // Used in rep stosw 3909 operand eDIRegP(eRegP reg) %{ 3910 constraint(ALLOC_IN_RC(edi_reg)); 3911 match(reg); 3912 format %{ "EDI" %} 3913 interface(REG_INTER); 3914 %} 3915 3916 operand eRegL() %{ 3917 constraint(ALLOC_IN_RC(long_reg)); 3918 match(RegL); 3919 match(eADXRegL); 3920 3921 format %{ %} 3922 interface(REG_INTER); 3923 %} 3924 3925 operand eADXRegL( eRegL reg ) %{ 3926 constraint(ALLOC_IN_RC(eadx_reg)); 3927 match(reg); 3928 3929 format %{ "EDX:EAX" %} 3930 interface(REG_INTER); 3931 %} 3932 3933 operand eBCXRegL( eRegL reg ) %{ 3934 constraint(ALLOC_IN_RC(ebcx_reg)); 3935 match(reg); 3936 3937 format %{ "EBX:ECX" %} 3938 interface(REG_INTER); 3939 %} 3940 3941 // Special case for integer high multiply 3942 operand eADXRegL_low_only() %{ 3943 constraint(ALLOC_IN_RC(eadx_reg)); 3944 match(RegL); 3945 3946 format %{ "EAX" %} 3947 interface(REG_INTER); 3948 %} 3949 3950 // Flags register, used as output of compare instructions 3951 operand eFlagsReg() %{ 3952 constraint(ALLOC_IN_RC(int_flags)); 3953 match(RegFlags); 3954 3955 format %{ "EFLAGS" %} 3956 interface(REG_INTER); 3957 %} 3958 3959 // Flags register, used as output of FLOATING POINT compare instructions 3960 operand eFlagsRegU() %{ 3961 constraint(ALLOC_IN_RC(int_flags)); 3962 match(RegFlags); 3963 3964 format %{ "EFLAGS_U" %} 3965 interface(REG_INTER); 3966 %} 3967 3968 operand eFlagsRegUCF() %{ 3969 constraint(ALLOC_IN_RC(int_flags)); 3970 match(RegFlags); 3971 predicate(false); 3972 3973 format %{ "EFLAGS_U_CF" %} 3974 interface(REG_INTER); 3975 %} 3976 3977 // Condition Code Register used by long compare 3978 operand flagsReg_long_LTGE() %{ 3979 constraint(ALLOC_IN_RC(int_flags)); 3980 match(RegFlags); 3981 format %{ "FLAGS_LTGE" %} 3982 interface(REG_INTER); 3983 %} 3984 operand flagsReg_long_EQNE() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 format %{ "FLAGS_EQNE" %} 3988 interface(REG_INTER); 3989 %} 3990 operand flagsReg_long_LEGT() %{ 3991 constraint(ALLOC_IN_RC(int_flags)); 3992 match(RegFlags); 3993 format %{ "FLAGS_LEGT" %} 3994 interface(REG_INTER); 3995 %} 3996 3997 // Float register operands 3998 operand regDPR() %{ 3999 predicate( UseSSE < 2 ); 4000 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4001 match(RegD); 4002 match(regDPR1); 4003 match(regDPR2); 4004 format %{ %} 4005 interface(REG_INTER); 4006 %} 4007 4008 operand regDPR1(regDPR reg) %{ 4009 predicate( UseSSE < 2 ); 4010 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4011 match(reg); 4012 format %{ "FPR1" %} 4013 interface(REG_INTER); 4014 %} 4015 4016 operand regDPR2(regDPR reg) %{ 4017 predicate( UseSSE < 2 ); 4018 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4019 match(reg); 4020 format %{ "FPR2" %} 4021 interface(REG_INTER); 4022 %} 4023 4024 operand regnotDPR1(regDPR reg) %{ 4025 predicate( UseSSE < 2 ); 4026 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4027 match(reg); 4028 format %{ %} 4029 interface(REG_INTER); 4030 %} 4031 4032 // Float register operands 4033 operand regFPR() %{ 4034 predicate( UseSSE < 2 ); 4035 constraint(ALLOC_IN_RC(fp_flt_reg)); 4036 match(RegF); 4037 match(regFPR1); 4038 format %{ %} 4039 interface(REG_INTER); 4040 %} 4041 4042 // Float register operands 4043 operand regFPR1(regFPR reg) %{ 4044 predicate( UseSSE < 2 ); 4045 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4046 match(reg); 4047 format %{ "FPR1" %} 4048 interface(REG_INTER); 4049 %} 4050 4051 // XMM Float register operands 4052 operand regF() %{ 4053 predicate( UseSSE>=1 ); 4054 constraint(ALLOC_IN_RC(float_reg_legacy)); 4055 match(RegF); 4056 format %{ %} 4057 interface(REG_INTER); 4058 %} 4059 4060 // XMM Double register operands 4061 operand regD() %{ 4062 predicate( UseSSE>=2 ); 4063 constraint(ALLOC_IN_RC(double_reg_legacy)); 4064 match(RegD); 4065 format %{ %} 4066 interface(REG_INTER); 4067 %} 4068 4069 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4070 // runtime code generation via reg_class_dynamic. 4071 operand vecS() %{ 4072 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4073 match(VecS); 4074 4075 format %{ %} 4076 interface(REG_INTER); 4077 %} 4078 4079 operand vecD() %{ 4080 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4081 match(VecD); 4082 4083 format %{ %} 4084 interface(REG_INTER); 4085 %} 4086 4087 operand vecX() %{ 4088 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4089 match(VecX); 4090 4091 format %{ %} 4092 interface(REG_INTER); 4093 %} 4094 4095 operand vecY() %{ 4096 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4097 match(VecY); 4098 4099 format %{ %} 4100 interface(REG_INTER); 4101 %} 4102 4103 //----------Memory Operands---------------------------------------------------- 4104 // Direct Memory Operand 4105 operand direct(immP addr) %{ 4106 match(addr); 4107 4108 format %{ "[$addr]" %} 4109 interface(MEMORY_INTER) %{ 4110 base(0xFFFFFFFF); 4111 index(0x4); 4112 scale(0x0); 4113 disp($addr); 4114 %} 4115 %} 4116 4117 // Indirect Memory Operand 4118 operand indirect(eRegP reg) %{ 4119 constraint(ALLOC_IN_RC(int_reg)); 4120 match(reg); 4121 4122 format %{ "[$reg]" %} 4123 interface(MEMORY_INTER) %{ 4124 base($reg); 4125 index(0x4); 4126 scale(0x0); 4127 disp(0x0); 4128 %} 4129 %} 4130 4131 // Indirect Memory Plus Short Offset Operand 4132 operand indOffset8(eRegP reg, immI8 off) %{ 4133 match(AddP reg off); 4134 4135 format %{ "[$reg + $off]" %} 4136 interface(MEMORY_INTER) %{ 4137 base($reg); 4138 index(0x4); 4139 scale(0x0); 4140 disp($off); 4141 %} 4142 %} 4143 4144 // Indirect Memory Plus Long Offset Operand 4145 operand indOffset32(eRegP reg, immI off) %{ 4146 match(AddP reg off); 4147 4148 format %{ "[$reg + $off]" %} 4149 interface(MEMORY_INTER) %{ 4150 base($reg); 4151 index(0x4); 4152 scale(0x0); 4153 disp($off); 4154 %} 4155 %} 4156 4157 // Indirect Memory Plus Long Offset Operand 4158 operand indOffset32X(rRegI reg, immP off) %{ 4159 match(AddP off reg); 4160 4161 format %{ "[$reg + $off]" %} 4162 interface(MEMORY_INTER) %{ 4163 base($reg); 4164 index(0x4); 4165 scale(0x0); 4166 disp($off); 4167 %} 4168 %} 4169 4170 // Indirect Memory Plus Index Register Plus Offset Operand 4171 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4172 match(AddP (AddP reg ireg) off); 4173 4174 op_cost(10); 4175 format %{"[$reg + $off + $ireg]" %} 4176 interface(MEMORY_INTER) %{ 4177 base($reg); 4178 index($ireg); 4179 scale(0x0); 4180 disp($off); 4181 %} 4182 %} 4183 4184 // Indirect Memory Plus Index Register Plus Offset Operand 4185 operand indIndex(eRegP reg, rRegI ireg) %{ 4186 match(AddP reg ireg); 4187 4188 op_cost(10); 4189 format %{"[$reg + $ireg]" %} 4190 interface(MEMORY_INTER) %{ 4191 base($reg); 4192 index($ireg); 4193 scale(0x0); 4194 disp(0x0); 4195 %} 4196 %} 4197 4198 // // ------------------------------------------------------------------------- 4199 // // 486 architecture doesn't support "scale * index + offset" with out a base 4200 // // ------------------------------------------------------------------------- 4201 // // Scaled Memory Operands 4202 // // Indirect Memory Times Scale Plus Offset Operand 4203 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4204 // match(AddP off (LShiftI ireg scale)); 4205 // 4206 // op_cost(10); 4207 // format %{"[$off + $ireg << $scale]" %} 4208 // interface(MEMORY_INTER) %{ 4209 // base(0x4); 4210 // index($ireg); 4211 // scale($scale); 4212 // disp($off); 4213 // %} 4214 // %} 4215 4216 // Indirect Memory Times Scale Plus Index Register 4217 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4218 match(AddP reg (LShiftI ireg scale)); 4219 4220 op_cost(10); 4221 format %{"[$reg + $ireg << $scale]" %} 4222 interface(MEMORY_INTER) %{ 4223 base($reg); 4224 index($ireg); 4225 scale($scale); 4226 disp(0x0); 4227 %} 4228 %} 4229 4230 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4231 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4232 match(AddP (AddP reg (LShiftI ireg scale)) off); 4233 4234 op_cost(10); 4235 format %{"[$reg + $off + $ireg << $scale]" %} 4236 interface(MEMORY_INTER) %{ 4237 base($reg); 4238 index($ireg); 4239 scale($scale); 4240 disp($off); 4241 %} 4242 %} 4243 4244 //----------Load Long Memory Operands------------------------------------------ 4245 // The load-long idiom will use it's address expression again after loading 4246 // the first word of the long. If the load-long destination overlaps with 4247 // registers used in the addressing expression, the 2nd half will be loaded 4248 // from a clobbered address. Fix this by requiring that load-long use 4249 // address registers that do not overlap with the load-long target. 4250 4251 // load-long support 4252 operand load_long_RegP() %{ 4253 constraint(ALLOC_IN_RC(esi_reg)); 4254 match(RegP); 4255 match(eSIRegP); 4256 op_cost(100); 4257 format %{ %} 4258 interface(REG_INTER); 4259 %} 4260 4261 // Indirect Memory Operand Long 4262 operand load_long_indirect(load_long_RegP reg) %{ 4263 constraint(ALLOC_IN_RC(esi_reg)); 4264 match(reg); 4265 4266 format %{ "[$reg]" %} 4267 interface(MEMORY_INTER) %{ 4268 base($reg); 4269 index(0x4); 4270 scale(0x0); 4271 disp(0x0); 4272 %} 4273 %} 4274 4275 // Indirect Memory Plus Long Offset Operand 4276 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4277 match(AddP reg off); 4278 4279 format %{ "[$reg + $off]" %} 4280 interface(MEMORY_INTER) %{ 4281 base($reg); 4282 index(0x4); 4283 scale(0x0); 4284 disp($off); 4285 %} 4286 %} 4287 4288 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4289 4290 4291 //----------Special Memory Operands-------------------------------------------- 4292 // Stack Slot Operand - This operand is used for loading and storing temporary 4293 // values on the stack where a match requires a value to 4294 // flow through memory. 4295 operand stackSlotP(sRegP reg) %{ 4296 constraint(ALLOC_IN_RC(stack_slots)); 4297 // No match rule because this operand is only generated in matching 4298 format %{ "[$reg]" %} 4299 interface(MEMORY_INTER) %{ 4300 base(0x4); // ESP 4301 index(0x4); // No Index 4302 scale(0x0); // No Scale 4303 disp($reg); // Stack Offset 4304 %} 4305 %} 4306 4307 operand stackSlotI(sRegI reg) %{ 4308 constraint(ALLOC_IN_RC(stack_slots)); 4309 // No match rule because this operand is only generated in matching 4310 format %{ "[$reg]" %} 4311 interface(MEMORY_INTER) %{ 4312 base(0x4); // ESP 4313 index(0x4); // No Index 4314 scale(0x0); // No Scale 4315 disp($reg); // Stack Offset 4316 %} 4317 %} 4318 4319 operand stackSlotF(sRegF reg) %{ 4320 constraint(ALLOC_IN_RC(stack_slots)); 4321 // No match rule because this operand is only generated in matching 4322 format %{ "[$reg]" %} 4323 interface(MEMORY_INTER) %{ 4324 base(0x4); // ESP 4325 index(0x4); // No Index 4326 scale(0x0); // No Scale 4327 disp($reg); // Stack Offset 4328 %} 4329 %} 4330 4331 operand stackSlotD(sRegD reg) %{ 4332 constraint(ALLOC_IN_RC(stack_slots)); 4333 // No match rule because this operand is only generated in matching 4334 format %{ "[$reg]" %} 4335 interface(MEMORY_INTER) %{ 4336 base(0x4); // ESP 4337 index(0x4); // No Index 4338 scale(0x0); // No Scale 4339 disp($reg); // Stack Offset 4340 %} 4341 %} 4342 4343 operand stackSlotL(sRegL reg) %{ 4344 constraint(ALLOC_IN_RC(stack_slots)); 4345 // No match rule because this operand is only generated in matching 4346 format %{ "[$reg]" %} 4347 interface(MEMORY_INTER) %{ 4348 base(0x4); // ESP 4349 index(0x4); // No Index 4350 scale(0x0); // No Scale 4351 disp($reg); // Stack Offset 4352 %} 4353 %} 4354 4355 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4356 // Indirect Memory Operand 4357 operand indirect_win95_safe(eRegP_no_EBP reg) 4358 %{ 4359 constraint(ALLOC_IN_RC(int_reg)); 4360 match(reg); 4361 4362 op_cost(100); 4363 format %{ "[$reg]" %} 4364 interface(MEMORY_INTER) %{ 4365 base($reg); 4366 index(0x4); 4367 scale(0x0); 4368 disp(0x0); 4369 %} 4370 %} 4371 4372 // Indirect Memory Plus Short Offset Operand 4373 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4374 %{ 4375 match(AddP reg off); 4376 4377 op_cost(100); 4378 format %{ "[$reg + $off]" %} 4379 interface(MEMORY_INTER) %{ 4380 base($reg); 4381 index(0x4); 4382 scale(0x0); 4383 disp($off); 4384 %} 4385 %} 4386 4387 // Indirect Memory Plus Long Offset Operand 4388 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4389 %{ 4390 match(AddP reg off); 4391 4392 op_cost(100); 4393 format %{ "[$reg + $off]" %} 4394 interface(MEMORY_INTER) %{ 4395 base($reg); 4396 index(0x4); 4397 scale(0x0); 4398 disp($off); 4399 %} 4400 %} 4401 4402 // Indirect Memory Plus Index Register Plus Offset Operand 4403 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4404 %{ 4405 match(AddP (AddP reg ireg) off); 4406 4407 op_cost(100); 4408 format %{"[$reg + $off + $ireg]" %} 4409 interface(MEMORY_INTER) %{ 4410 base($reg); 4411 index($ireg); 4412 scale(0x0); 4413 disp($off); 4414 %} 4415 %} 4416 4417 // Indirect Memory Times Scale Plus Index Register 4418 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4419 %{ 4420 match(AddP reg (LShiftI ireg scale)); 4421 4422 op_cost(100); 4423 format %{"[$reg + $ireg << $scale]" %} 4424 interface(MEMORY_INTER) %{ 4425 base($reg); 4426 index($ireg); 4427 scale($scale); 4428 disp(0x0); 4429 %} 4430 %} 4431 4432 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4433 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4434 %{ 4435 match(AddP (AddP reg (LShiftI ireg scale)) off); 4436 4437 op_cost(100); 4438 format %{"[$reg + $off + $ireg << $scale]" %} 4439 interface(MEMORY_INTER) %{ 4440 base($reg); 4441 index($ireg); 4442 scale($scale); 4443 disp($off); 4444 %} 4445 %} 4446 4447 //----------Conditional Branch Operands---------------------------------------- 4448 // Comparison Op - This is the operation of the comparison, and is limited to 4449 // the following set of codes: 4450 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4451 // 4452 // Other attributes of the comparison, such as unsignedness, are specified 4453 // by the comparison instruction that sets a condition code flags register. 4454 // That result is represented by a flags operand whose subtype is appropriate 4455 // to the unsignedness (etc.) of the comparison. 4456 // 4457 // Later, the instruction which matches both the Comparison Op (a Bool) and 4458 // the flags (produced by the Cmp) specifies the coding of the comparison op 4459 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4460 4461 // Comparision Code 4462 operand cmpOp() %{ 4463 match(Bool); 4464 4465 format %{ "" %} 4466 interface(COND_INTER) %{ 4467 equal(0x4, "e"); 4468 not_equal(0x5, "ne"); 4469 less(0xC, "l"); 4470 greater_equal(0xD, "ge"); 4471 less_equal(0xE, "le"); 4472 greater(0xF, "g"); 4473 overflow(0x0, "o"); 4474 no_overflow(0x1, "no"); 4475 %} 4476 %} 4477 4478 // Comparison Code, unsigned compare. Used by FP also, with 4479 // C2 (unordered) turned into GT or LT already. The other bits 4480 // C0 and C3 are turned into Carry & Zero flags. 4481 operand cmpOpU() %{ 4482 match(Bool); 4483 4484 format %{ "" %} 4485 interface(COND_INTER) %{ 4486 equal(0x4, "e"); 4487 not_equal(0x5, "ne"); 4488 less(0x2, "b"); 4489 greater_equal(0x3, "nb"); 4490 less_equal(0x6, "be"); 4491 greater(0x7, "nbe"); 4492 overflow(0x0, "o"); 4493 no_overflow(0x1, "no"); 4494 %} 4495 %} 4496 4497 // Floating comparisons that don't require any fixup for the unordered case 4498 operand cmpOpUCF() %{ 4499 match(Bool); 4500 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4501 n->as_Bool()->_test._test == BoolTest::ge || 4502 n->as_Bool()->_test._test == BoolTest::le || 4503 n->as_Bool()->_test._test == BoolTest::gt); 4504 format %{ "" %} 4505 interface(COND_INTER) %{ 4506 equal(0x4, "e"); 4507 not_equal(0x5, "ne"); 4508 less(0x2, "b"); 4509 greater_equal(0x3, "nb"); 4510 less_equal(0x6, "be"); 4511 greater(0x7, "nbe"); 4512 overflow(0x0, "o"); 4513 no_overflow(0x1, "no"); 4514 %} 4515 %} 4516 4517 4518 // Floating comparisons that can be fixed up with extra conditional jumps 4519 operand cmpOpUCF2() %{ 4520 match(Bool); 4521 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4522 n->as_Bool()->_test._test == BoolTest::eq); 4523 format %{ "" %} 4524 interface(COND_INTER) %{ 4525 equal(0x4, "e"); 4526 not_equal(0x5, "ne"); 4527 less(0x2, "b"); 4528 greater_equal(0x3, "nb"); 4529 less_equal(0x6, "be"); 4530 greater(0x7, "nbe"); 4531 overflow(0x0, "o"); 4532 no_overflow(0x1, "no"); 4533 %} 4534 %} 4535 4536 // Comparison Code for FP conditional move 4537 operand cmpOp_fcmov() %{ 4538 match(Bool); 4539 4540 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4541 n->as_Bool()->_test._test != BoolTest::no_overflow); 4542 format %{ "" %} 4543 interface(COND_INTER) %{ 4544 equal (0x0C8); 4545 not_equal (0x1C8); 4546 less (0x0C0); 4547 greater_equal(0x1C0); 4548 less_equal (0x0D0); 4549 greater (0x1D0); 4550 overflow(0x0, "o"); // not really supported by the instruction 4551 no_overflow(0x1, "no"); // not really supported by the instruction 4552 %} 4553 %} 4554 4555 // Comparision Code used in long compares 4556 operand cmpOp_commute() %{ 4557 match(Bool); 4558 4559 format %{ "" %} 4560 interface(COND_INTER) %{ 4561 equal(0x4, "e"); 4562 not_equal(0x5, "ne"); 4563 less(0xF, "g"); 4564 greater_equal(0xE, "le"); 4565 less_equal(0xD, "ge"); 4566 greater(0xC, "l"); 4567 overflow(0x0, "o"); 4568 no_overflow(0x1, "no"); 4569 %} 4570 %} 4571 4572 //----------OPERAND CLASSES---------------------------------------------------- 4573 // Operand Classes are groups of operands that are used as to simplify 4574 // instruction definitions by not requiring the AD writer to specify separate 4575 // instructions for every form of operand when the instruction accepts 4576 // multiple operand types with the same basic encoding and format. The classic 4577 // case of this is memory operands. 4578 4579 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4580 indIndex, indIndexScale, indIndexScaleOffset); 4581 4582 // Long memory operations are encoded in 2 instructions and a +4 offset. 4583 // This means some kind of offset is always required and you cannot use 4584 // an oop as the offset (done when working on static globals). 4585 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4586 indIndex, indIndexScale, indIndexScaleOffset); 4587 4588 4589 //----------PIPELINE----------------------------------------------------------- 4590 // Rules which define the behavior of the target architectures pipeline. 4591 pipeline %{ 4592 4593 //----------ATTRIBUTES--------------------------------------------------------- 4594 attributes %{ 4595 variable_size_instructions; // Fixed size instructions 4596 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4597 instruction_unit_size = 1; // An instruction is 1 bytes long 4598 instruction_fetch_unit_size = 16; // The processor fetches one line 4599 instruction_fetch_units = 1; // of 16 bytes 4600 4601 // List of nop instructions 4602 nops( MachNop ); 4603 %} 4604 4605 //----------RESOURCES---------------------------------------------------------- 4606 // Resources are the functional units available to the machine 4607 4608 // Generic P2/P3 pipeline 4609 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4610 // 3 instructions decoded per cycle. 4611 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4612 // 2 ALU op, only ALU0 handles mul/div instructions. 4613 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4614 MS0, MS1, MEM = MS0 | MS1, 4615 BR, FPU, 4616 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4617 4618 //----------PIPELINE DESCRIPTION----------------------------------------------- 4619 // Pipeline Description specifies the stages in the machine's pipeline 4620 4621 // Generic P2/P3 pipeline 4622 pipe_desc(S0, S1, S2, S3, S4, S5); 4623 4624 //----------PIPELINE CLASSES--------------------------------------------------- 4625 // Pipeline Classes describe the stages in which input and output are 4626 // referenced by the hardware pipeline. 4627 4628 // Naming convention: ialu or fpu 4629 // Then: _reg 4630 // Then: _reg if there is a 2nd register 4631 // Then: _long if it's a pair of instructions implementing a long 4632 // Then: _fat if it requires the big decoder 4633 // Or: _mem if it requires the big decoder and a memory unit. 4634 4635 // Integer ALU reg operation 4636 pipe_class ialu_reg(rRegI dst) %{ 4637 single_instruction; 4638 dst : S4(write); 4639 dst : S3(read); 4640 DECODE : S0; // any decoder 4641 ALU : S3; // any alu 4642 %} 4643 4644 // Long ALU reg operation 4645 pipe_class ialu_reg_long(eRegL dst) %{ 4646 instruction_count(2); 4647 dst : S4(write); 4648 dst : S3(read); 4649 DECODE : S0(2); // any 2 decoders 4650 ALU : S3(2); // both alus 4651 %} 4652 4653 // Integer ALU reg operation using big decoder 4654 pipe_class ialu_reg_fat(rRegI dst) %{ 4655 single_instruction; 4656 dst : S4(write); 4657 dst : S3(read); 4658 D0 : S0; // big decoder only 4659 ALU : S3; // any alu 4660 %} 4661 4662 // Long ALU reg operation using big decoder 4663 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4664 instruction_count(2); 4665 dst : S4(write); 4666 dst : S3(read); 4667 D0 : S0(2); // big decoder only; twice 4668 ALU : S3(2); // any 2 alus 4669 %} 4670 4671 // Integer ALU reg-reg operation 4672 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4673 single_instruction; 4674 dst : S4(write); 4675 src : S3(read); 4676 DECODE : S0; // any decoder 4677 ALU : S3; // any alu 4678 %} 4679 4680 // Long ALU reg-reg operation 4681 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4682 instruction_count(2); 4683 dst : S4(write); 4684 src : S3(read); 4685 DECODE : S0(2); // any 2 decoders 4686 ALU : S3(2); // both alus 4687 %} 4688 4689 // Integer ALU reg-reg operation 4690 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4691 single_instruction; 4692 dst : S4(write); 4693 src : S3(read); 4694 D0 : S0; // big decoder only 4695 ALU : S3; // any alu 4696 %} 4697 4698 // Long ALU reg-reg operation 4699 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4700 instruction_count(2); 4701 dst : S4(write); 4702 src : S3(read); 4703 D0 : S0(2); // big decoder only; twice 4704 ALU : S3(2); // both alus 4705 %} 4706 4707 // Integer ALU reg-mem operation 4708 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4709 single_instruction; 4710 dst : S5(write); 4711 mem : S3(read); 4712 D0 : S0; // big decoder only 4713 ALU : S4; // any alu 4714 MEM : S3; // any mem 4715 %} 4716 4717 // Long ALU reg-mem operation 4718 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4719 instruction_count(2); 4720 dst : S5(write); 4721 mem : S3(read); 4722 D0 : S0(2); // big decoder only; twice 4723 ALU : S4(2); // any 2 alus 4724 MEM : S3(2); // both mems 4725 %} 4726 4727 // Integer mem operation (prefetch) 4728 pipe_class ialu_mem(memory mem) 4729 %{ 4730 single_instruction; 4731 mem : S3(read); 4732 D0 : S0; // big decoder only 4733 MEM : S3; // any mem 4734 %} 4735 4736 // Integer Store to Memory 4737 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4738 single_instruction; 4739 mem : S3(read); 4740 src : S5(read); 4741 D0 : S0; // big decoder only 4742 ALU : S4; // any alu 4743 MEM : S3; 4744 %} 4745 4746 // Long Store to Memory 4747 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4748 instruction_count(2); 4749 mem : S3(read); 4750 src : S5(read); 4751 D0 : S0(2); // big decoder only; twice 4752 ALU : S4(2); // any 2 alus 4753 MEM : S3(2); // Both mems 4754 %} 4755 4756 // Integer Store to Memory 4757 pipe_class ialu_mem_imm(memory mem) %{ 4758 single_instruction; 4759 mem : S3(read); 4760 D0 : S0; // big decoder only 4761 ALU : S4; // any alu 4762 MEM : S3; 4763 %} 4764 4765 // Integer ALU0 reg-reg operation 4766 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4767 single_instruction; 4768 dst : S4(write); 4769 src : S3(read); 4770 D0 : S0; // Big decoder only 4771 ALU0 : S3; // only alu0 4772 %} 4773 4774 // Integer ALU0 reg-mem operation 4775 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4776 single_instruction; 4777 dst : S5(write); 4778 mem : S3(read); 4779 D0 : S0; // big decoder only 4780 ALU0 : S4; // ALU0 only 4781 MEM : S3; // any mem 4782 %} 4783 4784 // Integer ALU reg-reg operation 4785 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4786 single_instruction; 4787 cr : S4(write); 4788 src1 : S3(read); 4789 src2 : S3(read); 4790 DECODE : S0; // any decoder 4791 ALU : S3; // any alu 4792 %} 4793 4794 // Integer ALU reg-imm operation 4795 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4796 single_instruction; 4797 cr : S4(write); 4798 src1 : S3(read); 4799 DECODE : S0; // any decoder 4800 ALU : S3; // any alu 4801 %} 4802 4803 // Integer ALU reg-mem operation 4804 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4805 single_instruction; 4806 cr : S4(write); 4807 src1 : S3(read); 4808 src2 : S3(read); 4809 D0 : S0; // big decoder only 4810 ALU : S4; // any alu 4811 MEM : S3; 4812 %} 4813 4814 // Conditional move reg-reg 4815 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4816 instruction_count(4); 4817 y : S4(read); 4818 q : S3(read); 4819 p : S3(read); 4820 DECODE : S0(4); // any decoder 4821 %} 4822 4823 // Conditional move reg-reg 4824 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4825 single_instruction; 4826 dst : S4(write); 4827 src : S3(read); 4828 cr : S3(read); 4829 DECODE : S0; // any decoder 4830 %} 4831 4832 // Conditional move reg-mem 4833 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4834 single_instruction; 4835 dst : S4(write); 4836 src : S3(read); 4837 cr : S3(read); 4838 DECODE : S0; // any decoder 4839 MEM : S3; 4840 %} 4841 4842 // Conditional move reg-reg long 4843 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4844 single_instruction; 4845 dst : S4(write); 4846 src : S3(read); 4847 cr : S3(read); 4848 DECODE : S0(2); // any 2 decoders 4849 %} 4850 4851 // Conditional move double reg-reg 4852 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4853 single_instruction; 4854 dst : S4(write); 4855 src : S3(read); 4856 cr : S3(read); 4857 DECODE : S0; // any decoder 4858 %} 4859 4860 // Float reg-reg operation 4861 pipe_class fpu_reg(regDPR dst) %{ 4862 instruction_count(2); 4863 dst : S3(read); 4864 DECODE : S0(2); // any 2 decoders 4865 FPU : S3; 4866 %} 4867 4868 // Float reg-reg operation 4869 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4870 instruction_count(2); 4871 dst : S4(write); 4872 src : S3(read); 4873 DECODE : S0(2); // any 2 decoders 4874 FPU : S3; 4875 %} 4876 4877 // Float reg-reg operation 4878 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4879 instruction_count(3); 4880 dst : S4(write); 4881 src1 : S3(read); 4882 src2 : S3(read); 4883 DECODE : S0(3); // any 3 decoders 4884 FPU : S3(2); 4885 %} 4886 4887 // Float reg-reg operation 4888 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4889 instruction_count(4); 4890 dst : S4(write); 4891 src1 : S3(read); 4892 src2 : S3(read); 4893 src3 : S3(read); 4894 DECODE : S0(4); // any 3 decoders 4895 FPU : S3(2); 4896 %} 4897 4898 // Float reg-reg operation 4899 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4900 instruction_count(4); 4901 dst : S4(write); 4902 src1 : S3(read); 4903 src2 : S3(read); 4904 src3 : S3(read); 4905 DECODE : S1(3); // any 3 decoders 4906 D0 : S0; // Big decoder only 4907 FPU : S3(2); 4908 MEM : S3; 4909 %} 4910 4911 // Float reg-mem operation 4912 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4913 instruction_count(2); 4914 dst : S5(write); 4915 mem : S3(read); 4916 D0 : S0; // big decoder only 4917 DECODE : S1; // any decoder for FPU POP 4918 FPU : S4; 4919 MEM : S3; // any mem 4920 %} 4921 4922 // Float reg-mem operation 4923 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4924 instruction_count(3); 4925 dst : S5(write); 4926 src1 : S3(read); 4927 mem : S3(read); 4928 D0 : S0; // big decoder only 4929 DECODE : S1(2); // any decoder for FPU POP 4930 FPU : S4; 4931 MEM : S3; // any mem 4932 %} 4933 4934 // Float mem-reg operation 4935 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4936 instruction_count(2); 4937 src : S5(read); 4938 mem : S3(read); 4939 DECODE : S0; // any decoder for FPU PUSH 4940 D0 : S1; // big decoder only 4941 FPU : S4; 4942 MEM : S3; // any mem 4943 %} 4944 4945 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4946 instruction_count(3); 4947 src1 : S3(read); 4948 src2 : S3(read); 4949 mem : S3(read); 4950 DECODE : S0(2); // any decoder for FPU PUSH 4951 D0 : S1; // big decoder only 4952 FPU : S4; 4953 MEM : S3; // any mem 4954 %} 4955 4956 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4957 instruction_count(3); 4958 src1 : S3(read); 4959 src2 : S3(read); 4960 mem : S4(read); 4961 DECODE : S0; // any decoder for FPU PUSH 4962 D0 : S0(2); // big decoder only 4963 FPU : S4; 4964 MEM : S3(2); // any mem 4965 %} 4966 4967 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4968 instruction_count(2); 4969 src1 : S3(read); 4970 dst : S4(read); 4971 D0 : S0(2); // big decoder only 4972 MEM : S3(2); // any mem 4973 %} 4974 4975 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4976 instruction_count(3); 4977 src1 : S3(read); 4978 src2 : S3(read); 4979 dst : S4(read); 4980 D0 : S0(3); // big decoder only 4981 FPU : S4; 4982 MEM : S3(3); // any mem 4983 %} 4984 4985 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4986 instruction_count(3); 4987 src1 : S4(read); 4988 mem : S4(read); 4989 DECODE : S0; // any decoder for FPU PUSH 4990 D0 : S0(2); // big decoder only 4991 FPU : S4; 4992 MEM : S3(2); // any mem 4993 %} 4994 4995 // Float load constant 4996 pipe_class fpu_reg_con(regDPR dst) %{ 4997 instruction_count(2); 4998 dst : S5(write); 4999 D0 : S0; // big decoder only for the load 5000 DECODE : S1; // any decoder for FPU POP 5001 FPU : S4; 5002 MEM : S3; // any mem 5003 %} 5004 5005 // Float load constant 5006 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5007 instruction_count(3); 5008 dst : S5(write); 5009 src : S3(read); 5010 D0 : S0; // big decoder only for the load 5011 DECODE : S1(2); // any decoder for FPU POP 5012 FPU : S4; 5013 MEM : S3; // any mem 5014 %} 5015 5016 // UnConditional branch 5017 pipe_class pipe_jmp( label labl ) %{ 5018 single_instruction; 5019 BR : S3; 5020 %} 5021 5022 // Conditional branch 5023 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5024 single_instruction; 5025 cr : S1(read); 5026 BR : S3; 5027 %} 5028 5029 // Allocation idiom 5030 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5031 instruction_count(1); force_serialization; 5032 fixed_latency(6); 5033 heap_ptr : S3(read); 5034 DECODE : S0(3); 5035 D0 : S2; 5036 MEM : S3; 5037 ALU : S3(2); 5038 dst : S5(write); 5039 BR : S5; 5040 %} 5041 5042 // Generic big/slow expanded idiom 5043 pipe_class pipe_slow( ) %{ 5044 instruction_count(10); multiple_bundles; force_serialization; 5045 fixed_latency(100); 5046 D0 : S0(2); 5047 MEM : S3(2); 5048 %} 5049 5050 // The real do-nothing guy 5051 pipe_class empty( ) %{ 5052 instruction_count(0); 5053 %} 5054 5055 // Define the class for the Nop node 5056 define %{ 5057 MachNop = empty; 5058 %} 5059 5060 %} 5061 5062 //----------INSTRUCTIONS------------------------------------------------------- 5063 // 5064 // match -- States which machine-independent subtree may be replaced 5065 // by this instruction. 5066 // ins_cost -- The estimated cost of this instruction is used by instruction 5067 // selection to identify a minimum cost tree of machine 5068 // instructions that matches a tree of machine-independent 5069 // instructions. 5070 // format -- A string providing the disassembly for this instruction. 5071 // The value of an instruction's operand may be inserted 5072 // by referring to it with a '$' prefix. 5073 // opcode -- Three instruction opcodes may be provided. These are referred 5074 // to within an encode class as $primary, $secondary, and $tertiary 5075 // respectively. The primary opcode is commonly used to 5076 // indicate the type of machine instruction, while secondary 5077 // and tertiary are often used for prefix options or addressing 5078 // modes. 5079 // ins_encode -- A list of encode classes with parameters. The encode class 5080 // name must have been defined in an 'enc_class' specification 5081 // in the encode section of the architecture description. 5082 5083 //----------BSWAP-Instruction-------------------------------------------------- 5084 instruct bytes_reverse_int(rRegI dst) %{ 5085 match(Set dst (ReverseBytesI dst)); 5086 5087 format %{ "BSWAP $dst" %} 5088 opcode(0x0F, 0xC8); 5089 ins_encode( OpcP, OpcSReg(dst) ); 5090 ins_pipe( ialu_reg ); 5091 %} 5092 5093 instruct bytes_reverse_long(eRegL dst) %{ 5094 match(Set dst (ReverseBytesL dst)); 5095 5096 format %{ "BSWAP $dst.lo\n\t" 5097 "BSWAP $dst.hi\n\t" 5098 "XCHG $dst.lo $dst.hi" %} 5099 5100 ins_cost(125); 5101 ins_encode( bswap_long_bytes(dst) ); 5102 ins_pipe( ialu_reg_reg); 5103 %} 5104 5105 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5106 match(Set dst (ReverseBytesUS dst)); 5107 effect(KILL cr); 5108 5109 format %{ "BSWAP $dst\n\t" 5110 "SHR $dst,16\n\t" %} 5111 ins_encode %{ 5112 __ bswapl($dst$$Register); 5113 __ shrl($dst$$Register, 16); 5114 %} 5115 ins_pipe( ialu_reg ); 5116 %} 5117 5118 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5119 match(Set dst (ReverseBytesS dst)); 5120 effect(KILL cr); 5121 5122 format %{ "BSWAP $dst\n\t" 5123 "SAR $dst,16\n\t" %} 5124 ins_encode %{ 5125 __ bswapl($dst$$Register); 5126 __ sarl($dst$$Register, 16); 5127 %} 5128 ins_pipe( ialu_reg ); 5129 %} 5130 5131 5132 //---------- Zeros Count Instructions ------------------------------------------ 5133 5134 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5135 predicate(UseCountLeadingZerosInstruction); 5136 match(Set dst (CountLeadingZerosI src)); 5137 effect(KILL cr); 5138 5139 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5140 ins_encode %{ 5141 __ lzcntl($dst$$Register, $src$$Register); 5142 %} 5143 ins_pipe(ialu_reg); 5144 %} 5145 5146 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5147 predicate(!UseCountLeadingZerosInstruction); 5148 match(Set dst (CountLeadingZerosI src)); 5149 effect(KILL cr); 5150 5151 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5152 "JNZ skip\n\t" 5153 "MOV $dst, -1\n" 5154 "skip:\n\t" 5155 "NEG $dst\n\t" 5156 "ADD $dst, 31" %} 5157 ins_encode %{ 5158 Register Rdst = $dst$$Register; 5159 Register Rsrc = $src$$Register; 5160 Label skip; 5161 __ bsrl(Rdst, Rsrc); 5162 __ jccb(Assembler::notZero, skip); 5163 __ movl(Rdst, -1); 5164 __ bind(skip); 5165 __ negl(Rdst); 5166 __ addl(Rdst, BitsPerInt - 1); 5167 %} 5168 ins_pipe(ialu_reg); 5169 %} 5170 5171 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5172 predicate(UseCountLeadingZerosInstruction); 5173 match(Set dst (CountLeadingZerosL src)); 5174 effect(TEMP dst, KILL cr); 5175 5176 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5177 "JNC done\n\t" 5178 "LZCNT $dst, $src.lo\n\t" 5179 "ADD $dst, 32\n" 5180 "done:" %} 5181 ins_encode %{ 5182 Register Rdst = $dst$$Register; 5183 Register Rsrc = $src$$Register; 5184 Label done; 5185 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5186 __ jccb(Assembler::carryClear, done); 5187 __ lzcntl(Rdst, Rsrc); 5188 __ addl(Rdst, BitsPerInt); 5189 __ bind(done); 5190 %} 5191 ins_pipe(ialu_reg); 5192 %} 5193 5194 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5195 predicate(!UseCountLeadingZerosInstruction); 5196 match(Set dst (CountLeadingZerosL src)); 5197 effect(TEMP dst, KILL cr); 5198 5199 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5200 "JZ msw_is_zero\n\t" 5201 "ADD $dst, 32\n\t" 5202 "JMP not_zero\n" 5203 "msw_is_zero:\n\t" 5204 "BSR $dst, $src.lo\n\t" 5205 "JNZ not_zero\n\t" 5206 "MOV $dst, -1\n" 5207 "not_zero:\n\t" 5208 "NEG $dst\n\t" 5209 "ADD $dst, 63\n" %} 5210 ins_encode %{ 5211 Register Rdst = $dst$$Register; 5212 Register Rsrc = $src$$Register; 5213 Label msw_is_zero; 5214 Label not_zero; 5215 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5216 __ jccb(Assembler::zero, msw_is_zero); 5217 __ addl(Rdst, BitsPerInt); 5218 __ jmpb(not_zero); 5219 __ bind(msw_is_zero); 5220 __ bsrl(Rdst, Rsrc); 5221 __ jccb(Assembler::notZero, not_zero); 5222 __ movl(Rdst, -1); 5223 __ bind(not_zero); 5224 __ negl(Rdst); 5225 __ addl(Rdst, BitsPerLong - 1); 5226 %} 5227 ins_pipe(ialu_reg); 5228 %} 5229 5230 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5231 predicate(UseCountTrailingZerosInstruction); 5232 match(Set dst (CountTrailingZerosI src)); 5233 effect(KILL cr); 5234 5235 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5236 ins_encode %{ 5237 __ tzcntl($dst$$Register, $src$$Register); 5238 %} 5239 ins_pipe(ialu_reg); 5240 %} 5241 5242 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5243 predicate(!UseCountTrailingZerosInstruction); 5244 match(Set dst (CountTrailingZerosI src)); 5245 effect(KILL cr); 5246 5247 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5248 "JNZ done\n\t" 5249 "MOV $dst, 32\n" 5250 "done:" %} 5251 ins_encode %{ 5252 Register Rdst = $dst$$Register; 5253 Label done; 5254 __ bsfl(Rdst, $src$$Register); 5255 __ jccb(Assembler::notZero, done); 5256 __ movl(Rdst, BitsPerInt); 5257 __ bind(done); 5258 %} 5259 ins_pipe(ialu_reg); 5260 %} 5261 5262 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5263 predicate(UseCountTrailingZerosInstruction); 5264 match(Set dst (CountTrailingZerosL src)); 5265 effect(TEMP dst, KILL cr); 5266 5267 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5268 "JNC done\n\t" 5269 "TZCNT $dst, $src.hi\n\t" 5270 "ADD $dst, 32\n" 5271 "done:" %} 5272 ins_encode %{ 5273 Register Rdst = $dst$$Register; 5274 Register Rsrc = $src$$Register; 5275 Label done; 5276 __ tzcntl(Rdst, Rsrc); 5277 __ jccb(Assembler::carryClear, done); 5278 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5279 __ addl(Rdst, BitsPerInt); 5280 __ bind(done); 5281 %} 5282 ins_pipe(ialu_reg); 5283 %} 5284 5285 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5286 predicate(!UseCountTrailingZerosInstruction); 5287 match(Set dst (CountTrailingZerosL src)); 5288 effect(TEMP dst, KILL cr); 5289 5290 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5291 "JNZ done\n\t" 5292 "BSF $dst, $src.hi\n\t" 5293 "JNZ msw_not_zero\n\t" 5294 "MOV $dst, 32\n" 5295 "msw_not_zero:\n\t" 5296 "ADD $dst, 32\n" 5297 "done:" %} 5298 ins_encode %{ 5299 Register Rdst = $dst$$Register; 5300 Register Rsrc = $src$$Register; 5301 Label msw_not_zero; 5302 Label done; 5303 __ bsfl(Rdst, Rsrc); 5304 __ jccb(Assembler::notZero, done); 5305 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5306 __ jccb(Assembler::notZero, msw_not_zero); 5307 __ movl(Rdst, BitsPerInt); 5308 __ bind(msw_not_zero); 5309 __ addl(Rdst, BitsPerInt); 5310 __ bind(done); 5311 %} 5312 ins_pipe(ialu_reg); 5313 %} 5314 5315 5316 //---------- Population Count Instructions ------------------------------------- 5317 5318 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5319 predicate(UsePopCountInstruction); 5320 match(Set dst (PopCountI src)); 5321 effect(KILL cr); 5322 5323 format %{ "POPCNT $dst, $src" %} 5324 ins_encode %{ 5325 __ popcntl($dst$$Register, $src$$Register); 5326 %} 5327 ins_pipe(ialu_reg); 5328 %} 5329 5330 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5331 predicate(UsePopCountInstruction); 5332 match(Set dst (PopCountI (LoadI mem))); 5333 effect(KILL cr); 5334 5335 format %{ "POPCNT $dst, $mem" %} 5336 ins_encode %{ 5337 __ popcntl($dst$$Register, $mem$$Address); 5338 %} 5339 ins_pipe(ialu_reg); 5340 %} 5341 5342 // Note: Long.bitCount(long) returns an int. 5343 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5344 predicate(UsePopCountInstruction); 5345 match(Set dst (PopCountL src)); 5346 effect(KILL cr, TEMP tmp, TEMP dst); 5347 5348 format %{ "POPCNT $dst, $src.lo\n\t" 5349 "POPCNT $tmp, $src.hi\n\t" 5350 "ADD $dst, $tmp" %} 5351 ins_encode %{ 5352 __ popcntl($dst$$Register, $src$$Register); 5353 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5354 __ addl($dst$$Register, $tmp$$Register); 5355 %} 5356 ins_pipe(ialu_reg); 5357 %} 5358 5359 // Note: Long.bitCount(long) returns an int. 5360 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5361 predicate(UsePopCountInstruction); 5362 match(Set dst (PopCountL (LoadL mem))); 5363 effect(KILL cr, TEMP tmp, TEMP dst); 5364 5365 format %{ "POPCNT $dst, $mem\n\t" 5366 "POPCNT $tmp, $mem+4\n\t" 5367 "ADD $dst, $tmp" %} 5368 ins_encode %{ 5369 //__ popcntl($dst$$Register, $mem$$Address$$first); 5370 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5371 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5372 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5373 __ addl($dst$$Register, $tmp$$Register); 5374 %} 5375 ins_pipe(ialu_reg); 5376 %} 5377 5378 5379 //----------Load/Store/Move Instructions--------------------------------------- 5380 //----------Load Instructions-------------------------------------------------- 5381 // Load Byte (8bit signed) 5382 instruct loadB(xRegI dst, memory mem) %{ 5383 match(Set dst (LoadB mem)); 5384 5385 ins_cost(125); 5386 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5387 5388 ins_encode %{ 5389 __ movsbl($dst$$Register, $mem$$Address); 5390 %} 5391 5392 ins_pipe(ialu_reg_mem); 5393 %} 5394 5395 // Load Byte (8bit signed) into Long Register 5396 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5397 match(Set dst (ConvI2L (LoadB mem))); 5398 effect(KILL cr); 5399 5400 ins_cost(375); 5401 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5402 "MOV $dst.hi,$dst.lo\n\t" 5403 "SAR $dst.hi,7" %} 5404 5405 ins_encode %{ 5406 __ movsbl($dst$$Register, $mem$$Address); 5407 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5408 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5409 %} 5410 5411 ins_pipe(ialu_reg_mem); 5412 %} 5413 5414 // Load Unsigned Byte (8bit UNsigned) 5415 instruct loadUB(xRegI dst, memory mem) %{ 5416 match(Set dst (LoadUB mem)); 5417 5418 ins_cost(125); 5419 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5420 5421 ins_encode %{ 5422 __ movzbl($dst$$Register, $mem$$Address); 5423 %} 5424 5425 ins_pipe(ialu_reg_mem); 5426 %} 5427 5428 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5429 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5430 match(Set dst (ConvI2L (LoadUB mem))); 5431 effect(KILL cr); 5432 5433 ins_cost(250); 5434 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5435 "XOR $dst.hi,$dst.hi" %} 5436 5437 ins_encode %{ 5438 Register Rdst = $dst$$Register; 5439 __ movzbl(Rdst, $mem$$Address); 5440 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5441 %} 5442 5443 ins_pipe(ialu_reg_mem); 5444 %} 5445 5446 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5447 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5448 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5449 effect(KILL cr); 5450 5451 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5452 "XOR $dst.hi,$dst.hi\n\t" 5453 "AND $dst.lo,right_n_bits($mask, 8)" %} 5454 ins_encode %{ 5455 Register Rdst = $dst$$Register; 5456 __ movzbl(Rdst, $mem$$Address); 5457 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5458 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5459 %} 5460 ins_pipe(ialu_reg_mem); 5461 %} 5462 5463 // Load Short (16bit signed) 5464 instruct loadS(rRegI dst, memory mem) %{ 5465 match(Set dst (LoadS mem)); 5466 5467 ins_cost(125); 5468 format %{ "MOVSX $dst,$mem\t# short" %} 5469 5470 ins_encode %{ 5471 __ movswl($dst$$Register, $mem$$Address); 5472 %} 5473 5474 ins_pipe(ialu_reg_mem); 5475 %} 5476 5477 // Load Short (16 bit signed) to Byte (8 bit signed) 5478 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5479 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5480 5481 ins_cost(125); 5482 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5483 ins_encode %{ 5484 __ movsbl($dst$$Register, $mem$$Address); 5485 %} 5486 ins_pipe(ialu_reg_mem); 5487 %} 5488 5489 // Load Short (16bit signed) into Long Register 5490 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5491 match(Set dst (ConvI2L (LoadS mem))); 5492 effect(KILL cr); 5493 5494 ins_cost(375); 5495 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5496 "MOV $dst.hi,$dst.lo\n\t" 5497 "SAR $dst.hi,15" %} 5498 5499 ins_encode %{ 5500 __ movswl($dst$$Register, $mem$$Address); 5501 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5502 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5503 %} 5504 5505 ins_pipe(ialu_reg_mem); 5506 %} 5507 5508 // Load Unsigned Short/Char (16bit unsigned) 5509 instruct loadUS(rRegI dst, memory mem) %{ 5510 match(Set dst (LoadUS mem)); 5511 5512 ins_cost(125); 5513 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5514 5515 ins_encode %{ 5516 __ movzwl($dst$$Register, $mem$$Address); 5517 %} 5518 5519 ins_pipe(ialu_reg_mem); 5520 %} 5521 5522 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5523 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5524 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5525 5526 ins_cost(125); 5527 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5528 ins_encode %{ 5529 __ movsbl($dst$$Register, $mem$$Address); 5530 %} 5531 ins_pipe(ialu_reg_mem); 5532 %} 5533 5534 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5535 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5536 match(Set dst (ConvI2L (LoadUS mem))); 5537 effect(KILL cr); 5538 5539 ins_cost(250); 5540 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5541 "XOR $dst.hi,$dst.hi" %} 5542 5543 ins_encode %{ 5544 __ movzwl($dst$$Register, $mem$$Address); 5545 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5546 %} 5547 5548 ins_pipe(ialu_reg_mem); 5549 %} 5550 5551 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5552 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5553 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5554 effect(KILL cr); 5555 5556 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5557 "XOR $dst.hi,$dst.hi" %} 5558 ins_encode %{ 5559 Register Rdst = $dst$$Register; 5560 __ movzbl(Rdst, $mem$$Address); 5561 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5562 %} 5563 ins_pipe(ialu_reg_mem); 5564 %} 5565 5566 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5567 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5568 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5569 effect(KILL cr); 5570 5571 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5572 "XOR $dst.hi,$dst.hi\n\t" 5573 "AND $dst.lo,right_n_bits($mask, 16)" %} 5574 ins_encode %{ 5575 Register Rdst = $dst$$Register; 5576 __ movzwl(Rdst, $mem$$Address); 5577 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5578 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5579 %} 5580 ins_pipe(ialu_reg_mem); 5581 %} 5582 5583 // Load Integer 5584 instruct loadI(rRegI dst, memory mem) %{ 5585 match(Set dst (LoadI mem)); 5586 5587 ins_cost(125); 5588 format %{ "MOV $dst,$mem\t# int" %} 5589 5590 ins_encode %{ 5591 __ movl($dst$$Register, $mem$$Address); 5592 %} 5593 5594 ins_pipe(ialu_reg_mem); 5595 %} 5596 5597 // Load Integer (32 bit signed) to Byte (8 bit signed) 5598 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5599 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5600 5601 ins_cost(125); 5602 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5603 ins_encode %{ 5604 __ movsbl($dst$$Register, $mem$$Address); 5605 %} 5606 ins_pipe(ialu_reg_mem); 5607 %} 5608 5609 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5610 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5611 match(Set dst (AndI (LoadI mem) mask)); 5612 5613 ins_cost(125); 5614 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5615 ins_encode %{ 5616 __ movzbl($dst$$Register, $mem$$Address); 5617 %} 5618 ins_pipe(ialu_reg_mem); 5619 %} 5620 5621 // Load Integer (32 bit signed) to Short (16 bit signed) 5622 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5623 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5624 5625 ins_cost(125); 5626 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5627 ins_encode %{ 5628 __ movswl($dst$$Register, $mem$$Address); 5629 %} 5630 ins_pipe(ialu_reg_mem); 5631 %} 5632 5633 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5634 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5635 match(Set dst (AndI (LoadI mem) mask)); 5636 5637 ins_cost(125); 5638 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5639 ins_encode %{ 5640 __ movzwl($dst$$Register, $mem$$Address); 5641 %} 5642 ins_pipe(ialu_reg_mem); 5643 %} 5644 5645 // Load Integer into Long Register 5646 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5647 match(Set dst (ConvI2L (LoadI mem))); 5648 effect(KILL cr); 5649 5650 ins_cost(375); 5651 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5652 "MOV $dst.hi,$dst.lo\n\t" 5653 "SAR $dst.hi,31" %} 5654 5655 ins_encode %{ 5656 __ movl($dst$$Register, $mem$$Address); 5657 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5658 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5659 %} 5660 5661 ins_pipe(ialu_reg_mem); 5662 %} 5663 5664 // Load Integer with mask 0xFF into Long Register 5665 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5666 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5667 effect(KILL cr); 5668 5669 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5670 "XOR $dst.hi,$dst.hi" %} 5671 ins_encode %{ 5672 Register Rdst = $dst$$Register; 5673 __ movzbl(Rdst, $mem$$Address); 5674 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5675 %} 5676 ins_pipe(ialu_reg_mem); 5677 %} 5678 5679 // Load Integer with mask 0xFFFF into Long Register 5680 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5681 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5682 effect(KILL cr); 5683 5684 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5685 "XOR $dst.hi,$dst.hi" %} 5686 ins_encode %{ 5687 Register Rdst = $dst$$Register; 5688 __ movzwl(Rdst, $mem$$Address); 5689 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5690 %} 5691 ins_pipe(ialu_reg_mem); 5692 %} 5693 5694 // Load Integer with 31-bit mask into Long Register 5695 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5696 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5697 effect(KILL cr); 5698 5699 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5700 "XOR $dst.hi,$dst.hi\n\t" 5701 "AND $dst.lo,$mask" %} 5702 ins_encode %{ 5703 Register Rdst = $dst$$Register; 5704 __ movl(Rdst, $mem$$Address); 5705 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5706 __ andl(Rdst, $mask$$constant); 5707 %} 5708 ins_pipe(ialu_reg_mem); 5709 %} 5710 5711 // Load Unsigned Integer into Long Register 5712 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5713 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5714 effect(KILL cr); 5715 5716 ins_cost(250); 5717 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5718 "XOR $dst.hi,$dst.hi" %} 5719 5720 ins_encode %{ 5721 __ movl($dst$$Register, $mem$$Address); 5722 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5723 %} 5724 5725 ins_pipe(ialu_reg_mem); 5726 %} 5727 5728 // Load Long. Cannot clobber address while loading, so restrict address 5729 // register to ESI 5730 instruct loadL(eRegL dst, load_long_memory mem) %{ 5731 predicate(!((LoadLNode*)n)->require_atomic_access()); 5732 match(Set dst (LoadL mem)); 5733 5734 ins_cost(250); 5735 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5736 "MOV $dst.hi,$mem+4" %} 5737 5738 ins_encode %{ 5739 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5740 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5741 __ movl($dst$$Register, Amemlo); 5742 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5743 %} 5744 5745 ins_pipe(ialu_reg_long_mem); 5746 %} 5747 5748 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5749 // then store it down to the stack and reload on the int 5750 // side. 5751 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5752 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5753 match(Set dst (LoadL mem)); 5754 5755 ins_cost(200); 5756 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5757 "FISTp $dst" %} 5758 ins_encode(enc_loadL_volatile(mem,dst)); 5759 ins_pipe( fpu_reg_mem ); 5760 %} 5761 5762 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5763 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5764 match(Set dst (LoadL mem)); 5765 effect(TEMP tmp); 5766 ins_cost(180); 5767 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5768 "MOVSD $dst,$tmp" %} 5769 ins_encode %{ 5770 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5771 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5772 %} 5773 ins_pipe( pipe_slow ); 5774 %} 5775 5776 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5777 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5778 match(Set dst (LoadL mem)); 5779 effect(TEMP tmp); 5780 ins_cost(160); 5781 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5782 "MOVD $dst.lo,$tmp\n\t" 5783 "PSRLQ $tmp,32\n\t" 5784 "MOVD $dst.hi,$tmp" %} 5785 ins_encode %{ 5786 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5787 __ movdl($dst$$Register, $tmp$$XMMRegister); 5788 __ psrlq($tmp$$XMMRegister, 32); 5789 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 // Load Range 5795 instruct loadRange(rRegI dst, memory mem) %{ 5796 match(Set dst (LoadRange mem)); 5797 5798 ins_cost(125); 5799 format %{ "MOV $dst,$mem" %} 5800 opcode(0x8B); 5801 ins_encode( OpcP, RegMem(dst,mem)); 5802 ins_pipe( ialu_reg_mem ); 5803 %} 5804 5805 5806 // Load Pointer 5807 instruct loadP(eRegP dst, memory mem) %{ 5808 match(Set dst (LoadP mem)); 5809 5810 ins_cost(125); 5811 format %{ "MOV $dst,$mem" %} 5812 opcode(0x8B); 5813 ins_encode( OpcP, RegMem(dst,mem)); 5814 ins_pipe( ialu_reg_mem ); 5815 %} 5816 5817 // Load Klass Pointer 5818 instruct loadKlass(eRegP dst, memory mem) %{ 5819 match(Set dst (LoadKlass mem)); 5820 5821 ins_cost(125); 5822 format %{ "MOV $dst,$mem" %} 5823 opcode(0x8B); 5824 ins_encode( OpcP, RegMem(dst,mem)); 5825 ins_pipe( ialu_reg_mem ); 5826 %} 5827 5828 // Load Double 5829 instruct loadDPR(regDPR dst, memory mem) %{ 5830 predicate(UseSSE<=1); 5831 match(Set dst (LoadD mem)); 5832 5833 ins_cost(150); 5834 format %{ "FLD_D ST,$mem\n\t" 5835 "FSTP $dst" %} 5836 opcode(0xDD); /* DD /0 */ 5837 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5838 Pop_Reg_DPR(dst) ); 5839 ins_pipe( fpu_reg_mem ); 5840 %} 5841 5842 // Load Double to XMM 5843 instruct loadD(regD dst, memory mem) %{ 5844 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5845 match(Set dst (LoadD mem)); 5846 ins_cost(145); 5847 format %{ "MOVSD $dst,$mem" %} 5848 ins_encode %{ 5849 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 instruct loadD_partial(regD dst, memory mem) %{ 5855 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5856 match(Set dst (LoadD mem)); 5857 ins_cost(145); 5858 format %{ "MOVLPD $dst,$mem" %} 5859 ins_encode %{ 5860 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5861 %} 5862 ins_pipe( pipe_slow ); 5863 %} 5864 5865 // Load to XMM register (single-precision floating point) 5866 // MOVSS instruction 5867 instruct loadF(regF dst, memory mem) %{ 5868 predicate(UseSSE>=1); 5869 match(Set dst (LoadF mem)); 5870 ins_cost(145); 5871 format %{ "MOVSS $dst,$mem" %} 5872 ins_encode %{ 5873 __ movflt ($dst$$XMMRegister, $mem$$Address); 5874 %} 5875 ins_pipe( pipe_slow ); 5876 %} 5877 5878 // Load Float 5879 instruct loadFPR(regFPR dst, memory mem) %{ 5880 predicate(UseSSE==0); 5881 match(Set dst (LoadF mem)); 5882 5883 ins_cost(150); 5884 format %{ "FLD_S ST,$mem\n\t" 5885 "FSTP $dst" %} 5886 opcode(0xD9); /* D9 /0 */ 5887 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5888 Pop_Reg_FPR(dst) ); 5889 ins_pipe( fpu_reg_mem ); 5890 %} 5891 5892 // Load Effective Address 5893 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5894 match(Set dst mem); 5895 5896 ins_cost(110); 5897 format %{ "LEA $dst,$mem" %} 5898 opcode(0x8D); 5899 ins_encode( OpcP, RegMem(dst,mem)); 5900 ins_pipe( ialu_reg_reg_fat ); 5901 %} 5902 5903 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5904 match(Set dst mem); 5905 5906 ins_cost(110); 5907 format %{ "LEA $dst,$mem" %} 5908 opcode(0x8D); 5909 ins_encode( OpcP, RegMem(dst,mem)); 5910 ins_pipe( ialu_reg_reg_fat ); 5911 %} 5912 5913 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5914 match(Set dst mem); 5915 5916 ins_cost(110); 5917 format %{ "LEA $dst,$mem" %} 5918 opcode(0x8D); 5919 ins_encode( OpcP, RegMem(dst,mem)); 5920 ins_pipe( ialu_reg_reg_fat ); 5921 %} 5922 5923 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5924 match(Set dst mem); 5925 5926 ins_cost(110); 5927 format %{ "LEA $dst,$mem" %} 5928 opcode(0x8D); 5929 ins_encode( OpcP, RegMem(dst,mem)); 5930 ins_pipe( ialu_reg_reg_fat ); 5931 %} 5932 5933 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5934 match(Set dst mem); 5935 5936 ins_cost(110); 5937 format %{ "LEA $dst,$mem" %} 5938 opcode(0x8D); 5939 ins_encode( OpcP, RegMem(dst,mem)); 5940 ins_pipe( ialu_reg_reg_fat ); 5941 %} 5942 5943 // Load Constant 5944 instruct loadConI(rRegI dst, immI src) %{ 5945 match(Set dst src); 5946 5947 format %{ "MOV $dst,$src" %} 5948 ins_encode( LdImmI(dst, src) ); 5949 ins_pipe( ialu_reg_fat ); 5950 %} 5951 5952 // Load Constant zero 5953 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5954 match(Set dst src); 5955 effect(KILL cr); 5956 5957 ins_cost(50); 5958 format %{ "XOR $dst,$dst" %} 5959 opcode(0x33); /* + rd */ 5960 ins_encode( OpcP, RegReg( dst, dst ) ); 5961 ins_pipe( ialu_reg ); 5962 %} 5963 5964 instruct loadConP(eRegP dst, immP src) %{ 5965 match(Set dst src); 5966 5967 format %{ "MOV $dst,$src" %} 5968 opcode(0xB8); /* + rd */ 5969 ins_encode( LdImmP(dst, src) ); 5970 ins_pipe( ialu_reg_fat ); 5971 %} 5972 5973 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5974 match(Set dst src); 5975 effect(KILL cr); 5976 ins_cost(200); 5977 format %{ "MOV $dst.lo,$src.lo\n\t" 5978 "MOV $dst.hi,$src.hi" %} 5979 opcode(0xB8); 5980 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5981 ins_pipe( ialu_reg_long_fat ); 5982 %} 5983 5984 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5985 match(Set dst src); 5986 effect(KILL cr); 5987 ins_cost(150); 5988 format %{ "XOR $dst.lo,$dst.lo\n\t" 5989 "XOR $dst.hi,$dst.hi" %} 5990 opcode(0x33,0x33); 5991 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5992 ins_pipe( ialu_reg_long ); 5993 %} 5994 5995 // The instruction usage is guarded by predicate in operand immFPR(). 5996 instruct loadConFPR(regFPR dst, immFPR con) %{ 5997 match(Set dst con); 5998 ins_cost(125); 5999 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6000 "FSTP $dst" %} 6001 ins_encode %{ 6002 __ fld_s($constantaddress($con)); 6003 __ fstp_d($dst$$reg); 6004 %} 6005 ins_pipe(fpu_reg_con); 6006 %} 6007 6008 // The instruction usage is guarded by predicate in operand immFPR0(). 6009 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6010 match(Set dst con); 6011 ins_cost(125); 6012 format %{ "FLDZ ST\n\t" 6013 "FSTP $dst" %} 6014 ins_encode %{ 6015 __ fldz(); 6016 __ fstp_d($dst$$reg); 6017 %} 6018 ins_pipe(fpu_reg_con); 6019 %} 6020 6021 // The instruction usage is guarded by predicate in operand immFPR1(). 6022 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6023 match(Set dst con); 6024 ins_cost(125); 6025 format %{ "FLD1 ST\n\t" 6026 "FSTP $dst" %} 6027 ins_encode %{ 6028 __ fld1(); 6029 __ fstp_d($dst$$reg); 6030 %} 6031 ins_pipe(fpu_reg_con); 6032 %} 6033 6034 // The instruction usage is guarded by predicate in operand immF(). 6035 instruct loadConF(regF dst, immF con) %{ 6036 match(Set dst con); 6037 ins_cost(125); 6038 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6039 ins_encode %{ 6040 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6041 %} 6042 ins_pipe(pipe_slow); 6043 %} 6044 6045 // The instruction usage is guarded by predicate in operand immF0(). 6046 instruct loadConF0(regF dst, immF0 src) %{ 6047 match(Set dst src); 6048 ins_cost(100); 6049 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6050 ins_encode %{ 6051 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6052 %} 6053 ins_pipe(pipe_slow); 6054 %} 6055 6056 // The instruction usage is guarded by predicate in operand immDPR(). 6057 instruct loadConDPR(regDPR dst, immDPR con) %{ 6058 match(Set dst con); 6059 ins_cost(125); 6060 6061 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6062 "FSTP $dst" %} 6063 ins_encode %{ 6064 __ fld_d($constantaddress($con)); 6065 __ fstp_d($dst$$reg); 6066 %} 6067 ins_pipe(fpu_reg_con); 6068 %} 6069 6070 // The instruction usage is guarded by predicate in operand immDPR0(). 6071 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6072 match(Set dst con); 6073 ins_cost(125); 6074 6075 format %{ "FLDZ ST\n\t" 6076 "FSTP $dst" %} 6077 ins_encode %{ 6078 __ fldz(); 6079 __ fstp_d($dst$$reg); 6080 %} 6081 ins_pipe(fpu_reg_con); 6082 %} 6083 6084 // The instruction usage is guarded by predicate in operand immDPR1(). 6085 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6086 match(Set dst con); 6087 ins_cost(125); 6088 6089 format %{ "FLD1 ST\n\t" 6090 "FSTP $dst" %} 6091 ins_encode %{ 6092 __ fld1(); 6093 __ fstp_d($dst$$reg); 6094 %} 6095 ins_pipe(fpu_reg_con); 6096 %} 6097 6098 // The instruction usage is guarded by predicate in operand immD(). 6099 instruct loadConD(regD dst, immD con) %{ 6100 match(Set dst con); 6101 ins_cost(125); 6102 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6103 ins_encode %{ 6104 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6105 %} 6106 ins_pipe(pipe_slow); 6107 %} 6108 6109 // The instruction usage is guarded by predicate in operand immD0(). 6110 instruct loadConD0(regD dst, immD0 src) %{ 6111 match(Set dst src); 6112 ins_cost(100); 6113 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6114 ins_encode %{ 6115 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 // Load Stack Slot 6121 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6122 match(Set dst src); 6123 ins_cost(125); 6124 6125 format %{ "MOV $dst,$src" %} 6126 opcode(0x8B); 6127 ins_encode( OpcP, RegMem(dst,src)); 6128 ins_pipe( ialu_reg_mem ); 6129 %} 6130 6131 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6132 match(Set dst src); 6133 6134 ins_cost(200); 6135 format %{ "MOV $dst,$src.lo\n\t" 6136 "MOV $dst+4,$src.hi" %} 6137 opcode(0x8B, 0x8B); 6138 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6139 ins_pipe( ialu_mem_long_reg ); 6140 %} 6141 6142 // Load Stack Slot 6143 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6144 match(Set dst src); 6145 ins_cost(125); 6146 6147 format %{ "MOV $dst,$src" %} 6148 opcode(0x8B); 6149 ins_encode( OpcP, RegMem(dst,src)); 6150 ins_pipe( ialu_reg_mem ); 6151 %} 6152 6153 // Load Stack Slot 6154 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6155 match(Set dst src); 6156 ins_cost(125); 6157 6158 format %{ "FLD_S $src\n\t" 6159 "FSTP $dst" %} 6160 opcode(0xD9); /* D9 /0, FLD m32real */ 6161 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6162 Pop_Reg_FPR(dst) ); 6163 ins_pipe( fpu_reg_mem ); 6164 %} 6165 6166 // Load Stack Slot 6167 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6168 match(Set dst src); 6169 ins_cost(125); 6170 6171 format %{ "FLD_D $src\n\t" 6172 "FSTP $dst" %} 6173 opcode(0xDD); /* DD /0, FLD m64real */ 6174 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6175 Pop_Reg_DPR(dst) ); 6176 ins_pipe( fpu_reg_mem ); 6177 %} 6178 6179 // Prefetch instructions for allocation. 6180 // Must be safe to execute with invalid address (cannot fault). 6181 6182 instruct prefetchAlloc0( memory mem ) %{ 6183 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6184 match(PrefetchAllocation mem); 6185 ins_cost(0); 6186 size(0); 6187 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6188 ins_encode(); 6189 ins_pipe(empty); 6190 %} 6191 6192 instruct prefetchAlloc( memory mem ) %{ 6193 predicate(AllocatePrefetchInstr==3); 6194 match( PrefetchAllocation mem ); 6195 ins_cost(100); 6196 6197 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6198 ins_encode %{ 6199 __ prefetchw($mem$$Address); 6200 %} 6201 ins_pipe(ialu_mem); 6202 %} 6203 6204 instruct prefetchAllocNTA( memory mem ) %{ 6205 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6206 match(PrefetchAllocation mem); 6207 ins_cost(100); 6208 6209 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6210 ins_encode %{ 6211 __ prefetchnta($mem$$Address); 6212 %} 6213 ins_pipe(ialu_mem); 6214 %} 6215 6216 instruct prefetchAllocT0( memory mem ) %{ 6217 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6218 match(PrefetchAllocation mem); 6219 ins_cost(100); 6220 6221 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6222 ins_encode %{ 6223 __ prefetcht0($mem$$Address); 6224 %} 6225 ins_pipe(ialu_mem); 6226 %} 6227 6228 instruct prefetchAllocT2( memory mem ) %{ 6229 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6230 match(PrefetchAllocation mem); 6231 ins_cost(100); 6232 6233 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6234 ins_encode %{ 6235 __ prefetcht2($mem$$Address); 6236 %} 6237 ins_pipe(ialu_mem); 6238 %} 6239 6240 //----------Store Instructions------------------------------------------------- 6241 6242 // Store Byte 6243 instruct storeB(memory mem, xRegI src) %{ 6244 match(Set mem (StoreB mem src)); 6245 6246 ins_cost(125); 6247 format %{ "MOV8 $mem,$src" %} 6248 opcode(0x88); 6249 ins_encode( OpcP, RegMem( src, mem ) ); 6250 ins_pipe( ialu_mem_reg ); 6251 %} 6252 6253 // Store Char/Short 6254 instruct storeC(memory mem, rRegI src) %{ 6255 match(Set mem (StoreC mem src)); 6256 6257 ins_cost(125); 6258 format %{ "MOV16 $mem,$src" %} 6259 opcode(0x89, 0x66); 6260 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6261 ins_pipe( ialu_mem_reg ); 6262 %} 6263 6264 // Store Integer 6265 instruct storeI(memory mem, rRegI src) %{ 6266 match(Set mem (StoreI mem src)); 6267 6268 ins_cost(125); 6269 format %{ "MOV $mem,$src" %} 6270 opcode(0x89); 6271 ins_encode( OpcP, RegMem( src, mem ) ); 6272 ins_pipe( ialu_mem_reg ); 6273 %} 6274 6275 // Store Long 6276 instruct storeL(long_memory mem, eRegL src) %{ 6277 predicate(!((StoreLNode*)n)->require_atomic_access()); 6278 match(Set mem (StoreL mem src)); 6279 6280 ins_cost(200); 6281 format %{ "MOV $mem,$src.lo\n\t" 6282 "MOV $mem+4,$src.hi" %} 6283 opcode(0x89, 0x89); 6284 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6285 ins_pipe( ialu_mem_long_reg ); 6286 %} 6287 6288 // Store Long to Integer 6289 instruct storeL2I(memory mem, eRegL src) %{ 6290 match(Set mem (StoreI mem (ConvL2I src))); 6291 6292 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6293 ins_encode %{ 6294 __ movl($mem$$Address, $src$$Register); 6295 %} 6296 ins_pipe(ialu_mem_reg); 6297 %} 6298 6299 // Volatile Store Long. Must be atomic, so move it into 6300 // the FP TOS and then do a 64-bit FIST. Has to probe the 6301 // target address before the store (for null-ptr checks) 6302 // so the memory operand is used twice in the encoding. 6303 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6304 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6305 match(Set mem (StoreL mem src)); 6306 effect( KILL cr ); 6307 ins_cost(400); 6308 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6309 "FILD $src\n\t" 6310 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6311 opcode(0x3B); 6312 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6313 ins_pipe( fpu_reg_mem ); 6314 %} 6315 6316 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6317 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6318 match(Set mem (StoreL mem src)); 6319 effect( TEMP tmp, KILL cr ); 6320 ins_cost(380); 6321 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6322 "MOVSD $tmp,$src\n\t" 6323 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6324 ins_encode %{ 6325 __ cmpl(rax, $mem$$Address); 6326 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6327 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6333 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6334 match(Set mem (StoreL mem src)); 6335 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6336 ins_cost(360); 6337 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6338 "MOVD $tmp,$src.lo\n\t" 6339 "MOVD $tmp2,$src.hi\n\t" 6340 "PUNPCKLDQ $tmp,$tmp2\n\t" 6341 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6342 ins_encode %{ 6343 __ cmpl(rax, $mem$$Address); 6344 __ movdl($tmp$$XMMRegister, $src$$Register); 6345 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6346 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6347 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6348 %} 6349 ins_pipe( pipe_slow ); 6350 %} 6351 6352 // Store Pointer; for storing unknown oops and raw pointers 6353 instruct storeP(memory mem, anyRegP src) %{ 6354 match(Set mem (StoreP mem src)); 6355 6356 ins_cost(125); 6357 format %{ "MOV $mem,$src" %} 6358 opcode(0x89); 6359 ins_encode( OpcP, RegMem( src, mem ) ); 6360 ins_pipe( ialu_mem_reg ); 6361 %} 6362 6363 // Store Integer Immediate 6364 instruct storeImmI(memory mem, immI src) %{ 6365 match(Set mem (StoreI mem src)); 6366 6367 ins_cost(150); 6368 format %{ "MOV $mem,$src" %} 6369 opcode(0xC7); /* C7 /0 */ 6370 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6371 ins_pipe( ialu_mem_imm ); 6372 %} 6373 6374 // Store Short/Char Immediate 6375 instruct storeImmI16(memory mem, immI16 src) %{ 6376 predicate(UseStoreImmI16); 6377 match(Set mem (StoreC mem src)); 6378 6379 ins_cost(150); 6380 format %{ "MOV16 $mem,$src" %} 6381 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6382 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6383 ins_pipe( ialu_mem_imm ); 6384 %} 6385 6386 // Store Pointer Immediate; null pointers or constant oops that do not 6387 // need card-mark barriers. 6388 instruct storeImmP(memory mem, immP src) %{ 6389 match(Set mem (StoreP mem src)); 6390 6391 ins_cost(150); 6392 format %{ "MOV $mem,$src" %} 6393 opcode(0xC7); /* C7 /0 */ 6394 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6395 ins_pipe( ialu_mem_imm ); 6396 %} 6397 6398 // Store Byte Immediate 6399 instruct storeImmB(memory mem, immI8 src) %{ 6400 match(Set mem (StoreB mem src)); 6401 6402 ins_cost(150); 6403 format %{ "MOV8 $mem,$src" %} 6404 opcode(0xC6); /* C6 /0 */ 6405 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6406 ins_pipe( ialu_mem_imm ); 6407 %} 6408 6409 // Store CMS card-mark Immediate 6410 instruct storeImmCM(memory mem, immI8 src) %{ 6411 match(Set mem (StoreCM mem src)); 6412 6413 ins_cost(150); 6414 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6415 opcode(0xC6); /* C6 /0 */ 6416 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6417 ins_pipe( ialu_mem_imm ); 6418 %} 6419 6420 // Store Double 6421 instruct storeDPR( memory mem, regDPR1 src) %{ 6422 predicate(UseSSE<=1); 6423 match(Set mem (StoreD mem src)); 6424 6425 ins_cost(100); 6426 format %{ "FST_D $mem,$src" %} 6427 opcode(0xDD); /* DD /2 */ 6428 ins_encode( enc_FPR_store(mem,src) ); 6429 ins_pipe( fpu_mem_reg ); 6430 %} 6431 6432 // Store double does rounding on x86 6433 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6434 predicate(UseSSE<=1); 6435 match(Set mem (StoreD mem (RoundDouble src))); 6436 6437 ins_cost(100); 6438 format %{ "FST_D $mem,$src\t# round" %} 6439 opcode(0xDD); /* DD /2 */ 6440 ins_encode( enc_FPR_store(mem,src) ); 6441 ins_pipe( fpu_mem_reg ); 6442 %} 6443 6444 // Store XMM register to memory (double-precision floating points) 6445 // MOVSD instruction 6446 instruct storeD(memory mem, regD src) %{ 6447 predicate(UseSSE>=2); 6448 match(Set mem (StoreD mem src)); 6449 ins_cost(95); 6450 format %{ "MOVSD $mem,$src" %} 6451 ins_encode %{ 6452 __ movdbl($mem$$Address, $src$$XMMRegister); 6453 %} 6454 ins_pipe( pipe_slow ); 6455 %} 6456 6457 // Store XMM register to memory (single-precision floating point) 6458 // MOVSS instruction 6459 instruct storeF(memory mem, regF src) %{ 6460 predicate(UseSSE>=1); 6461 match(Set mem (StoreF mem src)); 6462 ins_cost(95); 6463 format %{ "MOVSS $mem,$src" %} 6464 ins_encode %{ 6465 __ movflt($mem$$Address, $src$$XMMRegister); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 // Store Float 6471 instruct storeFPR( memory mem, regFPR1 src) %{ 6472 predicate(UseSSE==0); 6473 match(Set mem (StoreF mem src)); 6474 6475 ins_cost(100); 6476 format %{ "FST_S $mem,$src" %} 6477 opcode(0xD9); /* D9 /2 */ 6478 ins_encode( enc_FPR_store(mem,src) ); 6479 ins_pipe( fpu_mem_reg ); 6480 %} 6481 6482 // Store Float does rounding on x86 6483 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6484 predicate(UseSSE==0); 6485 match(Set mem (StoreF mem (RoundFloat src))); 6486 6487 ins_cost(100); 6488 format %{ "FST_S $mem,$src\t# round" %} 6489 opcode(0xD9); /* D9 /2 */ 6490 ins_encode( enc_FPR_store(mem,src) ); 6491 ins_pipe( fpu_mem_reg ); 6492 %} 6493 6494 // Store Float does rounding on x86 6495 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6496 predicate(UseSSE<=1); 6497 match(Set mem (StoreF mem (ConvD2F src))); 6498 6499 ins_cost(100); 6500 format %{ "FST_S $mem,$src\t# D-round" %} 6501 opcode(0xD9); /* D9 /2 */ 6502 ins_encode( enc_FPR_store(mem,src) ); 6503 ins_pipe( fpu_mem_reg ); 6504 %} 6505 6506 // Store immediate Float value (it is faster than store from FPU register) 6507 // The instruction usage is guarded by predicate in operand immFPR(). 6508 instruct storeFPR_imm( memory mem, immFPR src) %{ 6509 match(Set mem (StoreF mem src)); 6510 6511 ins_cost(50); 6512 format %{ "MOV $mem,$src\t# store float" %} 6513 opcode(0xC7); /* C7 /0 */ 6514 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6515 ins_pipe( ialu_mem_imm ); 6516 %} 6517 6518 // Store immediate Float value (it is faster than store from XMM register) 6519 // The instruction usage is guarded by predicate in operand immF(). 6520 instruct storeF_imm( memory mem, immF src) %{ 6521 match(Set mem (StoreF mem src)); 6522 6523 ins_cost(50); 6524 format %{ "MOV $mem,$src\t# store float" %} 6525 opcode(0xC7); /* C7 /0 */ 6526 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6527 ins_pipe( ialu_mem_imm ); 6528 %} 6529 6530 // Store Integer to stack slot 6531 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6532 match(Set dst src); 6533 6534 ins_cost(100); 6535 format %{ "MOV $dst,$src" %} 6536 opcode(0x89); 6537 ins_encode( OpcPRegSS( dst, src ) ); 6538 ins_pipe( ialu_mem_reg ); 6539 %} 6540 6541 // Store Integer to stack slot 6542 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6543 match(Set dst src); 6544 6545 ins_cost(100); 6546 format %{ "MOV $dst,$src" %} 6547 opcode(0x89); 6548 ins_encode( OpcPRegSS( dst, src ) ); 6549 ins_pipe( ialu_mem_reg ); 6550 %} 6551 6552 // Store Long to stack slot 6553 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6554 match(Set dst src); 6555 6556 ins_cost(200); 6557 format %{ "MOV $dst,$src.lo\n\t" 6558 "MOV $dst+4,$src.hi" %} 6559 opcode(0x89, 0x89); 6560 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6561 ins_pipe( ialu_mem_long_reg ); 6562 %} 6563 6564 //----------MemBar Instructions----------------------------------------------- 6565 // Memory barrier flavors 6566 6567 instruct membar_acquire() %{ 6568 match(MemBarAcquire); 6569 match(LoadFence); 6570 ins_cost(400); 6571 6572 size(0); 6573 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6574 ins_encode(); 6575 ins_pipe(empty); 6576 %} 6577 6578 instruct membar_acquire_lock() %{ 6579 match(MemBarAcquireLock); 6580 ins_cost(0); 6581 6582 size(0); 6583 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6584 ins_encode( ); 6585 ins_pipe(empty); 6586 %} 6587 6588 instruct membar_release() %{ 6589 match(MemBarRelease); 6590 match(StoreFence); 6591 ins_cost(400); 6592 6593 size(0); 6594 format %{ "MEMBAR-release ! (empty encoding)" %} 6595 ins_encode( ); 6596 ins_pipe(empty); 6597 %} 6598 6599 instruct membar_release_lock() %{ 6600 match(MemBarReleaseLock); 6601 ins_cost(0); 6602 6603 size(0); 6604 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6605 ins_encode( ); 6606 ins_pipe(empty); 6607 %} 6608 6609 instruct membar_volatile(eFlagsReg cr) %{ 6610 match(MemBarVolatile); 6611 effect(KILL cr); 6612 ins_cost(400); 6613 6614 format %{ 6615 $$template 6616 if (os::is_MP()) { 6617 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6618 } else { 6619 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6620 } 6621 %} 6622 ins_encode %{ 6623 __ membar(Assembler::StoreLoad); 6624 %} 6625 ins_pipe(pipe_slow); 6626 %} 6627 6628 instruct unnecessary_membar_volatile() %{ 6629 match(MemBarVolatile); 6630 predicate(Matcher::post_store_load_barrier(n)); 6631 ins_cost(0); 6632 6633 size(0); 6634 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6635 ins_encode( ); 6636 ins_pipe(empty); 6637 %} 6638 6639 instruct membar_storestore() %{ 6640 match(MemBarStoreStore); 6641 ins_cost(0); 6642 6643 size(0); 6644 format %{ "MEMBAR-storestore (empty encoding)" %} 6645 ins_encode( ); 6646 ins_pipe(empty); 6647 %} 6648 6649 //----------Move Instructions-------------------------------------------------- 6650 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6651 match(Set dst (CastX2P src)); 6652 format %{ "# X2P $dst, $src" %} 6653 ins_encode( /*empty encoding*/ ); 6654 ins_cost(0); 6655 ins_pipe(empty); 6656 %} 6657 6658 instruct castP2X(rRegI dst, eRegP src ) %{ 6659 match(Set dst (CastP2X src)); 6660 ins_cost(50); 6661 format %{ "MOV $dst, $src\t# CastP2X" %} 6662 ins_encode( enc_Copy( dst, src) ); 6663 ins_pipe( ialu_reg_reg ); 6664 %} 6665 6666 //----------Conditional Move--------------------------------------------------- 6667 // Conditional move 6668 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6669 predicate(!VM_Version::supports_cmov() ); 6670 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6671 ins_cost(200); 6672 format %{ "J$cop,us skip\t# signed cmove\n\t" 6673 "MOV $dst,$src\n" 6674 "skip:" %} 6675 ins_encode %{ 6676 Label Lskip; 6677 // Invert sense of branch from sense of CMOV 6678 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6679 __ movl($dst$$Register, $src$$Register); 6680 __ bind(Lskip); 6681 %} 6682 ins_pipe( pipe_cmov_reg ); 6683 %} 6684 6685 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6686 predicate(!VM_Version::supports_cmov() ); 6687 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6688 ins_cost(200); 6689 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6690 "MOV $dst,$src\n" 6691 "skip:" %} 6692 ins_encode %{ 6693 Label Lskip; 6694 // Invert sense of branch from sense of CMOV 6695 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6696 __ movl($dst$$Register, $src$$Register); 6697 __ bind(Lskip); 6698 %} 6699 ins_pipe( pipe_cmov_reg ); 6700 %} 6701 6702 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6703 predicate(VM_Version::supports_cmov() ); 6704 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6705 ins_cost(200); 6706 format %{ "CMOV$cop $dst,$src" %} 6707 opcode(0x0F,0x40); 6708 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6709 ins_pipe( pipe_cmov_reg ); 6710 %} 6711 6712 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6713 predicate(VM_Version::supports_cmov() ); 6714 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6715 ins_cost(200); 6716 format %{ "CMOV$cop $dst,$src" %} 6717 opcode(0x0F,0x40); 6718 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6719 ins_pipe( pipe_cmov_reg ); 6720 %} 6721 6722 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6723 predicate(VM_Version::supports_cmov() ); 6724 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6725 ins_cost(200); 6726 expand %{ 6727 cmovI_regU(cop, cr, dst, src); 6728 %} 6729 %} 6730 6731 // Conditional move 6732 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6733 predicate(VM_Version::supports_cmov() ); 6734 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6735 ins_cost(250); 6736 format %{ "CMOV$cop $dst,$src" %} 6737 opcode(0x0F,0x40); 6738 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6739 ins_pipe( pipe_cmov_mem ); 6740 %} 6741 6742 // Conditional move 6743 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6744 predicate(VM_Version::supports_cmov() ); 6745 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6746 ins_cost(250); 6747 format %{ "CMOV$cop $dst,$src" %} 6748 opcode(0x0F,0x40); 6749 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6750 ins_pipe( pipe_cmov_mem ); 6751 %} 6752 6753 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6754 predicate(VM_Version::supports_cmov() ); 6755 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6756 ins_cost(250); 6757 expand %{ 6758 cmovI_memU(cop, cr, dst, src); 6759 %} 6760 %} 6761 6762 // Conditional move 6763 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6764 predicate(VM_Version::supports_cmov() ); 6765 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6766 ins_cost(200); 6767 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6768 opcode(0x0F,0x40); 6769 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6770 ins_pipe( pipe_cmov_reg ); 6771 %} 6772 6773 // Conditional move (non-P6 version) 6774 // Note: a CMoveP is generated for stubs and native wrappers 6775 // regardless of whether we are on a P6, so we 6776 // emulate a cmov here 6777 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6778 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6779 ins_cost(300); 6780 format %{ "Jn$cop skip\n\t" 6781 "MOV $dst,$src\t# pointer\n" 6782 "skip:" %} 6783 opcode(0x8b); 6784 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6785 ins_pipe( pipe_cmov_reg ); 6786 %} 6787 6788 // Conditional move 6789 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6790 predicate(VM_Version::supports_cmov() ); 6791 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6792 ins_cost(200); 6793 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6794 opcode(0x0F,0x40); 6795 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6796 ins_pipe( pipe_cmov_reg ); 6797 %} 6798 6799 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6800 predicate(VM_Version::supports_cmov() ); 6801 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6802 ins_cost(200); 6803 expand %{ 6804 cmovP_regU(cop, cr, dst, src); 6805 %} 6806 %} 6807 6808 // DISABLED: Requires the ADLC to emit a bottom_type call that 6809 // correctly meets the two pointer arguments; one is an incoming 6810 // register but the other is a memory operand. ALSO appears to 6811 // be buggy with implicit null checks. 6812 // 6813 //// Conditional move 6814 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6815 // predicate(VM_Version::supports_cmov() ); 6816 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6817 // ins_cost(250); 6818 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6819 // opcode(0x0F,0x40); 6820 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6821 // ins_pipe( pipe_cmov_mem ); 6822 //%} 6823 // 6824 //// Conditional move 6825 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6826 // predicate(VM_Version::supports_cmov() ); 6827 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6828 // ins_cost(250); 6829 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6830 // opcode(0x0F,0x40); 6831 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6832 // ins_pipe( pipe_cmov_mem ); 6833 //%} 6834 6835 // Conditional move 6836 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6837 predicate(UseSSE<=1); 6838 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6839 ins_cost(200); 6840 format %{ "FCMOV$cop $dst,$src\t# double" %} 6841 opcode(0xDA); 6842 ins_encode( enc_cmov_dpr(cop,src) ); 6843 ins_pipe( pipe_cmovDPR_reg ); 6844 %} 6845 6846 // Conditional move 6847 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6848 predicate(UseSSE==0); 6849 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6850 ins_cost(200); 6851 format %{ "FCMOV$cop $dst,$src\t# float" %} 6852 opcode(0xDA); 6853 ins_encode( enc_cmov_dpr(cop,src) ); 6854 ins_pipe( pipe_cmovDPR_reg ); 6855 %} 6856 6857 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6858 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6859 predicate(UseSSE<=1); 6860 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6861 ins_cost(200); 6862 format %{ "Jn$cop skip\n\t" 6863 "MOV $dst,$src\t# double\n" 6864 "skip:" %} 6865 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6866 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6867 ins_pipe( pipe_cmovDPR_reg ); 6868 %} 6869 6870 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6871 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6872 predicate(UseSSE==0); 6873 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6874 ins_cost(200); 6875 format %{ "Jn$cop skip\n\t" 6876 "MOV $dst,$src\t# float\n" 6877 "skip:" %} 6878 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6879 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6880 ins_pipe( pipe_cmovDPR_reg ); 6881 %} 6882 6883 // No CMOVE with SSE/SSE2 6884 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6885 predicate (UseSSE>=1); 6886 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6887 ins_cost(200); 6888 format %{ "Jn$cop skip\n\t" 6889 "MOVSS $dst,$src\t# float\n" 6890 "skip:" %} 6891 ins_encode %{ 6892 Label skip; 6893 // Invert sense of branch from sense of CMOV 6894 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6895 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6896 __ bind(skip); 6897 %} 6898 ins_pipe( pipe_slow ); 6899 %} 6900 6901 // No CMOVE with SSE/SSE2 6902 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6903 predicate (UseSSE>=2); 6904 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6905 ins_cost(200); 6906 format %{ "Jn$cop skip\n\t" 6907 "MOVSD $dst,$src\t# float\n" 6908 "skip:" %} 6909 ins_encode %{ 6910 Label skip; 6911 // Invert sense of branch from sense of CMOV 6912 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6913 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6914 __ bind(skip); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 // unsigned version 6920 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6921 predicate (UseSSE>=1); 6922 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6923 ins_cost(200); 6924 format %{ "Jn$cop skip\n\t" 6925 "MOVSS $dst,$src\t# float\n" 6926 "skip:" %} 6927 ins_encode %{ 6928 Label skip; 6929 // Invert sense of branch from sense of CMOV 6930 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6931 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6932 __ bind(skip); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6938 predicate (UseSSE>=1); 6939 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6940 ins_cost(200); 6941 expand %{ 6942 fcmovF_regU(cop, cr, dst, src); 6943 %} 6944 %} 6945 6946 // unsigned version 6947 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6948 predicate (UseSSE>=2); 6949 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6950 ins_cost(200); 6951 format %{ "Jn$cop skip\n\t" 6952 "MOVSD $dst,$src\t# float\n" 6953 "skip:" %} 6954 ins_encode %{ 6955 Label skip; 6956 // Invert sense of branch from sense of CMOV 6957 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6958 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6959 __ bind(skip); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6965 predicate (UseSSE>=2); 6966 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6967 ins_cost(200); 6968 expand %{ 6969 fcmovD_regU(cop, cr, dst, src); 6970 %} 6971 %} 6972 6973 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6974 predicate(VM_Version::supports_cmov() ); 6975 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6976 ins_cost(200); 6977 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6978 "CMOV$cop $dst.hi,$src.hi" %} 6979 opcode(0x0F,0x40); 6980 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6981 ins_pipe( pipe_cmov_reg_long ); 6982 %} 6983 6984 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6985 predicate(VM_Version::supports_cmov() ); 6986 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6987 ins_cost(200); 6988 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6989 "CMOV$cop $dst.hi,$src.hi" %} 6990 opcode(0x0F,0x40); 6991 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6992 ins_pipe( pipe_cmov_reg_long ); 6993 %} 6994 6995 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6996 predicate(VM_Version::supports_cmov() ); 6997 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6998 ins_cost(200); 6999 expand %{ 7000 cmovL_regU(cop, cr, dst, src); 7001 %} 7002 %} 7003 7004 //----------Arithmetic Instructions-------------------------------------------- 7005 //----------Addition Instructions---------------------------------------------- 7006 7007 // Integer Addition Instructions 7008 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7009 match(Set dst (AddI dst src)); 7010 effect(KILL cr); 7011 7012 size(2); 7013 format %{ "ADD $dst,$src" %} 7014 opcode(0x03); 7015 ins_encode( OpcP, RegReg( dst, src) ); 7016 ins_pipe( ialu_reg_reg ); 7017 %} 7018 7019 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7020 match(Set dst (AddI dst src)); 7021 effect(KILL cr); 7022 7023 format %{ "ADD $dst,$src" %} 7024 opcode(0x81, 0x00); /* /0 id */ 7025 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7026 ins_pipe( ialu_reg ); 7027 %} 7028 7029 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7030 predicate(UseIncDec); 7031 match(Set dst (AddI dst src)); 7032 effect(KILL cr); 7033 7034 size(1); 7035 format %{ "INC $dst" %} 7036 opcode(0x40); /* */ 7037 ins_encode( Opc_plus( primary, dst ) ); 7038 ins_pipe( ialu_reg ); 7039 %} 7040 7041 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7042 match(Set dst (AddI src0 src1)); 7043 ins_cost(110); 7044 7045 format %{ "LEA $dst,[$src0 + $src1]" %} 7046 opcode(0x8D); /* 0x8D /r */ 7047 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7048 ins_pipe( ialu_reg_reg ); 7049 %} 7050 7051 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7052 match(Set dst (AddP src0 src1)); 7053 ins_cost(110); 7054 7055 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7056 opcode(0x8D); /* 0x8D /r */ 7057 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7058 ins_pipe( ialu_reg_reg ); 7059 %} 7060 7061 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7062 predicate(UseIncDec); 7063 match(Set dst (AddI dst src)); 7064 effect(KILL cr); 7065 7066 size(1); 7067 format %{ "DEC $dst" %} 7068 opcode(0x48); /* */ 7069 ins_encode( Opc_plus( primary, dst ) ); 7070 ins_pipe( ialu_reg ); 7071 %} 7072 7073 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7074 match(Set dst (AddP dst src)); 7075 effect(KILL cr); 7076 7077 size(2); 7078 format %{ "ADD $dst,$src" %} 7079 opcode(0x03); 7080 ins_encode( OpcP, RegReg( dst, src) ); 7081 ins_pipe( ialu_reg_reg ); 7082 %} 7083 7084 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7085 match(Set dst (AddP dst src)); 7086 effect(KILL cr); 7087 7088 format %{ "ADD $dst,$src" %} 7089 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7090 // ins_encode( RegImm( dst, src) ); 7091 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7092 ins_pipe( ialu_reg ); 7093 %} 7094 7095 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7096 match(Set dst (AddI dst (LoadI src))); 7097 effect(KILL cr); 7098 7099 ins_cost(125); 7100 format %{ "ADD $dst,$src" %} 7101 opcode(0x03); 7102 ins_encode( OpcP, RegMem( dst, src) ); 7103 ins_pipe( ialu_reg_mem ); 7104 %} 7105 7106 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7107 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7108 effect(KILL cr); 7109 7110 ins_cost(150); 7111 format %{ "ADD $dst,$src" %} 7112 opcode(0x01); /* Opcode 01 /r */ 7113 ins_encode( OpcP, RegMem( src, dst ) ); 7114 ins_pipe( ialu_mem_reg ); 7115 %} 7116 7117 // Add Memory with Immediate 7118 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7119 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7120 effect(KILL cr); 7121 7122 ins_cost(125); 7123 format %{ "ADD $dst,$src" %} 7124 opcode(0x81); /* Opcode 81 /0 id */ 7125 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7126 ins_pipe( ialu_mem_imm ); 7127 %} 7128 7129 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7130 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7131 effect(KILL cr); 7132 7133 ins_cost(125); 7134 format %{ "INC $dst" %} 7135 opcode(0xFF); /* Opcode FF /0 */ 7136 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7137 ins_pipe( ialu_mem_imm ); 7138 %} 7139 7140 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7141 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7142 effect(KILL cr); 7143 7144 ins_cost(125); 7145 format %{ "DEC $dst" %} 7146 opcode(0xFF); /* Opcode FF /1 */ 7147 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7148 ins_pipe( ialu_mem_imm ); 7149 %} 7150 7151 7152 instruct checkCastPP( eRegP dst ) %{ 7153 match(Set dst (CheckCastPP dst)); 7154 7155 size(0); 7156 format %{ "#checkcastPP of $dst" %} 7157 ins_encode( /*empty encoding*/ ); 7158 ins_pipe( empty ); 7159 %} 7160 7161 instruct castPP( eRegP dst ) %{ 7162 match(Set dst (CastPP dst)); 7163 format %{ "#castPP of $dst" %} 7164 ins_encode( /*empty encoding*/ ); 7165 ins_pipe( empty ); 7166 %} 7167 7168 instruct castII( rRegI dst ) %{ 7169 match(Set dst (CastII dst)); 7170 format %{ "#castII of $dst" %} 7171 ins_encode( /*empty encoding*/ ); 7172 ins_cost(0); 7173 ins_pipe( empty ); 7174 %} 7175 7176 7177 // Load-locked - same as a regular pointer load when used with compare-swap 7178 instruct loadPLocked(eRegP dst, memory mem) %{ 7179 match(Set dst (LoadPLocked mem)); 7180 7181 ins_cost(125); 7182 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7183 opcode(0x8B); 7184 ins_encode( OpcP, RegMem(dst,mem)); 7185 ins_pipe( ialu_reg_mem ); 7186 %} 7187 7188 // Conditional-store of the updated heap-top. 7189 // Used during allocation of the shared heap. 7190 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7191 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7192 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7193 // EAX is killed if there is contention, but then it's also unused. 7194 // In the common case of no contention, EAX holds the new oop address. 7195 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7196 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7197 ins_pipe( pipe_cmpxchg ); 7198 %} 7199 7200 // Conditional-store of an int value. 7201 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7202 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7203 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7204 effect(KILL oldval); 7205 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7206 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7207 ins_pipe( pipe_cmpxchg ); 7208 %} 7209 7210 // Conditional-store of a long value. 7211 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7212 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7213 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7214 effect(KILL oldval); 7215 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7216 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7217 "XCHG EBX,ECX" 7218 %} 7219 ins_encode %{ 7220 // Note: we need to swap rbx, and rcx before and after the 7221 // cmpxchg8 instruction because the instruction uses 7222 // rcx as the high order word of the new value to store but 7223 // our register encoding uses rbx. 7224 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7225 if( os::is_MP() ) 7226 __ lock(); 7227 __ cmpxchg8($mem$$Address); 7228 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7229 %} 7230 ins_pipe( pipe_cmpxchg ); 7231 %} 7232 7233 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7234 7235 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7236 predicate(VM_Version::supports_cx8()); 7237 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7238 effect(KILL cr, KILL oldval); 7239 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7240 "MOV $res,0\n\t" 7241 "JNE,s fail\n\t" 7242 "MOV $res,1\n" 7243 "fail:" %} 7244 ins_encode( enc_cmpxchg8(mem_ptr), 7245 enc_flags_ne_to_boolean(res) ); 7246 ins_pipe( pipe_cmpxchg ); 7247 %} 7248 7249 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7250 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7251 effect(KILL cr, KILL oldval); 7252 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7253 "MOV $res,0\n\t" 7254 "JNE,s fail\n\t" 7255 "MOV $res,1\n" 7256 "fail:" %} 7257 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7258 ins_pipe( pipe_cmpxchg ); 7259 %} 7260 7261 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7262 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7263 effect(KILL cr, KILL oldval); 7264 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7265 "MOV $res,0\n\t" 7266 "JNE,s fail\n\t" 7267 "MOV $res,1\n" 7268 "fail:" %} 7269 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7270 ins_pipe( pipe_cmpxchg ); 7271 %} 7272 7273 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7274 predicate(n->as_LoadStore()->result_not_used()); 7275 match(Set dummy (GetAndAddI mem add)); 7276 effect(KILL cr); 7277 format %{ "ADDL [$mem],$add" %} 7278 ins_encode %{ 7279 if (os::is_MP()) { __ lock(); } 7280 __ addl($mem$$Address, $add$$constant); 7281 %} 7282 ins_pipe( pipe_cmpxchg ); 7283 %} 7284 7285 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7286 match(Set newval (GetAndAddI mem newval)); 7287 effect(KILL cr); 7288 format %{ "XADDL [$mem],$newval" %} 7289 ins_encode %{ 7290 if (os::is_MP()) { __ lock(); } 7291 __ xaddl($mem$$Address, $newval$$Register); 7292 %} 7293 ins_pipe( pipe_cmpxchg ); 7294 %} 7295 7296 instruct xchgI( memory mem, rRegI newval) %{ 7297 match(Set newval (GetAndSetI mem newval)); 7298 format %{ "XCHGL $newval,[$mem]" %} 7299 ins_encode %{ 7300 __ xchgl($newval$$Register, $mem$$Address); 7301 %} 7302 ins_pipe( pipe_cmpxchg ); 7303 %} 7304 7305 instruct xchgP( memory mem, pRegP newval) %{ 7306 match(Set newval (GetAndSetP mem newval)); 7307 format %{ "XCHGL $newval,[$mem]" %} 7308 ins_encode %{ 7309 __ xchgl($newval$$Register, $mem$$Address); 7310 %} 7311 ins_pipe( pipe_cmpxchg ); 7312 %} 7313 7314 //----------Subtraction Instructions------------------------------------------- 7315 7316 // Integer Subtraction Instructions 7317 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7318 match(Set dst (SubI dst src)); 7319 effect(KILL cr); 7320 7321 size(2); 7322 format %{ "SUB $dst,$src" %} 7323 opcode(0x2B); 7324 ins_encode( OpcP, RegReg( dst, src) ); 7325 ins_pipe( ialu_reg_reg ); 7326 %} 7327 7328 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7329 match(Set dst (SubI dst src)); 7330 effect(KILL cr); 7331 7332 format %{ "SUB $dst,$src" %} 7333 opcode(0x81,0x05); /* Opcode 81 /5 */ 7334 // ins_encode( RegImm( dst, src) ); 7335 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7336 ins_pipe( ialu_reg ); 7337 %} 7338 7339 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7340 match(Set dst (SubI dst (LoadI src))); 7341 effect(KILL cr); 7342 7343 ins_cost(125); 7344 format %{ "SUB $dst,$src" %} 7345 opcode(0x2B); 7346 ins_encode( OpcP, RegMem( dst, src) ); 7347 ins_pipe( ialu_reg_mem ); 7348 %} 7349 7350 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7351 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7352 effect(KILL cr); 7353 7354 ins_cost(150); 7355 format %{ "SUB $dst,$src" %} 7356 opcode(0x29); /* Opcode 29 /r */ 7357 ins_encode( OpcP, RegMem( src, dst ) ); 7358 ins_pipe( ialu_mem_reg ); 7359 %} 7360 7361 // Subtract from a pointer 7362 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7363 match(Set dst (AddP dst (SubI zero src))); 7364 effect(KILL cr); 7365 7366 size(2); 7367 format %{ "SUB $dst,$src" %} 7368 opcode(0x2B); 7369 ins_encode( OpcP, RegReg( dst, src) ); 7370 ins_pipe( ialu_reg_reg ); 7371 %} 7372 7373 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7374 match(Set dst (SubI zero dst)); 7375 effect(KILL cr); 7376 7377 size(2); 7378 format %{ "NEG $dst" %} 7379 opcode(0xF7,0x03); // Opcode F7 /3 7380 ins_encode( OpcP, RegOpc( dst ) ); 7381 ins_pipe( ialu_reg ); 7382 %} 7383 7384 //----------Multiplication/Division Instructions------------------------------- 7385 // Integer Multiplication Instructions 7386 // Multiply Register 7387 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7388 match(Set dst (MulI dst src)); 7389 effect(KILL cr); 7390 7391 size(3); 7392 ins_cost(300); 7393 format %{ "IMUL $dst,$src" %} 7394 opcode(0xAF, 0x0F); 7395 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7396 ins_pipe( ialu_reg_reg_alu0 ); 7397 %} 7398 7399 // Multiply 32-bit Immediate 7400 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7401 match(Set dst (MulI src imm)); 7402 effect(KILL cr); 7403 7404 ins_cost(300); 7405 format %{ "IMUL $dst,$src,$imm" %} 7406 opcode(0x69); /* 69 /r id */ 7407 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7408 ins_pipe( ialu_reg_reg_alu0 ); 7409 %} 7410 7411 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7412 match(Set dst src); 7413 effect(KILL cr); 7414 7415 // Note that this is artificially increased to make it more expensive than loadConL 7416 ins_cost(250); 7417 format %{ "MOV EAX,$src\t// low word only" %} 7418 opcode(0xB8); 7419 ins_encode( LdImmL_Lo(dst, src) ); 7420 ins_pipe( ialu_reg_fat ); 7421 %} 7422 7423 // Multiply by 32-bit Immediate, taking the shifted high order results 7424 // (special case for shift by 32) 7425 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7426 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7427 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7428 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7429 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7430 effect(USE src1, KILL cr); 7431 7432 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7433 ins_cost(0*100 + 1*400 - 150); 7434 format %{ "IMUL EDX:EAX,$src1" %} 7435 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 // Multiply by 32-bit Immediate, taking the shifted high order results 7440 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7441 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7442 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7443 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7444 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7445 effect(USE src1, KILL cr); 7446 7447 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7448 ins_cost(1*100 + 1*400 - 150); 7449 format %{ "IMUL EDX:EAX,$src1\n\t" 7450 "SAR EDX,$cnt-32" %} 7451 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7452 ins_pipe( pipe_slow ); 7453 %} 7454 7455 // Multiply Memory 32-bit Immediate 7456 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7457 match(Set dst (MulI (LoadI src) imm)); 7458 effect(KILL cr); 7459 7460 ins_cost(300); 7461 format %{ "IMUL $dst,$src,$imm" %} 7462 opcode(0x69); /* 69 /r id */ 7463 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7464 ins_pipe( ialu_reg_mem_alu0 ); 7465 %} 7466 7467 // Multiply Memory 7468 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7469 match(Set dst (MulI dst (LoadI src))); 7470 effect(KILL cr); 7471 7472 ins_cost(350); 7473 format %{ "IMUL $dst,$src" %} 7474 opcode(0xAF, 0x0F); 7475 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7476 ins_pipe( ialu_reg_mem_alu0 ); 7477 %} 7478 7479 // Multiply Register Int to Long 7480 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7481 // Basic Idea: long = (long)int * (long)int 7482 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7483 effect(DEF dst, USE src, USE src1, KILL flags); 7484 7485 ins_cost(300); 7486 format %{ "IMUL $dst,$src1" %} 7487 7488 ins_encode( long_int_multiply( dst, src1 ) ); 7489 ins_pipe( ialu_reg_reg_alu0 ); 7490 %} 7491 7492 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7493 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7494 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7495 effect(KILL flags); 7496 7497 ins_cost(300); 7498 format %{ "MUL $dst,$src1" %} 7499 7500 ins_encode( long_uint_multiply(dst, src1) ); 7501 ins_pipe( ialu_reg_reg_alu0 ); 7502 %} 7503 7504 // Multiply Register Long 7505 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7506 match(Set dst (MulL dst src)); 7507 effect(KILL cr, TEMP tmp); 7508 ins_cost(4*100+3*400); 7509 // Basic idea: lo(result) = lo(x_lo * y_lo) 7510 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7511 format %{ "MOV $tmp,$src.lo\n\t" 7512 "IMUL $tmp,EDX\n\t" 7513 "MOV EDX,$src.hi\n\t" 7514 "IMUL EDX,EAX\n\t" 7515 "ADD $tmp,EDX\n\t" 7516 "MUL EDX:EAX,$src.lo\n\t" 7517 "ADD EDX,$tmp" %} 7518 ins_encode( long_multiply( dst, src, tmp ) ); 7519 ins_pipe( pipe_slow ); 7520 %} 7521 7522 // Multiply Register Long where the left operand's high 32 bits are zero 7523 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7524 predicate(is_operand_hi32_zero(n->in(1))); 7525 match(Set dst (MulL dst src)); 7526 effect(KILL cr, TEMP tmp); 7527 ins_cost(2*100+2*400); 7528 // Basic idea: lo(result) = lo(x_lo * y_lo) 7529 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7530 format %{ "MOV $tmp,$src.hi\n\t" 7531 "IMUL $tmp,EAX\n\t" 7532 "MUL EDX:EAX,$src.lo\n\t" 7533 "ADD EDX,$tmp" %} 7534 ins_encode %{ 7535 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7536 __ imull($tmp$$Register, rax); 7537 __ mull($src$$Register); 7538 __ addl(rdx, $tmp$$Register); 7539 %} 7540 ins_pipe( pipe_slow ); 7541 %} 7542 7543 // Multiply Register Long where the right operand's high 32 bits are zero 7544 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7545 predicate(is_operand_hi32_zero(n->in(2))); 7546 match(Set dst (MulL dst src)); 7547 effect(KILL cr, TEMP tmp); 7548 ins_cost(2*100+2*400); 7549 // Basic idea: lo(result) = lo(x_lo * y_lo) 7550 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7551 format %{ "MOV $tmp,$src.lo\n\t" 7552 "IMUL $tmp,EDX\n\t" 7553 "MUL EDX:EAX,$src.lo\n\t" 7554 "ADD EDX,$tmp" %} 7555 ins_encode %{ 7556 __ movl($tmp$$Register, $src$$Register); 7557 __ imull($tmp$$Register, rdx); 7558 __ mull($src$$Register); 7559 __ addl(rdx, $tmp$$Register); 7560 %} 7561 ins_pipe( pipe_slow ); 7562 %} 7563 7564 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7565 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7566 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7567 match(Set dst (MulL dst src)); 7568 effect(KILL cr); 7569 ins_cost(1*400); 7570 // Basic idea: lo(result) = lo(x_lo * y_lo) 7571 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7572 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7573 ins_encode %{ 7574 __ mull($src$$Register); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 // Multiply Register Long by small constant 7580 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7581 match(Set dst (MulL dst src)); 7582 effect(KILL cr, TEMP tmp); 7583 ins_cost(2*100+2*400); 7584 size(12); 7585 // Basic idea: lo(result) = lo(src * EAX) 7586 // hi(result) = hi(src * EAX) + lo(src * EDX) 7587 format %{ "IMUL $tmp,EDX,$src\n\t" 7588 "MOV EDX,$src\n\t" 7589 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7590 "ADD EDX,$tmp" %} 7591 ins_encode( long_multiply_con( dst, src, tmp ) ); 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 // Integer DIV with Register 7596 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7597 match(Set rax (DivI rax div)); 7598 effect(KILL rdx, KILL cr); 7599 size(26); 7600 ins_cost(30*100+10*100); 7601 format %{ "CMP EAX,0x80000000\n\t" 7602 "JNE,s normal\n\t" 7603 "XOR EDX,EDX\n\t" 7604 "CMP ECX,-1\n\t" 7605 "JE,s done\n" 7606 "normal: CDQ\n\t" 7607 "IDIV $div\n\t" 7608 "done:" %} 7609 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7610 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7611 ins_pipe( ialu_reg_reg_alu0 ); 7612 %} 7613 7614 // Divide Register Long 7615 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7616 match(Set dst (DivL src1 src2)); 7617 effect( KILL cr, KILL cx, KILL bx ); 7618 ins_cost(10000); 7619 format %{ "PUSH $src1.hi\n\t" 7620 "PUSH $src1.lo\n\t" 7621 "PUSH $src2.hi\n\t" 7622 "PUSH $src2.lo\n\t" 7623 "CALL SharedRuntime::ldiv\n\t" 7624 "ADD ESP,16" %} 7625 ins_encode( long_div(src1,src2) ); 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 // Integer DIVMOD with Register, both quotient and mod results 7630 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7631 match(DivModI rax div); 7632 effect(KILL cr); 7633 size(26); 7634 ins_cost(30*100+10*100); 7635 format %{ "CMP EAX,0x80000000\n\t" 7636 "JNE,s normal\n\t" 7637 "XOR EDX,EDX\n\t" 7638 "CMP ECX,-1\n\t" 7639 "JE,s done\n" 7640 "normal: CDQ\n\t" 7641 "IDIV $div\n\t" 7642 "done:" %} 7643 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7644 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7645 ins_pipe( pipe_slow ); 7646 %} 7647 7648 // Integer MOD with Register 7649 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7650 match(Set rdx (ModI rax div)); 7651 effect(KILL rax, KILL cr); 7652 7653 size(26); 7654 ins_cost(300); 7655 format %{ "CDQ\n\t" 7656 "IDIV $div" %} 7657 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7658 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7659 ins_pipe( ialu_reg_reg_alu0 ); 7660 %} 7661 7662 // Remainder Register Long 7663 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7664 match(Set dst (ModL src1 src2)); 7665 effect( KILL cr, KILL cx, KILL bx ); 7666 ins_cost(10000); 7667 format %{ "PUSH $src1.hi\n\t" 7668 "PUSH $src1.lo\n\t" 7669 "PUSH $src2.hi\n\t" 7670 "PUSH $src2.lo\n\t" 7671 "CALL SharedRuntime::lrem\n\t" 7672 "ADD ESP,16" %} 7673 ins_encode( long_mod(src1,src2) ); 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 // Divide Register Long (no special case since divisor != -1) 7678 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7679 match(Set dst (DivL dst imm)); 7680 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7681 ins_cost(1000); 7682 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7683 "XOR $tmp2,$tmp2\n\t" 7684 "CMP $tmp,EDX\n\t" 7685 "JA,s fast\n\t" 7686 "MOV $tmp2,EAX\n\t" 7687 "MOV EAX,EDX\n\t" 7688 "MOV EDX,0\n\t" 7689 "JLE,s pos\n\t" 7690 "LNEG EAX : $tmp2\n\t" 7691 "DIV $tmp # unsigned division\n\t" 7692 "XCHG EAX,$tmp2\n\t" 7693 "DIV $tmp\n\t" 7694 "LNEG $tmp2 : EAX\n\t" 7695 "JMP,s done\n" 7696 "pos:\n\t" 7697 "DIV $tmp\n\t" 7698 "XCHG EAX,$tmp2\n" 7699 "fast:\n\t" 7700 "DIV $tmp\n" 7701 "done:\n\t" 7702 "MOV EDX,$tmp2\n\t" 7703 "NEG EDX:EAX # if $imm < 0" %} 7704 ins_encode %{ 7705 int con = (int)$imm$$constant; 7706 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7707 int pcon = (con > 0) ? con : -con; 7708 Label Lfast, Lpos, Ldone; 7709 7710 __ movl($tmp$$Register, pcon); 7711 __ xorl($tmp2$$Register,$tmp2$$Register); 7712 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7713 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7714 7715 __ movl($tmp2$$Register, $dst$$Register); // save 7716 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7717 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7718 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7719 7720 // Negative dividend. 7721 // convert value to positive to use unsigned division 7722 __ lneg($dst$$Register, $tmp2$$Register); 7723 __ divl($tmp$$Register); 7724 __ xchgl($dst$$Register, $tmp2$$Register); 7725 __ divl($tmp$$Register); 7726 // revert result back to negative 7727 __ lneg($tmp2$$Register, $dst$$Register); 7728 __ jmpb(Ldone); 7729 7730 __ bind(Lpos); 7731 __ divl($tmp$$Register); // Use unsigned division 7732 __ xchgl($dst$$Register, $tmp2$$Register); 7733 // Fallthrow for final divide, tmp2 has 32 bit hi result 7734 7735 __ bind(Lfast); 7736 // fast path: src is positive 7737 __ divl($tmp$$Register); // Use unsigned division 7738 7739 __ bind(Ldone); 7740 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7741 if (con < 0) { 7742 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7743 } 7744 %} 7745 ins_pipe( pipe_slow ); 7746 %} 7747 7748 // Remainder Register Long (remainder fit into 32 bits) 7749 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7750 match(Set dst (ModL dst imm)); 7751 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7752 ins_cost(1000); 7753 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7754 "CMP $tmp,EDX\n\t" 7755 "JA,s fast\n\t" 7756 "MOV $tmp2,EAX\n\t" 7757 "MOV EAX,EDX\n\t" 7758 "MOV EDX,0\n\t" 7759 "JLE,s pos\n\t" 7760 "LNEG EAX : $tmp2\n\t" 7761 "DIV $tmp # unsigned division\n\t" 7762 "MOV EAX,$tmp2\n\t" 7763 "DIV $tmp\n\t" 7764 "NEG EDX\n\t" 7765 "JMP,s done\n" 7766 "pos:\n\t" 7767 "DIV $tmp\n\t" 7768 "MOV EAX,$tmp2\n" 7769 "fast:\n\t" 7770 "DIV $tmp\n" 7771 "done:\n\t" 7772 "MOV EAX,EDX\n\t" 7773 "SAR EDX,31\n\t" %} 7774 ins_encode %{ 7775 int con = (int)$imm$$constant; 7776 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7777 int pcon = (con > 0) ? con : -con; 7778 Label Lfast, Lpos, Ldone; 7779 7780 __ movl($tmp$$Register, pcon); 7781 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7782 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7783 7784 __ movl($tmp2$$Register, $dst$$Register); // save 7785 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7786 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7787 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7788 7789 // Negative dividend. 7790 // convert value to positive to use unsigned division 7791 __ lneg($dst$$Register, $tmp2$$Register); 7792 __ divl($tmp$$Register); 7793 __ movl($dst$$Register, $tmp2$$Register); 7794 __ divl($tmp$$Register); 7795 // revert remainder back to negative 7796 __ negl(HIGH_FROM_LOW($dst$$Register)); 7797 __ jmpb(Ldone); 7798 7799 __ bind(Lpos); 7800 __ divl($tmp$$Register); 7801 __ movl($dst$$Register, $tmp2$$Register); 7802 7803 __ bind(Lfast); 7804 // fast path: src is positive 7805 __ divl($tmp$$Register); 7806 7807 __ bind(Ldone); 7808 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7809 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7810 7811 %} 7812 ins_pipe( pipe_slow ); 7813 %} 7814 7815 // Integer Shift Instructions 7816 // Shift Left by one 7817 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7818 match(Set dst (LShiftI dst shift)); 7819 effect(KILL cr); 7820 7821 size(2); 7822 format %{ "SHL $dst,$shift" %} 7823 opcode(0xD1, 0x4); /* D1 /4 */ 7824 ins_encode( OpcP, RegOpc( dst ) ); 7825 ins_pipe( ialu_reg ); 7826 %} 7827 7828 // Shift Left by 8-bit immediate 7829 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7830 match(Set dst (LShiftI dst shift)); 7831 effect(KILL cr); 7832 7833 size(3); 7834 format %{ "SHL $dst,$shift" %} 7835 opcode(0xC1, 0x4); /* C1 /4 ib */ 7836 ins_encode( RegOpcImm( dst, shift) ); 7837 ins_pipe( ialu_reg ); 7838 %} 7839 7840 // Shift Left by variable 7841 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7842 match(Set dst (LShiftI dst shift)); 7843 effect(KILL cr); 7844 7845 size(2); 7846 format %{ "SHL $dst,$shift" %} 7847 opcode(0xD3, 0x4); /* D3 /4 */ 7848 ins_encode( OpcP, RegOpc( dst ) ); 7849 ins_pipe( ialu_reg_reg ); 7850 %} 7851 7852 // Arithmetic shift right by one 7853 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7854 match(Set dst (RShiftI dst shift)); 7855 effect(KILL cr); 7856 7857 size(2); 7858 format %{ "SAR $dst,$shift" %} 7859 opcode(0xD1, 0x7); /* D1 /7 */ 7860 ins_encode( OpcP, RegOpc( dst ) ); 7861 ins_pipe( ialu_reg ); 7862 %} 7863 7864 // Arithmetic shift right by one 7865 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7866 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7867 effect(KILL cr); 7868 format %{ "SAR $dst,$shift" %} 7869 opcode(0xD1, 0x7); /* D1 /7 */ 7870 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7871 ins_pipe( ialu_mem_imm ); 7872 %} 7873 7874 // Arithmetic Shift Right by 8-bit immediate 7875 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7876 match(Set dst (RShiftI dst shift)); 7877 effect(KILL cr); 7878 7879 size(3); 7880 format %{ "SAR $dst,$shift" %} 7881 opcode(0xC1, 0x7); /* C1 /7 ib */ 7882 ins_encode( RegOpcImm( dst, shift ) ); 7883 ins_pipe( ialu_mem_imm ); 7884 %} 7885 7886 // Arithmetic Shift Right by 8-bit immediate 7887 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7888 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7889 effect(KILL cr); 7890 7891 format %{ "SAR $dst,$shift" %} 7892 opcode(0xC1, 0x7); /* C1 /7 ib */ 7893 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7894 ins_pipe( ialu_mem_imm ); 7895 %} 7896 7897 // Arithmetic Shift Right by variable 7898 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7899 match(Set dst (RShiftI dst shift)); 7900 effect(KILL cr); 7901 7902 size(2); 7903 format %{ "SAR $dst,$shift" %} 7904 opcode(0xD3, 0x7); /* D3 /7 */ 7905 ins_encode( OpcP, RegOpc( dst ) ); 7906 ins_pipe( ialu_reg_reg ); 7907 %} 7908 7909 // Logical shift right by one 7910 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7911 match(Set dst (URShiftI dst shift)); 7912 effect(KILL cr); 7913 7914 size(2); 7915 format %{ "SHR $dst,$shift" %} 7916 opcode(0xD1, 0x5); /* D1 /5 */ 7917 ins_encode( OpcP, RegOpc( dst ) ); 7918 ins_pipe( ialu_reg ); 7919 %} 7920 7921 // Logical Shift Right by 8-bit immediate 7922 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7923 match(Set dst (URShiftI dst shift)); 7924 effect(KILL cr); 7925 7926 size(3); 7927 format %{ "SHR $dst,$shift" %} 7928 opcode(0xC1, 0x5); /* C1 /5 ib */ 7929 ins_encode( RegOpcImm( dst, shift) ); 7930 ins_pipe( ialu_reg ); 7931 %} 7932 7933 7934 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7935 // This idiom is used by the compiler for the i2b bytecode. 7936 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7937 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7938 7939 size(3); 7940 format %{ "MOVSX $dst,$src :8" %} 7941 ins_encode %{ 7942 __ movsbl($dst$$Register, $src$$Register); 7943 %} 7944 ins_pipe(ialu_reg_reg); 7945 %} 7946 7947 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7948 // This idiom is used by the compiler the i2s bytecode. 7949 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7950 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7951 7952 size(3); 7953 format %{ "MOVSX $dst,$src :16" %} 7954 ins_encode %{ 7955 __ movswl($dst$$Register, $src$$Register); 7956 %} 7957 ins_pipe(ialu_reg_reg); 7958 %} 7959 7960 7961 // Logical Shift Right by variable 7962 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7963 match(Set dst (URShiftI dst shift)); 7964 effect(KILL cr); 7965 7966 size(2); 7967 format %{ "SHR $dst,$shift" %} 7968 opcode(0xD3, 0x5); /* D3 /5 */ 7969 ins_encode( OpcP, RegOpc( dst ) ); 7970 ins_pipe( ialu_reg_reg ); 7971 %} 7972 7973 7974 //----------Logical Instructions----------------------------------------------- 7975 //----------Integer Logical Instructions--------------------------------------- 7976 // And Instructions 7977 // And Register with Register 7978 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7979 match(Set dst (AndI dst src)); 7980 effect(KILL cr); 7981 7982 size(2); 7983 format %{ "AND $dst,$src" %} 7984 opcode(0x23); 7985 ins_encode( OpcP, RegReg( dst, src) ); 7986 ins_pipe( ialu_reg_reg ); 7987 %} 7988 7989 // And Register with Immediate 7990 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7991 match(Set dst (AndI dst src)); 7992 effect(KILL cr); 7993 7994 format %{ "AND $dst,$src" %} 7995 opcode(0x81,0x04); /* Opcode 81 /4 */ 7996 // ins_encode( RegImm( dst, src) ); 7997 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7998 ins_pipe( ialu_reg ); 7999 %} 8000 8001 // And Register with Memory 8002 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8003 match(Set dst (AndI dst (LoadI src))); 8004 effect(KILL cr); 8005 8006 ins_cost(125); 8007 format %{ "AND $dst,$src" %} 8008 opcode(0x23); 8009 ins_encode( OpcP, RegMem( dst, src) ); 8010 ins_pipe( ialu_reg_mem ); 8011 %} 8012 8013 // And Memory with Register 8014 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8015 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8016 effect(KILL cr); 8017 8018 ins_cost(150); 8019 format %{ "AND $dst,$src" %} 8020 opcode(0x21); /* Opcode 21 /r */ 8021 ins_encode( OpcP, RegMem( src, dst ) ); 8022 ins_pipe( ialu_mem_reg ); 8023 %} 8024 8025 // And Memory with Immediate 8026 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8027 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8028 effect(KILL cr); 8029 8030 ins_cost(125); 8031 format %{ "AND $dst,$src" %} 8032 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8033 // ins_encode( MemImm( dst, src) ); 8034 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8035 ins_pipe( ialu_mem_imm ); 8036 %} 8037 8038 // BMI1 instructions 8039 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8040 match(Set dst (AndI (XorI src1 minus_1) src2)); 8041 predicate(UseBMI1Instructions); 8042 effect(KILL cr); 8043 8044 format %{ "ANDNL $dst, $src1, $src2" %} 8045 8046 ins_encode %{ 8047 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8048 %} 8049 ins_pipe(ialu_reg); 8050 %} 8051 8052 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8053 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8054 predicate(UseBMI1Instructions); 8055 effect(KILL cr); 8056 8057 ins_cost(125); 8058 format %{ "ANDNL $dst, $src1, $src2" %} 8059 8060 ins_encode %{ 8061 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8062 %} 8063 ins_pipe(ialu_reg_mem); 8064 %} 8065 8066 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8067 match(Set dst (AndI (SubI imm_zero src) src)); 8068 predicate(UseBMI1Instructions); 8069 effect(KILL cr); 8070 8071 format %{ "BLSIL $dst, $src" %} 8072 8073 ins_encode %{ 8074 __ blsil($dst$$Register, $src$$Register); 8075 %} 8076 ins_pipe(ialu_reg); 8077 %} 8078 8079 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8080 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8081 predicate(UseBMI1Instructions); 8082 effect(KILL cr); 8083 8084 ins_cost(125); 8085 format %{ "BLSIL $dst, $src" %} 8086 8087 ins_encode %{ 8088 __ blsil($dst$$Register, $src$$Address); 8089 %} 8090 ins_pipe(ialu_reg_mem); 8091 %} 8092 8093 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8094 %{ 8095 match(Set dst (XorI (AddI src minus_1) src)); 8096 predicate(UseBMI1Instructions); 8097 effect(KILL cr); 8098 8099 format %{ "BLSMSKL $dst, $src" %} 8100 8101 ins_encode %{ 8102 __ blsmskl($dst$$Register, $src$$Register); 8103 %} 8104 8105 ins_pipe(ialu_reg); 8106 %} 8107 8108 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8109 %{ 8110 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8111 predicate(UseBMI1Instructions); 8112 effect(KILL cr); 8113 8114 ins_cost(125); 8115 format %{ "BLSMSKL $dst, $src" %} 8116 8117 ins_encode %{ 8118 __ blsmskl($dst$$Register, $src$$Address); 8119 %} 8120 8121 ins_pipe(ialu_reg_mem); 8122 %} 8123 8124 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8125 %{ 8126 match(Set dst (AndI (AddI src minus_1) src) ); 8127 predicate(UseBMI1Instructions); 8128 effect(KILL cr); 8129 8130 format %{ "BLSRL $dst, $src" %} 8131 8132 ins_encode %{ 8133 __ blsrl($dst$$Register, $src$$Register); 8134 %} 8135 8136 ins_pipe(ialu_reg); 8137 %} 8138 8139 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8140 %{ 8141 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8142 predicate(UseBMI1Instructions); 8143 effect(KILL cr); 8144 8145 ins_cost(125); 8146 format %{ "BLSRL $dst, $src" %} 8147 8148 ins_encode %{ 8149 __ blsrl($dst$$Register, $src$$Address); 8150 %} 8151 8152 ins_pipe(ialu_reg_mem); 8153 %} 8154 8155 // Or Instructions 8156 // Or Register with Register 8157 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8158 match(Set dst (OrI dst src)); 8159 effect(KILL cr); 8160 8161 size(2); 8162 format %{ "OR $dst,$src" %} 8163 opcode(0x0B); 8164 ins_encode( OpcP, RegReg( dst, src) ); 8165 ins_pipe( ialu_reg_reg ); 8166 %} 8167 8168 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8169 match(Set dst (OrI dst (CastP2X src))); 8170 effect(KILL cr); 8171 8172 size(2); 8173 format %{ "OR $dst,$src" %} 8174 opcode(0x0B); 8175 ins_encode( OpcP, RegReg( dst, src) ); 8176 ins_pipe( ialu_reg_reg ); 8177 %} 8178 8179 8180 // Or Register with Immediate 8181 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8182 match(Set dst (OrI dst src)); 8183 effect(KILL cr); 8184 8185 format %{ "OR $dst,$src" %} 8186 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8187 // ins_encode( RegImm( dst, src) ); 8188 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8189 ins_pipe( ialu_reg ); 8190 %} 8191 8192 // Or Register with Memory 8193 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8194 match(Set dst (OrI dst (LoadI src))); 8195 effect(KILL cr); 8196 8197 ins_cost(125); 8198 format %{ "OR $dst,$src" %} 8199 opcode(0x0B); 8200 ins_encode( OpcP, RegMem( dst, src) ); 8201 ins_pipe( ialu_reg_mem ); 8202 %} 8203 8204 // Or Memory with Register 8205 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8206 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8207 effect(KILL cr); 8208 8209 ins_cost(150); 8210 format %{ "OR $dst,$src" %} 8211 opcode(0x09); /* Opcode 09 /r */ 8212 ins_encode( OpcP, RegMem( src, dst ) ); 8213 ins_pipe( ialu_mem_reg ); 8214 %} 8215 8216 // Or Memory with Immediate 8217 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8218 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8219 effect(KILL cr); 8220 8221 ins_cost(125); 8222 format %{ "OR $dst,$src" %} 8223 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8224 // ins_encode( MemImm( dst, src) ); 8225 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8226 ins_pipe( ialu_mem_imm ); 8227 %} 8228 8229 // ROL/ROR 8230 // ROL expand 8231 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8232 effect(USE_DEF dst, USE shift, KILL cr); 8233 8234 format %{ "ROL $dst, $shift" %} 8235 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8236 ins_encode( OpcP, RegOpc( dst )); 8237 ins_pipe( ialu_reg ); 8238 %} 8239 8240 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8241 effect(USE_DEF dst, USE shift, KILL cr); 8242 8243 format %{ "ROL $dst, $shift" %} 8244 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8245 ins_encode( RegOpcImm(dst, shift) ); 8246 ins_pipe(ialu_reg); 8247 %} 8248 8249 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8250 effect(USE_DEF dst, USE shift, KILL cr); 8251 8252 format %{ "ROL $dst, $shift" %} 8253 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8254 ins_encode(OpcP, RegOpc(dst)); 8255 ins_pipe( ialu_reg_reg ); 8256 %} 8257 // end of ROL expand 8258 8259 // ROL 32bit by one once 8260 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8261 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8262 8263 expand %{ 8264 rolI_eReg_imm1(dst, lshift, cr); 8265 %} 8266 %} 8267 8268 // ROL 32bit var by imm8 once 8269 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8270 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8271 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8272 8273 expand %{ 8274 rolI_eReg_imm8(dst, lshift, cr); 8275 %} 8276 %} 8277 8278 // ROL 32bit var by var once 8279 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8280 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8281 8282 expand %{ 8283 rolI_eReg_CL(dst, shift, cr); 8284 %} 8285 %} 8286 8287 // ROL 32bit var by var once 8288 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8289 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8290 8291 expand %{ 8292 rolI_eReg_CL(dst, shift, cr); 8293 %} 8294 %} 8295 8296 // ROR expand 8297 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8298 effect(USE_DEF dst, USE shift, KILL cr); 8299 8300 format %{ "ROR $dst, $shift" %} 8301 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8302 ins_encode( OpcP, RegOpc( dst ) ); 8303 ins_pipe( ialu_reg ); 8304 %} 8305 8306 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8307 effect (USE_DEF dst, USE shift, KILL cr); 8308 8309 format %{ "ROR $dst, $shift" %} 8310 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8311 ins_encode( RegOpcImm(dst, shift) ); 8312 ins_pipe( ialu_reg ); 8313 %} 8314 8315 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8316 effect(USE_DEF dst, USE shift, KILL cr); 8317 8318 format %{ "ROR $dst, $shift" %} 8319 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8320 ins_encode(OpcP, RegOpc(dst)); 8321 ins_pipe( ialu_reg_reg ); 8322 %} 8323 // end of ROR expand 8324 8325 // ROR right once 8326 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8327 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8328 8329 expand %{ 8330 rorI_eReg_imm1(dst, rshift, cr); 8331 %} 8332 %} 8333 8334 // ROR 32bit by immI8 once 8335 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8336 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8337 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8338 8339 expand %{ 8340 rorI_eReg_imm8(dst, rshift, cr); 8341 %} 8342 %} 8343 8344 // ROR 32bit var by var once 8345 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8346 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8347 8348 expand %{ 8349 rorI_eReg_CL(dst, shift, cr); 8350 %} 8351 %} 8352 8353 // ROR 32bit var by var once 8354 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8355 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8356 8357 expand %{ 8358 rorI_eReg_CL(dst, shift, cr); 8359 %} 8360 %} 8361 8362 // Xor Instructions 8363 // Xor Register with Register 8364 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8365 match(Set dst (XorI dst src)); 8366 effect(KILL cr); 8367 8368 size(2); 8369 format %{ "XOR $dst,$src" %} 8370 opcode(0x33); 8371 ins_encode( OpcP, RegReg( dst, src) ); 8372 ins_pipe( ialu_reg_reg ); 8373 %} 8374 8375 // Xor Register with Immediate -1 8376 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8377 match(Set dst (XorI dst imm)); 8378 8379 size(2); 8380 format %{ "NOT $dst" %} 8381 ins_encode %{ 8382 __ notl($dst$$Register); 8383 %} 8384 ins_pipe( ialu_reg ); 8385 %} 8386 8387 // Xor Register with Immediate 8388 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8389 match(Set dst (XorI dst src)); 8390 effect(KILL cr); 8391 8392 format %{ "XOR $dst,$src" %} 8393 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8394 // ins_encode( RegImm( dst, src) ); 8395 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8396 ins_pipe( ialu_reg ); 8397 %} 8398 8399 // Xor Register with Memory 8400 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8401 match(Set dst (XorI dst (LoadI src))); 8402 effect(KILL cr); 8403 8404 ins_cost(125); 8405 format %{ "XOR $dst,$src" %} 8406 opcode(0x33); 8407 ins_encode( OpcP, RegMem(dst, src) ); 8408 ins_pipe( ialu_reg_mem ); 8409 %} 8410 8411 // Xor Memory with Register 8412 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8413 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8414 effect(KILL cr); 8415 8416 ins_cost(150); 8417 format %{ "XOR $dst,$src" %} 8418 opcode(0x31); /* Opcode 31 /r */ 8419 ins_encode( OpcP, RegMem( src, dst ) ); 8420 ins_pipe( ialu_mem_reg ); 8421 %} 8422 8423 // Xor Memory with Immediate 8424 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8425 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8426 effect(KILL cr); 8427 8428 ins_cost(125); 8429 format %{ "XOR $dst,$src" %} 8430 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8431 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8432 ins_pipe( ialu_mem_imm ); 8433 %} 8434 8435 //----------Convert Int to Boolean--------------------------------------------- 8436 8437 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8438 effect( DEF dst, USE src ); 8439 format %{ "MOV $dst,$src" %} 8440 ins_encode( enc_Copy( dst, src) ); 8441 ins_pipe( ialu_reg_reg ); 8442 %} 8443 8444 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8445 effect( USE_DEF dst, USE src, KILL cr ); 8446 8447 size(4); 8448 format %{ "NEG $dst\n\t" 8449 "ADC $dst,$src" %} 8450 ins_encode( neg_reg(dst), 8451 OpcRegReg(0x13,dst,src) ); 8452 ins_pipe( ialu_reg_reg_long ); 8453 %} 8454 8455 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8456 match(Set dst (Conv2B src)); 8457 8458 expand %{ 8459 movI_nocopy(dst,src); 8460 ci2b(dst,src,cr); 8461 %} 8462 %} 8463 8464 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8465 effect( DEF dst, USE src ); 8466 format %{ "MOV $dst,$src" %} 8467 ins_encode( enc_Copy( dst, src) ); 8468 ins_pipe( ialu_reg_reg ); 8469 %} 8470 8471 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8472 effect( USE_DEF dst, USE src, KILL cr ); 8473 format %{ "NEG $dst\n\t" 8474 "ADC $dst,$src" %} 8475 ins_encode( neg_reg(dst), 8476 OpcRegReg(0x13,dst,src) ); 8477 ins_pipe( ialu_reg_reg_long ); 8478 %} 8479 8480 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8481 match(Set dst (Conv2B src)); 8482 8483 expand %{ 8484 movP_nocopy(dst,src); 8485 cp2b(dst,src,cr); 8486 %} 8487 %} 8488 8489 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8490 match(Set dst (CmpLTMask p q)); 8491 effect(KILL cr); 8492 ins_cost(400); 8493 8494 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8495 format %{ "XOR $dst,$dst\n\t" 8496 "CMP $p,$q\n\t" 8497 "SETlt $dst\n\t" 8498 "NEG $dst" %} 8499 ins_encode %{ 8500 Register Rp = $p$$Register; 8501 Register Rq = $q$$Register; 8502 Register Rd = $dst$$Register; 8503 Label done; 8504 __ xorl(Rd, Rd); 8505 __ cmpl(Rp, Rq); 8506 __ setb(Assembler::less, Rd); 8507 __ negl(Rd); 8508 %} 8509 8510 ins_pipe(pipe_slow); 8511 %} 8512 8513 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8514 match(Set dst (CmpLTMask dst zero)); 8515 effect(DEF dst, KILL cr); 8516 ins_cost(100); 8517 8518 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8519 ins_encode %{ 8520 __ sarl($dst$$Register, 31); 8521 %} 8522 ins_pipe(ialu_reg); 8523 %} 8524 8525 /* better to save a register than avoid a branch */ 8526 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8527 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8528 effect(KILL cr); 8529 ins_cost(400); 8530 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8531 "JGE done\n\t" 8532 "ADD $p,$y\n" 8533 "done: " %} 8534 ins_encode %{ 8535 Register Rp = $p$$Register; 8536 Register Rq = $q$$Register; 8537 Register Ry = $y$$Register; 8538 Label done; 8539 __ subl(Rp, Rq); 8540 __ jccb(Assembler::greaterEqual, done); 8541 __ addl(Rp, Ry); 8542 __ bind(done); 8543 %} 8544 8545 ins_pipe(pipe_cmplt); 8546 %} 8547 8548 /* better to save a register than avoid a branch */ 8549 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8550 match(Set y (AndI (CmpLTMask p q) y)); 8551 effect(KILL cr); 8552 8553 ins_cost(300); 8554 8555 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8556 "JLT done\n\t" 8557 "XORL $y, $y\n" 8558 "done: " %} 8559 ins_encode %{ 8560 Register Rp = $p$$Register; 8561 Register Rq = $q$$Register; 8562 Register Ry = $y$$Register; 8563 Label done; 8564 __ cmpl(Rp, Rq); 8565 __ jccb(Assembler::less, done); 8566 __ xorl(Ry, Ry); 8567 __ bind(done); 8568 %} 8569 8570 ins_pipe(pipe_cmplt); 8571 %} 8572 8573 /* If I enable this, I encourage spilling in the inner loop of compress. 8574 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8575 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8576 */ 8577 //----------Overflow Math Instructions----------------------------------------- 8578 8579 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8580 %{ 8581 match(Set cr (OverflowAddI op1 op2)); 8582 effect(DEF cr, USE_KILL op1, USE op2); 8583 8584 format %{ "ADD $op1, $op2\t# overflow check int" %} 8585 8586 ins_encode %{ 8587 __ addl($op1$$Register, $op2$$Register); 8588 %} 8589 ins_pipe(ialu_reg_reg); 8590 %} 8591 8592 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8593 %{ 8594 match(Set cr (OverflowAddI op1 op2)); 8595 effect(DEF cr, USE_KILL op1, USE op2); 8596 8597 format %{ "ADD $op1, $op2\t# overflow check int" %} 8598 8599 ins_encode %{ 8600 __ addl($op1$$Register, $op2$$constant); 8601 %} 8602 ins_pipe(ialu_reg_reg); 8603 %} 8604 8605 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8606 %{ 8607 match(Set cr (OverflowSubI op1 op2)); 8608 8609 format %{ "CMP $op1, $op2\t# overflow check int" %} 8610 ins_encode %{ 8611 __ cmpl($op1$$Register, $op2$$Register); 8612 %} 8613 ins_pipe(ialu_reg_reg); 8614 %} 8615 8616 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8617 %{ 8618 match(Set cr (OverflowSubI op1 op2)); 8619 8620 format %{ "CMP $op1, $op2\t# overflow check int" %} 8621 ins_encode %{ 8622 __ cmpl($op1$$Register, $op2$$constant); 8623 %} 8624 ins_pipe(ialu_reg_reg); 8625 %} 8626 8627 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8628 %{ 8629 match(Set cr (OverflowSubI zero op2)); 8630 effect(DEF cr, USE_KILL op2); 8631 8632 format %{ "NEG $op2\t# overflow check int" %} 8633 ins_encode %{ 8634 __ negl($op2$$Register); 8635 %} 8636 ins_pipe(ialu_reg_reg); 8637 %} 8638 8639 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8640 %{ 8641 match(Set cr (OverflowMulI op1 op2)); 8642 effect(DEF cr, USE_KILL op1, USE op2); 8643 8644 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8645 ins_encode %{ 8646 __ imull($op1$$Register, $op2$$Register); 8647 %} 8648 ins_pipe(ialu_reg_reg_alu0); 8649 %} 8650 8651 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8652 %{ 8653 match(Set cr (OverflowMulI op1 op2)); 8654 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8655 8656 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8657 ins_encode %{ 8658 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8659 %} 8660 ins_pipe(ialu_reg_reg_alu0); 8661 %} 8662 8663 //----------Long Instructions------------------------------------------------ 8664 // Add Long Register with Register 8665 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8666 match(Set dst (AddL dst src)); 8667 effect(KILL cr); 8668 ins_cost(200); 8669 format %{ "ADD $dst.lo,$src.lo\n\t" 8670 "ADC $dst.hi,$src.hi" %} 8671 opcode(0x03, 0x13); 8672 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8673 ins_pipe( ialu_reg_reg_long ); 8674 %} 8675 8676 // Add Long Register with Immediate 8677 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8678 match(Set dst (AddL dst src)); 8679 effect(KILL cr); 8680 format %{ "ADD $dst.lo,$src.lo\n\t" 8681 "ADC $dst.hi,$src.hi" %} 8682 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8683 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8684 ins_pipe( ialu_reg_long ); 8685 %} 8686 8687 // Add Long Register with Memory 8688 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8689 match(Set dst (AddL dst (LoadL mem))); 8690 effect(KILL cr); 8691 ins_cost(125); 8692 format %{ "ADD $dst.lo,$mem\n\t" 8693 "ADC $dst.hi,$mem+4" %} 8694 opcode(0x03, 0x13); 8695 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8696 ins_pipe( ialu_reg_long_mem ); 8697 %} 8698 8699 // Subtract Long Register with Register. 8700 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8701 match(Set dst (SubL dst src)); 8702 effect(KILL cr); 8703 ins_cost(200); 8704 format %{ "SUB $dst.lo,$src.lo\n\t" 8705 "SBB $dst.hi,$src.hi" %} 8706 opcode(0x2B, 0x1B); 8707 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8708 ins_pipe( ialu_reg_reg_long ); 8709 %} 8710 8711 // Subtract Long Register with Immediate 8712 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8713 match(Set dst (SubL dst src)); 8714 effect(KILL cr); 8715 format %{ "SUB $dst.lo,$src.lo\n\t" 8716 "SBB $dst.hi,$src.hi" %} 8717 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8718 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8719 ins_pipe( ialu_reg_long ); 8720 %} 8721 8722 // Subtract Long Register with Memory 8723 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8724 match(Set dst (SubL dst (LoadL mem))); 8725 effect(KILL cr); 8726 ins_cost(125); 8727 format %{ "SUB $dst.lo,$mem\n\t" 8728 "SBB $dst.hi,$mem+4" %} 8729 opcode(0x2B, 0x1B); 8730 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8731 ins_pipe( ialu_reg_long_mem ); 8732 %} 8733 8734 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8735 match(Set dst (SubL zero dst)); 8736 effect(KILL cr); 8737 ins_cost(300); 8738 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8739 ins_encode( neg_long(dst) ); 8740 ins_pipe( ialu_reg_reg_long ); 8741 %} 8742 8743 // And Long Register with Register 8744 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8745 match(Set dst (AndL dst src)); 8746 effect(KILL cr); 8747 format %{ "AND $dst.lo,$src.lo\n\t" 8748 "AND $dst.hi,$src.hi" %} 8749 opcode(0x23,0x23); 8750 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8751 ins_pipe( ialu_reg_reg_long ); 8752 %} 8753 8754 // And Long Register with Immediate 8755 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8756 match(Set dst (AndL dst src)); 8757 effect(KILL cr); 8758 format %{ "AND $dst.lo,$src.lo\n\t" 8759 "AND $dst.hi,$src.hi" %} 8760 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8761 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8762 ins_pipe( ialu_reg_long ); 8763 %} 8764 8765 // And Long Register with Memory 8766 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8767 match(Set dst (AndL dst (LoadL mem))); 8768 effect(KILL cr); 8769 ins_cost(125); 8770 format %{ "AND $dst.lo,$mem\n\t" 8771 "AND $dst.hi,$mem+4" %} 8772 opcode(0x23, 0x23); 8773 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8774 ins_pipe( ialu_reg_long_mem ); 8775 %} 8776 8777 // BMI1 instructions 8778 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8779 match(Set dst (AndL (XorL src1 minus_1) src2)); 8780 predicate(UseBMI1Instructions); 8781 effect(KILL cr, TEMP dst); 8782 8783 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8784 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8785 %} 8786 8787 ins_encode %{ 8788 Register Rdst = $dst$$Register; 8789 Register Rsrc1 = $src1$$Register; 8790 Register Rsrc2 = $src2$$Register; 8791 __ andnl(Rdst, Rsrc1, Rsrc2); 8792 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8793 %} 8794 ins_pipe(ialu_reg_reg_long); 8795 %} 8796 8797 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8798 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8799 predicate(UseBMI1Instructions); 8800 effect(KILL cr, TEMP dst); 8801 8802 ins_cost(125); 8803 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8804 "ANDNL $dst.hi, $src1.hi, $src2+4" 8805 %} 8806 8807 ins_encode %{ 8808 Register Rdst = $dst$$Register; 8809 Register Rsrc1 = $src1$$Register; 8810 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8811 8812 __ andnl(Rdst, Rsrc1, $src2$$Address); 8813 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8814 %} 8815 ins_pipe(ialu_reg_mem); 8816 %} 8817 8818 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8819 match(Set dst (AndL (SubL imm_zero src) src)); 8820 predicate(UseBMI1Instructions); 8821 effect(KILL cr, TEMP dst); 8822 8823 format %{ "MOVL $dst.hi, 0\n\t" 8824 "BLSIL $dst.lo, $src.lo\n\t" 8825 "JNZ done\n\t" 8826 "BLSIL $dst.hi, $src.hi\n" 8827 "done:" 8828 %} 8829 8830 ins_encode %{ 8831 Label done; 8832 Register Rdst = $dst$$Register; 8833 Register Rsrc = $src$$Register; 8834 __ movl(HIGH_FROM_LOW(Rdst), 0); 8835 __ blsil(Rdst, Rsrc); 8836 __ jccb(Assembler::notZero, done); 8837 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8838 __ bind(done); 8839 %} 8840 ins_pipe(ialu_reg); 8841 %} 8842 8843 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8844 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8845 predicate(UseBMI1Instructions); 8846 effect(KILL cr, TEMP dst); 8847 8848 ins_cost(125); 8849 format %{ "MOVL $dst.hi, 0\n\t" 8850 "BLSIL $dst.lo, $src\n\t" 8851 "JNZ done\n\t" 8852 "BLSIL $dst.hi, $src+4\n" 8853 "done:" 8854 %} 8855 8856 ins_encode %{ 8857 Label done; 8858 Register Rdst = $dst$$Register; 8859 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8860 8861 __ movl(HIGH_FROM_LOW(Rdst), 0); 8862 __ blsil(Rdst, $src$$Address); 8863 __ jccb(Assembler::notZero, done); 8864 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8865 __ bind(done); 8866 %} 8867 ins_pipe(ialu_reg_mem); 8868 %} 8869 8870 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8871 %{ 8872 match(Set dst (XorL (AddL src minus_1) src)); 8873 predicate(UseBMI1Instructions); 8874 effect(KILL cr, TEMP dst); 8875 8876 format %{ "MOVL $dst.hi, 0\n\t" 8877 "BLSMSKL $dst.lo, $src.lo\n\t" 8878 "JNC done\n\t" 8879 "BLSMSKL $dst.hi, $src.hi\n" 8880 "done:" 8881 %} 8882 8883 ins_encode %{ 8884 Label done; 8885 Register Rdst = $dst$$Register; 8886 Register Rsrc = $src$$Register; 8887 __ movl(HIGH_FROM_LOW(Rdst), 0); 8888 __ blsmskl(Rdst, Rsrc); 8889 __ jccb(Assembler::carryClear, done); 8890 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8891 __ bind(done); 8892 %} 8893 8894 ins_pipe(ialu_reg); 8895 %} 8896 8897 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8898 %{ 8899 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8900 predicate(UseBMI1Instructions); 8901 effect(KILL cr, TEMP dst); 8902 8903 ins_cost(125); 8904 format %{ "MOVL $dst.hi, 0\n\t" 8905 "BLSMSKL $dst.lo, $src\n\t" 8906 "JNC done\n\t" 8907 "BLSMSKL $dst.hi, $src+4\n" 8908 "done:" 8909 %} 8910 8911 ins_encode %{ 8912 Label done; 8913 Register Rdst = $dst$$Register; 8914 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8915 8916 __ movl(HIGH_FROM_LOW(Rdst), 0); 8917 __ blsmskl(Rdst, $src$$Address); 8918 __ jccb(Assembler::carryClear, done); 8919 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8920 __ bind(done); 8921 %} 8922 8923 ins_pipe(ialu_reg_mem); 8924 %} 8925 8926 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8927 %{ 8928 match(Set dst (AndL (AddL src minus_1) src) ); 8929 predicate(UseBMI1Instructions); 8930 effect(KILL cr, TEMP dst); 8931 8932 format %{ "MOVL $dst.hi, $src.hi\n\t" 8933 "BLSRL $dst.lo, $src.lo\n\t" 8934 "JNC done\n\t" 8935 "BLSRL $dst.hi, $src.hi\n" 8936 "done:" 8937 %} 8938 8939 ins_encode %{ 8940 Label done; 8941 Register Rdst = $dst$$Register; 8942 Register Rsrc = $src$$Register; 8943 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8944 __ blsrl(Rdst, Rsrc); 8945 __ jccb(Assembler::carryClear, done); 8946 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8947 __ bind(done); 8948 %} 8949 8950 ins_pipe(ialu_reg); 8951 %} 8952 8953 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8954 %{ 8955 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8956 predicate(UseBMI1Instructions); 8957 effect(KILL cr, TEMP dst); 8958 8959 ins_cost(125); 8960 format %{ "MOVL $dst.hi, $src+4\n\t" 8961 "BLSRL $dst.lo, $src\n\t" 8962 "JNC done\n\t" 8963 "BLSRL $dst.hi, $src+4\n" 8964 "done:" 8965 %} 8966 8967 ins_encode %{ 8968 Label done; 8969 Register Rdst = $dst$$Register; 8970 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8971 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8972 __ blsrl(Rdst, $src$$Address); 8973 __ jccb(Assembler::carryClear, done); 8974 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8975 __ bind(done); 8976 %} 8977 8978 ins_pipe(ialu_reg_mem); 8979 %} 8980 8981 // Or Long Register with Register 8982 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8983 match(Set dst (OrL dst src)); 8984 effect(KILL cr); 8985 format %{ "OR $dst.lo,$src.lo\n\t" 8986 "OR $dst.hi,$src.hi" %} 8987 opcode(0x0B,0x0B); 8988 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8989 ins_pipe( ialu_reg_reg_long ); 8990 %} 8991 8992 // Or Long Register with Immediate 8993 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8994 match(Set dst (OrL dst src)); 8995 effect(KILL cr); 8996 format %{ "OR $dst.lo,$src.lo\n\t" 8997 "OR $dst.hi,$src.hi" %} 8998 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8999 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9000 ins_pipe( ialu_reg_long ); 9001 %} 9002 9003 // Or Long Register with Memory 9004 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9005 match(Set dst (OrL dst (LoadL mem))); 9006 effect(KILL cr); 9007 ins_cost(125); 9008 format %{ "OR $dst.lo,$mem\n\t" 9009 "OR $dst.hi,$mem+4" %} 9010 opcode(0x0B,0x0B); 9011 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9012 ins_pipe( ialu_reg_long_mem ); 9013 %} 9014 9015 // Xor Long Register with Register 9016 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9017 match(Set dst (XorL dst src)); 9018 effect(KILL cr); 9019 format %{ "XOR $dst.lo,$src.lo\n\t" 9020 "XOR $dst.hi,$src.hi" %} 9021 opcode(0x33,0x33); 9022 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9023 ins_pipe( ialu_reg_reg_long ); 9024 %} 9025 9026 // Xor Long Register with Immediate -1 9027 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9028 match(Set dst (XorL dst imm)); 9029 format %{ "NOT $dst.lo\n\t" 9030 "NOT $dst.hi" %} 9031 ins_encode %{ 9032 __ notl($dst$$Register); 9033 __ notl(HIGH_FROM_LOW($dst$$Register)); 9034 %} 9035 ins_pipe( ialu_reg_long ); 9036 %} 9037 9038 // Xor Long Register with Immediate 9039 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9040 match(Set dst (XorL dst src)); 9041 effect(KILL cr); 9042 format %{ "XOR $dst.lo,$src.lo\n\t" 9043 "XOR $dst.hi,$src.hi" %} 9044 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9045 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9046 ins_pipe( ialu_reg_long ); 9047 %} 9048 9049 // Xor Long Register with Memory 9050 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9051 match(Set dst (XorL dst (LoadL mem))); 9052 effect(KILL cr); 9053 ins_cost(125); 9054 format %{ "XOR $dst.lo,$mem\n\t" 9055 "XOR $dst.hi,$mem+4" %} 9056 opcode(0x33,0x33); 9057 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9058 ins_pipe( ialu_reg_long_mem ); 9059 %} 9060 9061 // Shift Left Long by 1 9062 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9063 predicate(UseNewLongLShift); 9064 match(Set dst (LShiftL dst cnt)); 9065 effect(KILL cr); 9066 ins_cost(100); 9067 format %{ "ADD $dst.lo,$dst.lo\n\t" 9068 "ADC $dst.hi,$dst.hi" %} 9069 ins_encode %{ 9070 __ addl($dst$$Register,$dst$$Register); 9071 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9072 %} 9073 ins_pipe( ialu_reg_long ); 9074 %} 9075 9076 // Shift Left Long by 2 9077 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9078 predicate(UseNewLongLShift); 9079 match(Set dst (LShiftL dst cnt)); 9080 effect(KILL cr); 9081 ins_cost(100); 9082 format %{ "ADD $dst.lo,$dst.lo\n\t" 9083 "ADC $dst.hi,$dst.hi\n\t" 9084 "ADD $dst.lo,$dst.lo\n\t" 9085 "ADC $dst.hi,$dst.hi" %} 9086 ins_encode %{ 9087 __ addl($dst$$Register,$dst$$Register); 9088 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9089 __ addl($dst$$Register,$dst$$Register); 9090 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9091 %} 9092 ins_pipe( ialu_reg_long ); 9093 %} 9094 9095 // Shift Left Long by 3 9096 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9097 predicate(UseNewLongLShift); 9098 match(Set dst (LShiftL dst cnt)); 9099 effect(KILL cr); 9100 ins_cost(100); 9101 format %{ "ADD $dst.lo,$dst.lo\n\t" 9102 "ADC $dst.hi,$dst.hi\n\t" 9103 "ADD $dst.lo,$dst.lo\n\t" 9104 "ADC $dst.hi,$dst.hi\n\t" 9105 "ADD $dst.lo,$dst.lo\n\t" 9106 "ADC $dst.hi,$dst.hi" %} 9107 ins_encode %{ 9108 __ addl($dst$$Register,$dst$$Register); 9109 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9110 __ addl($dst$$Register,$dst$$Register); 9111 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9112 __ addl($dst$$Register,$dst$$Register); 9113 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9114 %} 9115 ins_pipe( ialu_reg_long ); 9116 %} 9117 9118 // Shift Left Long by 1-31 9119 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9120 match(Set dst (LShiftL dst cnt)); 9121 effect(KILL cr); 9122 ins_cost(200); 9123 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9124 "SHL $dst.lo,$cnt" %} 9125 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9126 ins_encode( move_long_small_shift(dst,cnt) ); 9127 ins_pipe( ialu_reg_long ); 9128 %} 9129 9130 // Shift Left Long by 32-63 9131 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9132 match(Set dst (LShiftL dst cnt)); 9133 effect(KILL cr); 9134 ins_cost(300); 9135 format %{ "MOV $dst.hi,$dst.lo\n" 9136 "\tSHL $dst.hi,$cnt-32\n" 9137 "\tXOR $dst.lo,$dst.lo" %} 9138 opcode(0xC1, 0x4); /* C1 /4 ib */ 9139 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9140 ins_pipe( ialu_reg_long ); 9141 %} 9142 9143 // Shift Left Long by variable 9144 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9145 match(Set dst (LShiftL dst shift)); 9146 effect(KILL cr); 9147 ins_cost(500+200); 9148 size(17); 9149 format %{ "TEST $shift,32\n\t" 9150 "JEQ,s small\n\t" 9151 "MOV $dst.hi,$dst.lo\n\t" 9152 "XOR $dst.lo,$dst.lo\n" 9153 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9154 "SHL $dst.lo,$shift" %} 9155 ins_encode( shift_left_long( dst, shift ) ); 9156 ins_pipe( pipe_slow ); 9157 %} 9158 9159 // Shift Right Long by 1-31 9160 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9161 match(Set dst (URShiftL dst cnt)); 9162 effect(KILL cr); 9163 ins_cost(200); 9164 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9165 "SHR $dst.hi,$cnt" %} 9166 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9167 ins_encode( move_long_small_shift(dst,cnt) ); 9168 ins_pipe( ialu_reg_long ); 9169 %} 9170 9171 // Shift Right Long by 32-63 9172 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9173 match(Set dst (URShiftL dst cnt)); 9174 effect(KILL cr); 9175 ins_cost(300); 9176 format %{ "MOV $dst.lo,$dst.hi\n" 9177 "\tSHR $dst.lo,$cnt-32\n" 9178 "\tXOR $dst.hi,$dst.hi" %} 9179 opcode(0xC1, 0x5); /* C1 /5 ib */ 9180 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9181 ins_pipe( ialu_reg_long ); 9182 %} 9183 9184 // Shift Right Long by variable 9185 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9186 match(Set dst (URShiftL dst shift)); 9187 effect(KILL cr); 9188 ins_cost(600); 9189 size(17); 9190 format %{ "TEST $shift,32\n\t" 9191 "JEQ,s small\n\t" 9192 "MOV $dst.lo,$dst.hi\n\t" 9193 "XOR $dst.hi,$dst.hi\n" 9194 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9195 "SHR $dst.hi,$shift" %} 9196 ins_encode( shift_right_long( dst, shift ) ); 9197 ins_pipe( pipe_slow ); 9198 %} 9199 9200 // Shift Right Long by 1-31 9201 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9202 match(Set dst (RShiftL dst cnt)); 9203 effect(KILL cr); 9204 ins_cost(200); 9205 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9206 "SAR $dst.hi,$cnt" %} 9207 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9208 ins_encode( move_long_small_shift(dst,cnt) ); 9209 ins_pipe( ialu_reg_long ); 9210 %} 9211 9212 // Shift Right Long by 32-63 9213 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9214 match(Set dst (RShiftL dst cnt)); 9215 effect(KILL cr); 9216 ins_cost(300); 9217 format %{ "MOV $dst.lo,$dst.hi\n" 9218 "\tSAR $dst.lo,$cnt-32\n" 9219 "\tSAR $dst.hi,31" %} 9220 opcode(0xC1, 0x7); /* C1 /7 ib */ 9221 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9222 ins_pipe( ialu_reg_long ); 9223 %} 9224 9225 // Shift Right arithmetic Long by variable 9226 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9227 match(Set dst (RShiftL dst shift)); 9228 effect(KILL cr); 9229 ins_cost(600); 9230 size(18); 9231 format %{ "TEST $shift,32\n\t" 9232 "JEQ,s small\n\t" 9233 "MOV $dst.lo,$dst.hi\n\t" 9234 "SAR $dst.hi,31\n" 9235 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9236 "SAR $dst.hi,$shift" %} 9237 ins_encode( shift_right_arith_long( dst, shift ) ); 9238 ins_pipe( pipe_slow ); 9239 %} 9240 9241 9242 //----------Double Instructions------------------------------------------------ 9243 // Double Math 9244 9245 // Compare & branch 9246 9247 // P6 version of float compare, sets condition codes in EFLAGS 9248 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9249 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9250 match(Set cr (CmpD src1 src2)); 9251 effect(KILL rax); 9252 ins_cost(150); 9253 format %{ "FLD $src1\n\t" 9254 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9255 "JNP exit\n\t" 9256 "MOV ah,1 // saw a NaN, set CF\n\t" 9257 "SAHF\n" 9258 "exit:\tNOP // avoid branch to branch" %} 9259 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9260 ins_encode( Push_Reg_DPR(src1), 9261 OpcP, RegOpc(src2), 9262 cmpF_P6_fixup ); 9263 ins_pipe( pipe_slow ); 9264 %} 9265 9266 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9267 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9268 match(Set cr (CmpD src1 src2)); 9269 ins_cost(150); 9270 format %{ "FLD $src1\n\t" 9271 "FUCOMIP ST,$src2 // P6 instruction" %} 9272 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9273 ins_encode( Push_Reg_DPR(src1), 9274 OpcP, RegOpc(src2)); 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 // Compare & branch 9279 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9280 predicate(UseSSE<=1); 9281 match(Set cr (CmpD src1 src2)); 9282 effect(KILL rax); 9283 ins_cost(200); 9284 format %{ "FLD $src1\n\t" 9285 "FCOMp $src2\n\t" 9286 "FNSTSW AX\n\t" 9287 "TEST AX,0x400\n\t" 9288 "JZ,s flags\n\t" 9289 "MOV AH,1\t# unordered treat as LT\n" 9290 "flags:\tSAHF" %} 9291 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9292 ins_encode( Push_Reg_DPR(src1), 9293 OpcP, RegOpc(src2), 9294 fpu_flags); 9295 ins_pipe( pipe_slow ); 9296 %} 9297 9298 // Compare vs zero into -1,0,1 9299 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9300 predicate(UseSSE<=1); 9301 match(Set dst (CmpD3 src1 zero)); 9302 effect(KILL cr, KILL rax); 9303 ins_cost(280); 9304 format %{ "FTSTD $dst,$src1" %} 9305 opcode(0xE4, 0xD9); 9306 ins_encode( Push_Reg_DPR(src1), 9307 OpcS, OpcP, PopFPU, 9308 CmpF_Result(dst)); 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 // Compare into -1,0,1 9313 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9314 predicate(UseSSE<=1); 9315 match(Set dst (CmpD3 src1 src2)); 9316 effect(KILL cr, KILL rax); 9317 ins_cost(300); 9318 format %{ "FCMPD $dst,$src1,$src2" %} 9319 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9320 ins_encode( Push_Reg_DPR(src1), 9321 OpcP, RegOpc(src2), 9322 CmpF_Result(dst)); 9323 ins_pipe( pipe_slow ); 9324 %} 9325 9326 // float compare and set condition codes in EFLAGS by XMM regs 9327 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9328 predicate(UseSSE>=2); 9329 match(Set cr (CmpD src1 src2)); 9330 ins_cost(145); 9331 format %{ "UCOMISD $src1,$src2\n\t" 9332 "JNP,s exit\n\t" 9333 "PUSHF\t# saw NaN, set CF\n\t" 9334 "AND [rsp], #0xffffff2b\n\t" 9335 "POPF\n" 9336 "exit:" %} 9337 ins_encode %{ 9338 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9339 emit_cmpfp_fixup(_masm); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9345 predicate(UseSSE>=2); 9346 match(Set cr (CmpD src1 src2)); 9347 ins_cost(100); 9348 format %{ "UCOMISD $src1,$src2" %} 9349 ins_encode %{ 9350 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9351 %} 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 // float compare and set condition codes in EFLAGS by XMM regs 9356 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9357 predicate(UseSSE>=2); 9358 match(Set cr (CmpD src1 (LoadD src2))); 9359 ins_cost(145); 9360 format %{ "UCOMISD $src1,$src2\n\t" 9361 "JNP,s exit\n\t" 9362 "PUSHF\t# saw NaN, set CF\n\t" 9363 "AND [rsp], #0xffffff2b\n\t" 9364 "POPF\n" 9365 "exit:" %} 9366 ins_encode %{ 9367 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9368 emit_cmpfp_fixup(_masm); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9374 predicate(UseSSE>=2); 9375 match(Set cr (CmpD src1 (LoadD src2))); 9376 ins_cost(100); 9377 format %{ "UCOMISD $src1,$src2" %} 9378 ins_encode %{ 9379 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9380 %} 9381 ins_pipe( pipe_slow ); 9382 %} 9383 9384 // Compare into -1,0,1 in XMM 9385 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9386 predicate(UseSSE>=2); 9387 match(Set dst (CmpD3 src1 src2)); 9388 effect(KILL cr); 9389 ins_cost(255); 9390 format %{ "UCOMISD $src1, $src2\n\t" 9391 "MOV $dst, #-1\n\t" 9392 "JP,s done\n\t" 9393 "JB,s done\n\t" 9394 "SETNE $dst\n\t" 9395 "MOVZB $dst, $dst\n" 9396 "done:" %} 9397 ins_encode %{ 9398 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9399 emit_cmpfp3(_masm, $dst$$Register); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 // Compare into -1,0,1 in XMM and memory 9405 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9406 predicate(UseSSE>=2); 9407 match(Set dst (CmpD3 src1 (LoadD src2))); 9408 effect(KILL cr); 9409 ins_cost(275); 9410 format %{ "UCOMISD $src1, $src2\n\t" 9411 "MOV $dst, #-1\n\t" 9412 "JP,s done\n\t" 9413 "JB,s done\n\t" 9414 "SETNE $dst\n\t" 9415 "MOVZB $dst, $dst\n" 9416 "done:" %} 9417 ins_encode %{ 9418 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9419 emit_cmpfp3(_masm, $dst$$Register); 9420 %} 9421 ins_pipe( pipe_slow ); 9422 %} 9423 9424 9425 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9426 predicate (UseSSE <=1); 9427 match(Set dst (SubD dst src)); 9428 9429 format %{ "FLD $src\n\t" 9430 "DSUBp $dst,ST" %} 9431 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9432 ins_cost(150); 9433 ins_encode( Push_Reg_DPR(src), 9434 OpcP, RegOpc(dst) ); 9435 ins_pipe( fpu_reg_reg ); 9436 %} 9437 9438 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9439 predicate (UseSSE <=1); 9440 match(Set dst (RoundDouble (SubD src1 src2))); 9441 ins_cost(250); 9442 9443 format %{ "FLD $src2\n\t" 9444 "DSUB ST,$src1\n\t" 9445 "FSTP_D $dst\t# D-round" %} 9446 opcode(0xD8, 0x5); 9447 ins_encode( Push_Reg_DPR(src2), 9448 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9449 ins_pipe( fpu_mem_reg_reg ); 9450 %} 9451 9452 9453 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9454 predicate (UseSSE <=1); 9455 match(Set dst (SubD dst (LoadD src))); 9456 ins_cost(150); 9457 9458 format %{ "FLD $src\n\t" 9459 "DSUBp $dst,ST" %} 9460 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9461 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9462 OpcP, RegOpc(dst) ); 9463 ins_pipe( fpu_reg_mem ); 9464 %} 9465 9466 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9467 predicate (UseSSE<=1); 9468 match(Set dst (AbsD src)); 9469 ins_cost(100); 9470 format %{ "FABS" %} 9471 opcode(0xE1, 0xD9); 9472 ins_encode( OpcS, OpcP ); 9473 ins_pipe( fpu_reg_reg ); 9474 %} 9475 9476 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9477 predicate(UseSSE<=1); 9478 match(Set dst (NegD src)); 9479 ins_cost(100); 9480 format %{ "FCHS" %} 9481 opcode(0xE0, 0xD9); 9482 ins_encode( OpcS, OpcP ); 9483 ins_pipe( fpu_reg_reg ); 9484 %} 9485 9486 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9487 predicate(UseSSE<=1); 9488 match(Set dst (AddD dst src)); 9489 format %{ "FLD $src\n\t" 9490 "DADD $dst,ST" %} 9491 size(4); 9492 ins_cost(150); 9493 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9494 ins_encode( Push_Reg_DPR(src), 9495 OpcP, RegOpc(dst) ); 9496 ins_pipe( fpu_reg_reg ); 9497 %} 9498 9499 9500 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9501 predicate(UseSSE<=1); 9502 match(Set dst (RoundDouble (AddD src1 src2))); 9503 ins_cost(250); 9504 9505 format %{ "FLD $src2\n\t" 9506 "DADD ST,$src1\n\t" 9507 "FSTP_D $dst\t# D-round" %} 9508 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9509 ins_encode( Push_Reg_DPR(src2), 9510 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9511 ins_pipe( fpu_mem_reg_reg ); 9512 %} 9513 9514 9515 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9516 predicate(UseSSE<=1); 9517 match(Set dst (AddD dst (LoadD src))); 9518 ins_cost(150); 9519 9520 format %{ "FLD $src\n\t" 9521 "DADDp $dst,ST" %} 9522 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9523 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9524 OpcP, RegOpc(dst) ); 9525 ins_pipe( fpu_reg_mem ); 9526 %} 9527 9528 // add-to-memory 9529 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9530 predicate(UseSSE<=1); 9531 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9532 ins_cost(150); 9533 9534 format %{ "FLD_D $dst\n\t" 9535 "DADD ST,$src\n\t" 9536 "FST_D $dst" %} 9537 opcode(0xDD, 0x0); 9538 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9539 Opcode(0xD8), RegOpc(src), 9540 set_instruction_start, 9541 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9542 ins_pipe( fpu_reg_mem ); 9543 %} 9544 9545 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9546 predicate(UseSSE<=1); 9547 match(Set dst (AddD dst con)); 9548 ins_cost(125); 9549 format %{ "FLD1\n\t" 9550 "DADDp $dst,ST" %} 9551 ins_encode %{ 9552 __ fld1(); 9553 __ faddp($dst$$reg); 9554 %} 9555 ins_pipe(fpu_reg); 9556 %} 9557 9558 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9559 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9560 match(Set dst (AddD dst con)); 9561 ins_cost(200); 9562 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9563 "DADDp $dst,ST" %} 9564 ins_encode %{ 9565 __ fld_d($constantaddress($con)); 9566 __ faddp($dst$$reg); 9567 %} 9568 ins_pipe(fpu_reg_mem); 9569 %} 9570 9571 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9572 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9573 match(Set dst (RoundDouble (AddD src con))); 9574 ins_cost(200); 9575 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9576 "DADD ST,$src\n\t" 9577 "FSTP_D $dst\t# D-round" %} 9578 ins_encode %{ 9579 __ fld_d($constantaddress($con)); 9580 __ fadd($src$$reg); 9581 __ fstp_d(Address(rsp, $dst$$disp)); 9582 %} 9583 ins_pipe(fpu_mem_reg_con); 9584 %} 9585 9586 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9587 predicate(UseSSE<=1); 9588 match(Set dst (MulD dst src)); 9589 format %{ "FLD $src\n\t" 9590 "DMULp $dst,ST" %} 9591 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9592 ins_cost(150); 9593 ins_encode( Push_Reg_DPR(src), 9594 OpcP, RegOpc(dst) ); 9595 ins_pipe( fpu_reg_reg ); 9596 %} 9597 9598 // Strict FP instruction biases argument before multiply then 9599 // biases result to avoid double rounding of subnormals. 9600 // 9601 // scale arg1 by multiplying arg1 by 2^(-15360) 9602 // load arg2 9603 // multiply scaled arg1 by arg2 9604 // rescale product by 2^(15360) 9605 // 9606 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9607 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9608 match(Set dst (MulD dst src)); 9609 ins_cost(1); // Select this instruction for all strict FP double multiplies 9610 9611 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9612 "DMULp $dst,ST\n\t" 9613 "FLD $src\n\t" 9614 "DMULp $dst,ST\n\t" 9615 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9616 "DMULp $dst,ST\n\t" %} 9617 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9618 ins_encode( strictfp_bias1(dst), 9619 Push_Reg_DPR(src), 9620 OpcP, RegOpc(dst), 9621 strictfp_bias2(dst) ); 9622 ins_pipe( fpu_reg_reg ); 9623 %} 9624 9625 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9626 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9627 match(Set dst (MulD dst con)); 9628 ins_cost(200); 9629 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9630 "DMULp $dst,ST" %} 9631 ins_encode %{ 9632 __ fld_d($constantaddress($con)); 9633 __ fmulp($dst$$reg); 9634 %} 9635 ins_pipe(fpu_reg_mem); 9636 %} 9637 9638 9639 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9640 predicate( UseSSE<=1 ); 9641 match(Set dst (MulD dst (LoadD src))); 9642 ins_cost(200); 9643 format %{ "FLD_D $src\n\t" 9644 "DMULp $dst,ST" %} 9645 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9646 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9647 OpcP, RegOpc(dst) ); 9648 ins_pipe( fpu_reg_mem ); 9649 %} 9650 9651 // 9652 // Cisc-alternate to reg-reg multiply 9653 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9654 predicate( UseSSE<=1 ); 9655 match(Set dst (MulD src (LoadD mem))); 9656 ins_cost(250); 9657 format %{ "FLD_D $mem\n\t" 9658 "DMUL ST,$src\n\t" 9659 "FSTP_D $dst" %} 9660 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9661 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9662 OpcReg_FPR(src), 9663 Pop_Reg_DPR(dst) ); 9664 ins_pipe( fpu_reg_reg_mem ); 9665 %} 9666 9667 9668 // MACRO3 -- addDPR a mulDPR 9669 // This instruction is a '2-address' instruction in that the result goes 9670 // back to src2. This eliminates a move from the macro; possibly the 9671 // register allocator will have to add it back (and maybe not). 9672 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9673 predicate( UseSSE<=1 ); 9674 match(Set src2 (AddD (MulD src0 src1) src2)); 9675 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9676 "DMUL ST,$src1\n\t" 9677 "DADDp $src2,ST" %} 9678 ins_cost(250); 9679 opcode(0xDD); /* LoadD DD /0 */ 9680 ins_encode( Push_Reg_FPR(src0), 9681 FMul_ST_reg(src1), 9682 FAddP_reg_ST(src2) ); 9683 ins_pipe( fpu_reg_reg_reg ); 9684 %} 9685 9686 9687 // MACRO3 -- subDPR a mulDPR 9688 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9689 predicate( UseSSE<=1 ); 9690 match(Set src2 (SubD (MulD src0 src1) src2)); 9691 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9692 "DMUL ST,$src1\n\t" 9693 "DSUBRp $src2,ST" %} 9694 ins_cost(250); 9695 ins_encode( Push_Reg_FPR(src0), 9696 FMul_ST_reg(src1), 9697 Opcode(0xDE), Opc_plus(0xE0,src2)); 9698 ins_pipe( fpu_reg_reg_reg ); 9699 %} 9700 9701 9702 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9703 predicate( UseSSE<=1 ); 9704 match(Set dst (DivD dst src)); 9705 9706 format %{ "FLD $src\n\t" 9707 "FDIVp $dst,ST" %} 9708 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9709 ins_cost(150); 9710 ins_encode( Push_Reg_DPR(src), 9711 OpcP, RegOpc(dst) ); 9712 ins_pipe( fpu_reg_reg ); 9713 %} 9714 9715 // Strict FP instruction biases argument before division then 9716 // biases result, to avoid double rounding of subnormals. 9717 // 9718 // scale dividend by multiplying dividend by 2^(-15360) 9719 // load divisor 9720 // divide scaled dividend by divisor 9721 // rescale quotient by 2^(15360) 9722 // 9723 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9724 predicate (UseSSE<=1); 9725 match(Set dst (DivD dst src)); 9726 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9727 ins_cost(01); 9728 9729 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9730 "DMULp $dst,ST\n\t" 9731 "FLD $src\n\t" 9732 "FDIVp $dst,ST\n\t" 9733 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9734 "DMULp $dst,ST\n\t" %} 9735 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9736 ins_encode( strictfp_bias1(dst), 9737 Push_Reg_DPR(src), 9738 OpcP, RegOpc(dst), 9739 strictfp_bias2(dst) ); 9740 ins_pipe( fpu_reg_reg ); 9741 %} 9742 9743 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9744 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9745 match(Set dst (RoundDouble (DivD src1 src2))); 9746 9747 format %{ "FLD $src1\n\t" 9748 "FDIV ST,$src2\n\t" 9749 "FSTP_D $dst\t# D-round" %} 9750 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9751 ins_encode( Push_Reg_DPR(src1), 9752 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9753 ins_pipe( fpu_mem_reg_reg ); 9754 %} 9755 9756 9757 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9758 predicate(UseSSE<=1); 9759 match(Set dst (ModD dst src)); 9760 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9761 9762 format %{ "DMOD $dst,$src" %} 9763 ins_cost(250); 9764 ins_encode(Push_Reg_Mod_DPR(dst, src), 9765 emitModDPR(), 9766 Push_Result_Mod_DPR(src), 9767 Pop_Reg_DPR(dst)); 9768 ins_pipe( pipe_slow ); 9769 %} 9770 9771 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9772 predicate(UseSSE>=2); 9773 match(Set dst (ModD src0 src1)); 9774 effect(KILL rax, KILL cr); 9775 9776 format %{ "SUB ESP,8\t # DMOD\n" 9777 "\tMOVSD [ESP+0],$src1\n" 9778 "\tFLD_D [ESP+0]\n" 9779 "\tMOVSD [ESP+0],$src0\n" 9780 "\tFLD_D [ESP+0]\n" 9781 "loop:\tFPREM\n" 9782 "\tFWAIT\n" 9783 "\tFNSTSW AX\n" 9784 "\tSAHF\n" 9785 "\tJP loop\n" 9786 "\tFSTP_D [ESP+0]\n" 9787 "\tMOVSD $dst,[ESP+0]\n" 9788 "\tADD ESP,8\n" 9789 "\tFSTP ST0\t # Restore FPU Stack" 9790 %} 9791 ins_cost(250); 9792 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9793 ins_pipe( pipe_slow ); 9794 %} 9795 9796 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9797 predicate (UseSSE<=1); 9798 match(Set dst(TanD src)); 9799 format %{ "DTAN $dst" %} 9800 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9801 Opcode(0xDD), Opcode(0xD8)); // fstp st 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9806 predicate (UseSSE>=2); 9807 match(Set dst(TanD dst)); 9808 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9809 format %{ "DTAN $dst" %} 9810 ins_encode( Push_SrcD(dst), 9811 Opcode(0xD9), Opcode(0xF2), // fptan 9812 Opcode(0xDD), Opcode(0xD8), // fstp st 9813 Push_ResultD(dst) ); 9814 ins_pipe( pipe_slow ); 9815 %} 9816 9817 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9818 predicate (UseSSE<=1); 9819 match(Set dst(AtanD dst src)); 9820 format %{ "DATA $dst,$src" %} 9821 opcode(0xD9, 0xF3); 9822 ins_encode( Push_Reg_DPR(src), 9823 OpcP, OpcS, RegOpc(dst) ); 9824 ins_pipe( pipe_slow ); 9825 %} 9826 9827 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9828 predicate (UseSSE>=2); 9829 match(Set dst(AtanD dst src)); 9830 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9831 format %{ "DATA $dst,$src" %} 9832 opcode(0xD9, 0xF3); 9833 ins_encode( Push_SrcD(src), 9834 OpcP, OpcS, Push_ResultD(dst) ); 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9839 predicate (UseSSE<=1); 9840 match(Set dst (SqrtD src)); 9841 format %{ "DSQRT $dst,$src" %} 9842 opcode(0xFA, 0xD9); 9843 ins_encode( Push_Reg_DPR(src), 9844 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9845 ins_pipe( pipe_slow ); 9846 %} 9847 9848 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9849 predicate (UseSSE<=1); 9850 // The source Double operand on FPU stack 9851 match(Set dst (Log10D src)); 9852 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9853 // fxch ; swap ST(0) with ST(1) 9854 // fyl2x ; compute log_10(2) * log_2(x) 9855 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9856 "FXCH \n\t" 9857 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9858 %} 9859 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9860 Opcode(0xD9), Opcode(0xC9), // fxch 9861 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9862 9863 ins_pipe( pipe_slow ); 9864 %} 9865 9866 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9867 predicate (UseSSE>=2); 9868 effect(KILL cr); 9869 match(Set dst (Log10D src)); 9870 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9871 // fyl2x ; compute log_10(2) * log_2(x) 9872 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9873 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9874 %} 9875 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9876 Push_SrcD(src), 9877 Opcode(0xD9), Opcode(0xF1), // fyl2x 9878 Push_ResultD(dst)); 9879 9880 ins_pipe( pipe_slow ); 9881 %} 9882 9883 //-------------Float Instructions------------------------------- 9884 // Float Math 9885 9886 // Code for float compare: 9887 // fcompp(); 9888 // fwait(); fnstsw_ax(); 9889 // sahf(); 9890 // movl(dst, unordered_result); 9891 // jcc(Assembler::parity, exit); 9892 // movl(dst, less_result); 9893 // jcc(Assembler::below, exit); 9894 // movl(dst, equal_result); 9895 // jcc(Assembler::equal, exit); 9896 // movl(dst, greater_result); 9897 // exit: 9898 9899 // P6 version of float compare, sets condition codes in EFLAGS 9900 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9901 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9902 match(Set cr (CmpF src1 src2)); 9903 effect(KILL rax); 9904 ins_cost(150); 9905 format %{ "FLD $src1\n\t" 9906 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9907 "JNP exit\n\t" 9908 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9909 "SAHF\n" 9910 "exit:\tNOP // avoid branch to branch" %} 9911 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9912 ins_encode( Push_Reg_DPR(src1), 9913 OpcP, RegOpc(src2), 9914 cmpF_P6_fixup ); 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9919 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9920 match(Set cr (CmpF src1 src2)); 9921 ins_cost(100); 9922 format %{ "FLD $src1\n\t" 9923 "FUCOMIP ST,$src2 // P6 instruction" %} 9924 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9925 ins_encode( Push_Reg_DPR(src1), 9926 OpcP, RegOpc(src2)); 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 9931 // Compare & branch 9932 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9933 predicate(UseSSE == 0); 9934 match(Set cr (CmpF src1 src2)); 9935 effect(KILL rax); 9936 ins_cost(200); 9937 format %{ "FLD $src1\n\t" 9938 "FCOMp $src2\n\t" 9939 "FNSTSW AX\n\t" 9940 "TEST AX,0x400\n\t" 9941 "JZ,s flags\n\t" 9942 "MOV AH,1\t# unordered treat as LT\n" 9943 "flags:\tSAHF" %} 9944 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9945 ins_encode( Push_Reg_DPR(src1), 9946 OpcP, RegOpc(src2), 9947 fpu_flags); 9948 ins_pipe( pipe_slow ); 9949 %} 9950 9951 // Compare vs zero into -1,0,1 9952 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9953 predicate(UseSSE == 0); 9954 match(Set dst (CmpF3 src1 zero)); 9955 effect(KILL cr, KILL rax); 9956 ins_cost(280); 9957 format %{ "FTSTF $dst,$src1" %} 9958 opcode(0xE4, 0xD9); 9959 ins_encode( Push_Reg_DPR(src1), 9960 OpcS, OpcP, PopFPU, 9961 CmpF_Result(dst)); 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 // Compare into -1,0,1 9966 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9967 predicate(UseSSE == 0); 9968 match(Set dst (CmpF3 src1 src2)); 9969 effect(KILL cr, KILL rax); 9970 ins_cost(300); 9971 format %{ "FCMPF $dst,$src1,$src2" %} 9972 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9973 ins_encode( Push_Reg_DPR(src1), 9974 OpcP, RegOpc(src2), 9975 CmpF_Result(dst)); 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 // float compare and set condition codes in EFLAGS by XMM regs 9980 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9981 predicate(UseSSE>=1); 9982 match(Set cr (CmpF src1 src2)); 9983 ins_cost(145); 9984 format %{ "UCOMISS $src1,$src2\n\t" 9985 "JNP,s exit\n\t" 9986 "PUSHF\t# saw NaN, set CF\n\t" 9987 "AND [rsp], #0xffffff2b\n\t" 9988 "POPF\n" 9989 "exit:" %} 9990 ins_encode %{ 9991 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9992 emit_cmpfp_fixup(_masm); 9993 %} 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9998 predicate(UseSSE>=1); 9999 match(Set cr (CmpF src1 src2)); 10000 ins_cost(100); 10001 format %{ "UCOMISS $src1,$src2" %} 10002 ins_encode %{ 10003 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10004 %} 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 // float compare and set condition codes in EFLAGS by XMM regs 10009 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10010 predicate(UseSSE>=1); 10011 match(Set cr (CmpF src1 (LoadF src2))); 10012 ins_cost(165); 10013 format %{ "UCOMISS $src1,$src2\n\t" 10014 "JNP,s exit\n\t" 10015 "PUSHF\t# saw NaN, set CF\n\t" 10016 "AND [rsp], #0xffffff2b\n\t" 10017 "POPF\n" 10018 "exit:" %} 10019 ins_encode %{ 10020 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10021 emit_cmpfp_fixup(_masm); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10027 predicate(UseSSE>=1); 10028 match(Set cr (CmpF src1 (LoadF src2))); 10029 ins_cost(100); 10030 format %{ "UCOMISS $src1,$src2" %} 10031 ins_encode %{ 10032 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 // Compare into -1,0,1 in XMM 10038 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10039 predicate(UseSSE>=1); 10040 match(Set dst (CmpF3 src1 src2)); 10041 effect(KILL cr); 10042 ins_cost(255); 10043 format %{ "UCOMISS $src1, $src2\n\t" 10044 "MOV $dst, #-1\n\t" 10045 "JP,s done\n\t" 10046 "JB,s done\n\t" 10047 "SETNE $dst\n\t" 10048 "MOVZB $dst, $dst\n" 10049 "done:" %} 10050 ins_encode %{ 10051 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10052 emit_cmpfp3(_masm, $dst$$Register); 10053 %} 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 // Compare into -1,0,1 in XMM and memory 10058 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10059 predicate(UseSSE>=1); 10060 match(Set dst (CmpF3 src1 (LoadF src2))); 10061 effect(KILL cr); 10062 ins_cost(275); 10063 format %{ "UCOMISS $src1, $src2\n\t" 10064 "MOV $dst, #-1\n\t" 10065 "JP,s done\n\t" 10066 "JB,s done\n\t" 10067 "SETNE $dst\n\t" 10068 "MOVZB $dst, $dst\n" 10069 "done:" %} 10070 ins_encode %{ 10071 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10072 emit_cmpfp3(_masm, $dst$$Register); 10073 %} 10074 ins_pipe( pipe_slow ); 10075 %} 10076 10077 // Spill to obtain 24-bit precision 10078 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10079 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10080 match(Set dst (SubF src1 src2)); 10081 10082 format %{ "FSUB $dst,$src1 - $src2" %} 10083 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10084 ins_encode( Push_Reg_FPR(src1), 10085 OpcReg_FPR(src2), 10086 Pop_Mem_FPR(dst) ); 10087 ins_pipe( fpu_mem_reg_reg ); 10088 %} 10089 // 10090 // This instruction does not round to 24-bits 10091 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10092 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10093 match(Set dst (SubF dst src)); 10094 10095 format %{ "FSUB $dst,$src" %} 10096 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10097 ins_encode( Push_Reg_FPR(src), 10098 OpcP, RegOpc(dst) ); 10099 ins_pipe( fpu_reg_reg ); 10100 %} 10101 10102 // Spill to obtain 24-bit precision 10103 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10104 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10105 match(Set dst (AddF src1 src2)); 10106 10107 format %{ "FADD $dst,$src1,$src2" %} 10108 opcode(0xD8, 0x0); /* D8 C0+i */ 10109 ins_encode( Push_Reg_FPR(src2), 10110 OpcReg_FPR(src1), 10111 Pop_Mem_FPR(dst) ); 10112 ins_pipe( fpu_mem_reg_reg ); 10113 %} 10114 // 10115 // This instruction does not round to 24-bits 10116 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10117 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10118 match(Set dst (AddF dst src)); 10119 10120 format %{ "FLD $src\n\t" 10121 "FADDp $dst,ST" %} 10122 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10123 ins_encode( Push_Reg_FPR(src), 10124 OpcP, RegOpc(dst) ); 10125 ins_pipe( fpu_reg_reg ); 10126 %} 10127 10128 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10129 predicate(UseSSE==0); 10130 match(Set dst (AbsF src)); 10131 ins_cost(100); 10132 format %{ "FABS" %} 10133 opcode(0xE1, 0xD9); 10134 ins_encode( OpcS, OpcP ); 10135 ins_pipe( fpu_reg_reg ); 10136 %} 10137 10138 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10139 predicate(UseSSE==0); 10140 match(Set dst (NegF src)); 10141 ins_cost(100); 10142 format %{ "FCHS" %} 10143 opcode(0xE0, 0xD9); 10144 ins_encode( OpcS, OpcP ); 10145 ins_pipe( fpu_reg_reg ); 10146 %} 10147 10148 // Cisc-alternate to addFPR_reg 10149 // Spill to obtain 24-bit precision 10150 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10151 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10152 match(Set dst (AddF src1 (LoadF src2))); 10153 10154 format %{ "FLD $src2\n\t" 10155 "FADD ST,$src1\n\t" 10156 "FSTP_S $dst" %} 10157 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10158 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10159 OpcReg_FPR(src1), 10160 Pop_Mem_FPR(dst) ); 10161 ins_pipe( fpu_mem_reg_mem ); 10162 %} 10163 // 10164 // Cisc-alternate to addFPR_reg 10165 // This instruction does not round to 24-bits 10166 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10167 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10168 match(Set dst (AddF dst (LoadF src))); 10169 10170 format %{ "FADD $dst,$src" %} 10171 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10172 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10173 OpcP, RegOpc(dst) ); 10174 ins_pipe( fpu_reg_mem ); 10175 %} 10176 10177 // // Following two instructions for _222_mpegaudio 10178 // Spill to obtain 24-bit precision 10179 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10181 match(Set dst (AddF src1 src2)); 10182 10183 format %{ "FADD $dst,$src1,$src2" %} 10184 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10185 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10186 OpcReg_FPR(src2), 10187 Pop_Mem_FPR(dst) ); 10188 ins_pipe( fpu_mem_reg_mem ); 10189 %} 10190 10191 // Cisc-spill variant 10192 // Spill to obtain 24-bit precision 10193 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10194 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10195 match(Set dst (AddF src1 (LoadF src2))); 10196 10197 format %{ "FADD $dst,$src1,$src2 cisc" %} 10198 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10199 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10200 set_instruction_start, 10201 OpcP, RMopc_Mem(secondary,src1), 10202 Pop_Mem_FPR(dst) ); 10203 ins_pipe( fpu_mem_mem_mem ); 10204 %} 10205 10206 // Spill to obtain 24-bit precision 10207 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10208 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10209 match(Set dst (AddF src1 src2)); 10210 10211 format %{ "FADD $dst,$src1,$src2" %} 10212 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10213 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10214 set_instruction_start, 10215 OpcP, RMopc_Mem(secondary,src1), 10216 Pop_Mem_FPR(dst) ); 10217 ins_pipe( fpu_mem_mem_mem ); 10218 %} 10219 10220 10221 // Spill to obtain 24-bit precision 10222 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10223 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10224 match(Set dst (AddF src con)); 10225 format %{ "FLD $src\n\t" 10226 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10227 "FSTP_S $dst" %} 10228 ins_encode %{ 10229 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10230 __ fadd_s($constantaddress($con)); 10231 __ fstp_s(Address(rsp, $dst$$disp)); 10232 %} 10233 ins_pipe(fpu_mem_reg_con); 10234 %} 10235 // 10236 // This instruction does not round to 24-bits 10237 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10238 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10239 match(Set dst (AddF src con)); 10240 format %{ "FLD $src\n\t" 10241 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10242 "FSTP $dst" %} 10243 ins_encode %{ 10244 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10245 __ fadd_s($constantaddress($con)); 10246 __ fstp_d($dst$$reg); 10247 %} 10248 ins_pipe(fpu_reg_reg_con); 10249 %} 10250 10251 // Spill to obtain 24-bit precision 10252 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10253 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10254 match(Set dst (MulF src1 src2)); 10255 10256 format %{ "FLD $src1\n\t" 10257 "FMUL $src2\n\t" 10258 "FSTP_S $dst" %} 10259 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10260 ins_encode( Push_Reg_FPR(src1), 10261 OpcReg_FPR(src2), 10262 Pop_Mem_FPR(dst) ); 10263 ins_pipe( fpu_mem_reg_reg ); 10264 %} 10265 // 10266 // This instruction does not round to 24-bits 10267 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (MulF src1 src2)); 10270 10271 format %{ "FLD $src1\n\t" 10272 "FMUL $src2\n\t" 10273 "FSTP_S $dst" %} 10274 opcode(0xD8, 0x1); /* D8 C8+i */ 10275 ins_encode( Push_Reg_FPR(src2), 10276 OpcReg_FPR(src1), 10277 Pop_Reg_FPR(dst) ); 10278 ins_pipe( fpu_reg_reg_reg ); 10279 %} 10280 10281 10282 // Spill to obtain 24-bit precision 10283 // Cisc-alternate to reg-reg multiply 10284 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10285 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10286 match(Set dst (MulF src1 (LoadF src2))); 10287 10288 format %{ "FLD_S $src2\n\t" 10289 "FMUL $src1\n\t" 10290 "FSTP_S $dst" %} 10291 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10292 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10293 OpcReg_FPR(src1), 10294 Pop_Mem_FPR(dst) ); 10295 ins_pipe( fpu_mem_reg_mem ); 10296 %} 10297 // 10298 // This instruction does not round to 24-bits 10299 // Cisc-alternate to reg-reg multiply 10300 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10301 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10302 match(Set dst (MulF src1 (LoadF src2))); 10303 10304 format %{ "FMUL $dst,$src1,$src2" %} 10305 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10306 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10307 OpcReg_FPR(src1), 10308 Pop_Reg_FPR(dst) ); 10309 ins_pipe( fpu_reg_reg_mem ); 10310 %} 10311 10312 // Spill to obtain 24-bit precision 10313 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10314 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10315 match(Set dst (MulF src1 src2)); 10316 10317 format %{ "FMUL $dst,$src1,$src2" %} 10318 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10319 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10320 set_instruction_start, 10321 OpcP, RMopc_Mem(secondary,src1), 10322 Pop_Mem_FPR(dst) ); 10323 ins_pipe( fpu_mem_mem_mem ); 10324 %} 10325 10326 // Spill to obtain 24-bit precision 10327 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10328 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10329 match(Set dst (MulF src con)); 10330 10331 format %{ "FLD $src\n\t" 10332 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10333 "FSTP_S $dst" %} 10334 ins_encode %{ 10335 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10336 __ fmul_s($constantaddress($con)); 10337 __ fstp_s(Address(rsp, $dst$$disp)); 10338 %} 10339 ins_pipe(fpu_mem_reg_con); 10340 %} 10341 // 10342 // This instruction does not round to 24-bits 10343 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10344 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10345 match(Set dst (MulF src con)); 10346 10347 format %{ "FLD $src\n\t" 10348 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10349 "FSTP $dst" %} 10350 ins_encode %{ 10351 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10352 __ fmul_s($constantaddress($con)); 10353 __ fstp_d($dst$$reg); 10354 %} 10355 ins_pipe(fpu_reg_reg_con); 10356 %} 10357 10358 10359 // 10360 // MACRO1 -- subsume unshared load into mulFPR 10361 // This instruction does not round to 24-bits 10362 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10363 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10364 match(Set dst (MulF (LoadF mem1) src)); 10365 10366 format %{ "FLD $mem1 ===MACRO1===\n\t" 10367 "FMUL ST,$src\n\t" 10368 "FSTP $dst" %} 10369 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10370 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10371 OpcReg_FPR(src), 10372 Pop_Reg_FPR(dst) ); 10373 ins_pipe( fpu_reg_reg_mem ); 10374 %} 10375 // 10376 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10377 // This instruction does not round to 24-bits 10378 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10379 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10380 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10381 ins_cost(95); 10382 10383 format %{ "FLD $mem1 ===MACRO2===\n\t" 10384 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10385 "FADD ST,$src2\n\t" 10386 "FSTP $dst" %} 10387 opcode(0xD9); /* LoadF D9 /0 */ 10388 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10389 FMul_ST_reg(src1), 10390 FAdd_ST_reg(src2), 10391 Pop_Reg_FPR(dst) ); 10392 ins_pipe( fpu_reg_mem_reg_reg ); 10393 %} 10394 10395 // MACRO3 -- addFPR a mulFPR 10396 // This instruction does not round to 24-bits. It is a '2-address' 10397 // instruction in that the result goes back to src2. This eliminates 10398 // a move from the macro; possibly the register allocator will have 10399 // to add it back (and maybe not). 10400 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10401 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10402 match(Set src2 (AddF (MulF src0 src1) src2)); 10403 10404 format %{ "FLD $src0 ===MACRO3===\n\t" 10405 "FMUL ST,$src1\n\t" 10406 "FADDP $src2,ST" %} 10407 opcode(0xD9); /* LoadF D9 /0 */ 10408 ins_encode( Push_Reg_FPR(src0), 10409 FMul_ST_reg(src1), 10410 FAddP_reg_ST(src2) ); 10411 ins_pipe( fpu_reg_reg_reg ); 10412 %} 10413 10414 // MACRO4 -- divFPR subFPR 10415 // This instruction does not round to 24-bits 10416 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10417 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10418 match(Set dst (DivF (SubF src2 src1) src3)); 10419 10420 format %{ "FLD $src2 ===MACRO4===\n\t" 10421 "FSUB ST,$src1\n\t" 10422 "FDIV ST,$src3\n\t" 10423 "FSTP $dst" %} 10424 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10425 ins_encode( Push_Reg_FPR(src2), 10426 subFPR_divFPR_encode(src1,src3), 10427 Pop_Reg_FPR(dst) ); 10428 ins_pipe( fpu_reg_reg_reg_reg ); 10429 %} 10430 10431 // Spill to obtain 24-bit precision 10432 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10433 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10434 match(Set dst (DivF src1 src2)); 10435 10436 format %{ "FDIV $dst,$src1,$src2" %} 10437 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10438 ins_encode( Push_Reg_FPR(src1), 10439 OpcReg_FPR(src2), 10440 Pop_Mem_FPR(dst) ); 10441 ins_pipe( fpu_mem_reg_reg ); 10442 %} 10443 // 10444 // This instruction does not round to 24-bits 10445 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10446 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10447 match(Set dst (DivF dst src)); 10448 10449 format %{ "FDIV $dst,$src" %} 10450 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10451 ins_encode( Push_Reg_FPR(src), 10452 OpcP, RegOpc(dst) ); 10453 ins_pipe( fpu_reg_reg ); 10454 %} 10455 10456 10457 // Spill to obtain 24-bit precision 10458 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10459 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (ModF src1 src2)); 10461 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10462 10463 format %{ "FMOD $dst,$src1,$src2" %} 10464 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10465 emitModDPR(), 10466 Push_Result_Mod_DPR(src2), 10467 Pop_Mem_FPR(dst)); 10468 ins_pipe( pipe_slow ); 10469 %} 10470 // 10471 // This instruction does not round to 24-bits 10472 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10473 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10474 match(Set dst (ModF dst src)); 10475 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10476 10477 format %{ "FMOD $dst,$src" %} 10478 ins_encode(Push_Reg_Mod_DPR(dst, src), 10479 emitModDPR(), 10480 Push_Result_Mod_DPR(src), 10481 Pop_Reg_FPR(dst)); 10482 ins_pipe( pipe_slow ); 10483 %} 10484 10485 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10486 predicate(UseSSE>=1); 10487 match(Set dst (ModF src0 src1)); 10488 effect(KILL rax, KILL cr); 10489 format %{ "SUB ESP,4\t # FMOD\n" 10490 "\tMOVSS [ESP+0],$src1\n" 10491 "\tFLD_S [ESP+0]\n" 10492 "\tMOVSS [ESP+0],$src0\n" 10493 "\tFLD_S [ESP+0]\n" 10494 "loop:\tFPREM\n" 10495 "\tFWAIT\n" 10496 "\tFNSTSW AX\n" 10497 "\tSAHF\n" 10498 "\tJP loop\n" 10499 "\tFSTP_S [ESP+0]\n" 10500 "\tMOVSS $dst,[ESP+0]\n" 10501 "\tADD ESP,4\n" 10502 "\tFSTP ST0\t # Restore FPU Stack" 10503 %} 10504 ins_cost(250); 10505 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10506 ins_pipe( pipe_slow ); 10507 %} 10508 10509 10510 //----------Arithmetic Conversion Instructions--------------------------------- 10511 // The conversions operations are all Alpha sorted. Please keep it that way! 10512 10513 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10514 predicate(UseSSE==0); 10515 match(Set dst (RoundFloat src)); 10516 ins_cost(125); 10517 format %{ "FST_S $dst,$src\t# F-round" %} 10518 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10519 ins_pipe( fpu_mem_reg ); 10520 %} 10521 10522 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10523 predicate(UseSSE<=1); 10524 match(Set dst (RoundDouble src)); 10525 ins_cost(125); 10526 format %{ "FST_D $dst,$src\t# D-round" %} 10527 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10528 ins_pipe( fpu_mem_reg ); 10529 %} 10530 10531 // Force rounding to 24-bit precision and 6-bit exponent 10532 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10533 predicate(UseSSE==0); 10534 match(Set dst (ConvD2F src)); 10535 format %{ "FST_S $dst,$src\t# F-round" %} 10536 expand %{ 10537 roundFloat_mem_reg(dst,src); 10538 %} 10539 %} 10540 10541 // Force rounding to 24-bit precision and 6-bit exponent 10542 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10543 predicate(UseSSE==1); 10544 match(Set dst (ConvD2F src)); 10545 effect( KILL cr ); 10546 format %{ "SUB ESP,4\n\t" 10547 "FST_S [ESP],$src\t# F-round\n\t" 10548 "MOVSS $dst,[ESP]\n\t" 10549 "ADD ESP,4" %} 10550 ins_encode %{ 10551 __ subptr(rsp, 4); 10552 if ($src$$reg != FPR1L_enc) { 10553 __ fld_s($src$$reg-1); 10554 __ fstp_s(Address(rsp, 0)); 10555 } else { 10556 __ fst_s(Address(rsp, 0)); 10557 } 10558 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10559 __ addptr(rsp, 4); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 // Force rounding double precision to single precision 10565 instruct convD2F_reg(regF dst, regD src) %{ 10566 predicate(UseSSE>=2); 10567 match(Set dst (ConvD2F src)); 10568 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10569 ins_encode %{ 10570 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10571 %} 10572 ins_pipe( pipe_slow ); 10573 %} 10574 10575 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10576 predicate(UseSSE==0); 10577 match(Set dst (ConvF2D src)); 10578 format %{ "FST_S $dst,$src\t# D-round" %} 10579 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10580 ins_pipe( fpu_reg_reg ); 10581 %} 10582 10583 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10584 predicate(UseSSE==1); 10585 match(Set dst (ConvF2D src)); 10586 format %{ "FST_D $dst,$src\t# D-round" %} 10587 expand %{ 10588 roundDouble_mem_reg(dst,src); 10589 %} 10590 %} 10591 10592 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10593 predicate(UseSSE==1); 10594 match(Set dst (ConvF2D src)); 10595 effect( KILL cr ); 10596 format %{ "SUB ESP,4\n\t" 10597 "MOVSS [ESP] $src\n\t" 10598 "FLD_S [ESP]\n\t" 10599 "ADD ESP,4\n\t" 10600 "FSTP $dst\t# D-round" %} 10601 ins_encode %{ 10602 __ subptr(rsp, 4); 10603 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10604 __ fld_s(Address(rsp, 0)); 10605 __ addptr(rsp, 4); 10606 __ fstp_d($dst$$reg); 10607 %} 10608 ins_pipe( pipe_slow ); 10609 %} 10610 10611 instruct convF2D_reg(regD dst, regF src) %{ 10612 predicate(UseSSE>=2); 10613 match(Set dst (ConvF2D src)); 10614 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10615 ins_encode %{ 10616 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10617 %} 10618 ins_pipe( pipe_slow ); 10619 %} 10620 10621 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10622 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10623 predicate(UseSSE<=1); 10624 match(Set dst (ConvD2I src)); 10625 effect( KILL tmp, KILL cr ); 10626 format %{ "FLD $src\t# Convert double to int \n\t" 10627 "FLDCW trunc mode\n\t" 10628 "SUB ESP,4\n\t" 10629 "FISTp [ESP + #0]\n\t" 10630 "FLDCW std/24-bit mode\n\t" 10631 "POP EAX\n\t" 10632 "CMP EAX,0x80000000\n\t" 10633 "JNE,s fast\n\t" 10634 "FLD_D $src\n\t" 10635 "CALL d2i_wrapper\n" 10636 "fast:" %} 10637 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10638 ins_pipe( pipe_slow ); 10639 %} 10640 10641 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10642 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10643 predicate(UseSSE>=2); 10644 match(Set dst (ConvD2I src)); 10645 effect( KILL tmp, KILL cr ); 10646 format %{ "CVTTSD2SI $dst, $src\n\t" 10647 "CMP $dst,0x80000000\n\t" 10648 "JNE,s fast\n\t" 10649 "SUB ESP, 8\n\t" 10650 "MOVSD [ESP], $src\n\t" 10651 "FLD_D [ESP]\n\t" 10652 "ADD ESP, 8\n\t" 10653 "CALL d2i_wrapper\n" 10654 "fast:" %} 10655 ins_encode %{ 10656 Label fast; 10657 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10658 __ cmpl($dst$$Register, 0x80000000); 10659 __ jccb(Assembler::notEqual, fast); 10660 __ subptr(rsp, 8); 10661 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10662 __ fld_d(Address(rsp, 0)); 10663 __ addptr(rsp, 8); 10664 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10665 __ bind(fast); 10666 %} 10667 ins_pipe( pipe_slow ); 10668 %} 10669 10670 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10671 predicate(UseSSE<=1); 10672 match(Set dst (ConvD2L src)); 10673 effect( KILL cr ); 10674 format %{ "FLD $src\t# Convert double to long\n\t" 10675 "FLDCW trunc mode\n\t" 10676 "SUB ESP,8\n\t" 10677 "FISTp [ESP + #0]\n\t" 10678 "FLDCW std/24-bit mode\n\t" 10679 "POP EAX\n\t" 10680 "POP EDX\n\t" 10681 "CMP EDX,0x80000000\n\t" 10682 "JNE,s fast\n\t" 10683 "TEST EAX,EAX\n\t" 10684 "JNE,s fast\n\t" 10685 "FLD $src\n\t" 10686 "CALL d2l_wrapper\n" 10687 "fast:" %} 10688 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10689 ins_pipe( pipe_slow ); 10690 %} 10691 10692 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10693 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10694 predicate (UseSSE>=2); 10695 match(Set dst (ConvD2L src)); 10696 effect( KILL cr ); 10697 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10698 "MOVSD [ESP],$src\n\t" 10699 "FLD_D [ESP]\n\t" 10700 "FLDCW trunc mode\n\t" 10701 "FISTp [ESP + #0]\n\t" 10702 "FLDCW std/24-bit mode\n\t" 10703 "POP EAX\n\t" 10704 "POP EDX\n\t" 10705 "CMP EDX,0x80000000\n\t" 10706 "JNE,s fast\n\t" 10707 "TEST EAX,EAX\n\t" 10708 "JNE,s fast\n\t" 10709 "SUB ESP,8\n\t" 10710 "MOVSD [ESP],$src\n\t" 10711 "FLD_D [ESP]\n\t" 10712 "ADD ESP,8\n\t" 10713 "CALL d2l_wrapper\n" 10714 "fast:" %} 10715 ins_encode %{ 10716 Label fast; 10717 __ subptr(rsp, 8); 10718 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10719 __ fld_d(Address(rsp, 0)); 10720 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10721 __ fistp_d(Address(rsp, 0)); 10722 // Restore the rounding mode, mask the exception 10723 if (Compile::current()->in_24_bit_fp_mode()) { 10724 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10725 } else { 10726 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10727 } 10728 // Load the converted long, adjust CPU stack 10729 __ pop(rax); 10730 __ pop(rdx); 10731 __ cmpl(rdx, 0x80000000); 10732 __ jccb(Assembler::notEqual, fast); 10733 __ testl(rax, rax); 10734 __ jccb(Assembler::notEqual, fast); 10735 __ subptr(rsp, 8); 10736 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10737 __ fld_d(Address(rsp, 0)); 10738 __ addptr(rsp, 8); 10739 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10740 __ bind(fast); 10741 %} 10742 ins_pipe( pipe_slow ); 10743 %} 10744 10745 // Convert a double to an int. Java semantics require we do complex 10746 // manglations in the corner cases. So we set the rounding mode to 10747 // 'zero', store the darned double down as an int, and reset the 10748 // rounding mode to 'nearest'. The hardware stores a flag value down 10749 // if we would overflow or converted a NAN; we check for this and 10750 // and go the slow path if needed. 10751 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10752 predicate(UseSSE==0); 10753 match(Set dst (ConvF2I src)); 10754 effect( KILL tmp, KILL cr ); 10755 format %{ "FLD $src\t# Convert float to int \n\t" 10756 "FLDCW trunc mode\n\t" 10757 "SUB ESP,4\n\t" 10758 "FISTp [ESP + #0]\n\t" 10759 "FLDCW std/24-bit mode\n\t" 10760 "POP EAX\n\t" 10761 "CMP EAX,0x80000000\n\t" 10762 "JNE,s fast\n\t" 10763 "FLD $src\n\t" 10764 "CALL d2i_wrapper\n" 10765 "fast:" %} 10766 // DPR2I_encoding works for FPR2I 10767 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 // Convert a float in xmm to an int reg. 10772 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10773 predicate(UseSSE>=1); 10774 match(Set dst (ConvF2I src)); 10775 effect( KILL tmp, KILL cr ); 10776 format %{ "CVTTSS2SI $dst, $src\n\t" 10777 "CMP $dst,0x80000000\n\t" 10778 "JNE,s fast\n\t" 10779 "SUB ESP, 4\n\t" 10780 "MOVSS [ESP], $src\n\t" 10781 "FLD [ESP]\n\t" 10782 "ADD ESP, 4\n\t" 10783 "CALL d2i_wrapper\n" 10784 "fast:" %} 10785 ins_encode %{ 10786 Label fast; 10787 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10788 __ cmpl($dst$$Register, 0x80000000); 10789 __ jccb(Assembler::notEqual, fast); 10790 __ subptr(rsp, 4); 10791 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10792 __ fld_s(Address(rsp, 0)); 10793 __ addptr(rsp, 4); 10794 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10795 __ bind(fast); 10796 %} 10797 ins_pipe( pipe_slow ); 10798 %} 10799 10800 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10801 predicate(UseSSE==0); 10802 match(Set dst (ConvF2L src)); 10803 effect( KILL cr ); 10804 format %{ "FLD $src\t# Convert float to long\n\t" 10805 "FLDCW trunc mode\n\t" 10806 "SUB ESP,8\n\t" 10807 "FISTp [ESP + #0]\n\t" 10808 "FLDCW std/24-bit mode\n\t" 10809 "POP EAX\n\t" 10810 "POP EDX\n\t" 10811 "CMP EDX,0x80000000\n\t" 10812 "JNE,s fast\n\t" 10813 "TEST EAX,EAX\n\t" 10814 "JNE,s fast\n\t" 10815 "FLD $src\n\t" 10816 "CALL d2l_wrapper\n" 10817 "fast:" %} 10818 // DPR2L_encoding works for FPR2L 10819 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10820 ins_pipe( pipe_slow ); 10821 %} 10822 10823 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10824 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10825 predicate (UseSSE>=1); 10826 match(Set dst (ConvF2L src)); 10827 effect( KILL cr ); 10828 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10829 "MOVSS [ESP],$src\n\t" 10830 "FLD_S [ESP]\n\t" 10831 "FLDCW trunc mode\n\t" 10832 "FISTp [ESP + #0]\n\t" 10833 "FLDCW std/24-bit mode\n\t" 10834 "POP EAX\n\t" 10835 "POP EDX\n\t" 10836 "CMP EDX,0x80000000\n\t" 10837 "JNE,s fast\n\t" 10838 "TEST EAX,EAX\n\t" 10839 "JNE,s fast\n\t" 10840 "SUB ESP,4\t# Convert float to long\n\t" 10841 "MOVSS [ESP],$src\n\t" 10842 "FLD_S [ESP]\n\t" 10843 "ADD ESP,4\n\t" 10844 "CALL d2l_wrapper\n" 10845 "fast:" %} 10846 ins_encode %{ 10847 Label fast; 10848 __ subptr(rsp, 8); 10849 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10850 __ fld_s(Address(rsp, 0)); 10851 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10852 __ fistp_d(Address(rsp, 0)); 10853 // Restore the rounding mode, mask the exception 10854 if (Compile::current()->in_24_bit_fp_mode()) { 10855 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10856 } else { 10857 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10858 } 10859 // Load the converted long, adjust CPU stack 10860 __ pop(rax); 10861 __ pop(rdx); 10862 __ cmpl(rdx, 0x80000000); 10863 __ jccb(Assembler::notEqual, fast); 10864 __ testl(rax, rax); 10865 __ jccb(Assembler::notEqual, fast); 10866 __ subptr(rsp, 4); 10867 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10868 __ fld_s(Address(rsp, 0)); 10869 __ addptr(rsp, 4); 10870 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10871 __ bind(fast); 10872 %} 10873 ins_pipe( pipe_slow ); 10874 %} 10875 10876 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10877 predicate( UseSSE<=1 ); 10878 match(Set dst (ConvI2D src)); 10879 format %{ "FILD $src\n\t" 10880 "FSTP $dst" %} 10881 opcode(0xDB, 0x0); /* DB /0 */ 10882 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10883 ins_pipe( fpu_reg_mem ); 10884 %} 10885 10886 instruct convI2D_reg(regD dst, rRegI src) %{ 10887 predicate( UseSSE>=2 && !UseXmmI2D ); 10888 match(Set dst (ConvI2D src)); 10889 format %{ "CVTSI2SD $dst,$src" %} 10890 ins_encode %{ 10891 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10892 %} 10893 ins_pipe( pipe_slow ); 10894 %} 10895 10896 instruct convI2D_mem(regD dst, memory mem) %{ 10897 predicate( UseSSE>=2 ); 10898 match(Set dst (ConvI2D (LoadI mem))); 10899 format %{ "CVTSI2SD $dst,$mem" %} 10900 ins_encode %{ 10901 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10902 %} 10903 ins_pipe( pipe_slow ); 10904 %} 10905 10906 instruct convXI2D_reg(regD dst, rRegI src) 10907 %{ 10908 predicate( UseSSE>=2 && UseXmmI2D ); 10909 match(Set dst (ConvI2D src)); 10910 10911 format %{ "MOVD $dst,$src\n\t" 10912 "CVTDQ2PD $dst,$dst\t# i2d" %} 10913 ins_encode %{ 10914 __ movdl($dst$$XMMRegister, $src$$Register); 10915 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10916 %} 10917 ins_pipe(pipe_slow); // XXX 10918 %} 10919 10920 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10921 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10922 match(Set dst (ConvI2D (LoadI mem))); 10923 format %{ "FILD $mem\n\t" 10924 "FSTP $dst" %} 10925 opcode(0xDB); /* DB /0 */ 10926 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10927 Pop_Reg_DPR(dst)); 10928 ins_pipe( fpu_reg_mem ); 10929 %} 10930 10931 // Convert a byte to a float; no rounding step needed. 10932 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10933 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10934 match(Set dst (ConvI2F src)); 10935 format %{ "FILD $src\n\t" 10936 "FSTP $dst" %} 10937 10938 opcode(0xDB, 0x0); /* DB /0 */ 10939 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10940 ins_pipe( fpu_reg_mem ); 10941 %} 10942 10943 // In 24-bit mode, force exponent rounding by storing back out 10944 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10945 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10946 match(Set dst (ConvI2F src)); 10947 ins_cost(200); 10948 format %{ "FILD $src\n\t" 10949 "FSTP_S $dst" %} 10950 opcode(0xDB, 0x0); /* DB /0 */ 10951 ins_encode( Push_Mem_I(src), 10952 Pop_Mem_FPR(dst)); 10953 ins_pipe( fpu_mem_mem ); 10954 %} 10955 10956 // In 24-bit mode, force exponent rounding by storing back out 10957 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10958 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10959 match(Set dst (ConvI2F (LoadI mem))); 10960 ins_cost(200); 10961 format %{ "FILD $mem\n\t" 10962 "FSTP_S $dst" %} 10963 opcode(0xDB); /* DB /0 */ 10964 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10965 Pop_Mem_FPR(dst)); 10966 ins_pipe( fpu_mem_mem ); 10967 %} 10968 10969 // This instruction does not round to 24-bits 10970 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10971 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10972 match(Set dst (ConvI2F src)); 10973 format %{ "FILD $src\n\t" 10974 "FSTP $dst" %} 10975 opcode(0xDB, 0x0); /* DB /0 */ 10976 ins_encode( Push_Mem_I(src), 10977 Pop_Reg_FPR(dst)); 10978 ins_pipe( fpu_reg_mem ); 10979 %} 10980 10981 // This instruction does not round to 24-bits 10982 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10983 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10984 match(Set dst (ConvI2F (LoadI mem))); 10985 format %{ "FILD $mem\n\t" 10986 "FSTP $dst" %} 10987 opcode(0xDB); /* DB /0 */ 10988 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10989 Pop_Reg_FPR(dst)); 10990 ins_pipe( fpu_reg_mem ); 10991 %} 10992 10993 // Convert an int to a float in xmm; no rounding step needed. 10994 instruct convI2F_reg(regF dst, rRegI src) %{ 10995 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 10996 match(Set dst (ConvI2F src)); 10997 format %{ "CVTSI2SS $dst, $src" %} 10998 ins_encode %{ 10999 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11000 %} 11001 ins_pipe( pipe_slow ); 11002 %} 11003 11004 instruct convXI2F_reg(regF dst, rRegI src) 11005 %{ 11006 predicate( UseSSE>=2 && UseXmmI2F ); 11007 match(Set dst (ConvI2F src)); 11008 11009 format %{ "MOVD $dst,$src\n\t" 11010 "CVTDQ2PS $dst,$dst\t# i2f" %} 11011 ins_encode %{ 11012 __ movdl($dst$$XMMRegister, $src$$Register); 11013 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11014 %} 11015 ins_pipe(pipe_slow); // XXX 11016 %} 11017 11018 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11019 match(Set dst (ConvI2L src)); 11020 effect(KILL cr); 11021 ins_cost(375); 11022 format %{ "MOV $dst.lo,$src\n\t" 11023 "MOV $dst.hi,$src\n\t" 11024 "SAR $dst.hi,31" %} 11025 ins_encode(convert_int_long(dst,src)); 11026 ins_pipe( ialu_reg_reg_long ); 11027 %} 11028 11029 // Zero-extend convert int to long 11030 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11031 match(Set dst (AndL (ConvI2L src) mask) ); 11032 effect( KILL flags ); 11033 ins_cost(250); 11034 format %{ "MOV $dst.lo,$src\n\t" 11035 "XOR $dst.hi,$dst.hi" %} 11036 opcode(0x33); // XOR 11037 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11038 ins_pipe( ialu_reg_reg_long ); 11039 %} 11040 11041 // Zero-extend long 11042 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11043 match(Set dst (AndL src mask) ); 11044 effect( KILL flags ); 11045 ins_cost(250); 11046 format %{ "MOV $dst.lo,$src.lo\n\t" 11047 "XOR $dst.hi,$dst.hi\n\t" %} 11048 opcode(0x33); // XOR 11049 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11050 ins_pipe( ialu_reg_reg_long ); 11051 %} 11052 11053 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11054 predicate (UseSSE<=1); 11055 match(Set dst (ConvL2D src)); 11056 effect( KILL cr ); 11057 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11058 "PUSH $src.lo\n\t" 11059 "FILD ST,[ESP + #0]\n\t" 11060 "ADD ESP,8\n\t" 11061 "FSTP_D $dst\t# D-round" %} 11062 opcode(0xDF, 0x5); /* DF /5 */ 11063 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11064 ins_pipe( pipe_slow ); 11065 %} 11066 11067 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11068 predicate (UseSSE>=2); 11069 match(Set dst (ConvL2D src)); 11070 effect( KILL cr ); 11071 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11072 "PUSH $src.lo\n\t" 11073 "FILD_D [ESP]\n\t" 11074 "FSTP_D [ESP]\n\t" 11075 "MOVSD $dst,[ESP]\n\t" 11076 "ADD ESP,8" %} 11077 opcode(0xDF, 0x5); /* DF /5 */ 11078 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11079 ins_pipe( pipe_slow ); 11080 %} 11081 11082 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11083 predicate (UseSSE>=1); 11084 match(Set dst (ConvL2F src)); 11085 effect( KILL cr ); 11086 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11087 "PUSH $src.lo\n\t" 11088 "FILD_D [ESP]\n\t" 11089 "FSTP_S [ESP]\n\t" 11090 "MOVSS $dst,[ESP]\n\t" 11091 "ADD ESP,8" %} 11092 opcode(0xDF, 0x5); /* DF /5 */ 11093 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11094 ins_pipe( pipe_slow ); 11095 %} 11096 11097 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11098 match(Set dst (ConvL2F src)); 11099 effect( KILL cr ); 11100 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11101 "PUSH $src.lo\n\t" 11102 "FILD ST,[ESP + #0]\n\t" 11103 "ADD ESP,8\n\t" 11104 "FSTP_S $dst\t# F-round" %} 11105 opcode(0xDF, 0x5); /* DF /5 */ 11106 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11107 ins_pipe( pipe_slow ); 11108 %} 11109 11110 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11111 match(Set dst (ConvL2I src)); 11112 effect( DEF dst, USE src ); 11113 format %{ "MOV $dst,$src.lo" %} 11114 ins_encode(enc_CopyL_Lo(dst,src)); 11115 ins_pipe( ialu_reg_reg ); 11116 %} 11117 11118 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11119 match(Set dst (MoveF2I src)); 11120 effect( DEF dst, USE src ); 11121 ins_cost(100); 11122 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11123 ins_encode %{ 11124 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11125 %} 11126 ins_pipe( ialu_reg_mem ); 11127 %} 11128 11129 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11130 predicate(UseSSE==0); 11131 match(Set dst (MoveF2I src)); 11132 effect( DEF dst, USE src ); 11133 11134 ins_cost(125); 11135 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11136 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11137 ins_pipe( fpu_mem_reg ); 11138 %} 11139 11140 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11141 predicate(UseSSE>=1); 11142 match(Set dst (MoveF2I src)); 11143 effect( DEF dst, USE src ); 11144 11145 ins_cost(95); 11146 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11147 ins_encode %{ 11148 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11149 %} 11150 ins_pipe( pipe_slow ); 11151 %} 11152 11153 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11154 predicate(UseSSE>=2); 11155 match(Set dst (MoveF2I src)); 11156 effect( DEF dst, USE src ); 11157 ins_cost(85); 11158 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11159 ins_encode %{ 11160 __ movdl($dst$$Register, $src$$XMMRegister); 11161 %} 11162 ins_pipe( pipe_slow ); 11163 %} 11164 11165 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11166 match(Set dst (MoveI2F src)); 11167 effect( DEF dst, USE src ); 11168 11169 ins_cost(100); 11170 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11171 ins_encode %{ 11172 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11173 %} 11174 ins_pipe( ialu_mem_reg ); 11175 %} 11176 11177 11178 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11179 predicate(UseSSE==0); 11180 match(Set dst (MoveI2F src)); 11181 effect(DEF dst, USE src); 11182 11183 ins_cost(125); 11184 format %{ "FLD_S $src\n\t" 11185 "FSTP $dst\t# MoveI2F_stack_reg" %} 11186 opcode(0xD9); /* D9 /0, FLD m32real */ 11187 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11188 Pop_Reg_FPR(dst) ); 11189 ins_pipe( fpu_reg_mem ); 11190 %} 11191 11192 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11193 predicate(UseSSE>=1); 11194 match(Set dst (MoveI2F src)); 11195 effect( DEF dst, USE src ); 11196 11197 ins_cost(95); 11198 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11199 ins_encode %{ 11200 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11201 %} 11202 ins_pipe( pipe_slow ); 11203 %} 11204 11205 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11206 predicate(UseSSE>=2); 11207 match(Set dst (MoveI2F src)); 11208 effect( DEF dst, USE src ); 11209 11210 ins_cost(85); 11211 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11212 ins_encode %{ 11213 __ movdl($dst$$XMMRegister, $src$$Register); 11214 %} 11215 ins_pipe( pipe_slow ); 11216 %} 11217 11218 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11219 match(Set dst (MoveD2L src)); 11220 effect(DEF dst, USE src); 11221 11222 ins_cost(250); 11223 format %{ "MOV $dst.lo,$src\n\t" 11224 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11225 opcode(0x8B, 0x8B); 11226 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11227 ins_pipe( ialu_mem_long_reg ); 11228 %} 11229 11230 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11231 predicate(UseSSE<=1); 11232 match(Set dst (MoveD2L src)); 11233 effect(DEF dst, USE src); 11234 11235 ins_cost(125); 11236 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11237 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11238 ins_pipe( fpu_mem_reg ); 11239 %} 11240 11241 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11242 predicate(UseSSE>=2); 11243 match(Set dst (MoveD2L src)); 11244 effect(DEF dst, USE src); 11245 ins_cost(95); 11246 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11247 ins_encode %{ 11248 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11249 %} 11250 ins_pipe( pipe_slow ); 11251 %} 11252 11253 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11254 predicate(UseSSE>=2); 11255 match(Set dst (MoveD2L src)); 11256 effect(DEF dst, USE src, TEMP tmp); 11257 ins_cost(85); 11258 format %{ "MOVD $dst.lo,$src\n\t" 11259 "PSHUFLW $tmp,$src,0x4E\n\t" 11260 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11261 ins_encode %{ 11262 __ movdl($dst$$Register, $src$$XMMRegister); 11263 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11264 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11265 %} 11266 ins_pipe( pipe_slow ); 11267 %} 11268 11269 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11270 match(Set dst (MoveL2D src)); 11271 effect(DEF dst, USE src); 11272 11273 ins_cost(200); 11274 format %{ "MOV $dst,$src.lo\n\t" 11275 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11276 opcode(0x89, 0x89); 11277 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11278 ins_pipe( ialu_mem_long_reg ); 11279 %} 11280 11281 11282 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11283 predicate(UseSSE<=1); 11284 match(Set dst (MoveL2D src)); 11285 effect(DEF dst, USE src); 11286 ins_cost(125); 11287 11288 format %{ "FLD_D $src\n\t" 11289 "FSTP $dst\t# MoveL2D_stack_reg" %} 11290 opcode(0xDD); /* DD /0, FLD m64real */ 11291 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11292 Pop_Reg_DPR(dst) ); 11293 ins_pipe( fpu_reg_mem ); 11294 %} 11295 11296 11297 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11298 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11299 match(Set dst (MoveL2D src)); 11300 effect(DEF dst, USE src); 11301 11302 ins_cost(95); 11303 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11304 ins_encode %{ 11305 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11306 %} 11307 ins_pipe( pipe_slow ); 11308 %} 11309 11310 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11311 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11312 match(Set dst (MoveL2D src)); 11313 effect(DEF dst, USE src); 11314 11315 ins_cost(95); 11316 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11317 ins_encode %{ 11318 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11319 %} 11320 ins_pipe( pipe_slow ); 11321 %} 11322 11323 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11324 predicate(UseSSE>=2); 11325 match(Set dst (MoveL2D src)); 11326 effect(TEMP dst, USE src, TEMP tmp); 11327 ins_cost(85); 11328 format %{ "MOVD $dst,$src.lo\n\t" 11329 "MOVD $tmp,$src.hi\n\t" 11330 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11331 ins_encode %{ 11332 __ movdl($dst$$XMMRegister, $src$$Register); 11333 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11334 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11335 %} 11336 ins_pipe( pipe_slow ); 11337 %} 11338 11339 11340 // ======================================================================= 11341 // fast clearing of an array 11342 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11343 predicate(!UseFastStosb); 11344 match(Set dummy (ClearArray cnt base)); 11345 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11346 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11347 "SHL ECX,1\t# Convert doublewords to words\n\t" 11348 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11349 ins_encode %{ 11350 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11351 %} 11352 ins_pipe( pipe_slow ); 11353 %} 11354 11355 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11356 predicate(UseFastStosb); 11357 match(Set dummy (ClearArray cnt base)); 11358 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11359 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11360 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11361 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11362 ins_encode %{ 11363 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11364 %} 11365 ins_pipe( pipe_slow ); 11366 %} 11367 11368 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11369 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11370 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11371 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11372 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11373 11374 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11375 ins_encode %{ 11376 __ string_compare($str1$$Register, $str2$$Register, 11377 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11378 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11379 %} 11380 ins_pipe( pipe_slow ); 11381 %} 11382 11383 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11384 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11385 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11386 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11387 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11388 11389 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11390 ins_encode %{ 11391 __ string_compare($str1$$Register, $str2$$Register, 11392 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11393 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11394 %} 11395 ins_pipe( pipe_slow ); 11396 %} 11397 11398 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11399 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11400 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11401 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11402 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11403 11404 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11405 ins_encode %{ 11406 __ string_compare($str1$$Register, $str2$$Register, 11407 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11408 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11409 %} 11410 ins_pipe( pipe_slow ); 11411 %} 11412 11413 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11414 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11415 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11416 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11417 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11418 11419 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11420 ins_encode %{ 11421 __ string_compare($str2$$Register, $str1$$Register, 11422 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11423 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11424 %} 11425 ins_pipe( pipe_slow ); 11426 %} 11427 11428 // fast string equals 11429 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11430 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11431 match(Set result (StrEquals (Binary str1 str2) cnt)); 11432 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11433 11434 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11435 ins_encode %{ 11436 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11437 $cnt$$Register, $result$$Register, $tmp3$$Register, 11438 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11439 %} 11440 11441 ins_pipe( pipe_slow ); 11442 %} 11443 11444 // fast search of substring with known size. 11445 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11446 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11447 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11448 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11449 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11450 11451 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11452 ins_encode %{ 11453 int icnt2 = (int)$int_cnt2$$constant; 11454 if (icnt2 >= 16) { 11455 // IndexOf for constant substrings with size >= 16 elements 11456 // which don't need to be loaded through stack. 11457 __ string_indexofC8($str1$$Register, $str2$$Register, 11458 $cnt1$$Register, $cnt2$$Register, 11459 icnt2, $result$$Register, 11460 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11461 } else { 11462 // Small strings are loaded through stack if they cross page boundary. 11463 __ string_indexof($str1$$Register, $str2$$Register, 11464 $cnt1$$Register, $cnt2$$Register, 11465 icnt2, $result$$Register, 11466 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11467 } 11468 %} 11469 ins_pipe( pipe_slow ); 11470 %} 11471 11472 // fast search of substring with known size. 11473 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11474 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11475 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11476 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11477 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11478 11479 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11480 ins_encode %{ 11481 int icnt2 = (int)$int_cnt2$$constant; 11482 if (icnt2 >= 8) { 11483 // IndexOf for constant substrings with size >= 8 elements 11484 // which don't need to be loaded through stack. 11485 __ string_indexofC8($str1$$Register, $str2$$Register, 11486 $cnt1$$Register, $cnt2$$Register, 11487 icnt2, $result$$Register, 11488 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11489 } else { 11490 // Small strings are loaded through stack if they cross page boundary. 11491 __ string_indexof($str1$$Register, $str2$$Register, 11492 $cnt1$$Register, $cnt2$$Register, 11493 icnt2, $result$$Register, 11494 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11495 } 11496 %} 11497 ins_pipe( pipe_slow ); 11498 %} 11499 11500 // fast search of substring with known size. 11501 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11502 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11503 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11504 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11505 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11506 11507 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11508 ins_encode %{ 11509 int icnt2 = (int)$int_cnt2$$constant; 11510 if (icnt2 >= 8) { 11511 // IndexOf for constant substrings with size >= 8 elements 11512 // which don't need to be loaded through stack. 11513 __ string_indexofC8($str1$$Register, $str2$$Register, 11514 $cnt1$$Register, $cnt2$$Register, 11515 icnt2, $result$$Register, 11516 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11517 } else { 11518 // Small strings are loaded through stack if they cross page boundary. 11519 __ string_indexof($str1$$Register, $str2$$Register, 11520 $cnt1$$Register, $cnt2$$Register, 11521 icnt2, $result$$Register, 11522 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11523 } 11524 %} 11525 ins_pipe( pipe_slow ); 11526 %} 11527 11528 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11529 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11530 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11531 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11532 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11533 11534 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11535 ins_encode %{ 11536 __ string_indexof($str1$$Register, $str2$$Register, 11537 $cnt1$$Register, $cnt2$$Register, 11538 (-1), $result$$Register, 11539 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11540 %} 11541 ins_pipe( pipe_slow ); 11542 %} 11543 11544 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11545 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11546 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11547 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11548 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11549 11550 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11551 ins_encode %{ 11552 __ string_indexof($str1$$Register, $str2$$Register, 11553 $cnt1$$Register, $cnt2$$Register, 11554 (-1), $result$$Register, 11555 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11556 %} 11557 ins_pipe( pipe_slow ); 11558 %} 11559 11560 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11561 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11562 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11563 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11564 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11565 11566 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11567 ins_encode %{ 11568 __ string_indexof($str1$$Register, $str2$$Register, 11569 $cnt1$$Register, $cnt2$$Register, 11570 (-1), $result$$Register, 11571 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11572 %} 11573 ins_pipe( pipe_slow ); 11574 %} 11575 11576 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11577 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11578 predicate(UseSSE42Intrinsics); 11579 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11580 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11581 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11582 ins_encode %{ 11583 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11584 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11585 %} 11586 ins_pipe( pipe_slow ); 11587 %} 11588 11589 // fast array equals 11590 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11591 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11592 %{ 11593 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11594 match(Set result (AryEq ary1 ary2)); 11595 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11596 //ins_cost(300); 11597 11598 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11599 ins_encode %{ 11600 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11601 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11602 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11603 %} 11604 ins_pipe( pipe_slow ); 11605 %} 11606 11607 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11608 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11609 %{ 11610 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11611 match(Set result (AryEq ary1 ary2)); 11612 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11613 //ins_cost(300); 11614 11615 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11616 ins_encode %{ 11617 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11618 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11619 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11620 %} 11621 ins_pipe( pipe_slow ); 11622 %} 11623 11624 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11625 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11626 %{ 11627 match(Set result (HasNegatives ary1 len)); 11628 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11629 11630 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11631 ins_encode %{ 11632 __ has_negatives($ary1$$Register, $len$$Register, 11633 $result$$Register, $tmp3$$Register, 11634 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11635 %} 11636 ins_pipe( pipe_slow ); 11637 %} 11638 11639 // fast char[] to byte[] compression 11640 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11641 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11642 match(Set result (StrCompressedCopy src (Binary dst len))); 11643 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11644 11645 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11646 ins_encode %{ 11647 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11648 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11649 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11650 %} 11651 ins_pipe( pipe_slow ); 11652 %} 11653 11654 // fast byte[] to char[] inflation 11655 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11656 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11657 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11658 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11659 11660 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11661 ins_encode %{ 11662 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11663 $tmp1$$XMMRegister, $tmp2$$Register); 11664 %} 11665 ins_pipe( pipe_slow ); 11666 %} 11667 11668 // encode char[] to byte[] in ISO_8859_1 11669 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11670 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11671 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11672 match(Set result (EncodeISOArray src (Binary dst len))); 11673 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11674 11675 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11676 ins_encode %{ 11677 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11678 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11679 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11680 %} 11681 ins_pipe( pipe_slow ); 11682 %} 11683 11684 11685 //----------Control Flow Instructions------------------------------------------ 11686 // Signed compare Instructions 11687 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11688 match(Set cr (CmpI op1 op2)); 11689 effect( DEF cr, USE op1, USE op2 ); 11690 format %{ "CMP $op1,$op2" %} 11691 opcode(0x3B); /* Opcode 3B /r */ 11692 ins_encode( OpcP, RegReg( op1, op2) ); 11693 ins_pipe( ialu_cr_reg_reg ); 11694 %} 11695 11696 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11697 match(Set cr (CmpI op1 op2)); 11698 effect( DEF cr, USE op1 ); 11699 format %{ "CMP $op1,$op2" %} 11700 opcode(0x81,0x07); /* Opcode 81 /7 */ 11701 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11702 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11703 ins_pipe( ialu_cr_reg_imm ); 11704 %} 11705 11706 // Cisc-spilled version of cmpI_eReg 11707 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11708 match(Set cr (CmpI op1 (LoadI op2))); 11709 11710 format %{ "CMP $op1,$op2" %} 11711 ins_cost(500); 11712 opcode(0x3B); /* Opcode 3B /r */ 11713 ins_encode( OpcP, RegMem( op1, op2) ); 11714 ins_pipe( ialu_cr_reg_mem ); 11715 %} 11716 11717 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11718 match(Set cr (CmpI src zero)); 11719 effect( DEF cr, USE src ); 11720 11721 format %{ "TEST $src,$src" %} 11722 opcode(0x85); 11723 ins_encode( OpcP, RegReg( src, src ) ); 11724 ins_pipe( ialu_cr_reg_imm ); 11725 %} 11726 11727 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11728 match(Set cr (CmpI (AndI src con) zero)); 11729 11730 format %{ "TEST $src,$con" %} 11731 opcode(0xF7,0x00); 11732 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11733 ins_pipe( ialu_cr_reg_imm ); 11734 %} 11735 11736 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11737 match(Set cr (CmpI (AndI src mem) zero)); 11738 11739 format %{ "TEST $src,$mem" %} 11740 opcode(0x85); 11741 ins_encode( OpcP, RegMem( src, mem ) ); 11742 ins_pipe( ialu_cr_reg_mem ); 11743 %} 11744 11745 // Unsigned compare Instructions; really, same as signed except they 11746 // produce an eFlagsRegU instead of eFlagsReg. 11747 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11748 match(Set cr (CmpU op1 op2)); 11749 11750 format %{ "CMPu $op1,$op2" %} 11751 opcode(0x3B); /* Opcode 3B /r */ 11752 ins_encode( OpcP, RegReg( op1, op2) ); 11753 ins_pipe( ialu_cr_reg_reg ); 11754 %} 11755 11756 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11757 match(Set cr (CmpU op1 op2)); 11758 11759 format %{ "CMPu $op1,$op2" %} 11760 opcode(0x81,0x07); /* Opcode 81 /7 */ 11761 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11762 ins_pipe( ialu_cr_reg_imm ); 11763 %} 11764 11765 // // Cisc-spilled version of cmpU_eReg 11766 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11767 match(Set cr (CmpU op1 (LoadI op2))); 11768 11769 format %{ "CMPu $op1,$op2" %} 11770 ins_cost(500); 11771 opcode(0x3B); /* Opcode 3B /r */ 11772 ins_encode( OpcP, RegMem( op1, op2) ); 11773 ins_pipe( ialu_cr_reg_mem ); 11774 %} 11775 11776 // // Cisc-spilled version of cmpU_eReg 11777 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11778 // match(Set cr (CmpU (LoadI op1) op2)); 11779 // 11780 // format %{ "CMPu $op1,$op2" %} 11781 // ins_cost(500); 11782 // opcode(0x39); /* Opcode 39 /r */ 11783 // ins_encode( OpcP, RegMem( op1, op2) ); 11784 //%} 11785 11786 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11787 match(Set cr (CmpU src zero)); 11788 11789 format %{ "TESTu $src,$src" %} 11790 opcode(0x85); 11791 ins_encode( OpcP, RegReg( src, src ) ); 11792 ins_pipe( ialu_cr_reg_imm ); 11793 %} 11794 11795 // Unsigned pointer compare Instructions 11796 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11797 match(Set cr (CmpP op1 op2)); 11798 11799 format %{ "CMPu $op1,$op2" %} 11800 opcode(0x3B); /* Opcode 3B /r */ 11801 ins_encode( OpcP, RegReg( op1, op2) ); 11802 ins_pipe( ialu_cr_reg_reg ); 11803 %} 11804 11805 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11806 match(Set cr (CmpP op1 op2)); 11807 11808 format %{ "CMPu $op1,$op2" %} 11809 opcode(0x81,0x07); /* Opcode 81 /7 */ 11810 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11811 ins_pipe( ialu_cr_reg_imm ); 11812 %} 11813 11814 // // Cisc-spilled version of cmpP_eReg 11815 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11816 match(Set cr (CmpP op1 (LoadP op2))); 11817 11818 format %{ "CMPu $op1,$op2" %} 11819 ins_cost(500); 11820 opcode(0x3B); /* Opcode 3B /r */ 11821 ins_encode( OpcP, RegMem( op1, op2) ); 11822 ins_pipe( ialu_cr_reg_mem ); 11823 %} 11824 11825 // // Cisc-spilled version of cmpP_eReg 11826 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11827 // match(Set cr (CmpP (LoadP op1) op2)); 11828 // 11829 // format %{ "CMPu $op1,$op2" %} 11830 // ins_cost(500); 11831 // opcode(0x39); /* Opcode 39 /r */ 11832 // ins_encode( OpcP, RegMem( op1, op2) ); 11833 //%} 11834 11835 // Compare raw pointer (used in out-of-heap check). 11836 // Only works because non-oop pointers must be raw pointers 11837 // and raw pointers have no anti-dependencies. 11838 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11839 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11840 match(Set cr (CmpP op1 (LoadP op2))); 11841 11842 format %{ "CMPu $op1,$op2" %} 11843 opcode(0x3B); /* Opcode 3B /r */ 11844 ins_encode( OpcP, RegMem( op1, op2) ); 11845 ins_pipe( ialu_cr_reg_mem ); 11846 %} 11847 11848 // 11849 // This will generate a signed flags result. This should be ok 11850 // since any compare to a zero should be eq/neq. 11851 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11852 match(Set cr (CmpP src zero)); 11853 11854 format %{ "TEST $src,$src" %} 11855 opcode(0x85); 11856 ins_encode( OpcP, RegReg( src, src ) ); 11857 ins_pipe( ialu_cr_reg_imm ); 11858 %} 11859 11860 // Cisc-spilled version of testP_reg 11861 // This will generate a signed flags result. This should be ok 11862 // since any compare to a zero should be eq/neq. 11863 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11864 match(Set cr (CmpP (LoadP op) zero)); 11865 11866 format %{ "TEST $op,0xFFFFFFFF" %} 11867 ins_cost(500); 11868 opcode(0xF7); /* Opcode F7 /0 */ 11869 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11870 ins_pipe( ialu_cr_reg_imm ); 11871 %} 11872 11873 // Yanked all unsigned pointer compare operations. 11874 // Pointer compares are done with CmpP which is already unsigned. 11875 11876 //----------Max and Min-------------------------------------------------------- 11877 // Min Instructions 11878 //// 11879 // *** Min and Max using the conditional move are slower than the 11880 // *** branch version on a Pentium III. 11881 // // Conditional move for min 11882 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11883 // effect( USE_DEF op2, USE op1, USE cr ); 11884 // format %{ "CMOVlt $op2,$op1\t! min" %} 11885 // opcode(0x4C,0x0F); 11886 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11887 // ins_pipe( pipe_cmov_reg ); 11888 //%} 11889 // 11890 //// Min Register with Register (P6 version) 11891 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11892 // predicate(VM_Version::supports_cmov() ); 11893 // match(Set op2 (MinI op1 op2)); 11894 // ins_cost(200); 11895 // expand %{ 11896 // eFlagsReg cr; 11897 // compI_eReg(cr,op1,op2); 11898 // cmovI_reg_lt(op2,op1,cr); 11899 // %} 11900 //%} 11901 11902 // Min Register with Register (generic version) 11903 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11904 match(Set dst (MinI dst src)); 11905 effect(KILL flags); 11906 ins_cost(300); 11907 11908 format %{ "MIN $dst,$src" %} 11909 opcode(0xCC); 11910 ins_encode( min_enc(dst,src) ); 11911 ins_pipe( pipe_slow ); 11912 %} 11913 11914 // Max Register with Register 11915 // *** Min and Max using the conditional move are slower than the 11916 // *** branch version on a Pentium III. 11917 // // Conditional move for max 11918 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11919 // effect( USE_DEF op2, USE op1, USE cr ); 11920 // format %{ "CMOVgt $op2,$op1\t! max" %} 11921 // opcode(0x4F,0x0F); 11922 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11923 // ins_pipe( pipe_cmov_reg ); 11924 //%} 11925 // 11926 // // Max Register with Register (P6 version) 11927 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11928 // predicate(VM_Version::supports_cmov() ); 11929 // match(Set op2 (MaxI op1 op2)); 11930 // ins_cost(200); 11931 // expand %{ 11932 // eFlagsReg cr; 11933 // compI_eReg(cr,op1,op2); 11934 // cmovI_reg_gt(op2,op1,cr); 11935 // %} 11936 //%} 11937 11938 // Max Register with Register (generic version) 11939 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11940 match(Set dst (MaxI dst src)); 11941 effect(KILL flags); 11942 ins_cost(300); 11943 11944 format %{ "MAX $dst,$src" %} 11945 opcode(0xCC); 11946 ins_encode( max_enc(dst,src) ); 11947 ins_pipe( pipe_slow ); 11948 %} 11949 11950 // ============================================================================ 11951 // Counted Loop limit node which represents exact final iterator value. 11952 // Note: the resulting value should fit into integer range since 11953 // counted loops have limit check on overflow. 11954 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11955 match(Set limit (LoopLimit (Binary init limit) stride)); 11956 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11957 ins_cost(300); 11958 11959 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11960 ins_encode %{ 11961 int strd = (int)$stride$$constant; 11962 assert(strd != 1 && strd != -1, "sanity"); 11963 int m1 = (strd > 0) ? 1 : -1; 11964 // Convert limit to long (EAX:EDX) 11965 __ cdql(); 11966 // Convert init to long (init:tmp) 11967 __ movl($tmp$$Register, $init$$Register); 11968 __ sarl($tmp$$Register, 31); 11969 // $limit - $init 11970 __ subl($limit$$Register, $init$$Register); 11971 __ sbbl($limit_hi$$Register, $tmp$$Register); 11972 // + ($stride - 1) 11973 if (strd > 0) { 11974 __ addl($limit$$Register, (strd - 1)); 11975 __ adcl($limit_hi$$Register, 0); 11976 __ movl($tmp$$Register, strd); 11977 } else { 11978 __ addl($limit$$Register, (strd + 1)); 11979 __ adcl($limit_hi$$Register, -1); 11980 __ lneg($limit_hi$$Register, $limit$$Register); 11981 __ movl($tmp$$Register, -strd); 11982 } 11983 // signed devision: (EAX:EDX) / pos_stride 11984 __ idivl($tmp$$Register); 11985 if (strd < 0) { 11986 // restore sign 11987 __ negl($tmp$$Register); 11988 } 11989 // (EAX) * stride 11990 __ mull($tmp$$Register); 11991 // + init (ignore upper bits) 11992 __ addl($limit$$Register, $init$$Register); 11993 %} 11994 ins_pipe( pipe_slow ); 11995 %} 11996 11997 // ============================================================================ 11998 // Branch Instructions 11999 // Jump Table 12000 instruct jumpXtnd(rRegI switch_val) %{ 12001 match(Jump switch_val); 12002 ins_cost(350); 12003 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12004 ins_encode %{ 12005 // Jump to Address(table_base + switch_reg) 12006 Address index(noreg, $switch_val$$Register, Address::times_1); 12007 __ jump(ArrayAddress($constantaddress, index)); 12008 %} 12009 ins_pipe(pipe_jmp); 12010 %} 12011 12012 // Jump Direct - Label defines a relative address from JMP+1 12013 instruct jmpDir(label labl) %{ 12014 match(Goto); 12015 effect(USE labl); 12016 12017 ins_cost(300); 12018 format %{ "JMP $labl" %} 12019 size(5); 12020 ins_encode %{ 12021 Label* L = $labl$$label; 12022 __ jmp(*L, false); // Always long jump 12023 %} 12024 ins_pipe( pipe_jmp ); 12025 %} 12026 12027 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12028 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12029 match(If cop cr); 12030 effect(USE labl); 12031 12032 ins_cost(300); 12033 format %{ "J$cop $labl" %} 12034 size(6); 12035 ins_encode %{ 12036 Label* L = $labl$$label; 12037 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12038 %} 12039 ins_pipe( pipe_jcc ); 12040 %} 12041 12042 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12043 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12044 match(CountedLoopEnd cop cr); 12045 effect(USE labl); 12046 12047 ins_cost(300); 12048 format %{ "J$cop $labl\t# Loop end" %} 12049 size(6); 12050 ins_encode %{ 12051 Label* L = $labl$$label; 12052 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12053 %} 12054 ins_pipe( pipe_jcc ); 12055 %} 12056 12057 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12058 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12059 match(CountedLoopEnd cop cmp); 12060 effect(USE labl); 12061 12062 ins_cost(300); 12063 format %{ "J$cop,u $labl\t# Loop end" %} 12064 size(6); 12065 ins_encode %{ 12066 Label* L = $labl$$label; 12067 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12068 %} 12069 ins_pipe( pipe_jcc ); 12070 %} 12071 12072 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12073 match(CountedLoopEnd cop cmp); 12074 effect(USE labl); 12075 12076 ins_cost(200); 12077 format %{ "J$cop,u $labl\t# Loop end" %} 12078 size(6); 12079 ins_encode %{ 12080 Label* L = $labl$$label; 12081 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12082 %} 12083 ins_pipe( pipe_jcc ); 12084 %} 12085 12086 // Jump Direct Conditional - using unsigned comparison 12087 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12088 match(If cop cmp); 12089 effect(USE labl); 12090 12091 ins_cost(300); 12092 format %{ "J$cop,u $labl" %} 12093 size(6); 12094 ins_encode %{ 12095 Label* L = $labl$$label; 12096 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12097 %} 12098 ins_pipe(pipe_jcc); 12099 %} 12100 12101 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12102 match(If cop cmp); 12103 effect(USE labl); 12104 12105 ins_cost(200); 12106 format %{ "J$cop,u $labl" %} 12107 size(6); 12108 ins_encode %{ 12109 Label* L = $labl$$label; 12110 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12111 %} 12112 ins_pipe(pipe_jcc); 12113 %} 12114 12115 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12116 match(If cop cmp); 12117 effect(USE labl); 12118 12119 ins_cost(200); 12120 format %{ $$template 12121 if ($cop$$cmpcode == Assembler::notEqual) { 12122 $$emit$$"JP,u $labl\n\t" 12123 $$emit$$"J$cop,u $labl" 12124 } else { 12125 $$emit$$"JP,u done\n\t" 12126 $$emit$$"J$cop,u $labl\n\t" 12127 $$emit$$"done:" 12128 } 12129 %} 12130 ins_encode %{ 12131 Label* l = $labl$$label; 12132 if ($cop$$cmpcode == Assembler::notEqual) { 12133 __ jcc(Assembler::parity, *l, false); 12134 __ jcc(Assembler::notEqual, *l, false); 12135 } else if ($cop$$cmpcode == Assembler::equal) { 12136 Label done; 12137 __ jccb(Assembler::parity, done); 12138 __ jcc(Assembler::equal, *l, false); 12139 __ bind(done); 12140 } else { 12141 ShouldNotReachHere(); 12142 } 12143 %} 12144 ins_pipe(pipe_jcc); 12145 %} 12146 12147 // ============================================================================ 12148 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12149 // array for an instance of the superklass. Set a hidden internal cache on a 12150 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12151 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12152 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12153 match(Set result (PartialSubtypeCheck sub super)); 12154 effect( KILL rcx, KILL cr ); 12155 12156 ins_cost(1100); // slightly larger than the next version 12157 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12158 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12159 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12160 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12161 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12162 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12163 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12164 "miss:\t" %} 12165 12166 opcode(0x1); // Force a XOR of EDI 12167 ins_encode( enc_PartialSubtypeCheck() ); 12168 ins_pipe( pipe_slow ); 12169 %} 12170 12171 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12172 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12173 effect( KILL rcx, KILL result ); 12174 12175 ins_cost(1000); 12176 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12177 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12178 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12179 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12180 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12181 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12182 "miss:\t" %} 12183 12184 opcode(0x0); // No need to XOR EDI 12185 ins_encode( enc_PartialSubtypeCheck() ); 12186 ins_pipe( pipe_slow ); 12187 %} 12188 12189 // ============================================================================ 12190 // Branch Instructions -- short offset versions 12191 // 12192 // These instructions are used to replace jumps of a long offset (the default 12193 // match) with jumps of a shorter offset. These instructions are all tagged 12194 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12195 // match rules in general matching. Instead, the ADLC generates a conversion 12196 // method in the MachNode which can be used to do in-place replacement of the 12197 // long variant with the shorter variant. The compiler will determine if a 12198 // branch can be taken by the is_short_branch_offset() predicate in the machine 12199 // specific code section of the file. 12200 12201 // Jump Direct - Label defines a relative address from JMP+1 12202 instruct jmpDir_short(label labl) %{ 12203 match(Goto); 12204 effect(USE labl); 12205 12206 ins_cost(300); 12207 format %{ "JMP,s $labl" %} 12208 size(2); 12209 ins_encode %{ 12210 Label* L = $labl$$label; 12211 __ jmpb(*L); 12212 %} 12213 ins_pipe( pipe_jmp ); 12214 ins_short_branch(1); 12215 %} 12216 12217 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12218 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12219 match(If cop cr); 12220 effect(USE labl); 12221 12222 ins_cost(300); 12223 format %{ "J$cop,s $labl" %} 12224 size(2); 12225 ins_encode %{ 12226 Label* L = $labl$$label; 12227 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12228 %} 12229 ins_pipe( pipe_jcc ); 12230 ins_short_branch(1); 12231 %} 12232 12233 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12234 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12235 match(CountedLoopEnd cop cr); 12236 effect(USE labl); 12237 12238 ins_cost(300); 12239 format %{ "J$cop,s $labl\t# Loop end" %} 12240 size(2); 12241 ins_encode %{ 12242 Label* L = $labl$$label; 12243 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12244 %} 12245 ins_pipe( pipe_jcc ); 12246 ins_short_branch(1); 12247 %} 12248 12249 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12250 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12251 match(CountedLoopEnd cop cmp); 12252 effect(USE labl); 12253 12254 ins_cost(300); 12255 format %{ "J$cop,us $labl\t# Loop end" %} 12256 size(2); 12257 ins_encode %{ 12258 Label* L = $labl$$label; 12259 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12260 %} 12261 ins_pipe( pipe_jcc ); 12262 ins_short_branch(1); 12263 %} 12264 12265 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12266 match(CountedLoopEnd cop cmp); 12267 effect(USE labl); 12268 12269 ins_cost(300); 12270 format %{ "J$cop,us $labl\t# Loop end" %} 12271 size(2); 12272 ins_encode %{ 12273 Label* L = $labl$$label; 12274 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12275 %} 12276 ins_pipe( pipe_jcc ); 12277 ins_short_branch(1); 12278 %} 12279 12280 // Jump Direct Conditional - using unsigned comparison 12281 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12282 match(If cop cmp); 12283 effect(USE labl); 12284 12285 ins_cost(300); 12286 format %{ "J$cop,us $labl" %} 12287 size(2); 12288 ins_encode %{ 12289 Label* L = $labl$$label; 12290 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12291 %} 12292 ins_pipe( pipe_jcc ); 12293 ins_short_branch(1); 12294 %} 12295 12296 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12297 match(If cop cmp); 12298 effect(USE labl); 12299 12300 ins_cost(300); 12301 format %{ "J$cop,us $labl" %} 12302 size(2); 12303 ins_encode %{ 12304 Label* L = $labl$$label; 12305 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12306 %} 12307 ins_pipe( pipe_jcc ); 12308 ins_short_branch(1); 12309 %} 12310 12311 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12312 match(If cop cmp); 12313 effect(USE labl); 12314 12315 ins_cost(300); 12316 format %{ $$template 12317 if ($cop$$cmpcode == Assembler::notEqual) { 12318 $$emit$$"JP,u,s $labl\n\t" 12319 $$emit$$"J$cop,u,s $labl" 12320 } else { 12321 $$emit$$"JP,u,s done\n\t" 12322 $$emit$$"J$cop,u,s $labl\n\t" 12323 $$emit$$"done:" 12324 } 12325 %} 12326 size(4); 12327 ins_encode %{ 12328 Label* l = $labl$$label; 12329 if ($cop$$cmpcode == Assembler::notEqual) { 12330 __ jccb(Assembler::parity, *l); 12331 __ jccb(Assembler::notEqual, *l); 12332 } else if ($cop$$cmpcode == Assembler::equal) { 12333 Label done; 12334 __ jccb(Assembler::parity, done); 12335 __ jccb(Assembler::equal, *l); 12336 __ bind(done); 12337 } else { 12338 ShouldNotReachHere(); 12339 } 12340 %} 12341 ins_pipe(pipe_jcc); 12342 ins_short_branch(1); 12343 %} 12344 12345 // ============================================================================ 12346 // Long Compare 12347 // 12348 // Currently we hold longs in 2 registers. Comparing such values efficiently 12349 // is tricky. The flavor of compare used depends on whether we are testing 12350 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12351 // The GE test is the negated LT test. The LE test can be had by commuting 12352 // the operands (yielding a GE test) and then negating; negate again for the 12353 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12354 // NE test is negated from that. 12355 12356 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12357 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12358 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12359 // are collapsed internally in the ADLC's dfa-gen code. The match for 12360 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12361 // foo match ends up with the wrong leaf. One fix is to not match both 12362 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12363 // both forms beat the trinary form of long-compare and both are very useful 12364 // on Intel which has so few registers. 12365 12366 // Manifest a CmpL result in an integer register. Very painful. 12367 // This is the test to avoid. 12368 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12369 match(Set dst (CmpL3 src1 src2)); 12370 effect( KILL flags ); 12371 ins_cost(1000); 12372 format %{ "XOR $dst,$dst\n\t" 12373 "CMP $src1.hi,$src2.hi\n\t" 12374 "JLT,s m_one\n\t" 12375 "JGT,s p_one\n\t" 12376 "CMP $src1.lo,$src2.lo\n\t" 12377 "JB,s m_one\n\t" 12378 "JEQ,s done\n" 12379 "p_one:\tINC $dst\n\t" 12380 "JMP,s done\n" 12381 "m_one:\tDEC $dst\n" 12382 "done:" %} 12383 ins_encode %{ 12384 Label p_one, m_one, done; 12385 __ xorptr($dst$$Register, $dst$$Register); 12386 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12387 __ jccb(Assembler::less, m_one); 12388 __ jccb(Assembler::greater, p_one); 12389 __ cmpl($src1$$Register, $src2$$Register); 12390 __ jccb(Assembler::below, m_one); 12391 __ jccb(Assembler::equal, done); 12392 __ bind(p_one); 12393 __ incrementl($dst$$Register); 12394 __ jmpb(done); 12395 __ bind(m_one); 12396 __ decrementl($dst$$Register); 12397 __ bind(done); 12398 %} 12399 ins_pipe( pipe_slow ); 12400 %} 12401 12402 //====== 12403 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12404 // compares. Can be used for LE or GT compares by reversing arguments. 12405 // NOT GOOD FOR EQ/NE tests. 12406 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12407 match( Set flags (CmpL src zero )); 12408 ins_cost(100); 12409 format %{ "TEST $src.hi,$src.hi" %} 12410 opcode(0x85); 12411 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12412 ins_pipe( ialu_cr_reg_reg ); 12413 %} 12414 12415 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12416 // compares. Can be used for LE or GT compares by reversing arguments. 12417 // NOT GOOD FOR EQ/NE tests. 12418 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12419 match( Set flags (CmpL src1 src2 )); 12420 effect( TEMP tmp ); 12421 ins_cost(300); 12422 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12423 "MOV $tmp,$src1.hi\n\t" 12424 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12425 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12426 ins_pipe( ialu_cr_reg_reg ); 12427 %} 12428 12429 // Long compares reg < zero/req OR reg >= zero/req. 12430 // Just a wrapper for a normal branch, plus the predicate test. 12431 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12432 match(If cmp flags); 12433 effect(USE labl); 12434 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12435 expand %{ 12436 jmpCon(cmp,flags,labl); // JLT or JGE... 12437 %} 12438 %} 12439 12440 // Compare 2 longs and CMOVE longs. 12441 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12442 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12443 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12444 ins_cost(400); 12445 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12446 "CMOV$cmp $dst.hi,$src.hi" %} 12447 opcode(0x0F,0x40); 12448 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12449 ins_pipe( pipe_cmov_reg_long ); 12450 %} 12451 12452 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12453 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12454 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12455 ins_cost(500); 12456 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12457 "CMOV$cmp $dst.hi,$src.hi" %} 12458 opcode(0x0F,0x40); 12459 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12460 ins_pipe( pipe_cmov_reg_long ); 12461 %} 12462 12463 // Compare 2 longs and CMOVE ints. 12464 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12465 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12466 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12467 ins_cost(200); 12468 format %{ "CMOV$cmp $dst,$src" %} 12469 opcode(0x0F,0x40); 12470 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12471 ins_pipe( pipe_cmov_reg ); 12472 %} 12473 12474 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12475 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12476 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12477 ins_cost(250); 12478 format %{ "CMOV$cmp $dst,$src" %} 12479 opcode(0x0F,0x40); 12480 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12481 ins_pipe( pipe_cmov_mem ); 12482 %} 12483 12484 // Compare 2 longs and CMOVE ints. 12485 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12486 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12487 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12488 ins_cost(200); 12489 format %{ "CMOV$cmp $dst,$src" %} 12490 opcode(0x0F,0x40); 12491 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12492 ins_pipe( pipe_cmov_reg ); 12493 %} 12494 12495 // Compare 2 longs and CMOVE doubles 12496 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12497 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12498 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12499 ins_cost(200); 12500 expand %{ 12501 fcmovDPR_regS(cmp,flags,dst,src); 12502 %} 12503 %} 12504 12505 // Compare 2 longs and CMOVE doubles 12506 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12507 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12508 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12509 ins_cost(200); 12510 expand %{ 12511 fcmovD_regS(cmp,flags,dst,src); 12512 %} 12513 %} 12514 12515 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12516 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12517 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12518 ins_cost(200); 12519 expand %{ 12520 fcmovFPR_regS(cmp,flags,dst,src); 12521 %} 12522 %} 12523 12524 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12525 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12526 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12527 ins_cost(200); 12528 expand %{ 12529 fcmovF_regS(cmp,flags,dst,src); 12530 %} 12531 %} 12532 12533 //====== 12534 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12535 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12536 match( Set flags (CmpL src zero )); 12537 effect(TEMP tmp); 12538 ins_cost(200); 12539 format %{ "MOV $tmp,$src.lo\n\t" 12540 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12541 ins_encode( long_cmp_flags0( src, tmp ) ); 12542 ins_pipe( ialu_reg_reg_long ); 12543 %} 12544 12545 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12546 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12547 match( Set flags (CmpL src1 src2 )); 12548 ins_cost(200+300); 12549 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12550 "JNE,s skip\n\t" 12551 "CMP $src1.hi,$src2.hi\n\t" 12552 "skip:\t" %} 12553 ins_encode( long_cmp_flags1( src1, src2 ) ); 12554 ins_pipe( ialu_cr_reg_reg ); 12555 %} 12556 12557 // Long compare reg == zero/reg OR reg != zero/reg 12558 // Just a wrapper for a normal branch, plus the predicate test. 12559 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12560 match(If cmp flags); 12561 effect(USE labl); 12562 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12563 expand %{ 12564 jmpCon(cmp,flags,labl); // JEQ or JNE... 12565 %} 12566 %} 12567 12568 // Compare 2 longs and CMOVE longs. 12569 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12570 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12571 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12572 ins_cost(400); 12573 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12574 "CMOV$cmp $dst.hi,$src.hi" %} 12575 opcode(0x0F,0x40); 12576 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12577 ins_pipe( pipe_cmov_reg_long ); 12578 %} 12579 12580 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12581 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12582 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12583 ins_cost(500); 12584 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12585 "CMOV$cmp $dst.hi,$src.hi" %} 12586 opcode(0x0F,0x40); 12587 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12588 ins_pipe( pipe_cmov_reg_long ); 12589 %} 12590 12591 // Compare 2 longs and CMOVE ints. 12592 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12593 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12594 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12595 ins_cost(200); 12596 format %{ "CMOV$cmp $dst,$src" %} 12597 opcode(0x0F,0x40); 12598 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12599 ins_pipe( pipe_cmov_reg ); 12600 %} 12601 12602 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12603 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12604 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12605 ins_cost(250); 12606 format %{ "CMOV$cmp $dst,$src" %} 12607 opcode(0x0F,0x40); 12608 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12609 ins_pipe( pipe_cmov_mem ); 12610 %} 12611 12612 // Compare 2 longs and CMOVE ints. 12613 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12614 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12615 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12616 ins_cost(200); 12617 format %{ "CMOV$cmp $dst,$src" %} 12618 opcode(0x0F,0x40); 12619 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12620 ins_pipe( pipe_cmov_reg ); 12621 %} 12622 12623 // Compare 2 longs and CMOVE doubles 12624 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12625 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12626 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12627 ins_cost(200); 12628 expand %{ 12629 fcmovDPR_regS(cmp,flags,dst,src); 12630 %} 12631 %} 12632 12633 // Compare 2 longs and CMOVE doubles 12634 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12635 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12636 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12637 ins_cost(200); 12638 expand %{ 12639 fcmovD_regS(cmp,flags,dst,src); 12640 %} 12641 %} 12642 12643 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12644 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12645 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12646 ins_cost(200); 12647 expand %{ 12648 fcmovFPR_regS(cmp,flags,dst,src); 12649 %} 12650 %} 12651 12652 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12653 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12654 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12655 ins_cost(200); 12656 expand %{ 12657 fcmovF_regS(cmp,flags,dst,src); 12658 %} 12659 %} 12660 12661 //====== 12662 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12663 // Same as cmpL_reg_flags_LEGT except must negate src 12664 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12665 match( Set flags (CmpL src zero )); 12666 effect( TEMP tmp ); 12667 ins_cost(300); 12668 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12669 "CMP $tmp,$src.lo\n\t" 12670 "SBB $tmp,$src.hi\n\t" %} 12671 ins_encode( long_cmp_flags3(src, tmp) ); 12672 ins_pipe( ialu_reg_reg_long ); 12673 %} 12674 12675 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12676 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12677 // requires a commuted test to get the same result. 12678 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12679 match( Set flags (CmpL src1 src2 )); 12680 effect( TEMP tmp ); 12681 ins_cost(300); 12682 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12683 "MOV $tmp,$src2.hi\n\t" 12684 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12685 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12686 ins_pipe( ialu_cr_reg_reg ); 12687 %} 12688 12689 // Long compares reg < zero/req OR reg >= zero/req. 12690 // Just a wrapper for a normal branch, plus the predicate test 12691 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12692 match(If cmp flags); 12693 effect(USE labl); 12694 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12695 ins_cost(300); 12696 expand %{ 12697 jmpCon(cmp,flags,labl); // JGT or JLE... 12698 %} 12699 %} 12700 12701 // Compare 2 longs and CMOVE longs. 12702 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12703 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12704 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12705 ins_cost(400); 12706 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12707 "CMOV$cmp $dst.hi,$src.hi" %} 12708 opcode(0x0F,0x40); 12709 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12710 ins_pipe( pipe_cmov_reg_long ); 12711 %} 12712 12713 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12714 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12715 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12716 ins_cost(500); 12717 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12718 "CMOV$cmp $dst.hi,$src.hi+4" %} 12719 opcode(0x0F,0x40); 12720 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12721 ins_pipe( pipe_cmov_reg_long ); 12722 %} 12723 12724 // Compare 2 longs and CMOVE ints. 12725 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12726 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12727 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12728 ins_cost(200); 12729 format %{ "CMOV$cmp $dst,$src" %} 12730 opcode(0x0F,0x40); 12731 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12732 ins_pipe( pipe_cmov_reg ); 12733 %} 12734 12735 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12736 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12737 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12738 ins_cost(250); 12739 format %{ "CMOV$cmp $dst,$src" %} 12740 opcode(0x0F,0x40); 12741 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12742 ins_pipe( pipe_cmov_mem ); 12743 %} 12744 12745 // Compare 2 longs and CMOVE ptrs. 12746 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12747 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12748 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12749 ins_cost(200); 12750 format %{ "CMOV$cmp $dst,$src" %} 12751 opcode(0x0F,0x40); 12752 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12753 ins_pipe( pipe_cmov_reg ); 12754 %} 12755 12756 // Compare 2 longs and CMOVE doubles 12757 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12758 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12759 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12760 ins_cost(200); 12761 expand %{ 12762 fcmovDPR_regS(cmp,flags,dst,src); 12763 %} 12764 %} 12765 12766 // Compare 2 longs and CMOVE doubles 12767 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12768 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12769 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12770 ins_cost(200); 12771 expand %{ 12772 fcmovD_regS(cmp,flags,dst,src); 12773 %} 12774 %} 12775 12776 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12777 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12778 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12779 ins_cost(200); 12780 expand %{ 12781 fcmovFPR_regS(cmp,flags,dst,src); 12782 %} 12783 %} 12784 12785 12786 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12787 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12788 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12789 ins_cost(200); 12790 expand %{ 12791 fcmovF_regS(cmp,flags,dst,src); 12792 %} 12793 %} 12794 12795 12796 // ============================================================================ 12797 // Procedure Call/Return Instructions 12798 // Call Java Static Instruction 12799 // Note: If this code changes, the corresponding ret_addr_offset() and 12800 // compute_padding() functions will have to be adjusted. 12801 instruct CallStaticJavaDirect(method meth) %{ 12802 match(CallStaticJava); 12803 effect(USE meth); 12804 12805 ins_cost(300); 12806 format %{ "CALL,static " %} 12807 opcode(0xE8); /* E8 cd */ 12808 ins_encode( pre_call_resets, 12809 Java_Static_Call( meth ), 12810 call_epilog, 12811 post_call_FPU ); 12812 ins_pipe( pipe_slow ); 12813 ins_alignment(4); 12814 %} 12815 12816 // Call Java Dynamic Instruction 12817 // Note: If this code changes, the corresponding ret_addr_offset() and 12818 // compute_padding() functions will have to be adjusted. 12819 instruct CallDynamicJavaDirect(method meth) %{ 12820 match(CallDynamicJava); 12821 effect(USE meth); 12822 12823 ins_cost(300); 12824 format %{ "MOV EAX,(oop)-1\n\t" 12825 "CALL,dynamic" %} 12826 opcode(0xE8); /* E8 cd */ 12827 ins_encode( pre_call_resets, 12828 Java_Dynamic_Call( meth ), 12829 call_epilog, 12830 post_call_FPU ); 12831 ins_pipe( pipe_slow ); 12832 ins_alignment(4); 12833 %} 12834 12835 // Call Runtime Instruction 12836 instruct CallRuntimeDirect(method meth) %{ 12837 match(CallRuntime ); 12838 effect(USE meth); 12839 12840 ins_cost(300); 12841 format %{ "CALL,runtime " %} 12842 opcode(0xE8); /* E8 cd */ 12843 // Use FFREEs to clear entries in float stack 12844 ins_encode( pre_call_resets, 12845 FFree_Float_Stack_All, 12846 Java_To_Runtime( meth ), 12847 post_call_FPU ); 12848 ins_pipe( pipe_slow ); 12849 %} 12850 12851 // Call runtime without safepoint 12852 instruct CallLeafDirect(method meth) %{ 12853 match(CallLeaf); 12854 effect(USE meth); 12855 12856 ins_cost(300); 12857 format %{ "CALL_LEAF,runtime " %} 12858 opcode(0xE8); /* E8 cd */ 12859 ins_encode( pre_call_resets, 12860 FFree_Float_Stack_All, 12861 Java_To_Runtime( meth ), 12862 Verify_FPU_For_Leaf, post_call_FPU ); 12863 ins_pipe( pipe_slow ); 12864 %} 12865 12866 instruct CallLeafNoFPDirect(method meth) %{ 12867 match(CallLeafNoFP); 12868 effect(USE meth); 12869 12870 ins_cost(300); 12871 format %{ "CALL_LEAF_NOFP,runtime " %} 12872 opcode(0xE8); /* E8 cd */ 12873 ins_encode(Java_To_Runtime(meth)); 12874 ins_pipe( pipe_slow ); 12875 %} 12876 12877 12878 // Return Instruction 12879 // Remove the return address & jump to it. 12880 instruct Ret() %{ 12881 match(Return); 12882 format %{ "RET" %} 12883 opcode(0xC3); 12884 ins_encode(OpcP); 12885 ins_pipe( pipe_jmp ); 12886 %} 12887 12888 // Tail Call; Jump from runtime stub to Java code. 12889 // Also known as an 'interprocedural jump'. 12890 // Target of jump will eventually return to caller. 12891 // TailJump below removes the return address. 12892 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12893 match(TailCall jump_target method_oop ); 12894 ins_cost(300); 12895 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12896 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12897 ins_encode( OpcP, RegOpc(jump_target) ); 12898 ins_pipe( pipe_jmp ); 12899 %} 12900 12901 12902 // Tail Jump; remove the return address; jump to target. 12903 // TailCall above leaves the return address around. 12904 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12905 match( TailJump jump_target ex_oop ); 12906 ins_cost(300); 12907 format %{ "POP EDX\t# pop return address into dummy\n\t" 12908 "JMP $jump_target " %} 12909 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12910 ins_encode( enc_pop_rdx, 12911 OpcP, RegOpc(jump_target) ); 12912 ins_pipe( pipe_jmp ); 12913 %} 12914 12915 // Create exception oop: created by stack-crawling runtime code. 12916 // Created exception is now available to this handler, and is setup 12917 // just prior to jumping to this handler. No code emitted. 12918 instruct CreateException( eAXRegP ex_oop ) 12919 %{ 12920 match(Set ex_oop (CreateEx)); 12921 12922 size(0); 12923 // use the following format syntax 12924 format %{ "# exception oop is in EAX; no code emitted" %} 12925 ins_encode(); 12926 ins_pipe( empty ); 12927 %} 12928 12929 12930 // Rethrow exception: 12931 // The exception oop will come in the first argument position. 12932 // Then JUMP (not call) to the rethrow stub code. 12933 instruct RethrowException() 12934 %{ 12935 match(Rethrow); 12936 12937 // use the following format syntax 12938 format %{ "JMP rethrow_stub" %} 12939 ins_encode(enc_rethrow); 12940 ins_pipe( pipe_jmp ); 12941 %} 12942 12943 // inlined locking and unlocking 12944 12945 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12946 predicate(Compile::current()->use_rtm()); 12947 match(Set cr (FastLock object box)); 12948 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12949 ins_cost(300); 12950 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12951 ins_encode %{ 12952 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12953 $scr$$Register, $cx1$$Register, $cx2$$Register, 12954 _counters, _rtm_counters, _stack_rtm_counters, 12955 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12956 true, ra_->C->profile_rtm()); 12957 %} 12958 ins_pipe(pipe_slow); 12959 %} 12960 12961 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12962 predicate(!Compile::current()->use_rtm()); 12963 match(Set cr (FastLock object box)); 12964 effect(TEMP tmp, TEMP scr, USE_KILL box); 12965 ins_cost(300); 12966 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12967 ins_encode %{ 12968 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12969 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12970 %} 12971 ins_pipe(pipe_slow); 12972 %} 12973 12974 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12975 match(Set cr (FastUnlock object box)); 12976 effect(TEMP tmp, USE_KILL box); 12977 ins_cost(300); 12978 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12979 ins_encode %{ 12980 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12981 %} 12982 ins_pipe(pipe_slow); 12983 %} 12984 12985 12986 12987 // ============================================================================ 12988 // Safepoint Instruction 12989 instruct safePoint_poll(eFlagsReg cr) %{ 12990 match(SafePoint); 12991 effect(KILL cr); 12992 12993 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12994 // On SPARC that might be acceptable as we can generate the address with 12995 // just a sethi, saving an or. By polling at offset 0 we can end up 12996 // putting additional pressure on the index-0 in the D$. Because of 12997 // alignment (just like the situation at hand) the lower indices tend 12998 // to see more traffic. It'd be better to change the polling address 12999 // to offset 0 of the last $line in the polling page. 13000 13001 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13002 ins_cost(125); 13003 size(6) ; 13004 ins_encode( Safepoint_Poll() ); 13005 ins_pipe( ialu_reg_mem ); 13006 %} 13007 13008 13009 // ============================================================================ 13010 // This name is KNOWN by the ADLC and cannot be changed. 13011 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13012 // for this guy. 13013 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13014 match(Set dst (ThreadLocal)); 13015 effect(DEF dst, KILL cr); 13016 13017 format %{ "MOV $dst, Thread::current()" %} 13018 ins_encode %{ 13019 Register dstReg = as_Register($dst$$reg); 13020 __ get_thread(dstReg); 13021 %} 13022 ins_pipe( ialu_reg_fat ); 13023 %} 13024 13025 13026 13027 //----------PEEPHOLE RULES----------------------------------------------------- 13028 // These must follow all instruction definitions as they use the names 13029 // defined in the instructions definitions. 13030 // 13031 // peepmatch ( root_instr_name [preceding_instruction]* ); 13032 // 13033 // peepconstraint %{ 13034 // (instruction_number.operand_name relational_op instruction_number.operand_name 13035 // [, ...] ); 13036 // // instruction numbers are zero-based using left to right order in peepmatch 13037 // 13038 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13039 // // provide an instruction_number.operand_name for each operand that appears 13040 // // in the replacement instruction's match rule 13041 // 13042 // ---------VM FLAGS--------------------------------------------------------- 13043 // 13044 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13045 // 13046 // Each peephole rule is given an identifying number starting with zero and 13047 // increasing by one in the order seen by the parser. An individual peephole 13048 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13049 // on the command-line. 13050 // 13051 // ---------CURRENT LIMITATIONS---------------------------------------------- 13052 // 13053 // Only match adjacent instructions in same basic block 13054 // Only equality constraints 13055 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13056 // Only one replacement instruction 13057 // 13058 // ---------EXAMPLE---------------------------------------------------------- 13059 // 13060 // // pertinent parts of existing instructions in architecture description 13061 // instruct movI(rRegI dst, rRegI src) %{ 13062 // match(Set dst (CopyI src)); 13063 // %} 13064 // 13065 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13066 // match(Set dst (AddI dst src)); 13067 // effect(KILL cr); 13068 // %} 13069 // 13070 // // Change (inc mov) to lea 13071 // peephole %{ 13072 // // increment preceeded by register-register move 13073 // peepmatch ( incI_eReg movI ); 13074 // // require that the destination register of the increment 13075 // // match the destination register of the move 13076 // peepconstraint ( 0.dst == 1.dst ); 13077 // // construct a replacement instruction that sets 13078 // // the destination to ( move's source register + one ) 13079 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13080 // %} 13081 // 13082 // Implementation no longer uses movX instructions since 13083 // machine-independent system no longer uses CopyX nodes. 13084 // 13085 // peephole %{ 13086 // peepmatch ( incI_eReg movI ); 13087 // peepconstraint ( 0.dst == 1.dst ); 13088 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13089 // %} 13090 // 13091 // peephole %{ 13092 // peepmatch ( decI_eReg movI ); 13093 // peepconstraint ( 0.dst == 1.dst ); 13094 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13095 // %} 13096 // 13097 // peephole %{ 13098 // peepmatch ( addI_eReg_imm movI ); 13099 // peepconstraint ( 0.dst == 1.dst ); 13100 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13101 // %} 13102 // 13103 // peephole %{ 13104 // peepmatch ( addP_eReg_imm movP ); 13105 // peepconstraint ( 0.dst == 1.dst ); 13106 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13107 // %} 13108 13109 // // Change load of spilled value to only a spill 13110 // instruct storeI(memory mem, rRegI src) %{ 13111 // match(Set mem (StoreI mem src)); 13112 // %} 13113 // 13114 // instruct loadI(rRegI dst, memory mem) %{ 13115 // match(Set dst (LoadI mem)); 13116 // %} 13117 // 13118 peephole %{ 13119 peepmatch ( loadI storeI ); 13120 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13121 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13122 %} 13123 13124 //----------SMARTSPILL RULES--------------------------------------------------- 13125 // These must follow all instruction definitions as they use the names 13126 // defined in the instructions definitions.