1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 if (reg_lo+1 == reg_hi) { // double move? 799 if (is_load) { 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } else { 805 if (is_load) { 806 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 807 } else { 808 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 809 } 810 } 811 #ifndef PRODUCT 812 } else if (!do_size) { 813 if (size != 0) st->print("\n\t"); 814 if (reg_lo+1 == reg_hi) { // double move? 815 if (is_load) st->print("%s %s,[ESP + #%d]", 816 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 817 Matcher::regName[reg_lo], offset); 818 else st->print("MOVSD [ESP + #%d],%s", 819 offset, Matcher::regName[reg_lo]); 820 } else { 821 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 822 Matcher::regName[reg_lo], offset); 823 else st->print("MOVSS [ESP + #%d],%s", 824 offset, Matcher::regName[reg_lo]); 825 } 826 #endif 827 } 828 bool is_single_byte = false; 829 if ((UseAVX > 2) && (offset != 0)) { 830 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 831 } 832 int offset_size = 0; 833 if (UseAVX > 2 ) { 834 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 835 } else { 836 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 837 } 838 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 840 return size+5+offset_size; 841 } 842 843 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 845 int src_hi, int dst_hi, int size, outputStream* st ) { 846 if (cbuf) { 847 MacroAssembler _masm(cbuf); 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 850 as_XMMRegister(Matcher::_regEncode[src_lo])); 851 } else { 852 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } 855 #ifndef PRODUCT 856 } else if (!do_size) { 857 if (size != 0) st->print("\n\t"); 858 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 859 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 860 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } else { 865 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 866 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } else { 868 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } 870 } 871 #endif 872 } 873 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 874 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 875 int sz = (UseAVX > 2) ? 6 : 4; 876 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 877 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 878 return size + sz; 879 } 880 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 MacroAssembler _masm(cbuf); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888 #ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891 #endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894 } 895 896 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1016 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Needs 2 CMOV's for longs. 1424 const int Matcher::long_cmove_cost() { return 1; } 1425 1426 // No CMOVF/CMOVD with SSE/SSE2 1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1428 1429 // Does the CPU require late expand (see block.cpp for description of late expand)? 1430 const bool Matcher::require_postalloc_expand = false; 1431 1432 // Should the Matcher clone shifts on addressing modes, expecting them to 1433 // be subsumed into complex addressing expressions or compute them into 1434 // registers? True for Intel but false for most RISCs 1435 const bool Matcher::clone_shift_expressions = true; 1436 1437 // Do we need to mask the count passed to shift instructions or does 1438 // the cpu only look at the lower 5/6 bits anyway? 1439 const bool Matcher::need_masked_shift_count = false; 1440 1441 bool Matcher::narrow_oop_use_complex_address() { 1442 ShouldNotCallThis(); 1443 return true; 1444 } 1445 1446 bool Matcher::narrow_klass_use_complex_address() { 1447 ShouldNotCallThis(); 1448 return true; 1449 } 1450 1451 1452 // Is it better to copy float constants, or load them directly from memory? 1453 // Intel can load a float constant from a direct address, requiring no 1454 // extra registers. Most RISCs will have to materialize an address into a 1455 // register first, so they would do better to copy the constant from stack. 1456 const bool Matcher::rematerialize_float_constants = true; 1457 1458 // If CPU can load and store mis-aligned doubles directly then no fixup is 1459 // needed. Else we split the double into 2 integer pieces and move it 1460 // piece-by-piece. Only happens when passing doubles into C code as the 1461 // Java calling convention forces doubles to be aligned. 1462 const bool Matcher::misaligned_doubles_ok = true; 1463 1464 1465 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1466 // Get the memory operand from the node 1467 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1468 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1469 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1470 uint opcnt = 1; // First operand 1471 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1472 while( idx >= skipped+num_edges ) { 1473 skipped += num_edges; 1474 opcnt++; // Bump operand count 1475 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1476 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1477 } 1478 1479 MachOper *memory = node->_opnds[opcnt]; 1480 MachOper *new_memory = NULL; 1481 switch (memory->opcode()) { 1482 case DIRECT: 1483 case INDOFFSET32X: 1484 // No transformation necessary. 1485 return; 1486 case INDIRECT: 1487 new_memory = new indirect_win95_safeOper( ); 1488 break; 1489 case INDOFFSET8: 1490 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1491 break; 1492 case INDOFFSET32: 1493 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1494 break; 1495 case INDINDEXOFFSET: 1496 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1497 break; 1498 case INDINDEXSCALE: 1499 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1500 break; 1501 case INDINDEXSCALEOFFSET: 1502 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1503 break; 1504 case LOAD_LONG_INDIRECT: 1505 case LOAD_LONG_INDOFFSET32: 1506 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1507 return; 1508 default: 1509 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1510 return; 1511 } 1512 node->_opnds[opcnt] = new_memory; 1513 } 1514 1515 // Advertise here if the CPU requires explicit rounding operations 1516 // to implement the UseStrictFP mode. 1517 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1518 1519 // Are floats conerted to double when stored to stack during deoptimization? 1520 // On x32 it is stored with convertion only when FPU is used for floats. 1521 bool Matcher::float_in_double() { return (UseSSE == 0); } 1522 1523 // Do ints take an entire long register or just half? 1524 const bool Matcher::int_in_long = false; 1525 1526 // Return whether or not this register is ever used as an argument. This 1527 // function is used on startup to build the trampoline stubs in generateOptoStub. 1528 // Registers not mentioned will be killed by the VM call in the trampoline, and 1529 // arguments in those registers not be available to the callee. 1530 bool Matcher::can_be_java_arg( int reg ) { 1531 if( reg == ECX_num || reg == EDX_num ) return true; 1532 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1533 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1534 return false; 1535 } 1536 1537 bool Matcher::is_spillable_arg( int reg ) { 1538 return can_be_java_arg(reg); 1539 } 1540 1541 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1542 // Use hardware integer DIV instruction when 1543 // it is faster than a code which use multiply. 1544 // Only when constant divisor fits into 32 bit 1545 // (min_jint is excluded to get only correct 1546 // positive 32 bit values from negative). 1547 return VM_Version::has_fast_idiv() && 1548 (divisor == (int)divisor && divisor != min_jint); 1549 } 1550 1551 // Register for DIVI projection of divmodI 1552 RegMask Matcher::divI_proj_mask() { 1553 return EAX_REG_mask(); 1554 } 1555 1556 // Register for MODI projection of divmodI 1557 RegMask Matcher::modI_proj_mask() { 1558 return EDX_REG_mask(); 1559 } 1560 1561 // Register for DIVL projection of divmodL 1562 RegMask Matcher::divL_proj_mask() { 1563 ShouldNotReachHere(); 1564 return RegMask(); 1565 } 1566 1567 // Register for MODL projection of divmodL 1568 RegMask Matcher::modL_proj_mask() { 1569 ShouldNotReachHere(); 1570 return RegMask(); 1571 } 1572 1573 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1574 return NO_REG_mask(); 1575 } 1576 1577 // Returns true if the high 32 bits of the value is known to be zero. 1578 bool is_operand_hi32_zero(Node* n) { 1579 int opc = n->Opcode(); 1580 if (opc == Op_AndL) { 1581 Node* o2 = n->in(2); 1582 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1583 return true; 1584 } 1585 } 1586 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 return false; 1590 } 1591 1592 %} 1593 1594 //----------ENCODING BLOCK----------------------------------------------------- 1595 // This block specifies the encoding classes used by the compiler to output 1596 // byte streams. Encoding classes generate functions which are called by 1597 // Machine Instruction Nodes in order to generate the bit encoding of the 1598 // instruction. Operands specify their base encoding interface with the 1599 // interface keyword. There are currently supported four interfaces, 1600 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1601 // operand to generate a function which returns its register number when 1602 // queried. CONST_INTER causes an operand to generate a function which 1603 // returns the value of the constant when queried. MEMORY_INTER causes an 1604 // operand to generate four functions which return the Base Register, the 1605 // Index Register, the Scale Value, and the Offset Value of the operand when 1606 // queried. COND_INTER causes an operand to generate six functions which 1607 // return the encoding code (ie - encoding bits for the instruction) 1608 // associated with each basic boolean condition for a conditional instruction. 1609 // Instructions specify two basic values for encoding. They use the 1610 // ins_encode keyword to specify their encoding class (which must be one of 1611 // the class names specified in the encoding block), and they use the 1612 // opcode keyword to specify, in order, their primary, secondary, and 1613 // tertiary opcode. Only the opcode sections which a particular instruction 1614 // needs for encoding need to be specified. 1615 encode %{ 1616 // Build emit functions for each basic byte or larger field in the intel 1617 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1618 // code in the enc_class source block. Emit functions will live in the 1619 // main source block for now. In future, we can generalize this by 1620 // adding a syntax that specifies the sizes of fields in an order, 1621 // so that the adlc can build the emit functions automagically 1622 1623 // Emit primary opcode 1624 enc_class OpcP %{ 1625 emit_opcode(cbuf, $primary); 1626 %} 1627 1628 // Emit secondary opcode 1629 enc_class OpcS %{ 1630 emit_opcode(cbuf, $secondary); 1631 %} 1632 1633 // Emit opcode directly 1634 enc_class Opcode(immI d8) %{ 1635 emit_opcode(cbuf, $d8$$constant); 1636 %} 1637 1638 enc_class SizePrefix %{ 1639 emit_opcode(cbuf,0x66); 1640 %} 1641 1642 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1643 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1644 %} 1645 1646 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1647 emit_opcode(cbuf,$opcode$$constant); 1648 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1649 %} 1650 1651 enc_class mov_r32_imm0( rRegI dst ) %{ 1652 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1653 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1654 %} 1655 1656 enc_class cdq_enc %{ 1657 // Full implementation of Java idiv and irem; checks for 1658 // special case as described in JVM spec., p.243 & p.271. 1659 // 1660 // normal case special case 1661 // 1662 // input : rax,: dividend min_int 1663 // reg: divisor -1 1664 // 1665 // output: rax,: quotient (= rax, idiv reg) min_int 1666 // rdx: remainder (= rax, irem reg) 0 1667 // 1668 // Code sequnce: 1669 // 1670 // 81 F8 00 00 00 80 cmp rax,80000000h 1671 // 0F 85 0B 00 00 00 jne normal_case 1672 // 33 D2 xor rdx,edx 1673 // 83 F9 FF cmp rcx,0FFh 1674 // 0F 84 03 00 00 00 je done 1675 // normal_case: 1676 // 99 cdq 1677 // F7 F9 idiv rax,ecx 1678 // done: 1679 // 1680 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1681 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1682 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1683 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1684 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1686 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1687 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1688 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1689 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1690 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1691 // normal_case: 1692 emit_opcode(cbuf,0x99); // cdq 1693 // idiv (note: must be emitted by the user of this rule) 1694 // normal: 1695 %} 1696 1697 // Dense encoding for older common ops 1698 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1699 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1700 %} 1701 1702 1703 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1704 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1705 // Check for 8-bit immediate, and set sign extend bit in opcode 1706 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1707 emit_opcode(cbuf, $primary | 0x02); 1708 } 1709 else { // If 32-bit immediate 1710 emit_opcode(cbuf, $primary); 1711 } 1712 %} 1713 1714 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1715 // Emit primary opcode and set sign-extend bit 1716 // Check for 8-bit immediate, and set sign extend bit in opcode 1717 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1718 emit_opcode(cbuf, $primary | 0x02); } 1719 else { // If 32-bit immediate 1720 emit_opcode(cbuf, $primary); 1721 } 1722 // Emit r/m byte with secondary opcode, after primary opcode. 1723 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1724 %} 1725 1726 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1727 // Check for 8-bit immediate, and set sign extend bit in opcode 1728 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1729 $$$emit8$imm$$constant; 1730 } 1731 else { // If 32-bit immediate 1732 // Output immediate 1733 $$$emit32$imm$$constant; 1734 } 1735 %} 1736 1737 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1738 // Emit primary opcode and set sign-extend bit 1739 // Check for 8-bit immediate, and set sign extend bit in opcode 1740 int con = (int)$imm$$constant; // Throw away top bits 1741 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1742 // Emit r/m byte with secondary opcode, after primary opcode. 1743 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1744 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1745 else emit_d32(cbuf,con); 1746 %} 1747 1748 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1749 // Emit primary opcode and set sign-extend bit 1750 // Check for 8-bit immediate, and set sign extend bit in opcode 1751 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1752 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1753 // Emit r/m byte with tertiary opcode, after primary opcode. 1754 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1755 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1756 else emit_d32(cbuf,con); 1757 %} 1758 1759 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1760 emit_cc(cbuf, $secondary, $dst$$reg ); 1761 %} 1762 1763 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1764 int destlo = $dst$$reg; 1765 int desthi = HIGH_FROM_LOW(destlo); 1766 // bswap lo 1767 emit_opcode(cbuf, 0x0F); 1768 emit_cc(cbuf, 0xC8, destlo); 1769 // bswap hi 1770 emit_opcode(cbuf, 0x0F); 1771 emit_cc(cbuf, 0xC8, desthi); 1772 // xchg lo and hi 1773 emit_opcode(cbuf, 0x87); 1774 emit_rm(cbuf, 0x3, destlo, desthi); 1775 %} 1776 1777 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1778 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1779 %} 1780 1781 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1782 $$$emit8$primary; 1783 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1784 %} 1785 1786 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1787 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1788 emit_d8(cbuf, op >> 8 ); 1789 emit_d8(cbuf, op & 255); 1790 %} 1791 1792 // emulate a CMOV with a conditional branch around a MOV 1793 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1794 // Invert sense of branch from sense of CMOV 1795 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1796 emit_d8( cbuf, $brOffs$$constant ); 1797 %} 1798 1799 enc_class enc_PartialSubtypeCheck( ) %{ 1800 Register Redi = as_Register(EDI_enc); // result register 1801 Register Reax = as_Register(EAX_enc); // super class 1802 Register Recx = as_Register(ECX_enc); // killed 1803 Register Resi = as_Register(ESI_enc); // sub class 1804 Label miss; 1805 1806 MacroAssembler _masm(&cbuf); 1807 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1808 NULL, &miss, 1809 /*set_cond_codes:*/ true); 1810 if ($primary) { 1811 __ xorptr(Redi, Redi); 1812 } 1813 __ bind(miss); 1814 %} 1815 1816 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1817 MacroAssembler masm(&cbuf); 1818 int start = masm.offset(); 1819 if (UseSSE >= 2) { 1820 if (VerifyFPU) { 1821 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1822 } 1823 } else { 1824 // External c_calling_convention expects the FPU stack to be 'clean'. 1825 // Compiled code leaves it dirty. Do cleanup now. 1826 masm.empty_FPU_stack(); 1827 } 1828 if (sizeof_FFree_Float_Stack_All == -1) { 1829 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1830 } else { 1831 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1832 } 1833 %} 1834 1835 enc_class Verify_FPU_For_Leaf %{ 1836 if( VerifyFPU ) { 1837 MacroAssembler masm(&cbuf); 1838 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1839 } 1840 %} 1841 1842 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1843 // This is the instruction starting address for relocation info. 1844 cbuf.set_insts_mark(); 1845 $$$emit8$primary; 1846 // CALL directly to the runtime 1847 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1848 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1849 1850 if (UseSSE >= 2) { 1851 MacroAssembler _masm(&cbuf); 1852 BasicType rt = tf()->return_type(); 1853 1854 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1855 // A C runtime call where the return value is unused. In SSE2+ 1856 // mode the result needs to be removed from the FPU stack. It's 1857 // likely that this function call could be removed by the 1858 // optimizer if the C function is a pure function. 1859 __ ffree(0); 1860 } else if (rt == T_FLOAT) { 1861 __ lea(rsp, Address(rsp, -4)); 1862 __ fstp_s(Address(rsp, 0)); 1863 __ movflt(xmm0, Address(rsp, 0)); 1864 __ lea(rsp, Address(rsp, 4)); 1865 } else if (rt == T_DOUBLE) { 1866 __ lea(rsp, Address(rsp, -8)); 1867 __ fstp_d(Address(rsp, 0)); 1868 __ movdbl(xmm0, Address(rsp, 0)); 1869 __ lea(rsp, Address(rsp, 8)); 1870 } 1871 } 1872 %} 1873 1874 1875 enc_class pre_call_resets %{ 1876 // If method sets FPU control word restore it here 1877 debug_only(int off0 = cbuf.insts_size()); 1878 if (ra_->C->in_24_bit_fp_mode()) { 1879 MacroAssembler _masm(&cbuf); 1880 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1881 } 1882 if (ra_->C->max_vector_size() > 16) { 1883 // Clear upper bits of YMM registers when current compiled code uses 1884 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1885 MacroAssembler _masm(&cbuf); 1886 __ vzeroupper(); 1887 } 1888 debug_only(int off1 = cbuf.insts_size()); 1889 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1890 %} 1891 1892 enc_class post_call_FPU %{ 1893 // If method sets FPU control word do it here also 1894 if (Compile::current()->in_24_bit_fp_mode()) { 1895 MacroAssembler masm(&cbuf); 1896 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1897 } 1898 %} 1899 1900 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1901 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1902 // who we intended to call. 1903 cbuf.set_insts_mark(); 1904 $$$emit8$primary; 1905 1906 if (!_method) { 1907 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1908 runtime_call_Relocation::spec(), 1909 RELOC_IMM32); 1910 } else { 1911 int method_index = resolved_method_index(cbuf); 1912 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1913 : static_call_Relocation::spec(method_index); 1914 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1915 rspec, RELOC_DISP32); 1916 // Emit stubs for static call. 1917 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1918 if (stub == NULL) { 1919 ciEnv::current()->record_failure("CodeCache is full"); 1920 return; 1921 } 1922 } 1923 %} 1924 1925 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1926 MacroAssembler _masm(&cbuf); 1927 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1928 %} 1929 1930 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1931 int disp = in_bytes(Method::from_compiled_offset()); 1932 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1933 1934 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1935 cbuf.set_insts_mark(); 1936 $$$emit8$primary; 1937 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1938 emit_d8(cbuf, disp); // Displacement 1939 1940 %} 1941 1942 // Following encoding is no longer used, but may be restored if calling 1943 // convention changes significantly. 1944 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1945 // 1946 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1947 // // int ic_reg = Matcher::inline_cache_reg(); 1948 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1949 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1950 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1951 // 1952 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1953 // // // so we load it immediately before the call 1954 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1955 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1956 // 1957 // // xor rbp,ebp 1958 // emit_opcode(cbuf, 0x33); 1959 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1960 // 1961 // // CALL to interpreter. 1962 // cbuf.set_insts_mark(); 1963 // $$$emit8$primary; 1964 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1965 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1966 // %} 1967 1968 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1969 $$$emit8$primary; 1970 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1971 $$$emit8$shift$$constant; 1972 %} 1973 1974 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1975 // Load immediate does not have a zero or sign extended version 1976 // for 8-bit immediates 1977 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1978 $$$emit32$src$$constant; 1979 %} 1980 1981 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1982 // Load immediate does not have a zero or sign extended version 1983 // for 8-bit immediates 1984 emit_opcode(cbuf, $primary + $dst$$reg); 1985 $$$emit32$src$$constant; 1986 %} 1987 1988 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1989 // Load immediate does not have a zero or sign extended version 1990 // for 8-bit immediates 1991 int dst_enc = $dst$$reg; 1992 int src_con = $src$$constant & 0x0FFFFFFFFL; 1993 if (src_con == 0) { 1994 // xor dst, dst 1995 emit_opcode(cbuf, 0x33); 1996 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1997 } else { 1998 emit_opcode(cbuf, $primary + dst_enc); 1999 emit_d32(cbuf, src_con); 2000 } 2001 %} 2002 2003 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2004 // Load immediate does not have a zero or sign extended version 2005 // for 8-bit immediates 2006 int dst_enc = $dst$$reg + 2; 2007 int src_con = ((julong)($src$$constant)) >> 32; 2008 if (src_con == 0) { 2009 // xor dst, dst 2010 emit_opcode(cbuf, 0x33); 2011 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2012 } else { 2013 emit_opcode(cbuf, $primary + dst_enc); 2014 emit_d32(cbuf, src_con); 2015 } 2016 %} 2017 2018 2019 // Encode a reg-reg copy. If it is useless, then empty encoding. 2020 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2021 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2022 %} 2023 2024 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2025 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2026 %} 2027 2028 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2029 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2030 %} 2031 2032 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2033 $$$emit8$primary; 2034 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2035 %} 2036 2037 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2038 $$$emit8$secondary; 2039 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2040 %} 2041 2042 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2043 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2044 %} 2045 2046 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2047 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2048 %} 2049 2050 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2051 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2052 %} 2053 2054 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2055 // Output immediate 2056 $$$emit32$src$$constant; 2057 %} 2058 2059 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2060 // Output Float immediate bits 2061 jfloat jf = $src$$constant; 2062 int jf_as_bits = jint_cast( jf ); 2063 emit_d32(cbuf, jf_as_bits); 2064 %} 2065 2066 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2067 // Output Float immediate bits 2068 jfloat jf = $src$$constant; 2069 int jf_as_bits = jint_cast( jf ); 2070 emit_d32(cbuf, jf_as_bits); 2071 %} 2072 2073 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2074 // Output immediate 2075 $$$emit16$src$$constant; 2076 %} 2077 2078 enc_class Con_d32(immI src) %{ 2079 emit_d32(cbuf,$src$$constant); 2080 %} 2081 2082 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2083 // Output immediate memory reference 2084 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2085 emit_d32(cbuf, 0x00); 2086 %} 2087 2088 enc_class lock_prefix( ) %{ 2089 if( os::is_MP() ) 2090 emit_opcode(cbuf,0xF0); // [Lock] 2091 %} 2092 2093 // Cmp-xchg long value. 2094 // Note: we need to swap rbx, and rcx before and after the 2095 // cmpxchg8 instruction because the instruction uses 2096 // rcx as the high order word of the new value to store but 2097 // our register encoding uses rbx,. 2098 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2099 2100 // XCHG rbx,ecx 2101 emit_opcode(cbuf,0x87); 2102 emit_opcode(cbuf,0xD9); 2103 // [Lock] 2104 if( os::is_MP() ) 2105 emit_opcode(cbuf,0xF0); 2106 // CMPXCHG8 [Eptr] 2107 emit_opcode(cbuf,0x0F); 2108 emit_opcode(cbuf,0xC7); 2109 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2110 // XCHG rbx,ecx 2111 emit_opcode(cbuf,0x87); 2112 emit_opcode(cbuf,0xD9); 2113 %} 2114 2115 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2116 // [Lock] 2117 if( os::is_MP() ) 2118 emit_opcode(cbuf,0xF0); 2119 2120 // CMPXCHG [Eptr] 2121 emit_opcode(cbuf,0x0F); 2122 emit_opcode(cbuf,0xB1); 2123 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2124 %} 2125 2126 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2127 int res_encoding = $res$$reg; 2128 2129 // MOV res,0 2130 emit_opcode( cbuf, 0xB8 + res_encoding); 2131 emit_d32( cbuf, 0 ); 2132 // JNE,s fail 2133 emit_opcode(cbuf,0x75); 2134 emit_d8(cbuf, 5 ); 2135 // MOV res,1 2136 emit_opcode( cbuf, 0xB8 + res_encoding); 2137 emit_d32( cbuf, 1 ); 2138 // fail: 2139 %} 2140 2141 enc_class set_instruction_start( ) %{ 2142 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2143 %} 2144 2145 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2146 int reg_encoding = $ereg$$reg; 2147 int base = $mem$$base; 2148 int index = $mem$$index; 2149 int scale = $mem$$scale; 2150 int displace = $mem$$disp; 2151 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2152 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2153 %} 2154 2155 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2156 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2157 int base = $mem$$base; 2158 int index = $mem$$index; 2159 int scale = $mem$$scale; 2160 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2161 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2162 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2163 %} 2164 2165 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2166 int r1, r2; 2167 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2168 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2169 emit_opcode(cbuf,0x0F); 2170 emit_opcode(cbuf,$tertiary); 2171 emit_rm(cbuf, 0x3, r1, r2); 2172 emit_d8(cbuf,$cnt$$constant); 2173 emit_d8(cbuf,$primary); 2174 emit_rm(cbuf, 0x3, $secondary, r1); 2175 emit_d8(cbuf,$cnt$$constant); 2176 %} 2177 2178 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2179 emit_opcode( cbuf, 0x8B ); // Move 2180 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2181 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2182 emit_d8(cbuf,$primary); 2183 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2184 emit_d8(cbuf,$cnt$$constant-32); 2185 } 2186 emit_d8(cbuf,$primary); 2187 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2188 emit_d8(cbuf,31); 2189 %} 2190 2191 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2192 int r1, r2; 2193 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2194 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2195 2196 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2197 emit_rm(cbuf, 0x3, r1, r2); 2198 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2199 emit_opcode(cbuf,$primary); 2200 emit_rm(cbuf, 0x3, $secondary, r1); 2201 emit_d8(cbuf,$cnt$$constant-32); 2202 } 2203 emit_opcode(cbuf,0x33); // XOR r2,r2 2204 emit_rm(cbuf, 0x3, r2, r2); 2205 %} 2206 2207 // Clone of RegMem but accepts an extra parameter to access each 2208 // half of a double in memory; it never needs relocation info. 2209 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2210 emit_opcode(cbuf,$opcode$$constant); 2211 int reg_encoding = $rm_reg$$reg; 2212 int base = $mem$$base; 2213 int index = $mem$$index; 2214 int scale = $mem$$scale; 2215 int displace = $mem$$disp + $disp_for_half$$constant; 2216 relocInfo::relocType disp_reloc = relocInfo::none; 2217 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2218 %} 2219 2220 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2221 // 2222 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2223 // and it never needs relocation information. 2224 // Frequently used to move data between FPU's Stack Top and memory. 2225 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2226 int rm_byte_opcode = $rm_opcode$$constant; 2227 int base = $mem$$base; 2228 int index = $mem$$index; 2229 int scale = $mem$$scale; 2230 int displace = $mem$$disp; 2231 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2232 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2233 %} 2234 2235 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2236 int rm_byte_opcode = $rm_opcode$$constant; 2237 int base = $mem$$base; 2238 int index = $mem$$index; 2239 int scale = $mem$$scale; 2240 int displace = $mem$$disp; 2241 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2242 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2243 %} 2244 2245 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2246 int reg_encoding = $dst$$reg; 2247 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2248 int index = 0x04; // 0x04 indicates no index 2249 int scale = 0x00; // 0x00 indicates no scale 2250 int displace = $src1$$constant; // 0x00 indicates no displacement 2251 relocInfo::relocType disp_reloc = relocInfo::none; 2252 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2253 %} 2254 2255 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2256 // Compare dst,src 2257 emit_opcode(cbuf,0x3B); 2258 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2259 // jmp dst < src around move 2260 emit_opcode(cbuf,0x7C); 2261 emit_d8(cbuf,2); 2262 // move dst,src 2263 emit_opcode(cbuf,0x8B); 2264 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2265 %} 2266 2267 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2268 // Compare dst,src 2269 emit_opcode(cbuf,0x3B); 2270 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2271 // jmp dst > src around move 2272 emit_opcode(cbuf,0x7F); 2273 emit_d8(cbuf,2); 2274 // move dst,src 2275 emit_opcode(cbuf,0x8B); 2276 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2277 %} 2278 2279 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2280 // If src is FPR1, we can just FST to store it. 2281 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2282 int reg_encoding = 0x2; // Just store 2283 int base = $mem$$base; 2284 int index = $mem$$index; 2285 int scale = $mem$$scale; 2286 int displace = $mem$$disp; 2287 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2288 if( $src$$reg != FPR1L_enc ) { 2289 reg_encoding = 0x3; // Store & pop 2290 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2291 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2292 } 2293 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2294 emit_opcode(cbuf,$primary); 2295 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2296 %} 2297 2298 enc_class neg_reg(rRegI dst) %{ 2299 // NEG $dst 2300 emit_opcode(cbuf,0xF7); 2301 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2302 %} 2303 2304 enc_class setLT_reg(eCXRegI dst) %{ 2305 // SETLT $dst 2306 emit_opcode(cbuf,0x0F); 2307 emit_opcode(cbuf,0x9C); 2308 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2309 %} 2310 2311 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2312 int tmpReg = $tmp$$reg; 2313 2314 // SUB $p,$q 2315 emit_opcode(cbuf,0x2B); 2316 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2317 // SBB $tmp,$tmp 2318 emit_opcode(cbuf,0x1B); 2319 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2320 // AND $tmp,$y 2321 emit_opcode(cbuf,0x23); 2322 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2323 // ADD $p,$tmp 2324 emit_opcode(cbuf,0x03); 2325 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2326 %} 2327 2328 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2329 // TEST shift,32 2330 emit_opcode(cbuf,0xF7); 2331 emit_rm(cbuf, 0x3, 0, ECX_enc); 2332 emit_d32(cbuf,0x20); 2333 // JEQ,s small 2334 emit_opcode(cbuf, 0x74); 2335 emit_d8(cbuf, 0x04); 2336 // MOV $dst.hi,$dst.lo 2337 emit_opcode( cbuf, 0x8B ); 2338 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2339 // CLR $dst.lo 2340 emit_opcode(cbuf, 0x33); 2341 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2342 // small: 2343 // SHLD $dst.hi,$dst.lo,$shift 2344 emit_opcode(cbuf,0x0F); 2345 emit_opcode(cbuf,0xA5); 2346 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2347 // SHL $dst.lo,$shift" 2348 emit_opcode(cbuf,0xD3); 2349 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2350 %} 2351 2352 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2353 // TEST shift,32 2354 emit_opcode(cbuf,0xF7); 2355 emit_rm(cbuf, 0x3, 0, ECX_enc); 2356 emit_d32(cbuf,0x20); 2357 // JEQ,s small 2358 emit_opcode(cbuf, 0x74); 2359 emit_d8(cbuf, 0x04); 2360 // MOV $dst.lo,$dst.hi 2361 emit_opcode( cbuf, 0x8B ); 2362 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2363 // CLR $dst.hi 2364 emit_opcode(cbuf, 0x33); 2365 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2366 // small: 2367 // SHRD $dst.lo,$dst.hi,$shift 2368 emit_opcode(cbuf,0x0F); 2369 emit_opcode(cbuf,0xAD); 2370 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2371 // SHR $dst.hi,$shift" 2372 emit_opcode(cbuf,0xD3); 2373 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2374 %} 2375 2376 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2377 // TEST shift,32 2378 emit_opcode(cbuf,0xF7); 2379 emit_rm(cbuf, 0x3, 0, ECX_enc); 2380 emit_d32(cbuf,0x20); 2381 // JEQ,s small 2382 emit_opcode(cbuf, 0x74); 2383 emit_d8(cbuf, 0x05); 2384 // MOV $dst.lo,$dst.hi 2385 emit_opcode( cbuf, 0x8B ); 2386 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2387 // SAR $dst.hi,31 2388 emit_opcode(cbuf, 0xC1); 2389 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2390 emit_d8(cbuf, 0x1F ); 2391 // small: 2392 // SHRD $dst.lo,$dst.hi,$shift 2393 emit_opcode(cbuf,0x0F); 2394 emit_opcode(cbuf,0xAD); 2395 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2396 // SAR $dst.hi,$shift" 2397 emit_opcode(cbuf,0xD3); 2398 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2399 %} 2400 2401 2402 // ----------------- Encodings for floating point unit ----------------- 2403 // May leave result in FPU-TOS or FPU reg depending on opcodes 2404 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2405 $$$emit8$primary; 2406 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2407 %} 2408 2409 // Pop argument in FPR0 with FSTP ST(0) 2410 enc_class PopFPU() %{ 2411 emit_opcode( cbuf, 0xDD ); 2412 emit_d8( cbuf, 0xD8 ); 2413 %} 2414 2415 // !!!!! equivalent to Pop_Reg_F 2416 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2417 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2418 emit_d8( cbuf, 0xD8+$dst$$reg ); 2419 %} 2420 2421 enc_class Push_Reg_DPR( regDPR dst ) %{ 2422 emit_opcode( cbuf, 0xD9 ); 2423 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2424 %} 2425 2426 enc_class strictfp_bias1( regDPR dst ) %{ 2427 emit_opcode( cbuf, 0xDB ); // FLD m80real 2428 emit_opcode( cbuf, 0x2D ); 2429 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2430 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2431 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2432 %} 2433 2434 enc_class strictfp_bias2( regDPR dst ) %{ 2435 emit_opcode( cbuf, 0xDB ); // FLD m80real 2436 emit_opcode( cbuf, 0x2D ); 2437 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2438 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2439 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2440 %} 2441 2442 // Special case for moving an integer register to a stack slot. 2443 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2444 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2445 %} 2446 2447 // Special case for moving a register to a stack slot. 2448 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2449 // Opcode already emitted 2450 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2451 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2452 emit_d32(cbuf, $dst$$disp); // Displacement 2453 %} 2454 2455 // Push the integer in stackSlot 'src' onto FP-stack 2456 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2457 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2458 %} 2459 2460 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2461 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2462 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2463 %} 2464 2465 // Same as Pop_Mem_F except for opcode 2466 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2467 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2468 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2469 %} 2470 2471 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2472 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2473 emit_d8( cbuf, 0xD8+$dst$$reg ); 2474 %} 2475 2476 enc_class Push_Reg_FPR( regFPR dst ) %{ 2477 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2478 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2479 %} 2480 2481 // Push FPU's float to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2483 int pop = 0x02; 2484 if ($src$$reg != FPR1L_enc) { 2485 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2486 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2487 pop = 0x03; 2488 } 2489 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2490 %} 2491 2492 // Push FPU's double to a stack-slot, and pop FPU-stack 2493 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2494 int pop = 0x02; 2495 if ($src$$reg != FPR1L_enc) { 2496 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2497 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2498 pop = 0x03; 2499 } 2500 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2501 %} 2502 2503 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2504 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2505 int pop = 0xD0 - 1; // -1 since we skip FLD 2506 if ($src$$reg != FPR1L_enc) { 2507 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2508 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2509 pop = 0xD8; 2510 } 2511 emit_opcode( cbuf, 0xDD ); 2512 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2513 %} 2514 2515 2516 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2517 // load dst in FPR0 2518 emit_opcode( cbuf, 0xD9 ); 2519 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2520 if ($src$$reg != FPR1L_enc) { 2521 // fincstp 2522 emit_opcode (cbuf, 0xD9); 2523 emit_opcode (cbuf, 0xF7); 2524 // swap src with FPR1: 2525 // FXCH FPR1 with src 2526 emit_opcode(cbuf, 0xD9); 2527 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2528 // fdecstp 2529 emit_opcode (cbuf, 0xD9); 2530 emit_opcode (cbuf, 0xF6); 2531 } 2532 %} 2533 2534 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2535 MacroAssembler _masm(&cbuf); 2536 __ subptr(rsp, 8); 2537 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2538 __ fld_d(Address(rsp, 0)); 2539 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2540 __ fld_d(Address(rsp, 0)); 2541 %} 2542 2543 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2544 MacroAssembler _masm(&cbuf); 2545 __ subptr(rsp, 4); 2546 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2547 __ fld_s(Address(rsp, 0)); 2548 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2549 __ fld_s(Address(rsp, 0)); 2550 %} 2551 2552 enc_class Push_ResultD(regD dst) %{ 2553 MacroAssembler _masm(&cbuf); 2554 __ fstp_d(Address(rsp, 0)); 2555 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2556 __ addptr(rsp, 8); 2557 %} 2558 2559 enc_class Push_ResultF(regF dst, immI d8) %{ 2560 MacroAssembler _masm(&cbuf); 2561 __ fstp_s(Address(rsp, 0)); 2562 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2563 __ addptr(rsp, $d8$$constant); 2564 %} 2565 2566 enc_class Push_SrcD(regD src) %{ 2567 MacroAssembler _masm(&cbuf); 2568 __ subptr(rsp, 8); 2569 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2570 __ fld_d(Address(rsp, 0)); 2571 %} 2572 2573 enc_class push_stack_temp_qword() %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ subptr(rsp, 8); 2576 %} 2577 2578 enc_class pop_stack_temp_qword() %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ addptr(rsp, 8); 2581 %} 2582 2583 enc_class push_xmm_to_fpr1(regD src) %{ 2584 MacroAssembler _masm(&cbuf); 2585 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2586 __ fld_d(Address(rsp, 0)); 2587 %} 2588 2589 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2590 if ($src$$reg != FPR1L_enc) { 2591 // fincstp 2592 emit_opcode (cbuf, 0xD9); 2593 emit_opcode (cbuf, 0xF7); 2594 // FXCH FPR1 with src 2595 emit_opcode(cbuf, 0xD9); 2596 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2597 // fdecstp 2598 emit_opcode (cbuf, 0xD9); 2599 emit_opcode (cbuf, 0xF6); 2600 } 2601 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2602 // // FSTP FPR$dst$$reg 2603 // emit_opcode( cbuf, 0xDD ); 2604 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2605 %} 2606 2607 enc_class fnstsw_sahf_skip_parity() %{ 2608 // fnstsw ax 2609 emit_opcode( cbuf, 0xDF ); 2610 emit_opcode( cbuf, 0xE0 ); 2611 // sahf 2612 emit_opcode( cbuf, 0x9E ); 2613 // jnp ::skip 2614 emit_opcode( cbuf, 0x7B ); 2615 emit_opcode( cbuf, 0x05 ); 2616 %} 2617 2618 enc_class emitModDPR() %{ 2619 // fprem must be iterative 2620 // :: loop 2621 // fprem 2622 emit_opcode( cbuf, 0xD9 ); 2623 emit_opcode( cbuf, 0xF8 ); 2624 // wait 2625 emit_opcode( cbuf, 0x9b ); 2626 // fnstsw ax 2627 emit_opcode( cbuf, 0xDF ); 2628 emit_opcode( cbuf, 0xE0 ); 2629 // sahf 2630 emit_opcode( cbuf, 0x9E ); 2631 // jp ::loop 2632 emit_opcode( cbuf, 0x0F ); 2633 emit_opcode( cbuf, 0x8A ); 2634 emit_opcode( cbuf, 0xF4 ); 2635 emit_opcode( cbuf, 0xFF ); 2636 emit_opcode( cbuf, 0xFF ); 2637 emit_opcode( cbuf, 0xFF ); 2638 %} 2639 2640 enc_class fpu_flags() %{ 2641 // fnstsw_ax 2642 emit_opcode( cbuf, 0xDF); 2643 emit_opcode( cbuf, 0xE0); 2644 // test ax,0x0400 2645 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2646 emit_opcode( cbuf, 0xA9 ); 2647 emit_d16 ( cbuf, 0x0400 ); 2648 // // // This sequence works, but stalls for 12-16 cycles on PPro 2649 // // test rax,0x0400 2650 // emit_opcode( cbuf, 0xA9 ); 2651 // emit_d32 ( cbuf, 0x00000400 ); 2652 // 2653 // jz exit (no unordered comparison) 2654 emit_opcode( cbuf, 0x74 ); 2655 emit_d8 ( cbuf, 0x02 ); 2656 // mov ah,1 - treat as LT case (set carry flag) 2657 emit_opcode( cbuf, 0xB4 ); 2658 emit_d8 ( cbuf, 0x01 ); 2659 // sahf 2660 emit_opcode( cbuf, 0x9E); 2661 %} 2662 2663 enc_class cmpF_P6_fixup() %{ 2664 // Fixup the integer flags in case comparison involved a NaN 2665 // 2666 // JNP exit (no unordered comparison, P-flag is set by NaN) 2667 emit_opcode( cbuf, 0x7B ); 2668 emit_d8 ( cbuf, 0x03 ); 2669 // MOV AH,1 - treat as LT case (set carry flag) 2670 emit_opcode( cbuf, 0xB4 ); 2671 emit_d8 ( cbuf, 0x01 ); 2672 // SAHF 2673 emit_opcode( cbuf, 0x9E); 2674 // NOP // target for branch to avoid branch to branch 2675 emit_opcode( cbuf, 0x90); 2676 %} 2677 2678 // fnstsw_ax(); 2679 // sahf(); 2680 // movl(dst, nan_result); 2681 // jcc(Assembler::parity, exit); 2682 // movl(dst, less_result); 2683 // jcc(Assembler::below, exit); 2684 // movl(dst, equal_result); 2685 // jcc(Assembler::equal, exit); 2686 // movl(dst, greater_result); 2687 2688 // less_result = 1; 2689 // greater_result = -1; 2690 // equal_result = 0; 2691 // nan_result = -1; 2692 2693 enc_class CmpF_Result(rRegI dst) %{ 2694 // fnstsw_ax(); 2695 emit_opcode( cbuf, 0xDF); 2696 emit_opcode( cbuf, 0xE0); 2697 // sahf 2698 emit_opcode( cbuf, 0x9E); 2699 // movl(dst, nan_result); 2700 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2701 emit_d32( cbuf, -1 ); 2702 // jcc(Assembler::parity, exit); 2703 emit_opcode( cbuf, 0x7A ); 2704 emit_d8 ( cbuf, 0x13 ); 2705 // movl(dst, less_result); 2706 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2707 emit_d32( cbuf, -1 ); 2708 // jcc(Assembler::below, exit); 2709 emit_opcode( cbuf, 0x72 ); 2710 emit_d8 ( cbuf, 0x0C ); 2711 // movl(dst, equal_result); 2712 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2713 emit_d32( cbuf, 0 ); 2714 // jcc(Assembler::equal, exit); 2715 emit_opcode( cbuf, 0x74 ); 2716 emit_d8 ( cbuf, 0x05 ); 2717 // movl(dst, greater_result); 2718 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2719 emit_d32( cbuf, 1 ); 2720 %} 2721 2722 2723 // Compare the longs and set flags 2724 // BROKEN! Do Not use as-is 2725 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2726 // CMP $src1.hi,$src2.hi 2727 emit_opcode( cbuf, 0x3B ); 2728 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2729 // JNE,s done 2730 emit_opcode(cbuf,0x75); 2731 emit_d8(cbuf, 2 ); 2732 // CMP $src1.lo,$src2.lo 2733 emit_opcode( cbuf, 0x3B ); 2734 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2735 // done: 2736 %} 2737 2738 enc_class convert_int_long( regL dst, rRegI src ) %{ 2739 // mov $dst.lo,$src 2740 int dst_encoding = $dst$$reg; 2741 int src_encoding = $src$$reg; 2742 encode_Copy( cbuf, dst_encoding , src_encoding ); 2743 // mov $dst.hi,$src 2744 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2745 // sar $dst.hi,31 2746 emit_opcode( cbuf, 0xC1 ); 2747 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2748 emit_d8(cbuf, 0x1F ); 2749 %} 2750 2751 enc_class convert_long_double( eRegL src ) %{ 2752 // push $src.hi 2753 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2754 // push $src.lo 2755 emit_opcode(cbuf, 0x50+$src$$reg ); 2756 // fild 64-bits at [SP] 2757 emit_opcode(cbuf,0xdf); 2758 emit_d8(cbuf, 0x6C); 2759 emit_d8(cbuf, 0x24); 2760 emit_d8(cbuf, 0x00); 2761 // pop stack 2762 emit_opcode(cbuf, 0x83); // add SP, #8 2763 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2764 emit_d8(cbuf, 0x8); 2765 %} 2766 2767 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2768 // IMUL EDX:EAX,$src1 2769 emit_opcode( cbuf, 0xF7 ); 2770 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2771 // SAR EDX,$cnt-32 2772 int shift_count = ((int)$cnt$$constant) - 32; 2773 if (shift_count > 0) { 2774 emit_opcode(cbuf, 0xC1); 2775 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2776 emit_d8(cbuf, shift_count); 2777 } 2778 %} 2779 2780 // this version doesn't have add sp, 8 2781 enc_class convert_long_double2( eRegL src ) %{ 2782 // push $src.hi 2783 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2784 // push $src.lo 2785 emit_opcode(cbuf, 0x50+$src$$reg ); 2786 // fild 64-bits at [SP] 2787 emit_opcode(cbuf,0xdf); 2788 emit_d8(cbuf, 0x6C); 2789 emit_d8(cbuf, 0x24); 2790 emit_d8(cbuf, 0x00); 2791 %} 2792 2793 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2794 // Basic idea: long = (long)int * (long)int 2795 // IMUL EDX:EAX, src 2796 emit_opcode( cbuf, 0xF7 ); 2797 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2798 %} 2799 2800 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2801 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2802 // MUL EDX:EAX, src 2803 emit_opcode( cbuf, 0xF7 ); 2804 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2805 %} 2806 2807 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2808 // Basic idea: lo(result) = lo(x_lo * y_lo) 2809 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2810 // MOV $tmp,$src.lo 2811 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2812 // IMUL $tmp,EDX 2813 emit_opcode( cbuf, 0x0F ); 2814 emit_opcode( cbuf, 0xAF ); 2815 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2816 // MOV EDX,$src.hi 2817 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2818 // IMUL EDX,EAX 2819 emit_opcode( cbuf, 0x0F ); 2820 emit_opcode( cbuf, 0xAF ); 2821 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2822 // ADD $tmp,EDX 2823 emit_opcode( cbuf, 0x03 ); 2824 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2825 // MUL EDX:EAX,$src.lo 2826 emit_opcode( cbuf, 0xF7 ); 2827 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2828 // ADD EDX,ESI 2829 emit_opcode( cbuf, 0x03 ); 2830 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2831 %} 2832 2833 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2834 // Basic idea: lo(result) = lo(src * y_lo) 2835 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2836 // IMUL $tmp,EDX,$src 2837 emit_opcode( cbuf, 0x6B ); 2838 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2839 emit_d8( cbuf, (int)$src$$constant ); 2840 // MOV EDX,$src 2841 emit_opcode(cbuf, 0xB8 + EDX_enc); 2842 emit_d32( cbuf, (int)$src$$constant ); 2843 // MUL EDX:EAX,EDX 2844 emit_opcode( cbuf, 0xF7 ); 2845 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2846 // ADD EDX,ESI 2847 emit_opcode( cbuf, 0x03 ); 2848 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2849 %} 2850 2851 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2852 // PUSH src1.hi 2853 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2854 // PUSH src1.lo 2855 emit_opcode(cbuf, 0x50+$src1$$reg ); 2856 // PUSH src2.hi 2857 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2858 // PUSH src2.lo 2859 emit_opcode(cbuf, 0x50+$src2$$reg ); 2860 // CALL directly to the runtime 2861 cbuf.set_insts_mark(); 2862 emit_opcode(cbuf,0xE8); // Call into runtime 2863 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2864 // Restore stack 2865 emit_opcode(cbuf, 0x83); // add SP, #framesize 2866 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2867 emit_d8(cbuf, 4*4); 2868 %} 2869 2870 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2871 // PUSH src1.hi 2872 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2873 // PUSH src1.lo 2874 emit_opcode(cbuf, 0x50+$src1$$reg ); 2875 // PUSH src2.hi 2876 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2877 // PUSH src2.lo 2878 emit_opcode(cbuf, 0x50+$src2$$reg ); 2879 // CALL directly to the runtime 2880 cbuf.set_insts_mark(); 2881 emit_opcode(cbuf,0xE8); // Call into runtime 2882 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2883 // Restore stack 2884 emit_opcode(cbuf, 0x83); // add SP, #framesize 2885 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2886 emit_d8(cbuf, 4*4); 2887 %} 2888 2889 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2890 // MOV $tmp,$src.lo 2891 emit_opcode(cbuf, 0x8B); 2892 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2893 // OR $tmp,$src.hi 2894 emit_opcode(cbuf, 0x0B); 2895 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2896 %} 2897 2898 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2899 // CMP $src1.lo,$src2.lo 2900 emit_opcode( cbuf, 0x3B ); 2901 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2902 // JNE,s skip 2903 emit_cc(cbuf, 0x70, 0x5); 2904 emit_d8(cbuf,2); 2905 // CMP $src1.hi,$src2.hi 2906 emit_opcode( cbuf, 0x3B ); 2907 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2908 %} 2909 2910 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2911 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2912 emit_opcode( cbuf, 0x3B ); 2913 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2914 // MOV $tmp,$src1.hi 2915 emit_opcode( cbuf, 0x8B ); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2917 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2918 emit_opcode( cbuf, 0x1B ); 2919 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2920 %} 2921 2922 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2923 // XOR $tmp,$tmp 2924 emit_opcode(cbuf,0x33); // XOR 2925 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2926 // CMP $tmp,$src.lo 2927 emit_opcode( cbuf, 0x3B ); 2928 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2929 // SBB $tmp,$src.hi 2930 emit_opcode( cbuf, 0x1B ); 2931 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2932 %} 2933 2934 // Sniff, sniff... smells like Gnu Superoptimizer 2935 enc_class neg_long( eRegL dst ) %{ 2936 emit_opcode(cbuf,0xF7); // NEG hi 2937 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2938 emit_opcode(cbuf,0xF7); // NEG lo 2939 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2940 emit_opcode(cbuf,0x83); // SBB hi,0 2941 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2942 emit_d8 (cbuf,0 ); 2943 %} 2944 2945 enc_class enc_pop_rdx() %{ 2946 emit_opcode(cbuf,0x5A); 2947 %} 2948 2949 enc_class enc_rethrow() %{ 2950 cbuf.set_insts_mark(); 2951 emit_opcode(cbuf, 0xE9); // jmp entry 2952 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2953 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2954 %} 2955 2956 2957 // Convert a double to an int. Java semantics require we do complex 2958 // manglelations in the corner cases. So we set the rounding mode to 2959 // 'zero', store the darned double down as an int, and reset the 2960 // rounding mode to 'nearest'. The hardware throws an exception which 2961 // patches up the correct value directly to the stack. 2962 enc_class DPR2I_encoding( regDPR src ) %{ 2963 // Flip to round-to-zero mode. We attempted to allow invalid-op 2964 // exceptions here, so that a NAN or other corner-case value will 2965 // thrown an exception (but normal values get converted at full speed). 2966 // However, I2C adapters and other float-stack manglers leave pending 2967 // invalid-op exceptions hanging. We would have to clear them before 2968 // enabling them and that is more expensive than just testing for the 2969 // invalid value Intel stores down in the corner cases. 2970 emit_opcode(cbuf,0xD9); // FLDCW trunc 2971 emit_opcode(cbuf,0x2D); 2972 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2973 // Allocate a word 2974 emit_opcode(cbuf,0x83); // SUB ESP,4 2975 emit_opcode(cbuf,0xEC); 2976 emit_d8(cbuf,0x04); 2977 // Encoding assumes a double has been pushed into FPR0. 2978 // Store down the double as an int, popping the FPU stack 2979 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2980 emit_opcode(cbuf,0x1C); 2981 emit_d8(cbuf,0x24); 2982 // Restore the rounding mode; mask the exception 2983 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2984 emit_opcode(cbuf,0x2D); 2985 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2986 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2987 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2988 2989 // Load the converted int; adjust CPU stack 2990 emit_opcode(cbuf,0x58); // POP EAX 2991 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2992 emit_d32 (cbuf,0x80000000); // 0x80000000 2993 emit_opcode(cbuf,0x75); // JNE around_slow_call 2994 emit_d8 (cbuf,0x07); // Size of slow_call 2995 // Push src onto stack slow-path 2996 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2997 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2998 // CALL directly to the runtime 2999 cbuf.set_insts_mark(); 3000 emit_opcode(cbuf,0xE8); // Call into runtime 3001 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3002 // Carry on here... 3003 %} 3004 3005 enc_class DPR2L_encoding( regDPR src ) %{ 3006 emit_opcode(cbuf,0xD9); // FLDCW trunc 3007 emit_opcode(cbuf,0x2D); 3008 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3009 // Allocate a word 3010 emit_opcode(cbuf,0x83); // SUB ESP,8 3011 emit_opcode(cbuf,0xEC); 3012 emit_d8(cbuf,0x08); 3013 // Encoding assumes a double has been pushed into FPR0. 3014 // Store down the double as a long, popping the FPU stack 3015 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3016 emit_opcode(cbuf,0x3C); 3017 emit_d8(cbuf,0x24); 3018 // Restore the rounding mode; mask the exception 3019 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3020 emit_opcode(cbuf,0x2D); 3021 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3022 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3023 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3024 3025 // Load the converted int; adjust CPU stack 3026 emit_opcode(cbuf,0x58); // POP EAX 3027 emit_opcode(cbuf,0x5A); // POP EDX 3028 emit_opcode(cbuf,0x81); // CMP EDX,imm 3029 emit_d8 (cbuf,0xFA); // rdx 3030 emit_d32 (cbuf,0x80000000); // 0x80000000 3031 emit_opcode(cbuf,0x75); // JNE around_slow_call 3032 emit_d8 (cbuf,0x07+4); // Size of slow_call 3033 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3034 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3035 emit_opcode(cbuf,0x75); // JNE around_slow_call 3036 emit_d8 (cbuf,0x07); // Size of slow_call 3037 // Push src onto stack slow-path 3038 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3039 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3040 // CALL directly to the runtime 3041 cbuf.set_insts_mark(); 3042 emit_opcode(cbuf,0xE8); // Call into runtime 3043 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3044 // Carry on here... 3045 %} 3046 3047 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3048 // Operand was loaded from memory into fp ST (stack top) 3049 // FMUL ST,$src /* D8 C8+i */ 3050 emit_opcode(cbuf, 0xD8); 3051 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3052 %} 3053 3054 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3055 // FADDP ST,src2 /* D8 C0+i */ 3056 emit_opcode(cbuf, 0xD8); 3057 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3058 //could use FADDP src2,fpST /* DE C0+i */ 3059 %} 3060 3061 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3062 // FADDP src2,ST /* DE C0+i */ 3063 emit_opcode(cbuf, 0xDE); 3064 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3065 %} 3066 3067 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3068 // Operand has been loaded into fp ST (stack top) 3069 // FSUB ST,$src1 3070 emit_opcode(cbuf, 0xD8); 3071 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3072 3073 // FDIV 3074 emit_opcode(cbuf, 0xD8); 3075 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3076 %} 3077 3078 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3079 // Operand was loaded from memory into fp ST (stack top) 3080 // FADD ST,$src /* D8 C0+i */ 3081 emit_opcode(cbuf, 0xD8); 3082 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3083 3084 // FMUL ST,src2 /* D8 C*+i */ 3085 emit_opcode(cbuf, 0xD8); 3086 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3087 %} 3088 3089 3090 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3091 // Operand was loaded from memory into fp ST (stack top) 3092 // FADD ST,$src /* D8 C0+i */ 3093 emit_opcode(cbuf, 0xD8); 3094 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3095 3096 // FMULP src2,ST /* DE C8+i */ 3097 emit_opcode(cbuf, 0xDE); 3098 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3099 %} 3100 3101 // Atomically load the volatile long 3102 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3103 emit_opcode(cbuf,0xDF); 3104 int rm_byte_opcode = 0x05; 3105 int base = $mem$$base; 3106 int index = $mem$$index; 3107 int scale = $mem$$scale; 3108 int displace = $mem$$disp; 3109 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3110 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3111 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3112 %} 3113 3114 // Volatile Store Long. Must be atomic, so move it into 3115 // the FP TOS and then do a 64-bit FIST. Has to probe the 3116 // target address before the store (for null-ptr checks) 3117 // so the memory operand is used twice in the encoding. 3118 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3119 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3120 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3121 emit_opcode(cbuf,0xDF); 3122 int rm_byte_opcode = 0x07; 3123 int base = $mem$$base; 3124 int index = $mem$$index; 3125 int scale = $mem$$scale; 3126 int displace = $mem$$disp; 3127 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3128 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3129 %} 3130 3131 // Safepoint Poll. This polls the safepoint page, and causes an 3132 // exception if it is not readable. Unfortunately, it kills the condition code 3133 // in the process 3134 // We current use TESTL [spp],EDI 3135 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3136 3137 enc_class Safepoint_Poll() %{ 3138 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3139 emit_opcode(cbuf,0x85); 3140 emit_rm (cbuf, 0x0, 0x7, 0x5); 3141 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3142 %} 3143 %} 3144 3145 3146 //----------FRAME-------------------------------------------------------------- 3147 // Definition of frame structure and management information. 3148 // 3149 // S T A C K L A Y O U T Allocators stack-slot number 3150 // | (to get allocators register number 3151 // G Owned by | | v add OptoReg::stack0()) 3152 // r CALLER | | 3153 // o | +--------+ pad to even-align allocators stack-slot 3154 // w V | pad0 | numbers; owned by CALLER 3155 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3156 // h ^ | in | 5 3157 // | | args | 4 Holes in incoming args owned by SELF 3158 // | | | | 3 3159 // | | +--------+ 3160 // V | | old out| Empty on Intel, window on Sparc 3161 // | old |preserve| Must be even aligned. 3162 // | SP-+--------+----> Matcher::_old_SP, even aligned 3163 // | | in | 3 area for Intel ret address 3164 // Owned by |preserve| Empty on Sparc. 3165 // SELF +--------+ 3166 // | | pad2 | 2 pad to align old SP 3167 // | +--------+ 1 3168 // | | locks | 0 3169 // | +--------+----> OptoReg::stack0(), even aligned 3170 // | | pad1 | 11 pad to align new SP 3171 // | +--------+ 3172 // | | | 10 3173 // | | spills | 9 spills 3174 // V | | 8 (pad0 slot for callee) 3175 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3176 // ^ | out | 7 3177 // | | args | 6 Holes in outgoing args owned by CALLEE 3178 // Owned by +--------+ 3179 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3180 // | new |preserve| Must be even-aligned. 3181 // | SP-+--------+----> Matcher::_new_SP, even aligned 3182 // | | | 3183 // 3184 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3185 // known from SELF's arguments and the Java calling convention. 3186 // Region 6-7 is determined per call site. 3187 // Note 2: If the calling convention leaves holes in the incoming argument 3188 // area, those holes are owned by SELF. Holes in the outgoing area 3189 // are owned by the CALLEE. Holes should not be nessecary in the 3190 // incoming area, as the Java calling convention is completely under 3191 // the control of the AD file. Doubles can be sorted and packed to 3192 // avoid holes. Holes in the outgoing arguments may be nessecary for 3193 // varargs C calling conventions. 3194 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3195 // even aligned with pad0 as needed. 3196 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3197 // region 6-11 is even aligned; it may be padded out more so that 3198 // the region from SP to FP meets the minimum stack alignment. 3199 3200 frame %{ 3201 // What direction does stack grow in (assumed to be same for C & Java) 3202 stack_direction(TOWARDS_LOW); 3203 3204 // These three registers define part of the calling convention 3205 // between compiled code and the interpreter. 3206 inline_cache_reg(EAX); // Inline Cache Register 3207 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3208 3209 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3210 cisc_spilling_operand_name(indOffset32); 3211 3212 // Number of stack slots consumed by locking an object 3213 sync_stack_slots(1); 3214 3215 // Compiled code's Frame Pointer 3216 frame_pointer(ESP); 3217 // Interpreter stores its frame pointer in a register which is 3218 // stored to the stack by I2CAdaptors. 3219 // I2CAdaptors convert from interpreted java to compiled java. 3220 interpreter_frame_pointer(EBP); 3221 3222 // Stack alignment requirement 3223 // Alignment size in bytes (128-bit -> 16 bytes) 3224 stack_alignment(StackAlignmentInBytes); 3225 3226 // Number of stack slots between incoming argument block and the start of 3227 // a new frame. The PROLOG must add this many slots to the stack. The 3228 // EPILOG must remove this many slots. Intel needs one slot for 3229 // return address and one for rbp, (must save rbp) 3230 in_preserve_stack_slots(2+VerifyStackAtCalls); 3231 3232 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3233 // for calls to C. Supports the var-args backing area for register parms. 3234 varargs_C_out_slots_killed(0); 3235 3236 // The after-PROLOG location of the return address. Location of 3237 // return address specifies a type (REG or STACK) and a number 3238 // representing the register number (i.e. - use a register name) or 3239 // stack slot. 3240 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3241 // Otherwise, it is above the locks and verification slot and alignment word 3242 return_addr(STACK - 1 + 3243 round_to((Compile::current()->in_preserve_stack_slots() + 3244 Compile::current()->fixed_slots()), 3245 stack_alignment_in_slots())); 3246 3247 // Body of function which returns an integer array locating 3248 // arguments either in registers or in stack slots. Passed an array 3249 // of ideal registers called "sig" and a "length" count. Stack-slot 3250 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3251 // arguments for a CALLEE. Incoming stack arguments are 3252 // automatically biased by the preserve_stack_slots field above. 3253 calling_convention %{ 3254 // No difference between ingoing/outgoing just pass false 3255 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3256 %} 3257 3258 3259 // Body of function which returns an integer array locating 3260 // arguments either in registers or in stack slots. Passed an array 3261 // of ideal registers called "sig" and a "length" count. Stack-slot 3262 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3263 // arguments for a CALLEE. Incoming stack arguments are 3264 // automatically biased by the preserve_stack_slots field above. 3265 c_calling_convention %{ 3266 // This is obviously always outgoing 3267 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3268 %} 3269 3270 // Location of C & interpreter return values 3271 c_return_value %{ 3272 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3273 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3274 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3275 3276 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3277 // that C functions return float and double results in XMM0. 3278 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3279 return OptoRegPair(XMM0b_num,XMM0_num); 3280 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3281 return OptoRegPair(OptoReg::Bad,XMM0_num); 3282 3283 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3284 %} 3285 3286 // Location of return values 3287 return_value %{ 3288 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3289 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3290 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3291 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3292 return OptoRegPair(XMM0b_num,XMM0_num); 3293 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3294 return OptoRegPair(OptoReg::Bad,XMM0_num); 3295 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3296 %} 3297 3298 %} 3299 3300 //----------ATTRIBUTES--------------------------------------------------------- 3301 //----------Operand Attributes------------------------------------------------- 3302 op_attrib op_cost(0); // Required cost attribute 3303 3304 //----------Instruction Attributes--------------------------------------------- 3305 ins_attrib ins_cost(100); // Required cost attribute 3306 ins_attrib ins_size(8); // Required size attribute (in bits) 3307 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3308 // non-matching short branch variant of some 3309 // long branch? 3310 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3311 // specifies the alignment that some part of the instruction (not 3312 // necessarily the start) requires. If > 1, a compute_padding() 3313 // function must be provided for the instruction 3314 3315 //----------OPERANDS----------------------------------------------------------- 3316 // Operand definitions must precede instruction definitions for correct parsing 3317 // in the ADLC because operands constitute user defined types which are used in 3318 // instruction definitions. 3319 3320 //----------Simple Operands---------------------------------------------------- 3321 // Immediate Operands 3322 // Integer Immediate 3323 operand immI() %{ 3324 match(ConI); 3325 3326 op_cost(10); 3327 format %{ %} 3328 interface(CONST_INTER); 3329 %} 3330 3331 // Constant for test vs zero 3332 operand immI0() %{ 3333 predicate(n->get_int() == 0); 3334 match(ConI); 3335 3336 op_cost(0); 3337 format %{ %} 3338 interface(CONST_INTER); 3339 %} 3340 3341 // Constant for increment 3342 operand immI1() %{ 3343 predicate(n->get_int() == 1); 3344 match(ConI); 3345 3346 op_cost(0); 3347 format %{ %} 3348 interface(CONST_INTER); 3349 %} 3350 3351 // Constant for decrement 3352 operand immI_M1() %{ 3353 predicate(n->get_int() == -1); 3354 match(ConI); 3355 3356 op_cost(0); 3357 format %{ %} 3358 interface(CONST_INTER); 3359 %} 3360 3361 // Valid scale values for addressing modes 3362 operand immI2() %{ 3363 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3364 match(ConI); 3365 3366 format %{ %} 3367 interface(CONST_INTER); 3368 %} 3369 3370 operand immI8() %{ 3371 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3372 match(ConI); 3373 3374 op_cost(5); 3375 format %{ %} 3376 interface(CONST_INTER); 3377 %} 3378 3379 operand immI16() %{ 3380 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3381 match(ConI); 3382 3383 op_cost(10); 3384 format %{ %} 3385 interface(CONST_INTER); 3386 %} 3387 3388 // Int Immediate non-negative 3389 operand immU31() 3390 %{ 3391 predicate(n->get_int() >= 0); 3392 match(ConI); 3393 3394 op_cost(0); 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 // Constant for long shifts 3400 operand immI_32() %{ 3401 predicate( n->get_int() == 32 ); 3402 match(ConI); 3403 3404 op_cost(0); 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 operand immI_1_31() %{ 3410 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3411 match(ConI); 3412 3413 op_cost(0); 3414 format %{ %} 3415 interface(CONST_INTER); 3416 %} 3417 3418 operand immI_32_63() %{ 3419 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3420 match(ConI); 3421 op_cost(0); 3422 3423 format %{ %} 3424 interface(CONST_INTER); 3425 %} 3426 3427 operand immI_1() %{ 3428 predicate( n->get_int() == 1 ); 3429 match(ConI); 3430 3431 op_cost(0); 3432 format %{ %} 3433 interface(CONST_INTER); 3434 %} 3435 3436 operand immI_2() %{ 3437 predicate( n->get_int() == 2 ); 3438 match(ConI); 3439 3440 op_cost(0); 3441 format %{ %} 3442 interface(CONST_INTER); 3443 %} 3444 3445 operand immI_3() %{ 3446 predicate( n->get_int() == 3 ); 3447 match(ConI); 3448 3449 op_cost(0); 3450 format %{ %} 3451 interface(CONST_INTER); 3452 %} 3453 3454 // Pointer Immediate 3455 operand immP() %{ 3456 match(ConP); 3457 3458 op_cost(10); 3459 format %{ %} 3460 interface(CONST_INTER); 3461 %} 3462 3463 // NULL Pointer Immediate 3464 operand immP0() %{ 3465 predicate( n->get_ptr() == 0 ); 3466 match(ConP); 3467 op_cost(0); 3468 3469 format %{ %} 3470 interface(CONST_INTER); 3471 %} 3472 3473 // Long Immediate 3474 operand immL() %{ 3475 match(ConL); 3476 3477 op_cost(20); 3478 format %{ %} 3479 interface(CONST_INTER); 3480 %} 3481 3482 // Long Immediate zero 3483 operand immL0() %{ 3484 predicate( n->get_long() == 0L ); 3485 match(ConL); 3486 op_cost(0); 3487 3488 format %{ %} 3489 interface(CONST_INTER); 3490 %} 3491 3492 // Long Immediate zero 3493 operand immL_M1() %{ 3494 predicate( n->get_long() == -1L ); 3495 match(ConL); 3496 op_cost(0); 3497 3498 format %{ %} 3499 interface(CONST_INTER); 3500 %} 3501 3502 // Long immediate from 0 to 127. 3503 // Used for a shorter form of long mul by 10. 3504 operand immL_127() %{ 3505 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3506 match(ConL); 3507 op_cost(0); 3508 3509 format %{ %} 3510 interface(CONST_INTER); 3511 %} 3512 3513 // Long Immediate: low 32-bit mask 3514 operand immL_32bits() %{ 3515 predicate(n->get_long() == 0xFFFFFFFFL); 3516 match(ConL); 3517 op_cost(0); 3518 3519 format %{ %} 3520 interface(CONST_INTER); 3521 %} 3522 3523 // Long Immediate: low 32-bit mask 3524 operand immL32() %{ 3525 predicate(n->get_long() == (int)(n->get_long())); 3526 match(ConL); 3527 op_cost(20); 3528 3529 format %{ %} 3530 interface(CONST_INTER); 3531 %} 3532 3533 //Double Immediate zero 3534 operand immDPR0() %{ 3535 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3536 // bug that generates code such that NaNs compare equal to 0.0 3537 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3538 match(ConD); 3539 3540 op_cost(5); 3541 format %{ %} 3542 interface(CONST_INTER); 3543 %} 3544 3545 // Double Immediate one 3546 operand immDPR1() %{ 3547 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3548 match(ConD); 3549 3550 op_cost(5); 3551 format %{ %} 3552 interface(CONST_INTER); 3553 %} 3554 3555 // Double Immediate 3556 operand immDPR() %{ 3557 predicate(UseSSE<=1); 3558 match(ConD); 3559 3560 op_cost(5); 3561 format %{ %} 3562 interface(CONST_INTER); 3563 %} 3564 3565 operand immD() %{ 3566 predicate(UseSSE>=2); 3567 match(ConD); 3568 3569 op_cost(5); 3570 format %{ %} 3571 interface(CONST_INTER); 3572 %} 3573 3574 // Double Immediate zero 3575 operand immD0() %{ 3576 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3577 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3578 // compare equal to -0.0. 3579 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3580 match(ConD); 3581 3582 format %{ %} 3583 interface(CONST_INTER); 3584 %} 3585 3586 // Float Immediate zero 3587 operand immFPR0() %{ 3588 predicate(UseSSE == 0 && n->getf() == 0.0F); 3589 match(ConF); 3590 3591 op_cost(5); 3592 format %{ %} 3593 interface(CONST_INTER); 3594 %} 3595 3596 // Float Immediate one 3597 operand immFPR1() %{ 3598 predicate(UseSSE == 0 && n->getf() == 1.0F); 3599 match(ConF); 3600 3601 op_cost(5); 3602 format %{ %} 3603 interface(CONST_INTER); 3604 %} 3605 3606 // Float Immediate 3607 operand immFPR() %{ 3608 predicate( UseSSE == 0 ); 3609 match(ConF); 3610 3611 op_cost(5); 3612 format %{ %} 3613 interface(CONST_INTER); 3614 %} 3615 3616 // Float Immediate 3617 operand immF() %{ 3618 predicate(UseSSE >= 1); 3619 match(ConF); 3620 3621 op_cost(5); 3622 format %{ %} 3623 interface(CONST_INTER); 3624 %} 3625 3626 // Float Immediate zero. Zero and not -0.0 3627 operand immF0() %{ 3628 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3629 match(ConF); 3630 3631 op_cost(5); 3632 format %{ %} 3633 interface(CONST_INTER); 3634 %} 3635 3636 // Immediates for special shifts (sign extend) 3637 3638 // Constants for increment 3639 operand immI_16() %{ 3640 predicate( n->get_int() == 16 ); 3641 match(ConI); 3642 3643 format %{ %} 3644 interface(CONST_INTER); 3645 %} 3646 3647 operand immI_24() %{ 3648 predicate( n->get_int() == 24 ); 3649 match(ConI); 3650 3651 format %{ %} 3652 interface(CONST_INTER); 3653 %} 3654 3655 // Constant for byte-wide masking 3656 operand immI_255() %{ 3657 predicate( n->get_int() == 255 ); 3658 match(ConI); 3659 3660 format %{ %} 3661 interface(CONST_INTER); 3662 %} 3663 3664 // Constant for short-wide masking 3665 operand immI_65535() %{ 3666 predicate(n->get_int() == 65535); 3667 match(ConI); 3668 3669 format %{ %} 3670 interface(CONST_INTER); 3671 %} 3672 3673 // Register Operands 3674 // Integer Register 3675 operand rRegI() %{ 3676 constraint(ALLOC_IN_RC(int_reg)); 3677 match(RegI); 3678 match(xRegI); 3679 match(eAXRegI); 3680 match(eBXRegI); 3681 match(eCXRegI); 3682 match(eDXRegI); 3683 match(eDIRegI); 3684 match(eSIRegI); 3685 3686 format %{ %} 3687 interface(REG_INTER); 3688 %} 3689 3690 // Subset of Integer Register 3691 operand xRegI(rRegI reg) %{ 3692 constraint(ALLOC_IN_RC(int_x_reg)); 3693 match(reg); 3694 match(eAXRegI); 3695 match(eBXRegI); 3696 match(eCXRegI); 3697 match(eDXRegI); 3698 3699 format %{ %} 3700 interface(REG_INTER); 3701 %} 3702 3703 // Special Registers 3704 operand eAXRegI(xRegI reg) %{ 3705 constraint(ALLOC_IN_RC(eax_reg)); 3706 match(reg); 3707 match(rRegI); 3708 3709 format %{ "EAX" %} 3710 interface(REG_INTER); 3711 %} 3712 3713 // Special Registers 3714 operand eBXRegI(xRegI reg) %{ 3715 constraint(ALLOC_IN_RC(ebx_reg)); 3716 match(reg); 3717 match(rRegI); 3718 3719 format %{ "EBX" %} 3720 interface(REG_INTER); 3721 %} 3722 3723 operand eCXRegI(xRegI reg) %{ 3724 constraint(ALLOC_IN_RC(ecx_reg)); 3725 match(reg); 3726 match(rRegI); 3727 3728 format %{ "ECX" %} 3729 interface(REG_INTER); 3730 %} 3731 3732 operand eDXRegI(xRegI reg) %{ 3733 constraint(ALLOC_IN_RC(edx_reg)); 3734 match(reg); 3735 match(rRegI); 3736 3737 format %{ "EDX" %} 3738 interface(REG_INTER); 3739 %} 3740 3741 operand eDIRegI(xRegI reg) %{ 3742 constraint(ALLOC_IN_RC(edi_reg)); 3743 match(reg); 3744 match(rRegI); 3745 3746 format %{ "EDI" %} 3747 interface(REG_INTER); 3748 %} 3749 3750 operand naxRegI() %{ 3751 constraint(ALLOC_IN_RC(nax_reg)); 3752 match(RegI); 3753 match(eCXRegI); 3754 match(eDXRegI); 3755 match(eSIRegI); 3756 match(eDIRegI); 3757 3758 format %{ %} 3759 interface(REG_INTER); 3760 %} 3761 3762 operand nadxRegI() %{ 3763 constraint(ALLOC_IN_RC(nadx_reg)); 3764 match(RegI); 3765 match(eBXRegI); 3766 match(eCXRegI); 3767 match(eSIRegI); 3768 match(eDIRegI); 3769 3770 format %{ %} 3771 interface(REG_INTER); 3772 %} 3773 3774 operand ncxRegI() %{ 3775 constraint(ALLOC_IN_RC(ncx_reg)); 3776 match(RegI); 3777 match(eAXRegI); 3778 match(eDXRegI); 3779 match(eSIRegI); 3780 match(eDIRegI); 3781 3782 format %{ %} 3783 interface(REG_INTER); 3784 %} 3785 3786 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3787 // // 3788 operand eSIRegI(xRegI reg) %{ 3789 constraint(ALLOC_IN_RC(esi_reg)); 3790 match(reg); 3791 match(rRegI); 3792 3793 format %{ "ESI" %} 3794 interface(REG_INTER); 3795 %} 3796 3797 // Pointer Register 3798 operand anyRegP() %{ 3799 constraint(ALLOC_IN_RC(any_reg)); 3800 match(RegP); 3801 match(eAXRegP); 3802 match(eBXRegP); 3803 match(eCXRegP); 3804 match(eDIRegP); 3805 match(eRegP); 3806 3807 format %{ %} 3808 interface(REG_INTER); 3809 %} 3810 3811 operand eRegP() %{ 3812 constraint(ALLOC_IN_RC(int_reg)); 3813 match(RegP); 3814 match(eAXRegP); 3815 match(eBXRegP); 3816 match(eCXRegP); 3817 match(eDIRegP); 3818 3819 format %{ %} 3820 interface(REG_INTER); 3821 %} 3822 3823 // On windows95, EBP is not safe to use for implicit null tests. 3824 operand eRegP_no_EBP() %{ 3825 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3826 match(RegP); 3827 match(eAXRegP); 3828 match(eBXRegP); 3829 match(eCXRegP); 3830 match(eDIRegP); 3831 3832 op_cost(100); 3833 format %{ %} 3834 interface(REG_INTER); 3835 %} 3836 3837 operand naxRegP() %{ 3838 constraint(ALLOC_IN_RC(nax_reg)); 3839 match(RegP); 3840 match(eBXRegP); 3841 match(eDXRegP); 3842 match(eCXRegP); 3843 match(eSIRegP); 3844 match(eDIRegP); 3845 3846 format %{ %} 3847 interface(REG_INTER); 3848 %} 3849 3850 operand nabxRegP() %{ 3851 constraint(ALLOC_IN_RC(nabx_reg)); 3852 match(RegP); 3853 match(eCXRegP); 3854 match(eDXRegP); 3855 match(eSIRegP); 3856 match(eDIRegP); 3857 3858 format %{ %} 3859 interface(REG_INTER); 3860 %} 3861 3862 operand pRegP() %{ 3863 constraint(ALLOC_IN_RC(p_reg)); 3864 match(RegP); 3865 match(eBXRegP); 3866 match(eDXRegP); 3867 match(eSIRegP); 3868 match(eDIRegP); 3869 3870 format %{ %} 3871 interface(REG_INTER); 3872 %} 3873 3874 // Special Registers 3875 // Return a pointer value 3876 operand eAXRegP(eRegP reg) %{ 3877 constraint(ALLOC_IN_RC(eax_reg)); 3878 match(reg); 3879 format %{ "EAX" %} 3880 interface(REG_INTER); 3881 %} 3882 3883 // Used in AtomicAdd 3884 operand eBXRegP(eRegP reg) %{ 3885 constraint(ALLOC_IN_RC(ebx_reg)); 3886 match(reg); 3887 format %{ "EBX" %} 3888 interface(REG_INTER); 3889 %} 3890 3891 // Tail-call (interprocedural jump) to interpreter 3892 operand eCXRegP(eRegP reg) %{ 3893 constraint(ALLOC_IN_RC(ecx_reg)); 3894 match(reg); 3895 format %{ "ECX" %} 3896 interface(REG_INTER); 3897 %} 3898 3899 operand eSIRegP(eRegP reg) %{ 3900 constraint(ALLOC_IN_RC(esi_reg)); 3901 match(reg); 3902 format %{ "ESI" %} 3903 interface(REG_INTER); 3904 %} 3905 3906 // Used in rep stosw 3907 operand eDIRegP(eRegP reg) %{ 3908 constraint(ALLOC_IN_RC(edi_reg)); 3909 match(reg); 3910 format %{ "EDI" %} 3911 interface(REG_INTER); 3912 %} 3913 3914 operand eRegL() %{ 3915 constraint(ALLOC_IN_RC(long_reg)); 3916 match(RegL); 3917 match(eADXRegL); 3918 3919 format %{ %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand eADXRegL( eRegL reg ) %{ 3924 constraint(ALLOC_IN_RC(eadx_reg)); 3925 match(reg); 3926 3927 format %{ "EDX:EAX" %} 3928 interface(REG_INTER); 3929 %} 3930 3931 operand eBCXRegL( eRegL reg ) %{ 3932 constraint(ALLOC_IN_RC(ebcx_reg)); 3933 match(reg); 3934 3935 format %{ "EBX:ECX" %} 3936 interface(REG_INTER); 3937 %} 3938 3939 // Special case for integer high multiply 3940 operand eADXRegL_low_only() %{ 3941 constraint(ALLOC_IN_RC(eadx_reg)); 3942 match(RegL); 3943 3944 format %{ "EAX" %} 3945 interface(REG_INTER); 3946 %} 3947 3948 // Flags register, used as output of compare instructions 3949 operand eFlagsReg() %{ 3950 constraint(ALLOC_IN_RC(int_flags)); 3951 match(RegFlags); 3952 3953 format %{ "EFLAGS" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 // Flags register, used as output of FLOATING POINT compare instructions 3958 operand eFlagsRegU() %{ 3959 constraint(ALLOC_IN_RC(int_flags)); 3960 match(RegFlags); 3961 3962 format %{ "EFLAGS_U" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 operand eFlagsRegUCF() %{ 3967 constraint(ALLOC_IN_RC(int_flags)); 3968 match(RegFlags); 3969 predicate(false); 3970 3971 format %{ "EFLAGS_U_CF" %} 3972 interface(REG_INTER); 3973 %} 3974 3975 // Condition Code Register used by long compare 3976 operand flagsReg_long_LTGE() %{ 3977 constraint(ALLOC_IN_RC(int_flags)); 3978 match(RegFlags); 3979 format %{ "FLAGS_LTGE" %} 3980 interface(REG_INTER); 3981 %} 3982 operand flagsReg_long_EQNE() %{ 3983 constraint(ALLOC_IN_RC(int_flags)); 3984 match(RegFlags); 3985 format %{ "FLAGS_EQNE" %} 3986 interface(REG_INTER); 3987 %} 3988 operand flagsReg_long_LEGT() %{ 3989 constraint(ALLOC_IN_RC(int_flags)); 3990 match(RegFlags); 3991 format %{ "FLAGS_LEGT" %} 3992 interface(REG_INTER); 3993 %} 3994 3995 // Float register operands 3996 operand regDPR() %{ 3997 predicate( UseSSE < 2 ); 3998 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3999 match(RegD); 4000 match(regDPR1); 4001 match(regDPR2); 4002 format %{ %} 4003 interface(REG_INTER); 4004 %} 4005 4006 operand regDPR1(regDPR reg) %{ 4007 predicate( UseSSE < 2 ); 4008 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4009 match(reg); 4010 format %{ "FPR1" %} 4011 interface(REG_INTER); 4012 %} 4013 4014 operand regDPR2(regDPR reg) %{ 4015 predicate( UseSSE < 2 ); 4016 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4017 match(reg); 4018 format %{ "FPR2" %} 4019 interface(REG_INTER); 4020 %} 4021 4022 operand regnotDPR1(regDPR reg) %{ 4023 predicate( UseSSE < 2 ); 4024 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4025 match(reg); 4026 format %{ %} 4027 interface(REG_INTER); 4028 %} 4029 4030 // Float register operands 4031 operand regFPR() %{ 4032 predicate( UseSSE < 2 ); 4033 constraint(ALLOC_IN_RC(fp_flt_reg)); 4034 match(RegF); 4035 match(regFPR1); 4036 format %{ %} 4037 interface(REG_INTER); 4038 %} 4039 4040 // Float register operands 4041 operand regFPR1(regFPR reg) %{ 4042 predicate( UseSSE < 2 ); 4043 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4044 match(reg); 4045 format %{ "FPR1" %} 4046 interface(REG_INTER); 4047 %} 4048 4049 // XMM Float register operands 4050 operand regF() %{ 4051 predicate( UseSSE>=1 ); 4052 constraint(ALLOC_IN_RC(float_reg_legacy)); 4053 match(RegF); 4054 format %{ %} 4055 interface(REG_INTER); 4056 %} 4057 4058 // XMM Double register operands 4059 operand regD() %{ 4060 predicate( UseSSE>=2 ); 4061 constraint(ALLOC_IN_RC(double_reg_legacy)); 4062 match(RegD); 4063 format %{ %} 4064 interface(REG_INTER); 4065 %} 4066 4067 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4068 // runtime code generation via reg_class_dynamic. 4069 operand vecS() %{ 4070 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4071 match(VecS); 4072 4073 format %{ %} 4074 interface(REG_INTER); 4075 %} 4076 4077 operand vecD() %{ 4078 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4079 match(VecD); 4080 4081 format %{ %} 4082 interface(REG_INTER); 4083 %} 4084 4085 operand vecX() %{ 4086 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4087 match(VecX); 4088 4089 format %{ %} 4090 interface(REG_INTER); 4091 %} 4092 4093 operand vecY() %{ 4094 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4095 match(VecY); 4096 4097 format %{ %} 4098 interface(REG_INTER); 4099 %} 4100 4101 //----------Memory Operands---------------------------------------------------- 4102 // Direct Memory Operand 4103 operand direct(immP addr) %{ 4104 match(addr); 4105 4106 format %{ "[$addr]" %} 4107 interface(MEMORY_INTER) %{ 4108 base(0xFFFFFFFF); 4109 index(0x4); 4110 scale(0x0); 4111 disp($addr); 4112 %} 4113 %} 4114 4115 // Indirect Memory Operand 4116 operand indirect(eRegP reg) %{ 4117 constraint(ALLOC_IN_RC(int_reg)); 4118 match(reg); 4119 4120 format %{ "[$reg]" %} 4121 interface(MEMORY_INTER) %{ 4122 base($reg); 4123 index(0x4); 4124 scale(0x0); 4125 disp(0x0); 4126 %} 4127 %} 4128 4129 // Indirect Memory Plus Short Offset Operand 4130 operand indOffset8(eRegP reg, immI8 off) %{ 4131 match(AddP reg off); 4132 4133 format %{ "[$reg + $off]" %} 4134 interface(MEMORY_INTER) %{ 4135 base($reg); 4136 index(0x4); 4137 scale(0x0); 4138 disp($off); 4139 %} 4140 %} 4141 4142 // Indirect Memory Plus Long Offset Operand 4143 operand indOffset32(eRegP reg, immI off) %{ 4144 match(AddP reg off); 4145 4146 format %{ "[$reg + $off]" %} 4147 interface(MEMORY_INTER) %{ 4148 base($reg); 4149 index(0x4); 4150 scale(0x0); 4151 disp($off); 4152 %} 4153 %} 4154 4155 // Indirect Memory Plus Long Offset Operand 4156 operand indOffset32X(rRegI reg, immP off) %{ 4157 match(AddP off reg); 4158 4159 format %{ "[$reg + $off]" %} 4160 interface(MEMORY_INTER) %{ 4161 base($reg); 4162 index(0x4); 4163 scale(0x0); 4164 disp($off); 4165 %} 4166 %} 4167 4168 // Indirect Memory Plus Index Register Plus Offset Operand 4169 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4170 match(AddP (AddP reg ireg) off); 4171 4172 op_cost(10); 4173 format %{"[$reg + $off + $ireg]" %} 4174 interface(MEMORY_INTER) %{ 4175 base($reg); 4176 index($ireg); 4177 scale(0x0); 4178 disp($off); 4179 %} 4180 %} 4181 4182 // Indirect Memory Plus Index Register Plus Offset Operand 4183 operand indIndex(eRegP reg, rRegI ireg) %{ 4184 match(AddP reg ireg); 4185 4186 op_cost(10); 4187 format %{"[$reg + $ireg]" %} 4188 interface(MEMORY_INTER) %{ 4189 base($reg); 4190 index($ireg); 4191 scale(0x0); 4192 disp(0x0); 4193 %} 4194 %} 4195 4196 // // ------------------------------------------------------------------------- 4197 // // 486 architecture doesn't support "scale * index + offset" with out a base 4198 // // ------------------------------------------------------------------------- 4199 // // Scaled Memory Operands 4200 // // Indirect Memory Times Scale Plus Offset Operand 4201 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4202 // match(AddP off (LShiftI ireg scale)); 4203 // 4204 // op_cost(10); 4205 // format %{"[$off + $ireg << $scale]" %} 4206 // interface(MEMORY_INTER) %{ 4207 // base(0x4); 4208 // index($ireg); 4209 // scale($scale); 4210 // disp($off); 4211 // %} 4212 // %} 4213 4214 // Indirect Memory Times Scale Plus Index Register 4215 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4216 match(AddP reg (LShiftI ireg scale)); 4217 4218 op_cost(10); 4219 format %{"[$reg + $ireg << $scale]" %} 4220 interface(MEMORY_INTER) %{ 4221 base($reg); 4222 index($ireg); 4223 scale($scale); 4224 disp(0x0); 4225 %} 4226 %} 4227 4228 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4229 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4230 match(AddP (AddP reg (LShiftI ireg scale)) off); 4231 4232 op_cost(10); 4233 format %{"[$reg + $off + $ireg << $scale]" %} 4234 interface(MEMORY_INTER) %{ 4235 base($reg); 4236 index($ireg); 4237 scale($scale); 4238 disp($off); 4239 %} 4240 %} 4241 4242 //----------Load Long Memory Operands------------------------------------------ 4243 // The load-long idiom will use it's address expression again after loading 4244 // the first word of the long. If the load-long destination overlaps with 4245 // registers used in the addressing expression, the 2nd half will be loaded 4246 // from a clobbered address. Fix this by requiring that load-long use 4247 // address registers that do not overlap with the load-long target. 4248 4249 // load-long support 4250 operand load_long_RegP() %{ 4251 constraint(ALLOC_IN_RC(esi_reg)); 4252 match(RegP); 4253 match(eSIRegP); 4254 op_cost(100); 4255 format %{ %} 4256 interface(REG_INTER); 4257 %} 4258 4259 // Indirect Memory Operand Long 4260 operand load_long_indirect(load_long_RegP reg) %{ 4261 constraint(ALLOC_IN_RC(esi_reg)); 4262 match(reg); 4263 4264 format %{ "[$reg]" %} 4265 interface(MEMORY_INTER) %{ 4266 base($reg); 4267 index(0x4); 4268 scale(0x0); 4269 disp(0x0); 4270 %} 4271 %} 4272 4273 // Indirect Memory Plus Long Offset Operand 4274 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4275 match(AddP reg off); 4276 4277 format %{ "[$reg + $off]" %} 4278 interface(MEMORY_INTER) %{ 4279 base($reg); 4280 index(0x4); 4281 scale(0x0); 4282 disp($off); 4283 %} 4284 %} 4285 4286 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4287 4288 4289 //----------Special Memory Operands-------------------------------------------- 4290 // Stack Slot Operand - This operand is used for loading and storing temporary 4291 // values on the stack where a match requires a value to 4292 // flow through memory. 4293 operand stackSlotP(sRegP reg) %{ 4294 constraint(ALLOC_IN_RC(stack_slots)); 4295 // No match rule because this operand is only generated in matching 4296 format %{ "[$reg]" %} 4297 interface(MEMORY_INTER) %{ 4298 base(0x4); // ESP 4299 index(0x4); // No Index 4300 scale(0x0); // No Scale 4301 disp($reg); // Stack Offset 4302 %} 4303 %} 4304 4305 operand stackSlotI(sRegI reg) %{ 4306 constraint(ALLOC_IN_RC(stack_slots)); 4307 // No match rule because this operand is only generated in matching 4308 format %{ "[$reg]" %} 4309 interface(MEMORY_INTER) %{ 4310 base(0x4); // ESP 4311 index(0x4); // No Index 4312 scale(0x0); // No Scale 4313 disp($reg); // Stack Offset 4314 %} 4315 %} 4316 4317 operand stackSlotF(sRegF reg) %{ 4318 constraint(ALLOC_IN_RC(stack_slots)); 4319 // No match rule because this operand is only generated in matching 4320 format %{ "[$reg]" %} 4321 interface(MEMORY_INTER) %{ 4322 base(0x4); // ESP 4323 index(0x4); // No Index 4324 scale(0x0); // No Scale 4325 disp($reg); // Stack Offset 4326 %} 4327 %} 4328 4329 operand stackSlotD(sRegD reg) %{ 4330 constraint(ALLOC_IN_RC(stack_slots)); 4331 // No match rule because this operand is only generated in matching 4332 format %{ "[$reg]" %} 4333 interface(MEMORY_INTER) %{ 4334 base(0x4); // ESP 4335 index(0x4); // No Index 4336 scale(0x0); // No Scale 4337 disp($reg); // Stack Offset 4338 %} 4339 %} 4340 4341 operand stackSlotL(sRegL reg) %{ 4342 constraint(ALLOC_IN_RC(stack_slots)); 4343 // No match rule because this operand is only generated in matching 4344 format %{ "[$reg]" %} 4345 interface(MEMORY_INTER) %{ 4346 base(0x4); // ESP 4347 index(0x4); // No Index 4348 scale(0x0); // No Scale 4349 disp($reg); // Stack Offset 4350 %} 4351 %} 4352 4353 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4354 // Indirect Memory Operand 4355 operand indirect_win95_safe(eRegP_no_EBP reg) 4356 %{ 4357 constraint(ALLOC_IN_RC(int_reg)); 4358 match(reg); 4359 4360 op_cost(100); 4361 format %{ "[$reg]" %} 4362 interface(MEMORY_INTER) %{ 4363 base($reg); 4364 index(0x4); 4365 scale(0x0); 4366 disp(0x0); 4367 %} 4368 %} 4369 4370 // Indirect Memory Plus Short Offset Operand 4371 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4372 %{ 4373 match(AddP reg off); 4374 4375 op_cost(100); 4376 format %{ "[$reg + $off]" %} 4377 interface(MEMORY_INTER) %{ 4378 base($reg); 4379 index(0x4); 4380 scale(0x0); 4381 disp($off); 4382 %} 4383 %} 4384 4385 // Indirect Memory Plus Long Offset Operand 4386 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4387 %{ 4388 match(AddP reg off); 4389 4390 op_cost(100); 4391 format %{ "[$reg + $off]" %} 4392 interface(MEMORY_INTER) %{ 4393 base($reg); 4394 index(0x4); 4395 scale(0x0); 4396 disp($off); 4397 %} 4398 %} 4399 4400 // Indirect Memory Plus Index Register Plus Offset Operand 4401 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4402 %{ 4403 match(AddP (AddP reg ireg) off); 4404 4405 op_cost(100); 4406 format %{"[$reg + $off + $ireg]" %} 4407 interface(MEMORY_INTER) %{ 4408 base($reg); 4409 index($ireg); 4410 scale(0x0); 4411 disp($off); 4412 %} 4413 %} 4414 4415 // Indirect Memory Times Scale Plus Index Register 4416 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4417 %{ 4418 match(AddP reg (LShiftI ireg scale)); 4419 4420 op_cost(100); 4421 format %{"[$reg + $ireg << $scale]" %} 4422 interface(MEMORY_INTER) %{ 4423 base($reg); 4424 index($ireg); 4425 scale($scale); 4426 disp(0x0); 4427 %} 4428 %} 4429 4430 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4431 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4432 %{ 4433 match(AddP (AddP reg (LShiftI ireg scale)) off); 4434 4435 op_cost(100); 4436 format %{"[$reg + $off + $ireg << $scale]" %} 4437 interface(MEMORY_INTER) %{ 4438 base($reg); 4439 index($ireg); 4440 scale($scale); 4441 disp($off); 4442 %} 4443 %} 4444 4445 //----------Conditional Branch Operands---------------------------------------- 4446 // Comparison Op - This is the operation of the comparison, and is limited to 4447 // the following set of codes: 4448 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4449 // 4450 // Other attributes of the comparison, such as unsignedness, are specified 4451 // by the comparison instruction that sets a condition code flags register. 4452 // That result is represented by a flags operand whose subtype is appropriate 4453 // to the unsignedness (etc.) of the comparison. 4454 // 4455 // Later, the instruction which matches both the Comparison Op (a Bool) and 4456 // the flags (produced by the Cmp) specifies the coding of the comparison op 4457 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4458 4459 // Comparision Code 4460 operand cmpOp() %{ 4461 match(Bool); 4462 4463 format %{ "" %} 4464 interface(COND_INTER) %{ 4465 equal(0x4, "e"); 4466 not_equal(0x5, "ne"); 4467 less(0xC, "l"); 4468 greater_equal(0xD, "ge"); 4469 less_equal(0xE, "le"); 4470 greater(0xF, "g"); 4471 overflow(0x0, "o"); 4472 no_overflow(0x1, "no"); 4473 %} 4474 %} 4475 4476 // Comparison Code, unsigned compare. Used by FP also, with 4477 // C2 (unordered) turned into GT or LT already. The other bits 4478 // C0 and C3 are turned into Carry & Zero flags. 4479 operand cmpOpU() %{ 4480 match(Bool); 4481 4482 format %{ "" %} 4483 interface(COND_INTER) %{ 4484 equal(0x4, "e"); 4485 not_equal(0x5, "ne"); 4486 less(0x2, "b"); 4487 greater_equal(0x3, "nb"); 4488 less_equal(0x6, "be"); 4489 greater(0x7, "nbe"); 4490 overflow(0x0, "o"); 4491 no_overflow(0x1, "no"); 4492 %} 4493 %} 4494 4495 // Floating comparisons that don't require any fixup for the unordered case 4496 operand cmpOpUCF() %{ 4497 match(Bool); 4498 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4499 n->as_Bool()->_test._test == BoolTest::ge || 4500 n->as_Bool()->_test._test == BoolTest::le || 4501 n->as_Bool()->_test._test == BoolTest::gt); 4502 format %{ "" %} 4503 interface(COND_INTER) %{ 4504 equal(0x4, "e"); 4505 not_equal(0x5, "ne"); 4506 less(0x2, "b"); 4507 greater_equal(0x3, "nb"); 4508 less_equal(0x6, "be"); 4509 greater(0x7, "nbe"); 4510 overflow(0x0, "o"); 4511 no_overflow(0x1, "no"); 4512 %} 4513 %} 4514 4515 4516 // Floating comparisons that can be fixed up with extra conditional jumps 4517 operand cmpOpUCF2() %{ 4518 match(Bool); 4519 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4520 n->as_Bool()->_test._test == BoolTest::eq); 4521 format %{ "" %} 4522 interface(COND_INTER) %{ 4523 equal(0x4, "e"); 4524 not_equal(0x5, "ne"); 4525 less(0x2, "b"); 4526 greater_equal(0x3, "nb"); 4527 less_equal(0x6, "be"); 4528 greater(0x7, "nbe"); 4529 overflow(0x0, "o"); 4530 no_overflow(0x1, "no"); 4531 %} 4532 %} 4533 4534 // Comparison Code for FP conditional move 4535 operand cmpOp_fcmov() %{ 4536 match(Bool); 4537 4538 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4539 n->as_Bool()->_test._test != BoolTest::no_overflow); 4540 format %{ "" %} 4541 interface(COND_INTER) %{ 4542 equal (0x0C8); 4543 not_equal (0x1C8); 4544 less (0x0C0); 4545 greater_equal(0x1C0); 4546 less_equal (0x0D0); 4547 greater (0x1D0); 4548 overflow(0x0, "o"); // not really supported by the instruction 4549 no_overflow(0x1, "no"); // not really supported by the instruction 4550 %} 4551 %} 4552 4553 // Comparision Code used in long compares 4554 operand cmpOp_commute() %{ 4555 match(Bool); 4556 4557 format %{ "" %} 4558 interface(COND_INTER) %{ 4559 equal(0x4, "e"); 4560 not_equal(0x5, "ne"); 4561 less(0xF, "g"); 4562 greater_equal(0xE, "le"); 4563 less_equal(0xD, "ge"); 4564 greater(0xC, "l"); 4565 overflow(0x0, "o"); 4566 no_overflow(0x1, "no"); 4567 %} 4568 %} 4569 4570 //----------OPERAND CLASSES---------------------------------------------------- 4571 // Operand Classes are groups of operands that are used as to simplify 4572 // instruction definitions by not requiring the AD writer to specify separate 4573 // instructions for every form of operand when the instruction accepts 4574 // multiple operand types with the same basic encoding and format. The classic 4575 // case of this is memory operands. 4576 4577 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4578 indIndex, indIndexScale, indIndexScaleOffset); 4579 4580 // Long memory operations are encoded in 2 instructions and a +4 offset. 4581 // This means some kind of offset is always required and you cannot use 4582 // an oop as the offset (done when working on static globals). 4583 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4584 indIndex, indIndexScale, indIndexScaleOffset); 4585 4586 4587 //----------PIPELINE----------------------------------------------------------- 4588 // Rules which define the behavior of the target architectures pipeline. 4589 pipeline %{ 4590 4591 //----------ATTRIBUTES--------------------------------------------------------- 4592 attributes %{ 4593 variable_size_instructions; // Fixed size instructions 4594 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4595 instruction_unit_size = 1; // An instruction is 1 bytes long 4596 instruction_fetch_unit_size = 16; // The processor fetches one line 4597 instruction_fetch_units = 1; // of 16 bytes 4598 4599 // List of nop instructions 4600 nops( MachNop ); 4601 %} 4602 4603 //----------RESOURCES---------------------------------------------------------- 4604 // Resources are the functional units available to the machine 4605 4606 // Generic P2/P3 pipeline 4607 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4608 // 3 instructions decoded per cycle. 4609 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4610 // 2 ALU op, only ALU0 handles mul/div instructions. 4611 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4612 MS0, MS1, MEM = MS0 | MS1, 4613 BR, FPU, 4614 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4615 4616 //----------PIPELINE DESCRIPTION----------------------------------------------- 4617 // Pipeline Description specifies the stages in the machine's pipeline 4618 4619 // Generic P2/P3 pipeline 4620 pipe_desc(S0, S1, S2, S3, S4, S5); 4621 4622 //----------PIPELINE CLASSES--------------------------------------------------- 4623 // Pipeline Classes describe the stages in which input and output are 4624 // referenced by the hardware pipeline. 4625 4626 // Naming convention: ialu or fpu 4627 // Then: _reg 4628 // Then: _reg if there is a 2nd register 4629 // Then: _long if it's a pair of instructions implementing a long 4630 // Then: _fat if it requires the big decoder 4631 // Or: _mem if it requires the big decoder and a memory unit. 4632 4633 // Integer ALU reg operation 4634 pipe_class ialu_reg(rRegI dst) %{ 4635 single_instruction; 4636 dst : S4(write); 4637 dst : S3(read); 4638 DECODE : S0; // any decoder 4639 ALU : S3; // any alu 4640 %} 4641 4642 // Long ALU reg operation 4643 pipe_class ialu_reg_long(eRegL dst) %{ 4644 instruction_count(2); 4645 dst : S4(write); 4646 dst : S3(read); 4647 DECODE : S0(2); // any 2 decoders 4648 ALU : S3(2); // both alus 4649 %} 4650 4651 // Integer ALU reg operation using big decoder 4652 pipe_class ialu_reg_fat(rRegI dst) %{ 4653 single_instruction; 4654 dst : S4(write); 4655 dst : S3(read); 4656 D0 : S0; // big decoder only 4657 ALU : S3; // any alu 4658 %} 4659 4660 // Long ALU reg operation using big decoder 4661 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4662 instruction_count(2); 4663 dst : S4(write); 4664 dst : S3(read); 4665 D0 : S0(2); // big decoder only; twice 4666 ALU : S3(2); // any 2 alus 4667 %} 4668 4669 // Integer ALU reg-reg operation 4670 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4671 single_instruction; 4672 dst : S4(write); 4673 src : S3(read); 4674 DECODE : S0; // any decoder 4675 ALU : S3; // any alu 4676 %} 4677 4678 // Long ALU reg-reg operation 4679 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4680 instruction_count(2); 4681 dst : S4(write); 4682 src : S3(read); 4683 DECODE : S0(2); // any 2 decoders 4684 ALU : S3(2); // both alus 4685 %} 4686 4687 // Integer ALU reg-reg operation 4688 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4689 single_instruction; 4690 dst : S4(write); 4691 src : S3(read); 4692 D0 : S0; // big decoder only 4693 ALU : S3; // any alu 4694 %} 4695 4696 // Long ALU reg-reg operation 4697 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4698 instruction_count(2); 4699 dst : S4(write); 4700 src : S3(read); 4701 D0 : S0(2); // big decoder only; twice 4702 ALU : S3(2); // both alus 4703 %} 4704 4705 // Integer ALU reg-mem operation 4706 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4707 single_instruction; 4708 dst : S5(write); 4709 mem : S3(read); 4710 D0 : S0; // big decoder only 4711 ALU : S4; // any alu 4712 MEM : S3; // any mem 4713 %} 4714 4715 // Long ALU reg-mem operation 4716 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4717 instruction_count(2); 4718 dst : S5(write); 4719 mem : S3(read); 4720 D0 : S0(2); // big decoder only; twice 4721 ALU : S4(2); // any 2 alus 4722 MEM : S3(2); // both mems 4723 %} 4724 4725 // Integer mem operation (prefetch) 4726 pipe_class ialu_mem(memory mem) 4727 %{ 4728 single_instruction; 4729 mem : S3(read); 4730 D0 : S0; // big decoder only 4731 MEM : S3; // any mem 4732 %} 4733 4734 // Integer Store to Memory 4735 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4736 single_instruction; 4737 mem : S3(read); 4738 src : S5(read); 4739 D0 : S0; // big decoder only 4740 ALU : S4; // any alu 4741 MEM : S3; 4742 %} 4743 4744 // Long Store to Memory 4745 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4746 instruction_count(2); 4747 mem : S3(read); 4748 src : S5(read); 4749 D0 : S0(2); // big decoder only; twice 4750 ALU : S4(2); // any 2 alus 4751 MEM : S3(2); // Both mems 4752 %} 4753 4754 // Integer Store to Memory 4755 pipe_class ialu_mem_imm(memory mem) %{ 4756 single_instruction; 4757 mem : S3(read); 4758 D0 : S0; // big decoder only 4759 ALU : S4; // any alu 4760 MEM : S3; 4761 %} 4762 4763 // Integer ALU0 reg-reg operation 4764 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4765 single_instruction; 4766 dst : S4(write); 4767 src : S3(read); 4768 D0 : S0; // Big decoder only 4769 ALU0 : S3; // only alu0 4770 %} 4771 4772 // Integer ALU0 reg-mem operation 4773 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4774 single_instruction; 4775 dst : S5(write); 4776 mem : S3(read); 4777 D0 : S0; // big decoder only 4778 ALU0 : S4; // ALU0 only 4779 MEM : S3; // any mem 4780 %} 4781 4782 // Integer ALU reg-reg operation 4783 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4784 single_instruction; 4785 cr : S4(write); 4786 src1 : S3(read); 4787 src2 : S3(read); 4788 DECODE : S0; // any decoder 4789 ALU : S3; // any alu 4790 %} 4791 4792 // Integer ALU reg-imm operation 4793 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4794 single_instruction; 4795 cr : S4(write); 4796 src1 : S3(read); 4797 DECODE : S0; // any decoder 4798 ALU : S3; // any alu 4799 %} 4800 4801 // Integer ALU reg-mem operation 4802 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4803 single_instruction; 4804 cr : S4(write); 4805 src1 : S3(read); 4806 src2 : S3(read); 4807 D0 : S0; // big decoder only 4808 ALU : S4; // any alu 4809 MEM : S3; 4810 %} 4811 4812 // Conditional move reg-reg 4813 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4814 instruction_count(4); 4815 y : S4(read); 4816 q : S3(read); 4817 p : S3(read); 4818 DECODE : S0(4); // any decoder 4819 %} 4820 4821 // Conditional move reg-reg 4822 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4823 single_instruction; 4824 dst : S4(write); 4825 src : S3(read); 4826 cr : S3(read); 4827 DECODE : S0; // any decoder 4828 %} 4829 4830 // Conditional move reg-mem 4831 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4832 single_instruction; 4833 dst : S4(write); 4834 src : S3(read); 4835 cr : S3(read); 4836 DECODE : S0; // any decoder 4837 MEM : S3; 4838 %} 4839 4840 // Conditional move reg-reg long 4841 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4842 single_instruction; 4843 dst : S4(write); 4844 src : S3(read); 4845 cr : S3(read); 4846 DECODE : S0(2); // any 2 decoders 4847 %} 4848 4849 // Conditional move double reg-reg 4850 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4851 single_instruction; 4852 dst : S4(write); 4853 src : S3(read); 4854 cr : S3(read); 4855 DECODE : S0; // any decoder 4856 %} 4857 4858 // Float reg-reg operation 4859 pipe_class fpu_reg(regDPR dst) %{ 4860 instruction_count(2); 4861 dst : S3(read); 4862 DECODE : S0(2); // any 2 decoders 4863 FPU : S3; 4864 %} 4865 4866 // Float reg-reg operation 4867 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4868 instruction_count(2); 4869 dst : S4(write); 4870 src : S3(read); 4871 DECODE : S0(2); // any 2 decoders 4872 FPU : S3; 4873 %} 4874 4875 // Float reg-reg operation 4876 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4877 instruction_count(3); 4878 dst : S4(write); 4879 src1 : S3(read); 4880 src2 : S3(read); 4881 DECODE : S0(3); // any 3 decoders 4882 FPU : S3(2); 4883 %} 4884 4885 // Float reg-reg operation 4886 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4887 instruction_count(4); 4888 dst : S4(write); 4889 src1 : S3(read); 4890 src2 : S3(read); 4891 src3 : S3(read); 4892 DECODE : S0(4); // any 3 decoders 4893 FPU : S3(2); 4894 %} 4895 4896 // Float reg-reg operation 4897 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4898 instruction_count(4); 4899 dst : S4(write); 4900 src1 : S3(read); 4901 src2 : S3(read); 4902 src3 : S3(read); 4903 DECODE : S1(3); // any 3 decoders 4904 D0 : S0; // Big decoder only 4905 FPU : S3(2); 4906 MEM : S3; 4907 %} 4908 4909 // Float reg-mem operation 4910 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4911 instruction_count(2); 4912 dst : S5(write); 4913 mem : S3(read); 4914 D0 : S0; // big decoder only 4915 DECODE : S1; // any decoder for FPU POP 4916 FPU : S4; 4917 MEM : S3; // any mem 4918 %} 4919 4920 // Float reg-mem operation 4921 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4922 instruction_count(3); 4923 dst : S5(write); 4924 src1 : S3(read); 4925 mem : S3(read); 4926 D0 : S0; // big decoder only 4927 DECODE : S1(2); // any decoder for FPU POP 4928 FPU : S4; 4929 MEM : S3; // any mem 4930 %} 4931 4932 // Float mem-reg operation 4933 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4934 instruction_count(2); 4935 src : S5(read); 4936 mem : S3(read); 4937 DECODE : S0; // any decoder for FPU PUSH 4938 D0 : S1; // big decoder only 4939 FPU : S4; 4940 MEM : S3; // any mem 4941 %} 4942 4943 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4944 instruction_count(3); 4945 src1 : S3(read); 4946 src2 : S3(read); 4947 mem : S3(read); 4948 DECODE : S0(2); // any decoder for FPU PUSH 4949 D0 : S1; // big decoder only 4950 FPU : S4; 4951 MEM : S3; // any mem 4952 %} 4953 4954 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4955 instruction_count(3); 4956 src1 : S3(read); 4957 src2 : S3(read); 4958 mem : S4(read); 4959 DECODE : S0; // any decoder for FPU PUSH 4960 D0 : S0(2); // big decoder only 4961 FPU : S4; 4962 MEM : S3(2); // any mem 4963 %} 4964 4965 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4966 instruction_count(2); 4967 src1 : S3(read); 4968 dst : S4(read); 4969 D0 : S0(2); // big decoder only 4970 MEM : S3(2); // any mem 4971 %} 4972 4973 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4974 instruction_count(3); 4975 src1 : S3(read); 4976 src2 : S3(read); 4977 dst : S4(read); 4978 D0 : S0(3); // big decoder only 4979 FPU : S4; 4980 MEM : S3(3); // any mem 4981 %} 4982 4983 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4984 instruction_count(3); 4985 src1 : S4(read); 4986 mem : S4(read); 4987 DECODE : S0; // any decoder for FPU PUSH 4988 D0 : S0(2); // big decoder only 4989 FPU : S4; 4990 MEM : S3(2); // any mem 4991 %} 4992 4993 // Float load constant 4994 pipe_class fpu_reg_con(regDPR dst) %{ 4995 instruction_count(2); 4996 dst : S5(write); 4997 D0 : S0; // big decoder only for the load 4998 DECODE : S1; // any decoder for FPU POP 4999 FPU : S4; 5000 MEM : S3; // any mem 5001 %} 5002 5003 // Float load constant 5004 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5005 instruction_count(3); 5006 dst : S5(write); 5007 src : S3(read); 5008 D0 : S0; // big decoder only for the load 5009 DECODE : S1(2); // any decoder for FPU POP 5010 FPU : S4; 5011 MEM : S3; // any mem 5012 %} 5013 5014 // UnConditional branch 5015 pipe_class pipe_jmp( label labl ) %{ 5016 single_instruction; 5017 BR : S3; 5018 %} 5019 5020 // Conditional branch 5021 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5022 single_instruction; 5023 cr : S1(read); 5024 BR : S3; 5025 %} 5026 5027 // Allocation idiom 5028 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5029 instruction_count(1); force_serialization; 5030 fixed_latency(6); 5031 heap_ptr : S3(read); 5032 DECODE : S0(3); 5033 D0 : S2; 5034 MEM : S3; 5035 ALU : S3(2); 5036 dst : S5(write); 5037 BR : S5; 5038 %} 5039 5040 // Generic big/slow expanded idiom 5041 pipe_class pipe_slow( ) %{ 5042 instruction_count(10); multiple_bundles; force_serialization; 5043 fixed_latency(100); 5044 D0 : S0(2); 5045 MEM : S3(2); 5046 %} 5047 5048 // The real do-nothing guy 5049 pipe_class empty( ) %{ 5050 instruction_count(0); 5051 %} 5052 5053 // Define the class for the Nop node 5054 define %{ 5055 MachNop = empty; 5056 %} 5057 5058 %} 5059 5060 //----------INSTRUCTIONS------------------------------------------------------- 5061 // 5062 // match -- States which machine-independent subtree may be replaced 5063 // by this instruction. 5064 // ins_cost -- The estimated cost of this instruction is used by instruction 5065 // selection to identify a minimum cost tree of machine 5066 // instructions that matches a tree of machine-independent 5067 // instructions. 5068 // format -- A string providing the disassembly for this instruction. 5069 // The value of an instruction's operand may be inserted 5070 // by referring to it with a '$' prefix. 5071 // opcode -- Three instruction opcodes may be provided. These are referred 5072 // to within an encode class as $primary, $secondary, and $tertiary 5073 // respectively. The primary opcode is commonly used to 5074 // indicate the type of machine instruction, while secondary 5075 // and tertiary are often used for prefix options or addressing 5076 // modes. 5077 // ins_encode -- A list of encode classes with parameters. The encode class 5078 // name must have been defined in an 'enc_class' specification 5079 // in the encode section of the architecture description. 5080 5081 //----------BSWAP-Instruction-------------------------------------------------- 5082 instruct bytes_reverse_int(rRegI dst) %{ 5083 match(Set dst (ReverseBytesI dst)); 5084 5085 format %{ "BSWAP $dst" %} 5086 opcode(0x0F, 0xC8); 5087 ins_encode( OpcP, OpcSReg(dst) ); 5088 ins_pipe( ialu_reg ); 5089 %} 5090 5091 instruct bytes_reverse_long(eRegL dst) %{ 5092 match(Set dst (ReverseBytesL dst)); 5093 5094 format %{ "BSWAP $dst.lo\n\t" 5095 "BSWAP $dst.hi\n\t" 5096 "XCHG $dst.lo $dst.hi" %} 5097 5098 ins_cost(125); 5099 ins_encode( bswap_long_bytes(dst) ); 5100 ins_pipe( ialu_reg_reg); 5101 %} 5102 5103 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5104 match(Set dst (ReverseBytesUS dst)); 5105 effect(KILL cr); 5106 5107 format %{ "BSWAP $dst\n\t" 5108 "SHR $dst,16\n\t" %} 5109 ins_encode %{ 5110 __ bswapl($dst$$Register); 5111 __ shrl($dst$$Register, 16); 5112 %} 5113 ins_pipe( ialu_reg ); 5114 %} 5115 5116 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5117 match(Set dst (ReverseBytesS dst)); 5118 effect(KILL cr); 5119 5120 format %{ "BSWAP $dst\n\t" 5121 "SAR $dst,16\n\t" %} 5122 ins_encode %{ 5123 __ bswapl($dst$$Register); 5124 __ sarl($dst$$Register, 16); 5125 %} 5126 ins_pipe( ialu_reg ); 5127 %} 5128 5129 5130 //---------- Zeros Count Instructions ------------------------------------------ 5131 5132 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5133 predicate(UseCountLeadingZerosInstruction); 5134 match(Set dst (CountLeadingZerosI src)); 5135 effect(KILL cr); 5136 5137 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5138 ins_encode %{ 5139 __ lzcntl($dst$$Register, $src$$Register); 5140 %} 5141 ins_pipe(ialu_reg); 5142 %} 5143 5144 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5145 predicate(!UseCountLeadingZerosInstruction); 5146 match(Set dst (CountLeadingZerosI src)); 5147 effect(KILL cr); 5148 5149 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5150 "JNZ skip\n\t" 5151 "MOV $dst, -1\n" 5152 "skip:\n\t" 5153 "NEG $dst\n\t" 5154 "ADD $dst, 31" %} 5155 ins_encode %{ 5156 Register Rdst = $dst$$Register; 5157 Register Rsrc = $src$$Register; 5158 Label skip; 5159 __ bsrl(Rdst, Rsrc); 5160 __ jccb(Assembler::notZero, skip); 5161 __ movl(Rdst, -1); 5162 __ bind(skip); 5163 __ negl(Rdst); 5164 __ addl(Rdst, BitsPerInt - 1); 5165 %} 5166 ins_pipe(ialu_reg); 5167 %} 5168 5169 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5170 predicate(UseCountLeadingZerosInstruction); 5171 match(Set dst (CountLeadingZerosL src)); 5172 effect(TEMP dst, KILL cr); 5173 5174 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5175 "JNC done\n\t" 5176 "LZCNT $dst, $src.lo\n\t" 5177 "ADD $dst, 32\n" 5178 "done:" %} 5179 ins_encode %{ 5180 Register Rdst = $dst$$Register; 5181 Register Rsrc = $src$$Register; 5182 Label done; 5183 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5184 __ jccb(Assembler::carryClear, done); 5185 __ lzcntl(Rdst, Rsrc); 5186 __ addl(Rdst, BitsPerInt); 5187 __ bind(done); 5188 %} 5189 ins_pipe(ialu_reg); 5190 %} 5191 5192 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5193 predicate(!UseCountLeadingZerosInstruction); 5194 match(Set dst (CountLeadingZerosL src)); 5195 effect(TEMP dst, KILL cr); 5196 5197 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5198 "JZ msw_is_zero\n\t" 5199 "ADD $dst, 32\n\t" 5200 "JMP not_zero\n" 5201 "msw_is_zero:\n\t" 5202 "BSR $dst, $src.lo\n\t" 5203 "JNZ not_zero\n\t" 5204 "MOV $dst, -1\n" 5205 "not_zero:\n\t" 5206 "NEG $dst\n\t" 5207 "ADD $dst, 63\n" %} 5208 ins_encode %{ 5209 Register Rdst = $dst$$Register; 5210 Register Rsrc = $src$$Register; 5211 Label msw_is_zero; 5212 Label not_zero; 5213 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5214 __ jccb(Assembler::zero, msw_is_zero); 5215 __ addl(Rdst, BitsPerInt); 5216 __ jmpb(not_zero); 5217 __ bind(msw_is_zero); 5218 __ bsrl(Rdst, Rsrc); 5219 __ jccb(Assembler::notZero, not_zero); 5220 __ movl(Rdst, -1); 5221 __ bind(not_zero); 5222 __ negl(Rdst); 5223 __ addl(Rdst, BitsPerLong - 1); 5224 %} 5225 ins_pipe(ialu_reg); 5226 %} 5227 5228 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5229 predicate(UseCountTrailingZerosInstruction); 5230 match(Set dst (CountTrailingZerosI src)); 5231 effect(KILL cr); 5232 5233 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5234 ins_encode %{ 5235 __ tzcntl($dst$$Register, $src$$Register); 5236 %} 5237 ins_pipe(ialu_reg); 5238 %} 5239 5240 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5241 predicate(!UseCountTrailingZerosInstruction); 5242 match(Set dst (CountTrailingZerosI src)); 5243 effect(KILL cr); 5244 5245 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5246 "JNZ done\n\t" 5247 "MOV $dst, 32\n" 5248 "done:" %} 5249 ins_encode %{ 5250 Register Rdst = $dst$$Register; 5251 Label done; 5252 __ bsfl(Rdst, $src$$Register); 5253 __ jccb(Assembler::notZero, done); 5254 __ movl(Rdst, BitsPerInt); 5255 __ bind(done); 5256 %} 5257 ins_pipe(ialu_reg); 5258 %} 5259 5260 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5261 predicate(UseCountTrailingZerosInstruction); 5262 match(Set dst (CountTrailingZerosL src)); 5263 effect(TEMP dst, KILL cr); 5264 5265 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5266 "JNC done\n\t" 5267 "TZCNT $dst, $src.hi\n\t" 5268 "ADD $dst, 32\n" 5269 "done:" %} 5270 ins_encode %{ 5271 Register Rdst = $dst$$Register; 5272 Register Rsrc = $src$$Register; 5273 Label done; 5274 __ tzcntl(Rdst, Rsrc); 5275 __ jccb(Assembler::carryClear, done); 5276 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5277 __ addl(Rdst, BitsPerInt); 5278 __ bind(done); 5279 %} 5280 ins_pipe(ialu_reg); 5281 %} 5282 5283 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5284 predicate(!UseCountTrailingZerosInstruction); 5285 match(Set dst (CountTrailingZerosL src)); 5286 effect(TEMP dst, KILL cr); 5287 5288 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5289 "JNZ done\n\t" 5290 "BSF $dst, $src.hi\n\t" 5291 "JNZ msw_not_zero\n\t" 5292 "MOV $dst, 32\n" 5293 "msw_not_zero:\n\t" 5294 "ADD $dst, 32\n" 5295 "done:" %} 5296 ins_encode %{ 5297 Register Rdst = $dst$$Register; 5298 Register Rsrc = $src$$Register; 5299 Label msw_not_zero; 5300 Label done; 5301 __ bsfl(Rdst, Rsrc); 5302 __ jccb(Assembler::notZero, done); 5303 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5304 __ jccb(Assembler::notZero, msw_not_zero); 5305 __ movl(Rdst, BitsPerInt); 5306 __ bind(msw_not_zero); 5307 __ addl(Rdst, BitsPerInt); 5308 __ bind(done); 5309 %} 5310 ins_pipe(ialu_reg); 5311 %} 5312 5313 5314 //---------- Population Count Instructions ------------------------------------- 5315 5316 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5317 predicate(UsePopCountInstruction); 5318 match(Set dst (PopCountI src)); 5319 effect(KILL cr); 5320 5321 format %{ "POPCNT $dst, $src" %} 5322 ins_encode %{ 5323 __ popcntl($dst$$Register, $src$$Register); 5324 %} 5325 ins_pipe(ialu_reg); 5326 %} 5327 5328 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5329 predicate(UsePopCountInstruction); 5330 match(Set dst (PopCountI (LoadI mem))); 5331 effect(KILL cr); 5332 5333 format %{ "POPCNT $dst, $mem" %} 5334 ins_encode %{ 5335 __ popcntl($dst$$Register, $mem$$Address); 5336 %} 5337 ins_pipe(ialu_reg); 5338 %} 5339 5340 // Note: Long.bitCount(long) returns an int. 5341 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5342 predicate(UsePopCountInstruction); 5343 match(Set dst (PopCountL src)); 5344 effect(KILL cr, TEMP tmp, TEMP dst); 5345 5346 format %{ "POPCNT $dst, $src.lo\n\t" 5347 "POPCNT $tmp, $src.hi\n\t" 5348 "ADD $dst, $tmp" %} 5349 ins_encode %{ 5350 __ popcntl($dst$$Register, $src$$Register); 5351 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5352 __ addl($dst$$Register, $tmp$$Register); 5353 %} 5354 ins_pipe(ialu_reg); 5355 %} 5356 5357 // Note: Long.bitCount(long) returns an int. 5358 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5359 predicate(UsePopCountInstruction); 5360 match(Set dst (PopCountL (LoadL mem))); 5361 effect(KILL cr, TEMP tmp, TEMP dst); 5362 5363 format %{ "POPCNT $dst, $mem\n\t" 5364 "POPCNT $tmp, $mem+4\n\t" 5365 "ADD $dst, $tmp" %} 5366 ins_encode %{ 5367 //__ popcntl($dst$$Register, $mem$$Address$$first); 5368 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5369 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5370 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5371 __ addl($dst$$Register, $tmp$$Register); 5372 %} 5373 ins_pipe(ialu_reg); 5374 %} 5375 5376 5377 //----------Load/Store/Move Instructions--------------------------------------- 5378 //----------Load Instructions-------------------------------------------------- 5379 // Load Byte (8bit signed) 5380 instruct loadB(xRegI dst, memory mem) %{ 5381 match(Set dst (LoadB mem)); 5382 5383 ins_cost(125); 5384 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5385 5386 ins_encode %{ 5387 __ movsbl($dst$$Register, $mem$$Address); 5388 %} 5389 5390 ins_pipe(ialu_reg_mem); 5391 %} 5392 5393 // Load Byte (8bit signed) into Long Register 5394 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5395 match(Set dst (ConvI2L (LoadB mem))); 5396 effect(KILL cr); 5397 5398 ins_cost(375); 5399 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5400 "MOV $dst.hi,$dst.lo\n\t" 5401 "SAR $dst.hi,7" %} 5402 5403 ins_encode %{ 5404 __ movsbl($dst$$Register, $mem$$Address); 5405 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5406 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5407 %} 5408 5409 ins_pipe(ialu_reg_mem); 5410 %} 5411 5412 // Load Unsigned Byte (8bit UNsigned) 5413 instruct loadUB(xRegI dst, memory mem) %{ 5414 match(Set dst (LoadUB mem)); 5415 5416 ins_cost(125); 5417 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5418 5419 ins_encode %{ 5420 __ movzbl($dst$$Register, $mem$$Address); 5421 %} 5422 5423 ins_pipe(ialu_reg_mem); 5424 %} 5425 5426 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5427 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5428 match(Set dst (ConvI2L (LoadUB mem))); 5429 effect(KILL cr); 5430 5431 ins_cost(250); 5432 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5433 "XOR $dst.hi,$dst.hi" %} 5434 5435 ins_encode %{ 5436 Register Rdst = $dst$$Register; 5437 __ movzbl(Rdst, $mem$$Address); 5438 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5439 %} 5440 5441 ins_pipe(ialu_reg_mem); 5442 %} 5443 5444 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5445 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5446 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5447 effect(KILL cr); 5448 5449 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5450 "XOR $dst.hi,$dst.hi\n\t" 5451 "AND $dst.lo,right_n_bits($mask, 8)" %} 5452 ins_encode %{ 5453 Register Rdst = $dst$$Register; 5454 __ movzbl(Rdst, $mem$$Address); 5455 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5456 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5457 %} 5458 ins_pipe(ialu_reg_mem); 5459 %} 5460 5461 // Load Short (16bit signed) 5462 instruct loadS(rRegI dst, memory mem) %{ 5463 match(Set dst (LoadS mem)); 5464 5465 ins_cost(125); 5466 format %{ "MOVSX $dst,$mem\t# short" %} 5467 5468 ins_encode %{ 5469 __ movswl($dst$$Register, $mem$$Address); 5470 %} 5471 5472 ins_pipe(ialu_reg_mem); 5473 %} 5474 5475 // Load Short (16 bit signed) to Byte (8 bit signed) 5476 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5477 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5478 5479 ins_cost(125); 5480 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5481 ins_encode %{ 5482 __ movsbl($dst$$Register, $mem$$Address); 5483 %} 5484 ins_pipe(ialu_reg_mem); 5485 %} 5486 5487 // Load Short (16bit signed) into Long Register 5488 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5489 match(Set dst (ConvI2L (LoadS mem))); 5490 effect(KILL cr); 5491 5492 ins_cost(375); 5493 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5494 "MOV $dst.hi,$dst.lo\n\t" 5495 "SAR $dst.hi,15" %} 5496 5497 ins_encode %{ 5498 __ movswl($dst$$Register, $mem$$Address); 5499 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5500 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5501 %} 5502 5503 ins_pipe(ialu_reg_mem); 5504 %} 5505 5506 // Load Unsigned Short/Char (16bit unsigned) 5507 instruct loadUS(rRegI dst, memory mem) %{ 5508 match(Set dst (LoadUS mem)); 5509 5510 ins_cost(125); 5511 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5512 5513 ins_encode %{ 5514 __ movzwl($dst$$Register, $mem$$Address); 5515 %} 5516 5517 ins_pipe(ialu_reg_mem); 5518 %} 5519 5520 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5521 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5522 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5523 5524 ins_cost(125); 5525 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5526 ins_encode %{ 5527 __ movsbl($dst$$Register, $mem$$Address); 5528 %} 5529 ins_pipe(ialu_reg_mem); 5530 %} 5531 5532 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5533 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5534 match(Set dst (ConvI2L (LoadUS mem))); 5535 effect(KILL cr); 5536 5537 ins_cost(250); 5538 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5539 "XOR $dst.hi,$dst.hi" %} 5540 5541 ins_encode %{ 5542 __ movzwl($dst$$Register, $mem$$Address); 5543 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5544 %} 5545 5546 ins_pipe(ialu_reg_mem); 5547 %} 5548 5549 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5550 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5551 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5552 effect(KILL cr); 5553 5554 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5555 "XOR $dst.hi,$dst.hi" %} 5556 ins_encode %{ 5557 Register Rdst = $dst$$Register; 5558 __ movzbl(Rdst, $mem$$Address); 5559 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5560 %} 5561 ins_pipe(ialu_reg_mem); 5562 %} 5563 5564 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5565 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5566 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5567 effect(KILL cr); 5568 5569 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5570 "XOR $dst.hi,$dst.hi\n\t" 5571 "AND $dst.lo,right_n_bits($mask, 16)" %} 5572 ins_encode %{ 5573 Register Rdst = $dst$$Register; 5574 __ movzwl(Rdst, $mem$$Address); 5575 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5576 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5577 %} 5578 ins_pipe(ialu_reg_mem); 5579 %} 5580 5581 // Load Integer 5582 instruct loadI(rRegI dst, memory mem) %{ 5583 match(Set dst (LoadI mem)); 5584 5585 ins_cost(125); 5586 format %{ "MOV $dst,$mem\t# int" %} 5587 5588 ins_encode %{ 5589 __ movl($dst$$Register, $mem$$Address); 5590 %} 5591 5592 ins_pipe(ialu_reg_mem); 5593 %} 5594 5595 // Load Integer (32 bit signed) to Byte (8 bit signed) 5596 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5597 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5598 5599 ins_cost(125); 5600 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5601 ins_encode %{ 5602 __ movsbl($dst$$Register, $mem$$Address); 5603 %} 5604 ins_pipe(ialu_reg_mem); 5605 %} 5606 5607 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5608 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5609 match(Set dst (AndI (LoadI mem) mask)); 5610 5611 ins_cost(125); 5612 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5613 ins_encode %{ 5614 __ movzbl($dst$$Register, $mem$$Address); 5615 %} 5616 ins_pipe(ialu_reg_mem); 5617 %} 5618 5619 // Load Integer (32 bit signed) to Short (16 bit signed) 5620 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5621 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5622 5623 ins_cost(125); 5624 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5625 ins_encode %{ 5626 __ movswl($dst$$Register, $mem$$Address); 5627 %} 5628 ins_pipe(ialu_reg_mem); 5629 %} 5630 5631 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5632 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5633 match(Set dst (AndI (LoadI mem) mask)); 5634 5635 ins_cost(125); 5636 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5637 ins_encode %{ 5638 __ movzwl($dst$$Register, $mem$$Address); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Integer into Long Register 5644 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5645 match(Set dst (ConvI2L (LoadI mem))); 5646 effect(KILL cr); 5647 5648 ins_cost(375); 5649 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5650 "MOV $dst.hi,$dst.lo\n\t" 5651 "SAR $dst.hi,31" %} 5652 5653 ins_encode %{ 5654 __ movl($dst$$Register, $mem$$Address); 5655 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5656 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5657 %} 5658 5659 ins_pipe(ialu_reg_mem); 5660 %} 5661 5662 // Load Integer with mask 0xFF into Long Register 5663 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5664 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5665 effect(KILL cr); 5666 5667 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5668 "XOR $dst.hi,$dst.hi" %} 5669 ins_encode %{ 5670 Register Rdst = $dst$$Register; 5671 __ movzbl(Rdst, $mem$$Address); 5672 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5673 %} 5674 ins_pipe(ialu_reg_mem); 5675 %} 5676 5677 // Load Integer with mask 0xFFFF into Long Register 5678 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5679 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5680 effect(KILL cr); 5681 5682 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5683 "XOR $dst.hi,$dst.hi" %} 5684 ins_encode %{ 5685 Register Rdst = $dst$$Register; 5686 __ movzwl(Rdst, $mem$$Address); 5687 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5688 %} 5689 ins_pipe(ialu_reg_mem); 5690 %} 5691 5692 // Load Integer with 31-bit mask into Long Register 5693 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5694 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5695 effect(KILL cr); 5696 5697 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5698 "XOR $dst.hi,$dst.hi\n\t" 5699 "AND $dst.lo,$mask" %} 5700 ins_encode %{ 5701 Register Rdst = $dst$$Register; 5702 __ movl(Rdst, $mem$$Address); 5703 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5704 __ andl(Rdst, $mask$$constant); 5705 %} 5706 ins_pipe(ialu_reg_mem); 5707 %} 5708 5709 // Load Unsigned Integer into Long Register 5710 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5711 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5712 effect(KILL cr); 5713 5714 ins_cost(250); 5715 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5716 "XOR $dst.hi,$dst.hi" %} 5717 5718 ins_encode %{ 5719 __ movl($dst$$Register, $mem$$Address); 5720 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5721 %} 5722 5723 ins_pipe(ialu_reg_mem); 5724 %} 5725 5726 // Load Long. Cannot clobber address while loading, so restrict address 5727 // register to ESI 5728 instruct loadL(eRegL dst, load_long_memory mem) %{ 5729 predicate(!((LoadLNode*)n)->require_atomic_access()); 5730 match(Set dst (LoadL mem)); 5731 5732 ins_cost(250); 5733 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5734 "MOV $dst.hi,$mem+4" %} 5735 5736 ins_encode %{ 5737 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5738 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5739 __ movl($dst$$Register, Amemlo); 5740 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5741 %} 5742 5743 ins_pipe(ialu_reg_long_mem); 5744 %} 5745 5746 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5747 // then store it down to the stack and reload on the int 5748 // side. 5749 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5750 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5751 match(Set dst (LoadL mem)); 5752 5753 ins_cost(200); 5754 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5755 "FISTp $dst" %} 5756 ins_encode(enc_loadL_volatile(mem,dst)); 5757 ins_pipe( fpu_reg_mem ); 5758 %} 5759 5760 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5761 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5762 match(Set dst (LoadL mem)); 5763 effect(TEMP tmp); 5764 ins_cost(180); 5765 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5766 "MOVSD $dst,$tmp" %} 5767 ins_encode %{ 5768 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5769 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5770 %} 5771 ins_pipe( pipe_slow ); 5772 %} 5773 5774 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5775 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5776 match(Set dst (LoadL mem)); 5777 effect(TEMP tmp); 5778 ins_cost(160); 5779 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5780 "MOVD $dst.lo,$tmp\n\t" 5781 "PSRLQ $tmp,32\n\t" 5782 "MOVD $dst.hi,$tmp" %} 5783 ins_encode %{ 5784 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5785 __ movdl($dst$$Register, $tmp$$XMMRegister); 5786 __ psrlq($tmp$$XMMRegister, 32); 5787 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 // Load Range 5793 instruct loadRange(rRegI dst, memory mem) %{ 5794 match(Set dst (LoadRange mem)); 5795 5796 ins_cost(125); 5797 format %{ "MOV $dst,$mem" %} 5798 opcode(0x8B); 5799 ins_encode( OpcP, RegMem(dst,mem)); 5800 ins_pipe( ialu_reg_mem ); 5801 %} 5802 5803 5804 // Load Pointer 5805 instruct loadP(eRegP dst, memory mem) %{ 5806 match(Set dst (LoadP mem)); 5807 5808 ins_cost(125); 5809 format %{ "MOV $dst,$mem" %} 5810 opcode(0x8B); 5811 ins_encode( OpcP, RegMem(dst,mem)); 5812 ins_pipe( ialu_reg_mem ); 5813 %} 5814 5815 // Load Klass Pointer 5816 instruct loadKlass(eRegP dst, memory mem) %{ 5817 match(Set dst (LoadKlass mem)); 5818 5819 ins_cost(125); 5820 format %{ "MOV $dst,$mem" %} 5821 opcode(0x8B); 5822 ins_encode( OpcP, RegMem(dst,mem)); 5823 ins_pipe( ialu_reg_mem ); 5824 %} 5825 5826 // Load Double 5827 instruct loadDPR(regDPR dst, memory mem) %{ 5828 predicate(UseSSE<=1); 5829 match(Set dst (LoadD mem)); 5830 5831 ins_cost(150); 5832 format %{ "FLD_D ST,$mem\n\t" 5833 "FSTP $dst" %} 5834 opcode(0xDD); /* DD /0 */ 5835 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5836 Pop_Reg_DPR(dst) ); 5837 ins_pipe( fpu_reg_mem ); 5838 %} 5839 5840 // Load Double to XMM 5841 instruct loadD(regD dst, memory mem) %{ 5842 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5843 match(Set dst (LoadD mem)); 5844 ins_cost(145); 5845 format %{ "MOVSD $dst,$mem" %} 5846 ins_encode %{ 5847 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 instruct loadD_partial(regD dst, memory mem) %{ 5853 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5854 match(Set dst (LoadD mem)); 5855 ins_cost(145); 5856 format %{ "MOVLPD $dst,$mem" %} 5857 ins_encode %{ 5858 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5859 %} 5860 ins_pipe( pipe_slow ); 5861 %} 5862 5863 // Load to XMM register (single-precision floating point) 5864 // MOVSS instruction 5865 instruct loadF(regF dst, memory mem) %{ 5866 predicate(UseSSE>=1); 5867 match(Set dst (LoadF mem)); 5868 ins_cost(145); 5869 format %{ "MOVSS $dst,$mem" %} 5870 ins_encode %{ 5871 __ movflt ($dst$$XMMRegister, $mem$$Address); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 // Load Float 5877 instruct loadFPR(regFPR dst, memory mem) %{ 5878 predicate(UseSSE==0); 5879 match(Set dst (LoadF mem)); 5880 5881 ins_cost(150); 5882 format %{ "FLD_S ST,$mem\n\t" 5883 "FSTP $dst" %} 5884 opcode(0xD9); /* D9 /0 */ 5885 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5886 Pop_Reg_FPR(dst) ); 5887 ins_pipe( fpu_reg_mem ); 5888 %} 5889 5890 // Load Effective Address 5891 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5892 match(Set dst mem); 5893 5894 ins_cost(110); 5895 format %{ "LEA $dst,$mem" %} 5896 opcode(0x8D); 5897 ins_encode( OpcP, RegMem(dst,mem)); 5898 ins_pipe( ialu_reg_reg_fat ); 5899 %} 5900 5901 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5902 match(Set dst mem); 5903 5904 ins_cost(110); 5905 format %{ "LEA $dst,$mem" %} 5906 opcode(0x8D); 5907 ins_encode( OpcP, RegMem(dst,mem)); 5908 ins_pipe( ialu_reg_reg_fat ); 5909 %} 5910 5911 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5912 match(Set dst mem); 5913 5914 ins_cost(110); 5915 format %{ "LEA $dst,$mem" %} 5916 opcode(0x8D); 5917 ins_encode( OpcP, RegMem(dst,mem)); 5918 ins_pipe( ialu_reg_reg_fat ); 5919 %} 5920 5921 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5922 match(Set dst mem); 5923 5924 ins_cost(110); 5925 format %{ "LEA $dst,$mem" %} 5926 opcode(0x8D); 5927 ins_encode( OpcP, RegMem(dst,mem)); 5928 ins_pipe( ialu_reg_reg_fat ); 5929 %} 5930 5931 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5932 match(Set dst mem); 5933 5934 ins_cost(110); 5935 format %{ "LEA $dst,$mem" %} 5936 opcode(0x8D); 5937 ins_encode( OpcP, RegMem(dst,mem)); 5938 ins_pipe( ialu_reg_reg_fat ); 5939 %} 5940 5941 // Load Constant 5942 instruct loadConI(rRegI dst, immI src) %{ 5943 match(Set dst src); 5944 5945 format %{ "MOV $dst,$src" %} 5946 ins_encode( LdImmI(dst, src) ); 5947 ins_pipe( ialu_reg_fat ); 5948 %} 5949 5950 // Load Constant zero 5951 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5952 match(Set dst src); 5953 effect(KILL cr); 5954 5955 ins_cost(50); 5956 format %{ "XOR $dst,$dst" %} 5957 opcode(0x33); /* + rd */ 5958 ins_encode( OpcP, RegReg( dst, dst ) ); 5959 ins_pipe( ialu_reg ); 5960 %} 5961 5962 instruct loadConP(eRegP dst, immP src) %{ 5963 match(Set dst src); 5964 5965 format %{ "MOV $dst,$src" %} 5966 opcode(0xB8); /* + rd */ 5967 ins_encode( LdImmP(dst, src) ); 5968 ins_pipe( ialu_reg_fat ); 5969 %} 5970 5971 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5972 match(Set dst src); 5973 effect(KILL cr); 5974 ins_cost(200); 5975 format %{ "MOV $dst.lo,$src.lo\n\t" 5976 "MOV $dst.hi,$src.hi" %} 5977 opcode(0xB8); 5978 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5979 ins_pipe( ialu_reg_long_fat ); 5980 %} 5981 5982 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5983 match(Set dst src); 5984 effect(KILL cr); 5985 ins_cost(150); 5986 format %{ "XOR $dst.lo,$dst.lo\n\t" 5987 "XOR $dst.hi,$dst.hi" %} 5988 opcode(0x33,0x33); 5989 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5990 ins_pipe( ialu_reg_long ); 5991 %} 5992 5993 // The instruction usage is guarded by predicate in operand immFPR(). 5994 instruct loadConFPR(regFPR dst, immFPR con) %{ 5995 match(Set dst con); 5996 ins_cost(125); 5997 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5998 "FSTP $dst" %} 5999 ins_encode %{ 6000 __ fld_s($constantaddress($con)); 6001 __ fstp_d($dst$$reg); 6002 %} 6003 ins_pipe(fpu_reg_con); 6004 %} 6005 6006 // The instruction usage is guarded by predicate in operand immFPR0(). 6007 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6008 match(Set dst con); 6009 ins_cost(125); 6010 format %{ "FLDZ ST\n\t" 6011 "FSTP $dst" %} 6012 ins_encode %{ 6013 __ fldz(); 6014 __ fstp_d($dst$$reg); 6015 %} 6016 ins_pipe(fpu_reg_con); 6017 %} 6018 6019 // The instruction usage is guarded by predicate in operand immFPR1(). 6020 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6021 match(Set dst con); 6022 ins_cost(125); 6023 format %{ "FLD1 ST\n\t" 6024 "FSTP $dst" %} 6025 ins_encode %{ 6026 __ fld1(); 6027 __ fstp_d($dst$$reg); 6028 %} 6029 ins_pipe(fpu_reg_con); 6030 %} 6031 6032 // The instruction usage is guarded by predicate in operand immF(). 6033 instruct loadConF(regF dst, immF con) %{ 6034 match(Set dst con); 6035 ins_cost(125); 6036 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6037 ins_encode %{ 6038 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6039 %} 6040 ins_pipe(pipe_slow); 6041 %} 6042 6043 // The instruction usage is guarded by predicate in operand immF0(). 6044 instruct loadConF0(regF dst, immF0 src) %{ 6045 match(Set dst src); 6046 ins_cost(100); 6047 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6048 ins_encode %{ 6049 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6050 %} 6051 ins_pipe(pipe_slow); 6052 %} 6053 6054 // The instruction usage is guarded by predicate in operand immDPR(). 6055 instruct loadConDPR(regDPR dst, immDPR con) %{ 6056 match(Set dst con); 6057 ins_cost(125); 6058 6059 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6060 "FSTP $dst" %} 6061 ins_encode %{ 6062 __ fld_d($constantaddress($con)); 6063 __ fstp_d($dst$$reg); 6064 %} 6065 ins_pipe(fpu_reg_con); 6066 %} 6067 6068 // The instruction usage is guarded by predicate in operand immDPR0(). 6069 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6070 match(Set dst con); 6071 ins_cost(125); 6072 6073 format %{ "FLDZ ST\n\t" 6074 "FSTP $dst" %} 6075 ins_encode %{ 6076 __ fldz(); 6077 __ fstp_d($dst$$reg); 6078 %} 6079 ins_pipe(fpu_reg_con); 6080 %} 6081 6082 // The instruction usage is guarded by predicate in operand immDPR1(). 6083 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6084 match(Set dst con); 6085 ins_cost(125); 6086 6087 format %{ "FLD1 ST\n\t" 6088 "FSTP $dst" %} 6089 ins_encode %{ 6090 __ fld1(); 6091 __ fstp_d($dst$$reg); 6092 %} 6093 ins_pipe(fpu_reg_con); 6094 %} 6095 6096 // The instruction usage is guarded by predicate in operand immD(). 6097 instruct loadConD(regD dst, immD con) %{ 6098 match(Set dst con); 6099 ins_cost(125); 6100 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6101 ins_encode %{ 6102 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6103 %} 6104 ins_pipe(pipe_slow); 6105 %} 6106 6107 // The instruction usage is guarded by predicate in operand immD0(). 6108 instruct loadConD0(regD dst, immD0 src) %{ 6109 match(Set dst src); 6110 ins_cost(100); 6111 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6112 ins_encode %{ 6113 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6114 %} 6115 ins_pipe( pipe_slow ); 6116 %} 6117 6118 // Load Stack Slot 6119 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6120 match(Set dst src); 6121 ins_cost(125); 6122 6123 format %{ "MOV $dst,$src" %} 6124 opcode(0x8B); 6125 ins_encode( OpcP, RegMem(dst,src)); 6126 ins_pipe( ialu_reg_mem ); 6127 %} 6128 6129 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6130 match(Set dst src); 6131 6132 ins_cost(200); 6133 format %{ "MOV $dst,$src.lo\n\t" 6134 "MOV $dst+4,$src.hi" %} 6135 opcode(0x8B, 0x8B); 6136 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6137 ins_pipe( ialu_mem_long_reg ); 6138 %} 6139 6140 // Load Stack Slot 6141 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6142 match(Set dst src); 6143 ins_cost(125); 6144 6145 format %{ "MOV $dst,$src" %} 6146 opcode(0x8B); 6147 ins_encode( OpcP, RegMem(dst,src)); 6148 ins_pipe( ialu_reg_mem ); 6149 %} 6150 6151 // Load Stack Slot 6152 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6153 match(Set dst src); 6154 ins_cost(125); 6155 6156 format %{ "FLD_S $src\n\t" 6157 "FSTP $dst" %} 6158 opcode(0xD9); /* D9 /0, FLD m32real */ 6159 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6160 Pop_Reg_FPR(dst) ); 6161 ins_pipe( fpu_reg_mem ); 6162 %} 6163 6164 // Load Stack Slot 6165 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6166 match(Set dst src); 6167 ins_cost(125); 6168 6169 format %{ "FLD_D $src\n\t" 6170 "FSTP $dst" %} 6171 opcode(0xDD); /* DD /0, FLD m64real */ 6172 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6173 Pop_Reg_DPR(dst) ); 6174 ins_pipe( fpu_reg_mem ); 6175 %} 6176 6177 // Prefetch instructions for allocation. 6178 // Must be safe to execute with invalid address (cannot fault). 6179 6180 instruct prefetchAlloc0( memory mem ) %{ 6181 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6182 match(PrefetchAllocation mem); 6183 ins_cost(0); 6184 size(0); 6185 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6186 ins_encode(); 6187 ins_pipe(empty); 6188 %} 6189 6190 instruct prefetchAlloc( memory mem ) %{ 6191 predicate(AllocatePrefetchInstr==3); 6192 match( PrefetchAllocation mem ); 6193 ins_cost(100); 6194 6195 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6196 ins_encode %{ 6197 __ prefetchw($mem$$Address); 6198 %} 6199 ins_pipe(ialu_mem); 6200 %} 6201 6202 instruct prefetchAllocNTA( memory mem ) %{ 6203 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6204 match(PrefetchAllocation mem); 6205 ins_cost(100); 6206 6207 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6208 ins_encode %{ 6209 __ prefetchnta($mem$$Address); 6210 %} 6211 ins_pipe(ialu_mem); 6212 %} 6213 6214 instruct prefetchAllocT0( memory mem ) %{ 6215 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6216 match(PrefetchAllocation mem); 6217 ins_cost(100); 6218 6219 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6220 ins_encode %{ 6221 __ prefetcht0($mem$$Address); 6222 %} 6223 ins_pipe(ialu_mem); 6224 %} 6225 6226 instruct prefetchAllocT2( memory mem ) %{ 6227 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6228 match(PrefetchAllocation mem); 6229 ins_cost(100); 6230 6231 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6232 ins_encode %{ 6233 __ prefetcht2($mem$$Address); 6234 %} 6235 ins_pipe(ialu_mem); 6236 %} 6237 6238 //----------Store Instructions------------------------------------------------- 6239 6240 // Store Byte 6241 instruct storeB(memory mem, xRegI src) %{ 6242 match(Set mem (StoreB mem src)); 6243 6244 ins_cost(125); 6245 format %{ "MOV8 $mem,$src" %} 6246 opcode(0x88); 6247 ins_encode( OpcP, RegMem( src, mem ) ); 6248 ins_pipe( ialu_mem_reg ); 6249 %} 6250 6251 // Store Char/Short 6252 instruct storeC(memory mem, rRegI src) %{ 6253 match(Set mem (StoreC mem src)); 6254 6255 ins_cost(125); 6256 format %{ "MOV16 $mem,$src" %} 6257 opcode(0x89, 0x66); 6258 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6259 ins_pipe( ialu_mem_reg ); 6260 %} 6261 6262 // Store Integer 6263 instruct storeI(memory mem, rRegI src) %{ 6264 match(Set mem (StoreI mem src)); 6265 6266 ins_cost(125); 6267 format %{ "MOV $mem,$src" %} 6268 opcode(0x89); 6269 ins_encode( OpcP, RegMem( src, mem ) ); 6270 ins_pipe( ialu_mem_reg ); 6271 %} 6272 6273 // Store Long 6274 instruct storeL(long_memory mem, eRegL src) %{ 6275 predicate(!((StoreLNode*)n)->require_atomic_access()); 6276 match(Set mem (StoreL mem src)); 6277 6278 ins_cost(200); 6279 format %{ "MOV $mem,$src.lo\n\t" 6280 "MOV $mem+4,$src.hi" %} 6281 opcode(0x89, 0x89); 6282 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6283 ins_pipe( ialu_mem_long_reg ); 6284 %} 6285 6286 // Store Long to Integer 6287 instruct storeL2I(memory mem, eRegL src) %{ 6288 match(Set mem (StoreI mem (ConvL2I src))); 6289 6290 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6291 ins_encode %{ 6292 __ movl($mem$$Address, $src$$Register); 6293 %} 6294 ins_pipe(ialu_mem_reg); 6295 %} 6296 6297 // Volatile Store Long. Must be atomic, so move it into 6298 // the FP TOS and then do a 64-bit FIST. Has to probe the 6299 // target address before the store (for null-ptr checks) 6300 // so the memory operand is used twice in the encoding. 6301 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6302 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6303 match(Set mem (StoreL mem src)); 6304 effect( KILL cr ); 6305 ins_cost(400); 6306 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6307 "FILD $src\n\t" 6308 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6309 opcode(0x3B); 6310 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6311 ins_pipe( fpu_reg_mem ); 6312 %} 6313 6314 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6315 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6316 match(Set mem (StoreL mem src)); 6317 effect( TEMP tmp, KILL cr ); 6318 ins_cost(380); 6319 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6320 "MOVSD $tmp,$src\n\t" 6321 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6322 ins_encode %{ 6323 __ cmpl(rax, $mem$$Address); 6324 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6325 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6326 %} 6327 ins_pipe( pipe_slow ); 6328 %} 6329 6330 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6331 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6332 match(Set mem (StoreL mem src)); 6333 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6334 ins_cost(360); 6335 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6336 "MOVD $tmp,$src.lo\n\t" 6337 "MOVD $tmp2,$src.hi\n\t" 6338 "PUNPCKLDQ $tmp,$tmp2\n\t" 6339 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6340 ins_encode %{ 6341 __ cmpl(rax, $mem$$Address); 6342 __ movdl($tmp$$XMMRegister, $src$$Register); 6343 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6344 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6345 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 // Store Pointer; for storing unknown oops and raw pointers 6351 instruct storeP(memory mem, anyRegP src) %{ 6352 match(Set mem (StoreP mem src)); 6353 6354 ins_cost(125); 6355 format %{ "MOV $mem,$src" %} 6356 opcode(0x89); 6357 ins_encode( OpcP, RegMem( src, mem ) ); 6358 ins_pipe( ialu_mem_reg ); 6359 %} 6360 6361 // Store Integer Immediate 6362 instruct storeImmI(memory mem, immI src) %{ 6363 match(Set mem (StoreI mem src)); 6364 6365 ins_cost(150); 6366 format %{ "MOV $mem,$src" %} 6367 opcode(0xC7); /* C7 /0 */ 6368 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6369 ins_pipe( ialu_mem_imm ); 6370 %} 6371 6372 // Store Short/Char Immediate 6373 instruct storeImmI16(memory mem, immI16 src) %{ 6374 predicate(UseStoreImmI16); 6375 match(Set mem (StoreC mem src)); 6376 6377 ins_cost(150); 6378 format %{ "MOV16 $mem,$src" %} 6379 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6380 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6381 ins_pipe( ialu_mem_imm ); 6382 %} 6383 6384 // Store Pointer Immediate; null pointers or constant oops that do not 6385 // need card-mark barriers. 6386 instruct storeImmP(memory mem, immP src) %{ 6387 match(Set mem (StoreP mem src)); 6388 6389 ins_cost(150); 6390 format %{ "MOV $mem,$src" %} 6391 opcode(0xC7); /* C7 /0 */ 6392 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6393 ins_pipe( ialu_mem_imm ); 6394 %} 6395 6396 // Store Byte Immediate 6397 instruct storeImmB(memory mem, immI8 src) %{ 6398 match(Set mem (StoreB mem src)); 6399 6400 ins_cost(150); 6401 format %{ "MOV8 $mem,$src" %} 6402 opcode(0xC6); /* C6 /0 */ 6403 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6404 ins_pipe( ialu_mem_imm ); 6405 %} 6406 6407 // Store CMS card-mark Immediate 6408 instruct storeImmCM(memory mem, immI8 src) %{ 6409 match(Set mem (StoreCM mem src)); 6410 6411 ins_cost(150); 6412 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6413 opcode(0xC6); /* C6 /0 */ 6414 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6415 ins_pipe( ialu_mem_imm ); 6416 %} 6417 6418 // Store Double 6419 instruct storeDPR( memory mem, regDPR1 src) %{ 6420 predicate(UseSSE<=1); 6421 match(Set mem (StoreD mem src)); 6422 6423 ins_cost(100); 6424 format %{ "FST_D $mem,$src" %} 6425 opcode(0xDD); /* DD /2 */ 6426 ins_encode( enc_FPR_store(mem,src) ); 6427 ins_pipe( fpu_mem_reg ); 6428 %} 6429 6430 // Store double does rounding on x86 6431 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6432 predicate(UseSSE<=1); 6433 match(Set mem (StoreD mem (RoundDouble src))); 6434 6435 ins_cost(100); 6436 format %{ "FST_D $mem,$src\t# round" %} 6437 opcode(0xDD); /* DD /2 */ 6438 ins_encode( enc_FPR_store(mem,src) ); 6439 ins_pipe( fpu_mem_reg ); 6440 %} 6441 6442 // Store XMM register to memory (double-precision floating points) 6443 // MOVSD instruction 6444 instruct storeD(memory mem, regD src) %{ 6445 predicate(UseSSE>=2); 6446 match(Set mem (StoreD mem src)); 6447 ins_cost(95); 6448 format %{ "MOVSD $mem,$src" %} 6449 ins_encode %{ 6450 __ movdbl($mem$$Address, $src$$XMMRegister); 6451 %} 6452 ins_pipe( pipe_slow ); 6453 %} 6454 6455 // Store XMM register to memory (single-precision floating point) 6456 // MOVSS instruction 6457 instruct storeF(memory mem, regF src) %{ 6458 predicate(UseSSE>=1); 6459 match(Set mem (StoreF mem src)); 6460 ins_cost(95); 6461 format %{ "MOVSS $mem,$src" %} 6462 ins_encode %{ 6463 __ movflt($mem$$Address, $src$$XMMRegister); 6464 %} 6465 ins_pipe( pipe_slow ); 6466 %} 6467 6468 // Store Float 6469 instruct storeFPR( memory mem, regFPR1 src) %{ 6470 predicate(UseSSE==0); 6471 match(Set mem (StoreF mem src)); 6472 6473 ins_cost(100); 6474 format %{ "FST_S $mem,$src" %} 6475 opcode(0xD9); /* D9 /2 */ 6476 ins_encode( enc_FPR_store(mem,src) ); 6477 ins_pipe( fpu_mem_reg ); 6478 %} 6479 6480 // Store Float does rounding on x86 6481 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6482 predicate(UseSSE==0); 6483 match(Set mem (StoreF mem (RoundFloat src))); 6484 6485 ins_cost(100); 6486 format %{ "FST_S $mem,$src\t# round" %} 6487 opcode(0xD9); /* D9 /2 */ 6488 ins_encode( enc_FPR_store(mem,src) ); 6489 ins_pipe( fpu_mem_reg ); 6490 %} 6491 6492 // Store Float does rounding on x86 6493 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6494 predicate(UseSSE<=1); 6495 match(Set mem (StoreF mem (ConvD2F src))); 6496 6497 ins_cost(100); 6498 format %{ "FST_S $mem,$src\t# D-round" %} 6499 opcode(0xD9); /* D9 /2 */ 6500 ins_encode( enc_FPR_store(mem,src) ); 6501 ins_pipe( fpu_mem_reg ); 6502 %} 6503 6504 // Store immediate Float value (it is faster than store from FPU register) 6505 // The instruction usage is guarded by predicate in operand immFPR(). 6506 instruct storeFPR_imm( memory mem, immFPR src) %{ 6507 match(Set mem (StoreF mem src)); 6508 6509 ins_cost(50); 6510 format %{ "MOV $mem,$src\t# store float" %} 6511 opcode(0xC7); /* C7 /0 */ 6512 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6513 ins_pipe( ialu_mem_imm ); 6514 %} 6515 6516 // Store immediate Float value (it is faster than store from XMM register) 6517 // The instruction usage is guarded by predicate in operand immF(). 6518 instruct storeF_imm( memory mem, immF src) %{ 6519 match(Set mem (StoreF mem src)); 6520 6521 ins_cost(50); 6522 format %{ "MOV $mem,$src\t# store float" %} 6523 opcode(0xC7); /* C7 /0 */ 6524 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6525 ins_pipe( ialu_mem_imm ); 6526 %} 6527 6528 // Store Integer to stack slot 6529 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6530 match(Set dst src); 6531 6532 ins_cost(100); 6533 format %{ "MOV $dst,$src" %} 6534 opcode(0x89); 6535 ins_encode( OpcPRegSS( dst, src ) ); 6536 ins_pipe( ialu_mem_reg ); 6537 %} 6538 6539 // Store Integer to stack slot 6540 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6541 match(Set dst src); 6542 6543 ins_cost(100); 6544 format %{ "MOV $dst,$src" %} 6545 opcode(0x89); 6546 ins_encode( OpcPRegSS( dst, src ) ); 6547 ins_pipe( ialu_mem_reg ); 6548 %} 6549 6550 // Store Long to stack slot 6551 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6552 match(Set dst src); 6553 6554 ins_cost(200); 6555 format %{ "MOV $dst,$src.lo\n\t" 6556 "MOV $dst+4,$src.hi" %} 6557 opcode(0x89, 0x89); 6558 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6559 ins_pipe( ialu_mem_long_reg ); 6560 %} 6561 6562 //----------MemBar Instructions----------------------------------------------- 6563 // Memory barrier flavors 6564 6565 instruct membar_acquire() %{ 6566 match(MemBarAcquire); 6567 match(LoadFence); 6568 ins_cost(400); 6569 6570 size(0); 6571 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6572 ins_encode(); 6573 ins_pipe(empty); 6574 %} 6575 6576 instruct membar_acquire_lock() %{ 6577 match(MemBarAcquireLock); 6578 ins_cost(0); 6579 6580 size(0); 6581 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6582 ins_encode( ); 6583 ins_pipe(empty); 6584 %} 6585 6586 instruct membar_release() %{ 6587 match(MemBarRelease); 6588 match(StoreFence); 6589 ins_cost(400); 6590 6591 size(0); 6592 format %{ "MEMBAR-release ! (empty encoding)" %} 6593 ins_encode( ); 6594 ins_pipe(empty); 6595 %} 6596 6597 instruct membar_release_lock() %{ 6598 match(MemBarReleaseLock); 6599 ins_cost(0); 6600 6601 size(0); 6602 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6603 ins_encode( ); 6604 ins_pipe(empty); 6605 %} 6606 6607 instruct membar_volatile(eFlagsReg cr) %{ 6608 match(MemBarVolatile); 6609 effect(KILL cr); 6610 ins_cost(400); 6611 6612 format %{ 6613 $$template 6614 if (os::is_MP()) { 6615 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6616 } else { 6617 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6618 } 6619 %} 6620 ins_encode %{ 6621 __ membar(Assembler::StoreLoad); 6622 %} 6623 ins_pipe(pipe_slow); 6624 %} 6625 6626 instruct unnecessary_membar_volatile() %{ 6627 match(MemBarVolatile); 6628 predicate(Matcher::post_store_load_barrier(n)); 6629 ins_cost(0); 6630 6631 size(0); 6632 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6633 ins_encode( ); 6634 ins_pipe(empty); 6635 %} 6636 6637 instruct membar_storestore() %{ 6638 match(MemBarStoreStore); 6639 ins_cost(0); 6640 6641 size(0); 6642 format %{ "MEMBAR-storestore (empty encoding)" %} 6643 ins_encode( ); 6644 ins_pipe(empty); 6645 %} 6646 6647 //----------Move Instructions-------------------------------------------------- 6648 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6649 match(Set dst (CastX2P src)); 6650 format %{ "# X2P $dst, $src" %} 6651 ins_encode( /*empty encoding*/ ); 6652 ins_cost(0); 6653 ins_pipe(empty); 6654 %} 6655 6656 instruct castP2X(rRegI dst, eRegP src ) %{ 6657 match(Set dst (CastP2X src)); 6658 ins_cost(50); 6659 format %{ "MOV $dst, $src\t# CastP2X" %} 6660 ins_encode( enc_Copy( dst, src) ); 6661 ins_pipe( ialu_reg_reg ); 6662 %} 6663 6664 //----------Conditional Move--------------------------------------------------- 6665 // Conditional move 6666 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6667 predicate(!VM_Version::supports_cmov() ); 6668 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6669 ins_cost(200); 6670 format %{ "J$cop,us skip\t# signed cmove\n\t" 6671 "MOV $dst,$src\n" 6672 "skip:" %} 6673 ins_encode %{ 6674 Label Lskip; 6675 // Invert sense of branch from sense of CMOV 6676 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6677 __ movl($dst$$Register, $src$$Register); 6678 __ bind(Lskip); 6679 %} 6680 ins_pipe( pipe_cmov_reg ); 6681 %} 6682 6683 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6684 predicate(!VM_Version::supports_cmov() ); 6685 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6686 ins_cost(200); 6687 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6688 "MOV $dst,$src\n" 6689 "skip:" %} 6690 ins_encode %{ 6691 Label Lskip; 6692 // Invert sense of branch from sense of CMOV 6693 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6694 __ movl($dst$$Register, $src$$Register); 6695 __ bind(Lskip); 6696 %} 6697 ins_pipe( pipe_cmov_reg ); 6698 %} 6699 6700 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6701 predicate(VM_Version::supports_cmov() ); 6702 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6703 ins_cost(200); 6704 format %{ "CMOV$cop $dst,$src" %} 6705 opcode(0x0F,0x40); 6706 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6707 ins_pipe( pipe_cmov_reg ); 6708 %} 6709 6710 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6711 predicate(VM_Version::supports_cmov() ); 6712 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6713 ins_cost(200); 6714 format %{ "CMOV$cop $dst,$src" %} 6715 opcode(0x0F,0x40); 6716 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6717 ins_pipe( pipe_cmov_reg ); 6718 %} 6719 6720 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6721 predicate(VM_Version::supports_cmov() ); 6722 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6723 ins_cost(200); 6724 expand %{ 6725 cmovI_regU(cop, cr, dst, src); 6726 %} 6727 %} 6728 6729 // Conditional move 6730 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6731 predicate(VM_Version::supports_cmov() ); 6732 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6733 ins_cost(250); 6734 format %{ "CMOV$cop $dst,$src" %} 6735 opcode(0x0F,0x40); 6736 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6737 ins_pipe( pipe_cmov_mem ); 6738 %} 6739 6740 // Conditional move 6741 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6742 predicate(VM_Version::supports_cmov() ); 6743 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6744 ins_cost(250); 6745 format %{ "CMOV$cop $dst,$src" %} 6746 opcode(0x0F,0x40); 6747 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6748 ins_pipe( pipe_cmov_mem ); 6749 %} 6750 6751 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6752 predicate(VM_Version::supports_cmov() ); 6753 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6754 ins_cost(250); 6755 expand %{ 6756 cmovI_memU(cop, cr, dst, src); 6757 %} 6758 %} 6759 6760 // Conditional move 6761 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6762 predicate(VM_Version::supports_cmov() ); 6763 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6764 ins_cost(200); 6765 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6766 opcode(0x0F,0x40); 6767 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6768 ins_pipe( pipe_cmov_reg ); 6769 %} 6770 6771 // Conditional move (non-P6 version) 6772 // Note: a CMoveP is generated for stubs and native wrappers 6773 // regardless of whether we are on a P6, so we 6774 // emulate a cmov here 6775 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6776 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6777 ins_cost(300); 6778 format %{ "Jn$cop skip\n\t" 6779 "MOV $dst,$src\t# pointer\n" 6780 "skip:" %} 6781 opcode(0x8b); 6782 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6783 ins_pipe( pipe_cmov_reg ); 6784 %} 6785 6786 // Conditional move 6787 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6788 predicate(VM_Version::supports_cmov() ); 6789 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6790 ins_cost(200); 6791 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6792 opcode(0x0F,0x40); 6793 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6794 ins_pipe( pipe_cmov_reg ); 6795 %} 6796 6797 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6798 predicate(VM_Version::supports_cmov() ); 6799 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6800 ins_cost(200); 6801 expand %{ 6802 cmovP_regU(cop, cr, dst, src); 6803 %} 6804 %} 6805 6806 // DISABLED: Requires the ADLC to emit a bottom_type call that 6807 // correctly meets the two pointer arguments; one is an incoming 6808 // register but the other is a memory operand. ALSO appears to 6809 // be buggy with implicit null checks. 6810 // 6811 //// Conditional move 6812 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6813 // predicate(VM_Version::supports_cmov() ); 6814 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6815 // ins_cost(250); 6816 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6817 // opcode(0x0F,0x40); 6818 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6819 // ins_pipe( pipe_cmov_mem ); 6820 //%} 6821 // 6822 //// Conditional move 6823 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6824 // predicate(VM_Version::supports_cmov() ); 6825 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6826 // ins_cost(250); 6827 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6828 // opcode(0x0F,0x40); 6829 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6830 // ins_pipe( pipe_cmov_mem ); 6831 //%} 6832 6833 // Conditional move 6834 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6835 predicate(UseSSE<=1); 6836 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6837 ins_cost(200); 6838 format %{ "FCMOV$cop $dst,$src\t# double" %} 6839 opcode(0xDA); 6840 ins_encode( enc_cmov_dpr(cop,src) ); 6841 ins_pipe( pipe_cmovDPR_reg ); 6842 %} 6843 6844 // Conditional move 6845 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6846 predicate(UseSSE==0); 6847 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6848 ins_cost(200); 6849 format %{ "FCMOV$cop $dst,$src\t# float" %} 6850 opcode(0xDA); 6851 ins_encode( enc_cmov_dpr(cop,src) ); 6852 ins_pipe( pipe_cmovDPR_reg ); 6853 %} 6854 6855 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6856 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6857 predicate(UseSSE<=1); 6858 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6859 ins_cost(200); 6860 format %{ "Jn$cop skip\n\t" 6861 "MOV $dst,$src\t# double\n" 6862 "skip:" %} 6863 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6864 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6865 ins_pipe( pipe_cmovDPR_reg ); 6866 %} 6867 6868 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6869 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6870 predicate(UseSSE==0); 6871 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6872 ins_cost(200); 6873 format %{ "Jn$cop skip\n\t" 6874 "MOV $dst,$src\t# float\n" 6875 "skip:" %} 6876 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6877 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6878 ins_pipe( pipe_cmovDPR_reg ); 6879 %} 6880 6881 // No CMOVE with SSE/SSE2 6882 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6883 predicate (UseSSE>=1); 6884 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6885 ins_cost(200); 6886 format %{ "Jn$cop skip\n\t" 6887 "MOVSS $dst,$src\t# float\n" 6888 "skip:" %} 6889 ins_encode %{ 6890 Label skip; 6891 // Invert sense of branch from sense of CMOV 6892 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6893 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6894 __ bind(skip); 6895 %} 6896 ins_pipe( pipe_slow ); 6897 %} 6898 6899 // No CMOVE with SSE/SSE2 6900 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6901 predicate (UseSSE>=2); 6902 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6903 ins_cost(200); 6904 format %{ "Jn$cop skip\n\t" 6905 "MOVSD $dst,$src\t# float\n" 6906 "skip:" %} 6907 ins_encode %{ 6908 Label skip; 6909 // Invert sense of branch from sense of CMOV 6910 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6911 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6912 __ bind(skip); 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 // unsigned version 6918 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6919 predicate (UseSSE>=1); 6920 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6921 ins_cost(200); 6922 format %{ "Jn$cop skip\n\t" 6923 "MOVSS $dst,$src\t# float\n" 6924 "skip:" %} 6925 ins_encode %{ 6926 Label skip; 6927 // Invert sense of branch from sense of CMOV 6928 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6929 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6930 __ bind(skip); 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6936 predicate (UseSSE>=1); 6937 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6938 ins_cost(200); 6939 expand %{ 6940 fcmovF_regU(cop, cr, dst, src); 6941 %} 6942 %} 6943 6944 // unsigned version 6945 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6946 predicate (UseSSE>=2); 6947 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 format %{ "Jn$cop skip\n\t" 6950 "MOVSD $dst,$src\t# float\n" 6951 "skip:" %} 6952 ins_encode %{ 6953 Label skip; 6954 // Invert sense of branch from sense of CMOV 6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6956 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6957 __ bind(skip); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6963 predicate (UseSSE>=2); 6964 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6965 ins_cost(200); 6966 expand %{ 6967 fcmovD_regU(cop, cr, dst, src); 6968 %} 6969 %} 6970 6971 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6972 predicate(VM_Version::supports_cmov() ); 6973 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6974 ins_cost(200); 6975 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6976 "CMOV$cop $dst.hi,$src.hi" %} 6977 opcode(0x0F,0x40); 6978 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6979 ins_pipe( pipe_cmov_reg_long ); 6980 %} 6981 6982 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6983 predicate(VM_Version::supports_cmov() ); 6984 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6985 ins_cost(200); 6986 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6987 "CMOV$cop $dst.hi,$src.hi" %} 6988 opcode(0x0F,0x40); 6989 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6990 ins_pipe( pipe_cmov_reg_long ); 6991 %} 6992 6993 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6994 predicate(VM_Version::supports_cmov() ); 6995 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6996 ins_cost(200); 6997 expand %{ 6998 cmovL_regU(cop, cr, dst, src); 6999 %} 7000 %} 7001 7002 //----------Arithmetic Instructions-------------------------------------------- 7003 //----------Addition Instructions---------------------------------------------- 7004 7005 // Integer Addition Instructions 7006 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7007 match(Set dst (AddI dst src)); 7008 effect(KILL cr); 7009 7010 size(2); 7011 format %{ "ADD $dst,$src" %} 7012 opcode(0x03); 7013 ins_encode( OpcP, RegReg( dst, src) ); 7014 ins_pipe( ialu_reg_reg ); 7015 %} 7016 7017 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7018 match(Set dst (AddI dst src)); 7019 effect(KILL cr); 7020 7021 format %{ "ADD $dst,$src" %} 7022 opcode(0x81, 0x00); /* /0 id */ 7023 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7024 ins_pipe( ialu_reg ); 7025 %} 7026 7027 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7028 predicate(UseIncDec); 7029 match(Set dst (AddI dst src)); 7030 effect(KILL cr); 7031 7032 size(1); 7033 format %{ "INC $dst" %} 7034 opcode(0x40); /* */ 7035 ins_encode( Opc_plus( primary, dst ) ); 7036 ins_pipe( ialu_reg ); 7037 %} 7038 7039 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7040 match(Set dst (AddI src0 src1)); 7041 ins_cost(110); 7042 7043 format %{ "LEA $dst,[$src0 + $src1]" %} 7044 opcode(0x8D); /* 0x8D /r */ 7045 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7046 ins_pipe( ialu_reg_reg ); 7047 %} 7048 7049 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7050 match(Set dst (AddP src0 src1)); 7051 ins_cost(110); 7052 7053 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7054 opcode(0x8D); /* 0x8D /r */ 7055 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7056 ins_pipe( ialu_reg_reg ); 7057 %} 7058 7059 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7060 predicate(UseIncDec); 7061 match(Set dst (AddI dst src)); 7062 effect(KILL cr); 7063 7064 size(1); 7065 format %{ "DEC $dst" %} 7066 opcode(0x48); /* */ 7067 ins_encode( Opc_plus( primary, dst ) ); 7068 ins_pipe( ialu_reg ); 7069 %} 7070 7071 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7072 match(Set dst (AddP dst src)); 7073 effect(KILL cr); 7074 7075 size(2); 7076 format %{ "ADD $dst,$src" %} 7077 opcode(0x03); 7078 ins_encode( OpcP, RegReg( dst, src) ); 7079 ins_pipe( ialu_reg_reg ); 7080 %} 7081 7082 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7083 match(Set dst (AddP dst src)); 7084 effect(KILL cr); 7085 7086 format %{ "ADD $dst,$src" %} 7087 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7088 // ins_encode( RegImm( dst, src) ); 7089 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7090 ins_pipe( ialu_reg ); 7091 %} 7092 7093 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7094 match(Set dst (AddI dst (LoadI src))); 7095 effect(KILL cr); 7096 7097 ins_cost(125); 7098 format %{ "ADD $dst,$src" %} 7099 opcode(0x03); 7100 ins_encode( OpcP, RegMem( dst, src) ); 7101 ins_pipe( ialu_reg_mem ); 7102 %} 7103 7104 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7105 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7106 effect(KILL cr); 7107 7108 ins_cost(150); 7109 format %{ "ADD $dst,$src" %} 7110 opcode(0x01); /* Opcode 01 /r */ 7111 ins_encode( OpcP, RegMem( src, dst ) ); 7112 ins_pipe( ialu_mem_reg ); 7113 %} 7114 7115 // Add Memory with Immediate 7116 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7117 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7118 effect(KILL cr); 7119 7120 ins_cost(125); 7121 format %{ "ADD $dst,$src" %} 7122 opcode(0x81); /* Opcode 81 /0 id */ 7123 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7124 ins_pipe( ialu_mem_imm ); 7125 %} 7126 7127 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7128 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7129 effect(KILL cr); 7130 7131 ins_cost(125); 7132 format %{ "INC $dst" %} 7133 opcode(0xFF); /* Opcode FF /0 */ 7134 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7135 ins_pipe( ialu_mem_imm ); 7136 %} 7137 7138 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7139 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7140 effect(KILL cr); 7141 7142 ins_cost(125); 7143 format %{ "DEC $dst" %} 7144 opcode(0xFF); /* Opcode FF /1 */ 7145 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7146 ins_pipe( ialu_mem_imm ); 7147 %} 7148 7149 7150 instruct checkCastPP( eRegP dst ) %{ 7151 match(Set dst (CheckCastPP dst)); 7152 7153 size(0); 7154 format %{ "#checkcastPP of $dst" %} 7155 ins_encode( /*empty encoding*/ ); 7156 ins_pipe( empty ); 7157 %} 7158 7159 instruct castPP( eRegP dst ) %{ 7160 match(Set dst (CastPP dst)); 7161 format %{ "#castPP of $dst" %} 7162 ins_encode( /*empty encoding*/ ); 7163 ins_pipe( empty ); 7164 %} 7165 7166 instruct castII( rRegI dst ) %{ 7167 match(Set dst (CastII dst)); 7168 format %{ "#castII of $dst" %} 7169 ins_encode( /*empty encoding*/ ); 7170 ins_cost(0); 7171 ins_pipe( empty ); 7172 %} 7173 7174 7175 // Load-locked - same as a regular pointer load when used with compare-swap 7176 instruct loadPLocked(eRegP dst, memory mem) %{ 7177 match(Set dst (LoadPLocked mem)); 7178 7179 ins_cost(125); 7180 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7181 opcode(0x8B); 7182 ins_encode( OpcP, RegMem(dst,mem)); 7183 ins_pipe( ialu_reg_mem ); 7184 %} 7185 7186 // Conditional-store of the updated heap-top. 7187 // Used during allocation of the shared heap. 7188 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7189 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7190 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7191 // EAX is killed if there is contention, but then it's also unused. 7192 // In the common case of no contention, EAX holds the new oop address. 7193 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7194 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7195 ins_pipe( pipe_cmpxchg ); 7196 %} 7197 7198 // Conditional-store of an int value. 7199 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7200 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7201 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7202 effect(KILL oldval); 7203 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7204 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7205 ins_pipe( pipe_cmpxchg ); 7206 %} 7207 7208 // Conditional-store of a long value. 7209 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7210 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7211 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7212 effect(KILL oldval); 7213 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7214 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7215 "XCHG EBX,ECX" 7216 %} 7217 ins_encode %{ 7218 // Note: we need to swap rbx, and rcx before and after the 7219 // cmpxchg8 instruction because the instruction uses 7220 // rcx as the high order word of the new value to store but 7221 // our register encoding uses rbx. 7222 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7223 if( os::is_MP() ) 7224 __ lock(); 7225 __ cmpxchg8($mem$$Address); 7226 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7227 %} 7228 ins_pipe( pipe_cmpxchg ); 7229 %} 7230 7231 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7232 7233 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7234 predicate(VM_Version::supports_cx8()); 7235 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7236 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7237 effect(KILL cr, KILL oldval); 7238 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7239 "MOV $res,0\n\t" 7240 "JNE,s fail\n\t" 7241 "MOV $res,1\n" 7242 "fail:" %} 7243 ins_encode( enc_cmpxchg8(mem_ptr), 7244 enc_flags_ne_to_boolean(res) ); 7245 ins_pipe( pipe_cmpxchg ); 7246 %} 7247 7248 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7249 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7250 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7251 effect(KILL cr, KILL oldval); 7252 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7253 "MOV $res,0\n\t" 7254 "JNE,s fail\n\t" 7255 "MOV $res,1\n" 7256 "fail:" %} 7257 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7258 ins_pipe( pipe_cmpxchg ); 7259 %} 7260 7261 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7262 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7263 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7264 effect(KILL cr, KILL oldval); 7265 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7266 "MOV $res,0\n\t" 7267 "JNE,s fail\n\t" 7268 "MOV $res,1\n" 7269 "fail:" %} 7270 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7271 ins_pipe( pipe_cmpxchg ); 7272 %} 7273 7274 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7275 predicate(VM_Version::supports_cx8()); 7276 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7277 effect(KILL cr); 7278 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7279 ins_encode( enc_cmpxchg8(mem_ptr) ); 7280 ins_pipe( pipe_cmpxchg ); 7281 %} 7282 7283 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7284 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7285 effect(KILL cr); 7286 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7287 ins_encode( enc_cmpxchg(mem_ptr) ); 7288 ins_pipe( pipe_cmpxchg ); 7289 %} 7290 7291 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7292 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7293 effect(KILL cr); 7294 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7295 ins_encode( enc_cmpxchg(mem_ptr) ); 7296 ins_pipe( pipe_cmpxchg ); 7297 %} 7298 7299 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7300 predicate(n->as_LoadStore()->result_not_used()); 7301 match(Set dummy (GetAndAddI mem add)); 7302 effect(KILL cr); 7303 format %{ "ADDL [$mem],$add" %} 7304 ins_encode %{ 7305 if (os::is_MP()) { __ lock(); } 7306 __ addl($mem$$Address, $add$$constant); 7307 %} 7308 ins_pipe( pipe_cmpxchg ); 7309 %} 7310 7311 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7312 match(Set newval (GetAndAddI mem newval)); 7313 effect(KILL cr); 7314 format %{ "XADDL [$mem],$newval" %} 7315 ins_encode %{ 7316 if (os::is_MP()) { __ lock(); } 7317 __ xaddl($mem$$Address, $newval$$Register); 7318 %} 7319 ins_pipe( pipe_cmpxchg ); 7320 %} 7321 7322 instruct xchgI( memory mem, rRegI newval) %{ 7323 match(Set newval (GetAndSetI mem newval)); 7324 format %{ "XCHGL $newval,[$mem]" %} 7325 ins_encode %{ 7326 __ xchgl($newval$$Register, $mem$$Address); 7327 %} 7328 ins_pipe( pipe_cmpxchg ); 7329 %} 7330 7331 instruct xchgP( memory mem, pRegP newval) %{ 7332 match(Set newval (GetAndSetP mem newval)); 7333 format %{ "XCHGL $newval,[$mem]" %} 7334 ins_encode %{ 7335 __ xchgl($newval$$Register, $mem$$Address); 7336 %} 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 //----------Subtraction Instructions------------------------------------------- 7341 7342 // Integer Subtraction Instructions 7343 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7344 match(Set dst (SubI dst src)); 7345 effect(KILL cr); 7346 7347 size(2); 7348 format %{ "SUB $dst,$src" %} 7349 opcode(0x2B); 7350 ins_encode( OpcP, RegReg( dst, src) ); 7351 ins_pipe( ialu_reg_reg ); 7352 %} 7353 7354 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7355 match(Set dst (SubI dst src)); 7356 effect(KILL cr); 7357 7358 format %{ "SUB $dst,$src" %} 7359 opcode(0x81,0x05); /* Opcode 81 /5 */ 7360 // ins_encode( RegImm( dst, src) ); 7361 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7362 ins_pipe( ialu_reg ); 7363 %} 7364 7365 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7366 match(Set dst (SubI dst (LoadI src))); 7367 effect(KILL cr); 7368 7369 ins_cost(125); 7370 format %{ "SUB $dst,$src" %} 7371 opcode(0x2B); 7372 ins_encode( OpcP, RegMem( dst, src) ); 7373 ins_pipe( ialu_reg_mem ); 7374 %} 7375 7376 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7377 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7378 effect(KILL cr); 7379 7380 ins_cost(150); 7381 format %{ "SUB $dst,$src" %} 7382 opcode(0x29); /* Opcode 29 /r */ 7383 ins_encode( OpcP, RegMem( src, dst ) ); 7384 ins_pipe( ialu_mem_reg ); 7385 %} 7386 7387 // Subtract from a pointer 7388 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7389 match(Set dst (AddP dst (SubI zero src))); 7390 effect(KILL cr); 7391 7392 size(2); 7393 format %{ "SUB $dst,$src" %} 7394 opcode(0x2B); 7395 ins_encode( OpcP, RegReg( dst, src) ); 7396 ins_pipe( ialu_reg_reg ); 7397 %} 7398 7399 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7400 match(Set dst (SubI zero dst)); 7401 effect(KILL cr); 7402 7403 size(2); 7404 format %{ "NEG $dst" %} 7405 opcode(0xF7,0x03); // Opcode F7 /3 7406 ins_encode( OpcP, RegOpc( dst ) ); 7407 ins_pipe( ialu_reg ); 7408 %} 7409 7410 //----------Multiplication/Division Instructions------------------------------- 7411 // Integer Multiplication Instructions 7412 // Multiply Register 7413 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7414 match(Set dst (MulI dst src)); 7415 effect(KILL cr); 7416 7417 size(3); 7418 ins_cost(300); 7419 format %{ "IMUL $dst,$src" %} 7420 opcode(0xAF, 0x0F); 7421 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7422 ins_pipe( ialu_reg_reg_alu0 ); 7423 %} 7424 7425 // Multiply 32-bit Immediate 7426 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7427 match(Set dst (MulI src imm)); 7428 effect(KILL cr); 7429 7430 ins_cost(300); 7431 format %{ "IMUL $dst,$src,$imm" %} 7432 opcode(0x69); /* 69 /r id */ 7433 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7434 ins_pipe( ialu_reg_reg_alu0 ); 7435 %} 7436 7437 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7438 match(Set dst src); 7439 effect(KILL cr); 7440 7441 // Note that this is artificially increased to make it more expensive than loadConL 7442 ins_cost(250); 7443 format %{ "MOV EAX,$src\t// low word only" %} 7444 opcode(0xB8); 7445 ins_encode( LdImmL_Lo(dst, src) ); 7446 ins_pipe( ialu_reg_fat ); 7447 %} 7448 7449 // Multiply by 32-bit Immediate, taking the shifted high order results 7450 // (special case for shift by 32) 7451 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7452 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7453 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7454 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7455 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7456 effect(USE src1, KILL cr); 7457 7458 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7459 ins_cost(0*100 + 1*400 - 150); 7460 format %{ "IMUL EDX:EAX,$src1" %} 7461 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7462 ins_pipe( pipe_slow ); 7463 %} 7464 7465 // Multiply by 32-bit Immediate, taking the shifted high order results 7466 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7467 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7468 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7469 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7470 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7471 effect(USE src1, KILL cr); 7472 7473 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7474 ins_cost(1*100 + 1*400 - 150); 7475 format %{ "IMUL EDX:EAX,$src1\n\t" 7476 "SAR EDX,$cnt-32" %} 7477 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7478 ins_pipe( pipe_slow ); 7479 %} 7480 7481 // Multiply Memory 32-bit Immediate 7482 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7483 match(Set dst (MulI (LoadI src) imm)); 7484 effect(KILL cr); 7485 7486 ins_cost(300); 7487 format %{ "IMUL $dst,$src,$imm" %} 7488 opcode(0x69); /* 69 /r id */ 7489 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7490 ins_pipe( ialu_reg_mem_alu0 ); 7491 %} 7492 7493 // Multiply Memory 7494 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7495 match(Set dst (MulI dst (LoadI src))); 7496 effect(KILL cr); 7497 7498 ins_cost(350); 7499 format %{ "IMUL $dst,$src" %} 7500 opcode(0xAF, 0x0F); 7501 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7502 ins_pipe( ialu_reg_mem_alu0 ); 7503 %} 7504 7505 // Multiply Register Int to Long 7506 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7507 // Basic Idea: long = (long)int * (long)int 7508 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7509 effect(DEF dst, USE src, USE src1, KILL flags); 7510 7511 ins_cost(300); 7512 format %{ "IMUL $dst,$src1" %} 7513 7514 ins_encode( long_int_multiply( dst, src1 ) ); 7515 ins_pipe( ialu_reg_reg_alu0 ); 7516 %} 7517 7518 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7519 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7520 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7521 effect(KILL flags); 7522 7523 ins_cost(300); 7524 format %{ "MUL $dst,$src1" %} 7525 7526 ins_encode( long_uint_multiply(dst, src1) ); 7527 ins_pipe( ialu_reg_reg_alu0 ); 7528 %} 7529 7530 // Multiply Register Long 7531 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7532 match(Set dst (MulL dst src)); 7533 effect(KILL cr, TEMP tmp); 7534 ins_cost(4*100+3*400); 7535 // Basic idea: lo(result) = lo(x_lo * y_lo) 7536 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7537 format %{ "MOV $tmp,$src.lo\n\t" 7538 "IMUL $tmp,EDX\n\t" 7539 "MOV EDX,$src.hi\n\t" 7540 "IMUL EDX,EAX\n\t" 7541 "ADD $tmp,EDX\n\t" 7542 "MUL EDX:EAX,$src.lo\n\t" 7543 "ADD EDX,$tmp" %} 7544 ins_encode( long_multiply( dst, src, tmp ) ); 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 // Multiply Register Long where the left operand's high 32 bits are zero 7549 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7550 predicate(is_operand_hi32_zero(n->in(1))); 7551 match(Set dst (MulL dst src)); 7552 effect(KILL cr, TEMP tmp); 7553 ins_cost(2*100+2*400); 7554 // Basic idea: lo(result) = lo(x_lo * y_lo) 7555 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7556 format %{ "MOV $tmp,$src.hi\n\t" 7557 "IMUL $tmp,EAX\n\t" 7558 "MUL EDX:EAX,$src.lo\n\t" 7559 "ADD EDX,$tmp" %} 7560 ins_encode %{ 7561 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7562 __ imull($tmp$$Register, rax); 7563 __ mull($src$$Register); 7564 __ addl(rdx, $tmp$$Register); 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 // Multiply Register Long where the right operand's high 32 bits are zero 7570 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7571 predicate(is_operand_hi32_zero(n->in(2))); 7572 match(Set dst (MulL dst src)); 7573 effect(KILL cr, TEMP tmp); 7574 ins_cost(2*100+2*400); 7575 // Basic idea: lo(result) = lo(x_lo * y_lo) 7576 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7577 format %{ "MOV $tmp,$src.lo\n\t" 7578 "IMUL $tmp,EDX\n\t" 7579 "MUL EDX:EAX,$src.lo\n\t" 7580 "ADD EDX,$tmp" %} 7581 ins_encode %{ 7582 __ movl($tmp$$Register, $src$$Register); 7583 __ imull($tmp$$Register, rdx); 7584 __ mull($src$$Register); 7585 __ addl(rdx, $tmp$$Register); 7586 %} 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7591 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7592 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7593 match(Set dst (MulL dst src)); 7594 effect(KILL cr); 7595 ins_cost(1*400); 7596 // Basic idea: lo(result) = lo(x_lo * y_lo) 7597 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7598 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7599 ins_encode %{ 7600 __ mull($src$$Register); 7601 %} 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 // Multiply Register Long by small constant 7606 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7607 match(Set dst (MulL dst src)); 7608 effect(KILL cr, TEMP tmp); 7609 ins_cost(2*100+2*400); 7610 size(12); 7611 // Basic idea: lo(result) = lo(src * EAX) 7612 // hi(result) = hi(src * EAX) + lo(src * EDX) 7613 format %{ "IMUL $tmp,EDX,$src\n\t" 7614 "MOV EDX,$src\n\t" 7615 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7616 "ADD EDX,$tmp" %} 7617 ins_encode( long_multiply_con( dst, src, tmp ) ); 7618 ins_pipe( pipe_slow ); 7619 %} 7620 7621 // Integer DIV with Register 7622 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7623 match(Set rax (DivI rax div)); 7624 effect(KILL rdx, KILL cr); 7625 size(26); 7626 ins_cost(30*100+10*100); 7627 format %{ "CMP EAX,0x80000000\n\t" 7628 "JNE,s normal\n\t" 7629 "XOR EDX,EDX\n\t" 7630 "CMP ECX,-1\n\t" 7631 "JE,s done\n" 7632 "normal: CDQ\n\t" 7633 "IDIV $div\n\t" 7634 "done:" %} 7635 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7636 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7637 ins_pipe( ialu_reg_reg_alu0 ); 7638 %} 7639 7640 // Divide Register Long 7641 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7642 match(Set dst (DivL src1 src2)); 7643 effect( KILL cr, KILL cx, KILL bx ); 7644 ins_cost(10000); 7645 format %{ "PUSH $src1.hi\n\t" 7646 "PUSH $src1.lo\n\t" 7647 "PUSH $src2.hi\n\t" 7648 "PUSH $src2.lo\n\t" 7649 "CALL SharedRuntime::ldiv\n\t" 7650 "ADD ESP,16" %} 7651 ins_encode( long_div(src1,src2) ); 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 // Integer DIVMOD with Register, both quotient and mod results 7656 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7657 match(DivModI rax div); 7658 effect(KILL cr); 7659 size(26); 7660 ins_cost(30*100+10*100); 7661 format %{ "CMP EAX,0x80000000\n\t" 7662 "JNE,s normal\n\t" 7663 "XOR EDX,EDX\n\t" 7664 "CMP ECX,-1\n\t" 7665 "JE,s done\n" 7666 "normal: CDQ\n\t" 7667 "IDIV $div\n\t" 7668 "done:" %} 7669 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7670 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7671 ins_pipe( pipe_slow ); 7672 %} 7673 7674 // Integer MOD with Register 7675 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7676 match(Set rdx (ModI rax div)); 7677 effect(KILL rax, KILL cr); 7678 7679 size(26); 7680 ins_cost(300); 7681 format %{ "CDQ\n\t" 7682 "IDIV $div" %} 7683 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7684 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7685 ins_pipe( ialu_reg_reg_alu0 ); 7686 %} 7687 7688 // Remainder Register Long 7689 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7690 match(Set dst (ModL src1 src2)); 7691 effect( KILL cr, KILL cx, KILL bx ); 7692 ins_cost(10000); 7693 format %{ "PUSH $src1.hi\n\t" 7694 "PUSH $src1.lo\n\t" 7695 "PUSH $src2.hi\n\t" 7696 "PUSH $src2.lo\n\t" 7697 "CALL SharedRuntime::lrem\n\t" 7698 "ADD ESP,16" %} 7699 ins_encode( long_mod(src1,src2) ); 7700 ins_pipe( pipe_slow ); 7701 %} 7702 7703 // Divide Register Long (no special case since divisor != -1) 7704 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7705 match(Set dst (DivL dst imm)); 7706 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7707 ins_cost(1000); 7708 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7709 "XOR $tmp2,$tmp2\n\t" 7710 "CMP $tmp,EDX\n\t" 7711 "JA,s fast\n\t" 7712 "MOV $tmp2,EAX\n\t" 7713 "MOV EAX,EDX\n\t" 7714 "MOV EDX,0\n\t" 7715 "JLE,s pos\n\t" 7716 "LNEG EAX : $tmp2\n\t" 7717 "DIV $tmp # unsigned division\n\t" 7718 "XCHG EAX,$tmp2\n\t" 7719 "DIV $tmp\n\t" 7720 "LNEG $tmp2 : EAX\n\t" 7721 "JMP,s done\n" 7722 "pos:\n\t" 7723 "DIV $tmp\n\t" 7724 "XCHG EAX,$tmp2\n" 7725 "fast:\n\t" 7726 "DIV $tmp\n" 7727 "done:\n\t" 7728 "MOV EDX,$tmp2\n\t" 7729 "NEG EDX:EAX # if $imm < 0" %} 7730 ins_encode %{ 7731 int con = (int)$imm$$constant; 7732 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7733 int pcon = (con > 0) ? con : -con; 7734 Label Lfast, Lpos, Ldone; 7735 7736 __ movl($tmp$$Register, pcon); 7737 __ xorl($tmp2$$Register,$tmp2$$Register); 7738 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7739 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7740 7741 __ movl($tmp2$$Register, $dst$$Register); // save 7742 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7743 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7744 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7745 7746 // Negative dividend. 7747 // convert value to positive to use unsigned division 7748 __ lneg($dst$$Register, $tmp2$$Register); 7749 __ divl($tmp$$Register); 7750 __ xchgl($dst$$Register, $tmp2$$Register); 7751 __ divl($tmp$$Register); 7752 // revert result back to negative 7753 __ lneg($tmp2$$Register, $dst$$Register); 7754 __ jmpb(Ldone); 7755 7756 __ bind(Lpos); 7757 __ divl($tmp$$Register); // Use unsigned division 7758 __ xchgl($dst$$Register, $tmp2$$Register); 7759 // Fallthrow for final divide, tmp2 has 32 bit hi result 7760 7761 __ bind(Lfast); 7762 // fast path: src is positive 7763 __ divl($tmp$$Register); // Use unsigned division 7764 7765 __ bind(Ldone); 7766 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7767 if (con < 0) { 7768 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7769 } 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Remainder Register Long (remainder fit into 32 bits) 7775 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7776 match(Set dst (ModL dst imm)); 7777 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7778 ins_cost(1000); 7779 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7780 "CMP $tmp,EDX\n\t" 7781 "JA,s fast\n\t" 7782 "MOV $tmp2,EAX\n\t" 7783 "MOV EAX,EDX\n\t" 7784 "MOV EDX,0\n\t" 7785 "JLE,s pos\n\t" 7786 "LNEG EAX : $tmp2\n\t" 7787 "DIV $tmp # unsigned division\n\t" 7788 "MOV EAX,$tmp2\n\t" 7789 "DIV $tmp\n\t" 7790 "NEG EDX\n\t" 7791 "JMP,s done\n" 7792 "pos:\n\t" 7793 "DIV $tmp\n\t" 7794 "MOV EAX,$tmp2\n" 7795 "fast:\n\t" 7796 "DIV $tmp\n" 7797 "done:\n\t" 7798 "MOV EAX,EDX\n\t" 7799 "SAR EDX,31\n\t" %} 7800 ins_encode %{ 7801 int con = (int)$imm$$constant; 7802 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7803 int pcon = (con > 0) ? con : -con; 7804 Label Lfast, Lpos, Ldone; 7805 7806 __ movl($tmp$$Register, pcon); 7807 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7808 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7809 7810 __ movl($tmp2$$Register, $dst$$Register); // save 7811 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7812 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7813 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7814 7815 // Negative dividend. 7816 // convert value to positive to use unsigned division 7817 __ lneg($dst$$Register, $tmp2$$Register); 7818 __ divl($tmp$$Register); 7819 __ movl($dst$$Register, $tmp2$$Register); 7820 __ divl($tmp$$Register); 7821 // revert remainder back to negative 7822 __ negl(HIGH_FROM_LOW($dst$$Register)); 7823 __ jmpb(Ldone); 7824 7825 __ bind(Lpos); 7826 __ divl($tmp$$Register); 7827 __ movl($dst$$Register, $tmp2$$Register); 7828 7829 __ bind(Lfast); 7830 // fast path: src is positive 7831 __ divl($tmp$$Register); 7832 7833 __ bind(Ldone); 7834 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7835 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7836 7837 %} 7838 ins_pipe( pipe_slow ); 7839 %} 7840 7841 // Integer Shift Instructions 7842 // Shift Left by one 7843 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7844 match(Set dst (LShiftI dst shift)); 7845 effect(KILL cr); 7846 7847 size(2); 7848 format %{ "SHL $dst,$shift" %} 7849 opcode(0xD1, 0x4); /* D1 /4 */ 7850 ins_encode( OpcP, RegOpc( dst ) ); 7851 ins_pipe( ialu_reg ); 7852 %} 7853 7854 // Shift Left by 8-bit immediate 7855 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7856 match(Set dst (LShiftI dst shift)); 7857 effect(KILL cr); 7858 7859 size(3); 7860 format %{ "SHL $dst,$shift" %} 7861 opcode(0xC1, 0x4); /* C1 /4 ib */ 7862 ins_encode( RegOpcImm( dst, shift) ); 7863 ins_pipe( ialu_reg ); 7864 %} 7865 7866 // Shift Left by variable 7867 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7868 match(Set dst (LShiftI dst shift)); 7869 effect(KILL cr); 7870 7871 size(2); 7872 format %{ "SHL $dst,$shift" %} 7873 opcode(0xD3, 0x4); /* D3 /4 */ 7874 ins_encode( OpcP, RegOpc( dst ) ); 7875 ins_pipe( ialu_reg_reg ); 7876 %} 7877 7878 // Arithmetic shift right by one 7879 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7880 match(Set dst (RShiftI dst shift)); 7881 effect(KILL cr); 7882 7883 size(2); 7884 format %{ "SAR $dst,$shift" %} 7885 opcode(0xD1, 0x7); /* D1 /7 */ 7886 ins_encode( OpcP, RegOpc( dst ) ); 7887 ins_pipe( ialu_reg ); 7888 %} 7889 7890 // Arithmetic shift right by one 7891 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7892 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7893 effect(KILL cr); 7894 format %{ "SAR $dst,$shift" %} 7895 opcode(0xD1, 0x7); /* D1 /7 */ 7896 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7897 ins_pipe( ialu_mem_imm ); 7898 %} 7899 7900 // Arithmetic Shift Right by 8-bit immediate 7901 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7902 match(Set dst (RShiftI dst shift)); 7903 effect(KILL cr); 7904 7905 size(3); 7906 format %{ "SAR $dst,$shift" %} 7907 opcode(0xC1, 0x7); /* C1 /7 ib */ 7908 ins_encode( RegOpcImm( dst, shift ) ); 7909 ins_pipe( ialu_mem_imm ); 7910 %} 7911 7912 // Arithmetic Shift Right by 8-bit immediate 7913 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7914 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7915 effect(KILL cr); 7916 7917 format %{ "SAR $dst,$shift" %} 7918 opcode(0xC1, 0x7); /* C1 /7 ib */ 7919 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7920 ins_pipe( ialu_mem_imm ); 7921 %} 7922 7923 // Arithmetic Shift Right by variable 7924 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7925 match(Set dst (RShiftI dst shift)); 7926 effect(KILL cr); 7927 7928 size(2); 7929 format %{ "SAR $dst,$shift" %} 7930 opcode(0xD3, 0x7); /* D3 /7 */ 7931 ins_encode( OpcP, RegOpc( dst ) ); 7932 ins_pipe( ialu_reg_reg ); 7933 %} 7934 7935 // Logical shift right by one 7936 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7937 match(Set dst (URShiftI dst shift)); 7938 effect(KILL cr); 7939 7940 size(2); 7941 format %{ "SHR $dst,$shift" %} 7942 opcode(0xD1, 0x5); /* D1 /5 */ 7943 ins_encode( OpcP, RegOpc( dst ) ); 7944 ins_pipe( ialu_reg ); 7945 %} 7946 7947 // Logical Shift Right by 8-bit immediate 7948 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7949 match(Set dst (URShiftI dst shift)); 7950 effect(KILL cr); 7951 7952 size(3); 7953 format %{ "SHR $dst,$shift" %} 7954 opcode(0xC1, 0x5); /* C1 /5 ib */ 7955 ins_encode( RegOpcImm( dst, shift) ); 7956 ins_pipe( ialu_reg ); 7957 %} 7958 7959 7960 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7961 // This idiom is used by the compiler for the i2b bytecode. 7962 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7963 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7964 7965 size(3); 7966 format %{ "MOVSX $dst,$src :8" %} 7967 ins_encode %{ 7968 __ movsbl($dst$$Register, $src$$Register); 7969 %} 7970 ins_pipe(ialu_reg_reg); 7971 %} 7972 7973 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7974 // This idiom is used by the compiler the i2s bytecode. 7975 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7976 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7977 7978 size(3); 7979 format %{ "MOVSX $dst,$src :16" %} 7980 ins_encode %{ 7981 __ movswl($dst$$Register, $src$$Register); 7982 %} 7983 ins_pipe(ialu_reg_reg); 7984 %} 7985 7986 7987 // Logical Shift Right by variable 7988 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7989 match(Set dst (URShiftI dst shift)); 7990 effect(KILL cr); 7991 7992 size(2); 7993 format %{ "SHR $dst,$shift" %} 7994 opcode(0xD3, 0x5); /* D3 /5 */ 7995 ins_encode( OpcP, RegOpc( dst ) ); 7996 ins_pipe( ialu_reg_reg ); 7997 %} 7998 7999 8000 //----------Logical Instructions----------------------------------------------- 8001 //----------Integer Logical Instructions--------------------------------------- 8002 // And Instructions 8003 // And Register with Register 8004 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8005 match(Set dst (AndI dst src)); 8006 effect(KILL cr); 8007 8008 size(2); 8009 format %{ "AND $dst,$src" %} 8010 opcode(0x23); 8011 ins_encode( OpcP, RegReg( dst, src) ); 8012 ins_pipe( ialu_reg_reg ); 8013 %} 8014 8015 // And Register with Immediate 8016 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8017 match(Set dst (AndI dst src)); 8018 effect(KILL cr); 8019 8020 format %{ "AND $dst,$src" %} 8021 opcode(0x81,0x04); /* Opcode 81 /4 */ 8022 // ins_encode( RegImm( dst, src) ); 8023 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8024 ins_pipe( ialu_reg ); 8025 %} 8026 8027 // And Register with Memory 8028 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8029 match(Set dst (AndI dst (LoadI src))); 8030 effect(KILL cr); 8031 8032 ins_cost(125); 8033 format %{ "AND $dst,$src" %} 8034 opcode(0x23); 8035 ins_encode( OpcP, RegMem( dst, src) ); 8036 ins_pipe( ialu_reg_mem ); 8037 %} 8038 8039 // And Memory with Register 8040 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8041 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8042 effect(KILL cr); 8043 8044 ins_cost(150); 8045 format %{ "AND $dst,$src" %} 8046 opcode(0x21); /* Opcode 21 /r */ 8047 ins_encode( OpcP, RegMem( src, dst ) ); 8048 ins_pipe( ialu_mem_reg ); 8049 %} 8050 8051 // And Memory with Immediate 8052 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8053 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8054 effect(KILL cr); 8055 8056 ins_cost(125); 8057 format %{ "AND $dst,$src" %} 8058 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8059 // ins_encode( MemImm( dst, src) ); 8060 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8061 ins_pipe( ialu_mem_imm ); 8062 %} 8063 8064 // BMI1 instructions 8065 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8066 match(Set dst (AndI (XorI src1 minus_1) src2)); 8067 predicate(UseBMI1Instructions); 8068 effect(KILL cr); 8069 8070 format %{ "ANDNL $dst, $src1, $src2" %} 8071 8072 ins_encode %{ 8073 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8074 %} 8075 ins_pipe(ialu_reg); 8076 %} 8077 8078 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8079 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8080 predicate(UseBMI1Instructions); 8081 effect(KILL cr); 8082 8083 ins_cost(125); 8084 format %{ "ANDNL $dst, $src1, $src2" %} 8085 8086 ins_encode %{ 8087 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8088 %} 8089 ins_pipe(ialu_reg_mem); 8090 %} 8091 8092 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8093 match(Set dst (AndI (SubI imm_zero src) src)); 8094 predicate(UseBMI1Instructions); 8095 effect(KILL cr); 8096 8097 format %{ "BLSIL $dst, $src" %} 8098 8099 ins_encode %{ 8100 __ blsil($dst$$Register, $src$$Register); 8101 %} 8102 ins_pipe(ialu_reg); 8103 %} 8104 8105 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8106 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8107 predicate(UseBMI1Instructions); 8108 effect(KILL cr); 8109 8110 ins_cost(125); 8111 format %{ "BLSIL $dst, $src" %} 8112 8113 ins_encode %{ 8114 __ blsil($dst$$Register, $src$$Address); 8115 %} 8116 ins_pipe(ialu_reg_mem); 8117 %} 8118 8119 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8120 %{ 8121 match(Set dst (XorI (AddI src minus_1) src)); 8122 predicate(UseBMI1Instructions); 8123 effect(KILL cr); 8124 8125 format %{ "BLSMSKL $dst, $src" %} 8126 8127 ins_encode %{ 8128 __ blsmskl($dst$$Register, $src$$Register); 8129 %} 8130 8131 ins_pipe(ialu_reg); 8132 %} 8133 8134 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8135 %{ 8136 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8137 predicate(UseBMI1Instructions); 8138 effect(KILL cr); 8139 8140 ins_cost(125); 8141 format %{ "BLSMSKL $dst, $src" %} 8142 8143 ins_encode %{ 8144 __ blsmskl($dst$$Register, $src$$Address); 8145 %} 8146 8147 ins_pipe(ialu_reg_mem); 8148 %} 8149 8150 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8151 %{ 8152 match(Set dst (AndI (AddI src minus_1) src) ); 8153 predicate(UseBMI1Instructions); 8154 effect(KILL cr); 8155 8156 format %{ "BLSRL $dst, $src" %} 8157 8158 ins_encode %{ 8159 __ blsrl($dst$$Register, $src$$Register); 8160 %} 8161 8162 ins_pipe(ialu_reg); 8163 %} 8164 8165 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8166 %{ 8167 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8168 predicate(UseBMI1Instructions); 8169 effect(KILL cr); 8170 8171 ins_cost(125); 8172 format %{ "BLSRL $dst, $src" %} 8173 8174 ins_encode %{ 8175 __ blsrl($dst$$Register, $src$$Address); 8176 %} 8177 8178 ins_pipe(ialu_reg_mem); 8179 %} 8180 8181 // Or Instructions 8182 // Or Register with Register 8183 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8184 match(Set dst (OrI dst src)); 8185 effect(KILL cr); 8186 8187 size(2); 8188 format %{ "OR $dst,$src" %} 8189 opcode(0x0B); 8190 ins_encode( OpcP, RegReg( dst, src) ); 8191 ins_pipe( ialu_reg_reg ); 8192 %} 8193 8194 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8195 match(Set dst (OrI dst (CastP2X src))); 8196 effect(KILL cr); 8197 8198 size(2); 8199 format %{ "OR $dst,$src" %} 8200 opcode(0x0B); 8201 ins_encode( OpcP, RegReg( dst, src) ); 8202 ins_pipe( ialu_reg_reg ); 8203 %} 8204 8205 8206 // Or Register with Immediate 8207 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8208 match(Set dst (OrI dst src)); 8209 effect(KILL cr); 8210 8211 format %{ "OR $dst,$src" %} 8212 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8213 // ins_encode( RegImm( dst, src) ); 8214 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8215 ins_pipe( ialu_reg ); 8216 %} 8217 8218 // Or Register with Memory 8219 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8220 match(Set dst (OrI dst (LoadI src))); 8221 effect(KILL cr); 8222 8223 ins_cost(125); 8224 format %{ "OR $dst,$src" %} 8225 opcode(0x0B); 8226 ins_encode( OpcP, RegMem( dst, src) ); 8227 ins_pipe( ialu_reg_mem ); 8228 %} 8229 8230 // Or Memory with Register 8231 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8232 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8233 effect(KILL cr); 8234 8235 ins_cost(150); 8236 format %{ "OR $dst,$src" %} 8237 opcode(0x09); /* Opcode 09 /r */ 8238 ins_encode( OpcP, RegMem( src, dst ) ); 8239 ins_pipe( ialu_mem_reg ); 8240 %} 8241 8242 // Or Memory with Immediate 8243 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8244 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8245 effect(KILL cr); 8246 8247 ins_cost(125); 8248 format %{ "OR $dst,$src" %} 8249 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8250 // ins_encode( MemImm( dst, src) ); 8251 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8252 ins_pipe( ialu_mem_imm ); 8253 %} 8254 8255 // ROL/ROR 8256 // ROL expand 8257 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8258 effect(USE_DEF dst, USE shift, KILL cr); 8259 8260 format %{ "ROL $dst, $shift" %} 8261 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8262 ins_encode( OpcP, RegOpc( dst )); 8263 ins_pipe( ialu_reg ); 8264 %} 8265 8266 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8267 effect(USE_DEF dst, USE shift, KILL cr); 8268 8269 format %{ "ROL $dst, $shift" %} 8270 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8271 ins_encode( RegOpcImm(dst, shift) ); 8272 ins_pipe(ialu_reg); 8273 %} 8274 8275 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8276 effect(USE_DEF dst, USE shift, KILL cr); 8277 8278 format %{ "ROL $dst, $shift" %} 8279 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8280 ins_encode(OpcP, RegOpc(dst)); 8281 ins_pipe( ialu_reg_reg ); 8282 %} 8283 // end of ROL expand 8284 8285 // ROL 32bit by one once 8286 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8287 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8288 8289 expand %{ 8290 rolI_eReg_imm1(dst, lshift, cr); 8291 %} 8292 %} 8293 8294 // ROL 32bit var by imm8 once 8295 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8296 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8297 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8298 8299 expand %{ 8300 rolI_eReg_imm8(dst, lshift, cr); 8301 %} 8302 %} 8303 8304 // ROL 32bit var by var once 8305 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8306 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8307 8308 expand %{ 8309 rolI_eReg_CL(dst, shift, cr); 8310 %} 8311 %} 8312 8313 // ROL 32bit var by var once 8314 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8315 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8316 8317 expand %{ 8318 rolI_eReg_CL(dst, shift, cr); 8319 %} 8320 %} 8321 8322 // ROR expand 8323 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8324 effect(USE_DEF dst, USE shift, KILL cr); 8325 8326 format %{ "ROR $dst, $shift" %} 8327 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8328 ins_encode( OpcP, RegOpc( dst ) ); 8329 ins_pipe( ialu_reg ); 8330 %} 8331 8332 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8333 effect (USE_DEF dst, USE shift, KILL cr); 8334 8335 format %{ "ROR $dst, $shift" %} 8336 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8337 ins_encode( RegOpcImm(dst, shift) ); 8338 ins_pipe( ialu_reg ); 8339 %} 8340 8341 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8342 effect(USE_DEF dst, USE shift, KILL cr); 8343 8344 format %{ "ROR $dst, $shift" %} 8345 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8346 ins_encode(OpcP, RegOpc(dst)); 8347 ins_pipe( ialu_reg_reg ); 8348 %} 8349 // end of ROR expand 8350 8351 // ROR right once 8352 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8353 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8354 8355 expand %{ 8356 rorI_eReg_imm1(dst, rshift, cr); 8357 %} 8358 %} 8359 8360 // ROR 32bit by immI8 once 8361 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8362 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8363 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8364 8365 expand %{ 8366 rorI_eReg_imm8(dst, rshift, cr); 8367 %} 8368 %} 8369 8370 // ROR 32bit var by var once 8371 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8372 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8373 8374 expand %{ 8375 rorI_eReg_CL(dst, shift, cr); 8376 %} 8377 %} 8378 8379 // ROR 32bit var by var once 8380 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8381 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8382 8383 expand %{ 8384 rorI_eReg_CL(dst, shift, cr); 8385 %} 8386 %} 8387 8388 // Xor Instructions 8389 // Xor Register with Register 8390 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8391 match(Set dst (XorI dst src)); 8392 effect(KILL cr); 8393 8394 size(2); 8395 format %{ "XOR $dst,$src" %} 8396 opcode(0x33); 8397 ins_encode( OpcP, RegReg( dst, src) ); 8398 ins_pipe( ialu_reg_reg ); 8399 %} 8400 8401 // Xor Register with Immediate -1 8402 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8403 match(Set dst (XorI dst imm)); 8404 8405 size(2); 8406 format %{ "NOT $dst" %} 8407 ins_encode %{ 8408 __ notl($dst$$Register); 8409 %} 8410 ins_pipe( ialu_reg ); 8411 %} 8412 8413 // Xor Register with Immediate 8414 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8415 match(Set dst (XorI dst src)); 8416 effect(KILL cr); 8417 8418 format %{ "XOR $dst,$src" %} 8419 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8420 // ins_encode( RegImm( dst, src) ); 8421 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8422 ins_pipe( ialu_reg ); 8423 %} 8424 8425 // Xor Register with Memory 8426 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8427 match(Set dst (XorI dst (LoadI src))); 8428 effect(KILL cr); 8429 8430 ins_cost(125); 8431 format %{ "XOR $dst,$src" %} 8432 opcode(0x33); 8433 ins_encode( OpcP, RegMem(dst, src) ); 8434 ins_pipe( ialu_reg_mem ); 8435 %} 8436 8437 // Xor Memory with Register 8438 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8439 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8440 effect(KILL cr); 8441 8442 ins_cost(150); 8443 format %{ "XOR $dst,$src" %} 8444 opcode(0x31); /* Opcode 31 /r */ 8445 ins_encode( OpcP, RegMem( src, dst ) ); 8446 ins_pipe( ialu_mem_reg ); 8447 %} 8448 8449 // Xor Memory with Immediate 8450 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8451 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8452 effect(KILL cr); 8453 8454 ins_cost(125); 8455 format %{ "XOR $dst,$src" %} 8456 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8457 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8458 ins_pipe( ialu_mem_imm ); 8459 %} 8460 8461 //----------Convert Int to Boolean--------------------------------------------- 8462 8463 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8464 effect( DEF dst, USE src ); 8465 format %{ "MOV $dst,$src" %} 8466 ins_encode( enc_Copy( dst, src) ); 8467 ins_pipe( ialu_reg_reg ); 8468 %} 8469 8470 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8471 effect( USE_DEF dst, USE src, KILL cr ); 8472 8473 size(4); 8474 format %{ "NEG $dst\n\t" 8475 "ADC $dst,$src" %} 8476 ins_encode( neg_reg(dst), 8477 OpcRegReg(0x13,dst,src) ); 8478 ins_pipe( ialu_reg_reg_long ); 8479 %} 8480 8481 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8482 match(Set dst (Conv2B src)); 8483 8484 expand %{ 8485 movI_nocopy(dst,src); 8486 ci2b(dst,src,cr); 8487 %} 8488 %} 8489 8490 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8491 effect( DEF dst, USE src ); 8492 format %{ "MOV $dst,$src" %} 8493 ins_encode( enc_Copy( dst, src) ); 8494 ins_pipe( ialu_reg_reg ); 8495 %} 8496 8497 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8498 effect( USE_DEF dst, USE src, KILL cr ); 8499 format %{ "NEG $dst\n\t" 8500 "ADC $dst,$src" %} 8501 ins_encode( neg_reg(dst), 8502 OpcRegReg(0x13,dst,src) ); 8503 ins_pipe( ialu_reg_reg_long ); 8504 %} 8505 8506 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8507 match(Set dst (Conv2B src)); 8508 8509 expand %{ 8510 movP_nocopy(dst,src); 8511 cp2b(dst,src,cr); 8512 %} 8513 %} 8514 8515 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8516 match(Set dst (CmpLTMask p q)); 8517 effect(KILL cr); 8518 ins_cost(400); 8519 8520 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8521 format %{ "XOR $dst,$dst\n\t" 8522 "CMP $p,$q\n\t" 8523 "SETlt $dst\n\t" 8524 "NEG $dst" %} 8525 ins_encode %{ 8526 Register Rp = $p$$Register; 8527 Register Rq = $q$$Register; 8528 Register Rd = $dst$$Register; 8529 Label done; 8530 __ xorl(Rd, Rd); 8531 __ cmpl(Rp, Rq); 8532 __ setb(Assembler::less, Rd); 8533 __ negl(Rd); 8534 %} 8535 8536 ins_pipe(pipe_slow); 8537 %} 8538 8539 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8540 match(Set dst (CmpLTMask dst zero)); 8541 effect(DEF dst, KILL cr); 8542 ins_cost(100); 8543 8544 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8545 ins_encode %{ 8546 __ sarl($dst$$Register, 31); 8547 %} 8548 ins_pipe(ialu_reg); 8549 %} 8550 8551 /* better to save a register than avoid a branch */ 8552 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8553 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8554 effect(KILL cr); 8555 ins_cost(400); 8556 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8557 "JGE done\n\t" 8558 "ADD $p,$y\n" 8559 "done: " %} 8560 ins_encode %{ 8561 Register Rp = $p$$Register; 8562 Register Rq = $q$$Register; 8563 Register Ry = $y$$Register; 8564 Label done; 8565 __ subl(Rp, Rq); 8566 __ jccb(Assembler::greaterEqual, done); 8567 __ addl(Rp, Ry); 8568 __ bind(done); 8569 %} 8570 8571 ins_pipe(pipe_cmplt); 8572 %} 8573 8574 /* better to save a register than avoid a branch */ 8575 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8576 match(Set y (AndI (CmpLTMask p q) y)); 8577 effect(KILL cr); 8578 8579 ins_cost(300); 8580 8581 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8582 "JLT done\n\t" 8583 "XORL $y, $y\n" 8584 "done: " %} 8585 ins_encode %{ 8586 Register Rp = $p$$Register; 8587 Register Rq = $q$$Register; 8588 Register Ry = $y$$Register; 8589 Label done; 8590 __ cmpl(Rp, Rq); 8591 __ jccb(Assembler::less, done); 8592 __ xorl(Ry, Ry); 8593 __ bind(done); 8594 %} 8595 8596 ins_pipe(pipe_cmplt); 8597 %} 8598 8599 /* If I enable this, I encourage spilling in the inner loop of compress. 8600 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8601 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8602 */ 8603 //----------Overflow Math Instructions----------------------------------------- 8604 8605 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8606 %{ 8607 match(Set cr (OverflowAddI op1 op2)); 8608 effect(DEF cr, USE_KILL op1, USE op2); 8609 8610 format %{ "ADD $op1, $op2\t# overflow check int" %} 8611 8612 ins_encode %{ 8613 __ addl($op1$$Register, $op2$$Register); 8614 %} 8615 ins_pipe(ialu_reg_reg); 8616 %} 8617 8618 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8619 %{ 8620 match(Set cr (OverflowAddI op1 op2)); 8621 effect(DEF cr, USE_KILL op1, USE op2); 8622 8623 format %{ "ADD $op1, $op2\t# overflow check int" %} 8624 8625 ins_encode %{ 8626 __ addl($op1$$Register, $op2$$constant); 8627 %} 8628 ins_pipe(ialu_reg_reg); 8629 %} 8630 8631 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8632 %{ 8633 match(Set cr (OverflowSubI op1 op2)); 8634 8635 format %{ "CMP $op1, $op2\t# overflow check int" %} 8636 ins_encode %{ 8637 __ cmpl($op1$$Register, $op2$$Register); 8638 %} 8639 ins_pipe(ialu_reg_reg); 8640 %} 8641 8642 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8643 %{ 8644 match(Set cr (OverflowSubI op1 op2)); 8645 8646 format %{ "CMP $op1, $op2\t# overflow check int" %} 8647 ins_encode %{ 8648 __ cmpl($op1$$Register, $op2$$constant); 8649 %} 8650 ins_pipe(ialu_reg_reg); 8651 %} 8652 8653 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8654 %{ 8655 match(Set cr (OverflowSubI zero op2)); 8656 effect(DEF cr, USE_KILL op2); 8657 8658 format %{ "NEG $op2\t# overflow check int" %} 8659 ins_encode %{ 8660 __ negl($op2$$Register); 8661 %} 8662 ins_pipe(ialu_reg_reg); 8663 %} 8664 8665 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8666 %{ 8667 match(Set cr (OverflowMulI op1 op2)); 8668 effect(DEF cr, USE_KILL op1, USE op2); 8669 8670 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8671 ins_encode %{ 8672 __ imull($op1$$Register, $op2$$Register); 8673 %} 8674 ins_pipe(ialu_reg_reg_alu0); 8675 %} 8676 8677 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8678 %{ 8679 match(Set cr (OverflowMulI op1 op2)); 8680 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8681 8682 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8683 ins_encode %{ 8684 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8685 %} 8686 ins_pipe(ialu_reg_reg_alu0); 8687 %} 8688 8689 //----------Long Instructions------------------------------------------------ 8690 // Add Long Register with Register 8691 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8692 match(Set dst (AddL dst src)); 8693 effect(KILL cr); 8694 ins_cost(200); 8695 format %{ "ADD $dst.lo,$src.lo\n\t" 8696 "ADC $dst.hi,$src.hi" %} 8697 opcode(0x03, 0x13); 8698 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8699 ins_pipe( ialu_reg_reg_long ); 8700 %} 8701 8702 // Add Long Register with Immediate 8703 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8704 match(Set dst (AddL dst src)); 8705 effect(KILL cr); 8706 format %{ "ADD $dst.lo,$src.lo\n\t" 8707 "ADC $dst.hi,$src.hi" %} 8708 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8709 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8710 ins_pipe( ialu_reg_long ); 8711 %} 8712 8713 // Add Long Register with Memory 8714 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8715 match(Set dst (AddL dst (LoadL mem))); 8716 effect(KILL cr); 8717 ins_cost(125); 8718 format %{ "ADD $dst.lo,$mem\n\t" 8719 "ADC $dst.hi,$mem+4" %} 8720 opcode(0x03, 0x13); 8721 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8722 ins_pipe( ialu_reg_long_mem ); 8723 %} 8724 8725 // Subtract Long Register with Register. 8726 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8727 match(Set dst (SubL dst src)); 8728 effect(KILL cr); 8729 ins_cost(200); 8730 format %{ "SUB $dst.lo,$src.lo\n\t" 8731 "SBB $dst.hi,$src.hi" %} 8732 opcode(0x2B, 0x1B); 8733 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8734 ins_pipe( ialu_reg_reg_long ); 8735 %} 8736 8737 // Subtract Long Register with Immediate 8738 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8739 match(Set dst (SubL dst src)); 8740 effect(KILL cr); 8741 format %{ "SUB $dst.lo,$src.lo\n\t" 8742 "SBB $dst.hi,$src.hi" %} 8743 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8744 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8745 ins_pipe( ialu_reg_long ); 8746 %} 8747 8748 // Subtract Long Register with Memory 8749 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8750 match(Set dst (SubL dst (LoadL mem))); 8751 effect(KILL cr); 8752 ins_cost(125); 8753 format %{ "SUB $dst.lo,$mem\n\t" 8754 "SBB $dst.hi,$mem+4" %} 8755 opcode(0x2B, 0x1B); 8756 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8757 ins_pipe( ialu_reg_long_mem ); 8758 %} 8759 8760 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8761 match(Set dst (SubL zero dst)); 8762 effect(KILL cr); 8763 ins_cost(300); 8764 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8765 ins_encode( neg_long(dst) ); 8766 ins_pipe( ialu_reg_reg_long ); 8767 %} 8768 8769 // And Long Register with Register 8770 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8771 match(Set dst (AndL dst src)); 8772 effect(KILL cr); 8773 format %{ "AND $dst.lo,$src.lo\n\t" 8774 "AND $dst.hi,$src.hi" %} 8775 opcode(0x23,0x23); 8776 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8777 ins_pipe( ialu_reg_reg_long ); 8778 %} 8779 8780 // And Long Register with Immediate 8781 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8782 match(Set dst (AndL dst src)); 8783 effect(KILL cr); 8784 format %{ "AND $dst.lo,$src.lo\n\t" 8785 "AND $dst.hi,$src.hi" %} 8786 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8787 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8788 ins_pipe( ialu_reg_long ); 8789 %} 8790 8791 // And Long Register with Memory 8792 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8793 match(Set dst (AndL dst (LoadL mem))); 8794 effect(KILL cr); 8795 ins_cost(125); 8796 format %{ "AND $dst.lo,$mem\n\t" 8797 "AND $dst.hi,$mem+4" %} 8798 opcode(0x23, 0x23); 8799 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8800 ins_pipe( ialu_reg_long_mem ); 8801 %} 8802 8803 // BMI1 instructions 8804 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8805 match(Set dst (AndL (XorL src1 minus_1) src2)); 8806 predicate(UseBMI1Instructions); 8807 effect(KILL cr, TEMP dst); 8808 8809 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8810 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8811 %} 8812 8813 ins_encode %{ 8814 Register Rdst = $dst$$Register; 8815 Register Rsrc1 = $src1$$Register; 8816 Register Rsrc2 = $src2$$Register; 8817 __ andnl(Rdst, Rsrc1, Rsrc2); 8818 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8819 %} 8820 ins_pipe(ialu_reg_reg_long); 8821 %} 8822 8823 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8824 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8825 predicate(UseBMI1Instructions); 8826 effect(KILL cr, TEMP dst); 8827 8828 ins_cost(125); 8829 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8830 "ANDNL $dst.hi, $src1.hi, $src2+4" 8831 %} 8832 8833 ins_encode %{ 8834 Register Rdst = $dst$$Register; 8835 Register Rsrc1 = $src1$$Register; 8836 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8837 8838 __ andnl(Rdst, Rsrc1, $src2$$Address); 8839 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8840 %} 8841 ins_pipe(ialu_reg_mem); 8842 %} 8843 8844 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8845 match(Set dst (AndL (SubL imm_zero src) src)); 8846 predicate(UseBMI1Instructions); 8847 effect(KILL cr, TEMP dst); 8848 8849 format %{ "MOVL $dst.hi, 0\n\t" 8850 "BLSIL $dst.lo, $src.lo\n\t" 8851 "JNZ done\n\t" 8852 "BLSIL $dst.hi, $src.hi\n" 8853 "done:" 8854 %} 8855 8856 ins_encode %{ 8857 Label done; 8858 Register Rdst = $dst$$Register; 8859 Register Rsrc = $src$$Register; 8860 __ movl(HIGH_FROM_LOW(Rdst), 0); 8861 __ blsil(Rdst, Rsrc); 8862 __ jccb(Assembler::notZero, done); 8863 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8864 __ bind(done); 8865 %} 8866 ins_pipe(ialu_reg); 8867 %} 8868 8869 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8870 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8871 predicate(UseBMI1Instructions); 8872 effect(KILL cr, TEMP dst); 8873 8874 ins_cost(125); 8875 format %{ "MOVL $dst.hi, 0\n\t" 8876 "BLSIL $dst.lo, $src\n\t" 8877 "JNZ done\n\t" 8878 "BLSIL $dst.hi, $src+4\n" 8879 "done:" 8880 %} 8881 8882 ins_encode %{ 8883 Label done; 8884 Register Rdst = $dst$$Register; 8885 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8886 8887 __ movl(HIGH_FROM_LOW(Rdst), 0); 8888 __ blsil(Rdst, $src$$Address); 8889 __ jccb(Assembler::notZero, done); 8890 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8891 __ bind(done); 8892 %} 8893 ins_pipe(ialu_reg_mem); 8894 %} 8895 8896 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8897 %{ 8898 match(Set dst (XorL (AddL src minus_1) src)); 8899 predicate(UseBMI1Instructions); 8900 effect(KILL cr, TEMP dst); 8901 8902 format %{ "MOVL $dst.hi, 0\n\t" 8903 "BLSMSKL $dst.lo, $src.lo\n\t" 8904 "JNC done\n\t" 8905 "BLSMSKL $dst.hi, $src.hi\n" 8906 "done:" 8907 %} 8908 8909 ins_encode %{ 8910 Label done; 8911 Register Rdst = $dst$$Register; 8912 Register Rsrc = $src$$Register; 8913 __ movl(HIGH_FROM_LOW(Rdst), 0); 8914 __ blsmskl(Rdst, Rsrc); 8915 __ jccb(Assembler::carryClear, done); 8916 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8917 __ bind(done); 8918 %} 8919 8920 ins_pipe(ialu_reg); 8921 %} 8922 8923 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8924 %{ 8925 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8926 predicate(UseBMI1Instructions); 8927 effect(KILL cr, TEMP dst); 8928 8929 ins_cost(125); 8930 format %{ "MOVL $dst.hi, 0\n\t" 8931 "BLSMSKL $dst.lo, $src\n\t" 8932 "JNC done\n\t" 8933 "BLSMSKL $dst.hi, $src+4\n" 8934 "done:" 8935 %} 8936 8937 ins_encode %{ 8938 Label done; 8939 Register Rdst = $dst$$Register; 8940 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8941 8942 __ movl(HIGH_FROM_LOW(Rdst), 0); 8943 __ blsmskl(Rdst, $src$$Address); 8944 __ jccb(Assembler::carryClear, done); 8945 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8946 __ bind(done); 8947 %} 8948 8949 ins_pipe(ialu_reg_mem); 8950 %} 8951 8952 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8953 %{ 8954 match(Set dst (AndL (AddL src minus_1) src) ); 8955 predicate(UseBMI1Instructions); 8956 effect(KILL cr, TEMP dst); 8957 8958 format %{ "MOVL $dst.hi, $src.hi\n\t" 8959 "BLSRL $dst.lo, $src.lo\n\t" 8960 "JNC done\n\t" 8961 "BLSRL $dst.hi, $src.hi\n" 8962 "done:" 8963 %} 8964 8965 ins_encode %{ 8966 Label done; 8967 Register Rdst = $dst$$Register; 8968 Register Rsrc = $src$$Register; 8969 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8970 __ blsrl(Rdst, Rsrc); 8971 __ jccb(Assembler::carryClear, done); 8972 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8973 __ bind(done); 8974 %} 8975 8976 ins_pipe(ialu_reg); 8977 %} 8978 8979 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8980 %{ 8981 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8982 predicate(UseBMI1Instructions); 8983 effect(KILL cr, TEMP dst); 8984 8985 ins_cost(125); 8986 format %{ "MOVL $dst.hi, $src+4\n\t" 8987 "BLSRL $dst.lo, $src\n\t" 8988 "JNC done\n\t" 8989 "BLSRL $dst.hi, $src+4\n" 8990 "done:" 8991 %} 8992 8993 ins_encode %{ 8994 Label done; 8995 Register Rdst = $dst$$Register; 8996 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8997 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8998 __ blsrl(Rdst, $src$$Address); 8999 __ jccb(Assembler::carryClear, done); 9000 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9001 __ bind(done); 9002 %} 9003 9004 ins_pipe(ialu_reg_mem); 9005 %} 9006 9007 // Or Long Register with Register 9008 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9009 match(Set dst (OrL dst src)); 9010 effect(KILL cr); 9011 format %{ "OR $dst.lo,$src.lo\n\t" 9012 "OR $dst.hi,$src.hi" %} 9013 opcode(0x0B,0x0B); 9014 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9015 ins_pipe( ialu_reg_reg_long ); 9016 %} 9017 9018 // Or Long Register with Immediate 9019 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9020 match(Set dst (OrL dst src)); 9021 effect(KILL cr); 9022 format %{ "OR $dst.lo,$src.lo\n\t" 9023 "OR $dst.hi,$src.hi" %} 9024 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9025 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9026 ins_pipe( ialu_reg_long ); 9027 %} 9028 9029 // Or Long Register with Memory 9030 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9031 match(Set dst (OrL dst (LoadL mem))); 9032 effect(KILL cr); 9033 ins_cost(125); 9034 format %{ "OR $dst.lo,$mem\n\t" 9035 "OR $dst.hi,$mem+4" %} 9036 opcode(0x0B,0x0B); 9037 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9038 ins_pipe( ialu_reg_long_mem ); 9039 %} 9040 9041 // Xor Long Register with Register 9042 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9043 match(Set dst (XorL dst src)); 9044 effect(KILL cr); 9045 format %{ "XOR $dst.lo,$src.lo\n\t" 9046 "XOR $dst.hi,$src.hi" %} 9047 opcode(0x33,0x33); 9048 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9049 ins_pipe( ialu_reg_reg_long ); 9050 %} 9051 9052 // Xor Long Register with Immediate -1 9053 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9054 match(Set dst (XorL dst imm)); 9055 format %{ "NOT $dst.lo\n\t" 9056 "NOT $dst.hi" %} 9057 ins_encode %{ 9058 __ notl($dst$$Register); 9059 __ notl(HIGH_FROM_LOW($dst$$Register)); 9060 %} 9061 ins_pipe( ialu_reg_long ); 9062 %} 9063 9064 // Xor Long Register with Immediate 9065 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9066 match(Set dst (XorL dst src)); 9067 effect(KILL cr); 9068 format %{ "XOR $dst.lo,$src.lo\n\t" 9069 "XOR $dst.hi,$src.hi" %} 9070 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9071 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9072 ins_pipe( ialu_reg_long ); 9073 %} 9074 9075 // Xor Long Register with Memory 9076 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9077 match(Set dst (XorL dst (LoadL mem))); 9078 effect(KILL cr); 9079 ins_cost(125); 9080 format %{ "XOR $dst.lo,$mem\n\t" 9081 "XOR $dst.hi,$mem+4" %} 9082 opcode(0x33,0x33); 9083 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9084 ins_pipe( ialu_reg_long_mem ); 9085 %} 9086 9087 // Shift Left Long by 1 9088 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9089 predicate(UseNewLongLShift); 9090 match(Set dst (LShiftL dst cnt)); 9091 effect(KILL cr); 9092 ins_cost(100); 9093 format %{ "ADD $dst.lo,$dst.lo\n\t" 9094 "ADC $dst.hi,$dst.hi" %} 9095 ins_encode %{ 9096 __ addl($dst$$Register,$dst$$Register); 9097 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9098 %} 9099 ins_pipe( ialu_reg_long ); 9100 %} 9101 9102 // Shift Left Long by 2 9103 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9104 predicate(UseNewLongLShift); 9105 match(Set dst (LShiftL dst cnt)); 9106 effect(KILL cr); 9107 ins_cost(100); 9108 format %{ "ADD $dst.lo,$dst.lo\n\t" 9109 "ADC $dst.hi,$dst.hi\n\t" 9110 "ADD $dst.lo,$dst.lo\n\t" 9111 "ADC $dst.hi,$dst.hi" %} 9112 ins_encode %{ 9113 __ addl($dst$$Register,$dst$$Register); 9114 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9115 __ addl($dst$$Register,$dst$$Register); 9116 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9117 %} 9118 ins_pipe( ialu_reg_long ); 9119 %} 9120 9121 // Shift Left Long by 3 9122 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9123 predicate(UseNewLongLShift); 9124 match(Set dst (LShiftL dst cnt)); 9125 effect(KILL cr); 9126 ins_cost(100); 9127 format %{ "ADD $dst.lo,$dst.lo\n\t" 9128 "ADC $dst.hi,$dst.hi\n\t" 9129 "ADD $dst.lo,$dst.lo\n\t" 9130 "ADC $dst.hi,$dst.hi\n\t" 9131 "ADD $dst.lo,$dst.lo\n\t" 9132 "ADC $dst.hi,$dst.hi" %} 9133 ins_encode %{ 9134 __ addl($dst$$Register,$dst$$Register); 9135 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9136 __ addl($dst$$Register,$dst$$Register); 9137 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9138 __ addl($dst$$Register,$dst$$Register); 9139 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9140 %} 9141 ins_pipe( ialu_reg_long ); 9142 %} 9143 9144 // Shift Left Long by 1-31 9145 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9146 match(Set dst (LShiftL dst cnt)); 9147 effect(KILL cr); 9148 ins_cost(200); 9149 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9150 "SHL $dst.lo,$cnt" %} 9151 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9152 ins_encode( move_long_small_shift(dst,cnt) ); 9153 ins_pipe( ialu_reg_long ); 9154 %} 9155 9156 // Shift Left Long by 32-63 9157 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9158 match(Set dst (LShiftL dst cnt)); 9159 effect(KILL cr); 9160 ins_cost(300); 9161 format %{ "MOV $dst.hi,$dst.lo\n" 9162 "\tSHL $dst.hi,$cnt-32\n" 9163 "\tXOR $dst.lo,$dst.lo" %} 9164 opcode(0xC1, 0x4); /* C1 /4 ib */ 9165 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9166 ins_pipe( ialu_reg_long ); 9167 %} 9168 9169 // Shift Left Long by variable 9170 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9171 match(Set dst (LShiftL dst shift)); 9172 effect(KILL cr); 9173 ins_cost(500+200); 9174 size(17); 9175 format %{ "TEST $shift,32\n\t" 9176 "JEQ,s small\n\t" 9177 "MOV $dst.hi,$dst.lo\n\t" 9178 "XOR $dst.lo,$dst.lo\n" 9179 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9180 "SHL $dst.lo,$shift" %} 9181 ins_encode( shift_left_long( dst, shift ) ); 9182 ins_pipe( pipe_slow ); 9183 %} 9184 9185 // Shift Right Long by 1-31 9186 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9187 match(Set dst (URShiftL dst cnt)); 9188 effect(KILL cr); 9189 ins_cost(200); 9190 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9191 "SHR $dst.hi,$cnt" %} 9192 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9193 ins_encode( move_long_small_shift(dst,cnt) ); 9194 ins_pipe( ialu_reg_long ); 9195 %} 9196 9197 // Shift Right Long by 32-63 9198 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9199 match(Set dst (URShiftL dst cnt)); 9200 effect(KILL cr); 9201 ins_cost(300); 9202 format %{ "MOV $dst.lo,$dst.hi\n" 9203 "\tSHR $dst.lo,$cnt-32\n" 9204 "\tXOR $dst.hi,$dst.hi" %} 9205 opcode(0xC1, 0x5); /* C1 /5 ib */ 9206 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9207 ins_pipe( ialu_reg_long ); 9208 %} 9209 9210 // Shift Right Long by variable 9211 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9212 match(Set dst (URShiftL dst shift)); 9213 effect(KILL cr); 9214 ins_cost(600); 9215 size(17); 9216 format %{ "TEST $shift,32\n\t" 9217 "JEQ,s small\n\t" 9218 "MOV $dst.lo,$dst.hi\n\t" 9219 "XOR $dst.hi,$dst.hi\n" 9220 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9221 "SHR $dst.hi,$shift" %} 9222 ins_encode( shift_right_long( dst, shift ) ); 9223 ins_pipe( pipe_slow ); 9224 %} 9225 9226 // Shift Right Long by 1-31 9227 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9228 match(Set dst (RShiftL dst cnt)); 9229 effect(KILL cr); 9230 ins_cost(200); 9231 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9232 "SAR $dst.hi,$cnt" %} 9233 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9234 ins_encode( move_long_small_shift(dst,cnt) ); 9235 ins_pipe( ialu_reg_long ); 9236 %} 9237 9238 // Shift Right Long by 32-63 9239 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9240 match(Set dst (RShiftL dst cnt)); 9241 effect(KILL cr); 9242 ins_cost(300); 9243 format %{ "MOV $dst.lo,$dst.hi\n" 9244 "\tSAR $dst.lo,$cnt-32\n" 9245 "\tSAR $dst.hi,31" %} 9246 opcode(0xC1, 0x7); /* C1 /7 ib */ 9247 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9248 ins_pipe( ialu_reg_long ); 9249 %} 9250 9251 // Shift Right arithmetic Long by variable 9252 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9253 match(Set dst (RShiftL dst shift)); 9254 effect(KILL cr); 9255 ins_cost(600); 9256 size(18); 9257 format %{ "TEST $shift,32\n\t" 9258 "JEQ,s small\n\t" 9259 "MOV $dst.lo,$dst.hi\n\t" 9260 "SAR $dst.hi,31\n" 9261 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9262 "SAR $dst.hi,$shift" %} 9263 ins_encode( shift_right_arith_long( dst, shift ) ); 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 9268 //----------Double Instructions------------------------------------------------ 9269 // Double Math 9270 9271 // Compare & branch 9272 9273 // P6 version of float compare, sets condition codes in EFLAGS 9274 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9275 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9276 match(Set cr (CmpD src1 src2)); 9277 effect(KILL rax); 9278 ins_cost(150); 9279 format %{ "FLD $src1\n\t" 9280 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9281 "JNP exit\n\t" 9282 "MOV ah,1 // saw a NaN, set CF\n\t" 9283 "SAHF\n" 9284 "exit:\tNOP // avoid branch to branch" %} 9285 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9286 ins_encode( Push_Reg_DPR(src1), 9287 OpcP, RegOpc(src2), 9288 cmpF_P6_fixup ); 9289 ins_pipe( pipe_slow ); 9290 %} 9291 9292 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9293 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9294 match(Set cr (CmpD src1 src2)); 9295 ins_cost(150); 9296 format %{ "FLD $src1\n\t" 9297 "FUCOMIP ST,$src2 // P6 instruction" %} 9298 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9299 ins_encode( Push_Reg_DPR(src1), 9300 OpcP, RegOpc(src2)); 9301 ins_pipe( pipe_slow ); 9302 %} 9303 9304 // Compare & branch 9305 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9306 predicate(UseSSE<=1); 9307 match(Set cr (CmpD src1 src2)); 9308 effect(KILL rax); 9309 ins_cost(200); 9310 format %{ "FLD $src1\n\t" 9311 "FCOMp $src2\n\t" 9312 "FNSTSW AX\n\t" 9313 "TEST AX,0x400\n\t" 9314 "JZ,s flags\n\t" 9315 "MOV AH,1\t# unordered treat as LT\n" 9316 "flags:\tSAHF" %} 9317 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9318 ins_encode( Push_Reg_DPR(src1), 9319 OpcP, RegOpc(src2), 9320 fpu_flags); 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 // Compare vs zero into -1,0,1 9325 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9326 predicate(UseSSE<=1); 9327 match(Set dst (CmpD3 src1 zero)); 9328 effect(KILL cr, KILL rax); 9329 ins_cost(280); 9330 format %{ "FTSTD $dst,$src1" %} 9331 opcode(0xE4, 0xD9); 9332 ins_encode( Push_Reg_DPR(src1), 9333 OpcS, OpcP, PopFPU, 9334 CmpF_Result(dst)); 9335 ins_pipe( pipe_slow ); 9336 %} 9337 9338 // Compare into -1,0,1 9339 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9340 predicate(UseSSE<=1); 9341 match(Set dst (CmpD3 src1 src2)); 9342 effect(KILL cr, KILL rax); 9343 ins_cost(300); 9344 format %{ "FCMPD $dst,$src1,$src2" %} 9345 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9346 ins_encode( Push_Reg_DPR(src1), 9347 OpcP, RegOpc(src2), 9348 CmpF_Result(dst)); 9349 ins_pipe( pipe_slow ); 9350 %} 9351 9352 // float compare and set condition codes in EFLAGS by XMM regs 9353 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9354 predicate(UseSSE>=2); 9355 match(Set cr (CmpD src1 src2)); 9356 ins_cost(145); 9357 format %{ "UCOMISD $src1,$src2\n\t" 9358 "JNP,s exit\n\t" 9359 "PUSHF\t# saw NaN, set CF\n\t" 9360 "AND [rsp], #0xffffff2b\n\t" 9361 "POPF\n" 9362 "exit:" %} 9363 ins_encode %{ 9364 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9365 emit_cmpfp_fixup(_masm); 9366 %} 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9371 predicate(UseSSE>=2); 9372 match(Set cr (CmpD src1 src2)); 9373 ins_cost(100); 9374 format %{ "UCOMISD $src1,$src2" %} 9375 ins_encode %{ 9376 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9377 %} 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 // float compare and set condition codes in EFLAGS by XMM regs 9382 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9383 predicate(UseSSE>=2); 9384 match(Set cr (CmpD src1 (LoadD src2))); 9385 ins_cost(145); 9386 format %{ "UCOMISD $src1,$src2\n\t" 9387 "JNP,s exit\n\t" 9388 "PUSHF\t# saw NaN, set CF\n\t" 9389 "AND [rsp], #0xffffff2b\n\t" 9390 "POPF\n" 9391 "exit:" %} 9392 ins_encode %{ 9393 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9394 emit_cmpfp_fixup(_masm); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9400 predicate(UseSSE>=2); 9401 match(Set cr (CmpD src1 (LoadD src2))); 9402 ins_cost(100); 9403 format %{ "UCOMISD $src1,$src2" %} 9404 ins_encode %{ 9405 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9406 %} 9407 ins_pipe( pipe_slow ); 9408 %} 9409 9410 // Compare into -1,0,1 in XMM 9411 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9412 predicate(UseSSE>=2); 9413 match(Set dst (CmpD3 src1 src2)); 9414 effect(KILL cr); 9415 ins_cost(255); 9416 format %{ "UCOMISD $src1, $src2\n\t" 9417 "MOV $dst, #-1\n\t" 9418 "JP,s done\n\t" 9419 "JB,s done\n\t" 9420 "SETNE $dst\n\t" 9421 "MOVZB $dst, $dst\n" 9422 "done:" %} 9423 ins_encode %{ 9424 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9425 emit_cmpfp3(_masm, $dst$$Register); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 // Compare into -1,0,1 in XMM and memory 9431 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9432 predicate(UseSSE>=2); 9433 match(Set dst (CmpD3 src1 (LoadD src2))); 9434 effect(KILL cr); 9435 ins_cost(275); 9436 format %{ "UCOMISD $src1, $src2\n\t" 9437 "MOV $dst, #-1\n\t" 9438 "JP,s done\n\t" 9439 "JB,s done\n\t" 9440 "SETNE $dst\n\t" 9441 "MOVZB $dst, $dst\n" 9442 "done:" %} 9443 ins_encode %{ 9444 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9445 emit_cmpfp3(_masm, $dst$$Register); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 9451 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9452 predicate (UseSSE <=1); 9453 match(Set dst (SubD dst src)); 9454 9455 format %{ "FLD $src\n\t" 9456 "DSUBp $dst,ST" %} 9457 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9458 ins_cost(150); 9459 ins_encode( Push_Reg_DPR(src), 9460 OpcP, RegOpc(dst) ); 9461 ins_pipe( fpu_reg_reg ); 9462 %} 9463 9464 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9465 predicate (UseSSE <=1); 9466 match(Set dst (RoundDouble (SubD src1 src2))); 9467 ins_cost(250); 9468 9469 format %{ "FLD $src2\n\t" 9470 "DSUB ST,$src1\n\t" 9471 "FSTP_D $dst\t# D-round" %} 9472 opcode(0xD8, 0x5); 9473 ins_encode( Push_Reg_DPR(src2), 9474 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9475 ins_pipe( fpu_mem_reg_reg ); 9476 %} 9477 9478 9479 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9480 predicate (UseSSE <=1); 9481 match(Set dst (SubD dst (LoadD src))); 9482 ins_cost(150); 9483 9484 format %{ "FLD $src\n\t" 9485 "DSUBp $dst,ST" %} 9486 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9487 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9488 OpcP, RegOpc(dst) ); 9489 ins_pipe( fpu_reg_mem ); 9490 %} 9491 9492 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9493 predicate (UseSSE<=1); 9494 match(Set dst (AbsD src)); 9495 ins_cost(100); 9496 format %{ "FABS" %} 9497 opcode(0xE1, 0xD9); 9498 ins_encode( OpcS, OpcP ); 9499 ins_pipe( fpu_reg_reg ); 9500 %} 9501 9502 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9503 predicate(UseSSE<=1); 9504 match(Set dst (NegD src)); 9505 ins_cost(100); 9506 format %{ "FCHS" %} 9507 opcode(0xE0, 0xD9); 9508 ins_encode( OpcS, OpcP ); 9509 ins_pipe( fpu_reg_reg ); 9510 %} 9511 9512 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9513 predicate(UseSSE<=1); 9514 match(Set dst (AddD dst src)); 9515 format %{ "FLD $src\n\t" 9516 "DADD $dst,ST" %} 9517 size(4); 9518 ins_cost(150); 9519 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9520 ins_encode( Push_Reg_DPR(src), 9521 OpcP, RegOpc(dst) ); 9522 ins_pipe( fpu_reg_reg ); 9523 %} 9524 9525 9526 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9527 predicate(UseSSE<=1); 9528 match(Set dst (RoundDouble (AddD src1 src2))); 9529 ins_cost(250); 9530 9531 format %{ "FLD $src2\n\t" 9532 "DADD ST,$src1\n\t" 9533 "FSTP_D $dst\t# D-round" %} 9534 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9535 ins_encode( Push_Reg_DPR(src2), 9536 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9537 ins_pipe( fpu_mem_reg_reg ); 9538 %} 9539 9540 9541 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9542 predicate(UseSSE<=1); 9543 match(Set dst (AddD dst (LoadD src))); 9544 ins_cost(150); 9545 9546 format %{ "FLD $src\n\t" 9547 "DADDp $dst,ST" %} 9548 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9549 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9550 OpcP, RegOpc(dst) ); 9551 ins_pipe( fpu_reg_mem ); 9552 %} 9553 9554 // add-to-memory 9555 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9556 predicate(UseSSE<=1); 9557 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9558 ins_cost(150); 9559 9560 format %{ "FLD_D $dst\n\t" 9561 "DADD ST,$src\n\t" 9562 "FST_D $dst" %} 9563 opcode(0xDD, 0x0); 9564 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9565 Opcode(0xD8), RegOpc(src), 9566 set_instruction_start, 9567 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9568 ins_pipe( fpu_reg_mem ); 9569 %} 9570 9571 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9572 predicate(UseSSE<=1); 9573 match(Set dst (AddD dst con)); 9574 ins_cost(125); 9575 format %{ "FLD1\n\t" 9576 "DADDp $dst,ST" %} 9577 ins_encode %{ 9578 __ fld1(); 9579 __ faddp($dst$$reg); 9580 %} 9581 ins_pipe(fpu_reg); 9582 %} 9583 9584 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9585 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9586 match(Set dst (AddD dst con)); 9587 ins_cost(200); 9588 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9589 "DADDp $dst,ST" %} 9590 ins_encode %{ 9591 __ fld_d($constantaddress($con)); 9592 __ faddp($dst$$reg); 9593 %} 9594 ins_pipe(fpu_reg_mem); 9595 %} 9596 9597 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9598 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9599 match(Set dst (RoundDouble (AddD src con))); 9600 ins_cost(200); 9601 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9602 "DADD ST,$src\n\t" 9603 "FSTP_D $dst\t# D-round" %} 9604 ins_encode %{ 9605 __ fld_d($constantaddress($con)); 9606 __ fadd($src$$reg); 9607 __ fstp_d(Address(rsp, $dst$$disp)); 9608 %} 9609 ins_pipe(fpu_mem_reg_con); 9610 %} 9611 9612 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9613 predicate(UseSSE<=1); 9614 match(Set dst (MulD dst src)); 9615 format %{ "FLD $src\n\t" 9616 "DMULp $dst,ST" %} 9617 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9618 ins_cost(150); 9619 ins_encode( Push_Reg_DPR(src), 9620 OpcP, RegOpc(dst) ); 9621 ins_pipe( fpu_reg_reg ); 9622 %} 9623 9624 // Strict FP instruction biases argument before multiply then 9625 // biases result to avoid double rounding of subnormals. 9626 // 9627 // scale arg1 by multiplying arg1 by 2^(-15360) 9628 // load arg2 9629 // multiply scaled arg1 by arg2 9630 // rescale product by 2^(15360) 9631 // 9632 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9633 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9634 match(Set dst (MulD dst src)); 9635 ins_cost(1); // Select this instruction for all strict FP double multiplies 9636 9637 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9638 "DMULp $dst,ST\n\t" 9639 "FLD $src\n\t" 9640 "DMULp $dst,ST\n\t" 9641 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9642 "DMULp $dst,ST\n\t" %} 9643 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9644 ins_encode( strictfp_bias1(dst), 9645 Push_Reg_DPR(src), 9646 OpcP, RegOpc(dst), 9647 strictfp_bias2(dst) ); 9648 ins_pipe( fpu_reg_reg ); 9649 %} 9650 9651 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9652 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9653 match(Set dst (MulD dst con)); 9654 ins_cost(200); 9655 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9656 "DMULp $dst,ST" %} 9657 ins_encode %{ 9658 __ fld_d($constantaddress($con)); 9659 __ fmulp($dst$$reg); 9660 %} 9661 ins_pipe(fpu_reg_mem); 9662 %} 9663 9664 9665 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9666 predicate( UseSSE<=1 ); 9667 match(Set dst (MulD dst (LoadD src))); 9668 ins_cost(200); 9669 format %{ "FLD_D $src\n\t" 9670 "DMULp $dst,ST" %} 9671 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9672 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9673 OpcP, RegOpc(dst) ); 9674 ins_pipe( fpu_reg_mem ); 9675 %} 9676 9677 // 9678 // Cisc-alternate to reg-reg multiply 9679 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9680 predicate( UseSSE<=1 ); 9681 match(Set dst (MulD src (LoadD mem))); 9682 ins_cost(250); 9683 format %{ "FLD_D $mem\n\t" 9684 "DMUL ST,$src\n\t" 9685 "FSTP_D $dst" %} 9686 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9687 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9688 OpcReg_FPR(src), 9689 Pop_Reg_DPR(dst) ); 9690 ins_pipe( fpu_reg_reg_mem ); 9691 %} 9692 9693 9694 // MACRO3 -- addDPR a mulDPR 9695 // This instruction is a '2-address' instruction in that the result goes 9696 // back to src2. This eliminates a move from the macro; possibly the 9697 // register allocator will have to add it back (and maybe not). 9698 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9699 predicate( UseSSE<=1 ); 9700 match(Set src2 (AddD (MulD src0 src1) src2)); 9701 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9702 "DMUL ST,$src1\n\t" 9703 "DADDp $src2,ST" %} 9704 ins_cost(250); 9705 opcode(0xDD); /* LoadD DD /0 */ 9706 ins_encode( Push_Reg_FPR(src0), 9707 FMul_ST_reg(src1), 9708 FAddP_reg_ST(src2) ); 9709 ins_pipe( fpu_reg_reg_reg ); 9710 %} 9711 9712 9713 // MACRO3 -- subDPR a mulDPR 9714 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9715 predicate( UseSSE<=1 ); 9716 match(Set src2 (SubD (MulD src0 src1) src2)); 9717 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9718 "DMUL ST,$src1\n\t" 9719 "DSUBRp $src2,ST" %} 9720 ins_cost(250); 9721 ins_encode( Push_Reg_FPR(src0), 9722 FMul_ST_reg(src1), 9723 Opcode(0xDE), Opc_plus(0xE0,src2)); 9724 ins_pipe( fpu_reg_reg_reg ); 9725 %} 9726 9727 9728 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9729 predicate( UseSSE<=1 ); 9730 match(Set dst (DivD dst src)); 9731 9732 format %{ "FLD $src\n\t" 9733 "FDIVp $dst,ST" %} 9734 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9735 ins_cost(150); 9736 ins_encode( Push_Reg_DPR(src), 9737 OpcP, RegOpc(dst) ); 9738 ins_pipe( fpu_reg_reg ); 9739 %} 9740 9741 // Strict FP instruction biases argument before division then 9742 // biases result, to avoid double rounding of subnormals. 9743 // 9744 // scale dividend by multiplying dividend by 2^(-15360) 9745 // load divisor 9746 // divide scaled dividend by divisor 9747 // rescale quotient by 2^(15360) 9748 // 9749 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9750 predicate (UseSSE<=1); 9751 match(Set dst (DivD dst src)); 9752 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9753 ins_cost(01); 9754 9755 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9756 "DMULp $dst,ST\n\t" 9757 "FLD $src\n\t" 9758 "FDIVp $dst,ST\n\t" 9759 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9760 "DMULp $dst,ST\n\t" %} 9761 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9762 ins_encode( strictfp_bias1(dst), 9763 Push_Reg_DPR(src), 9764 OpcP, RegOpc(dst), 9765 strictfp_bias2(dst) ); 9766 ins_pipe( fpu_reg_reg ); 9767 %} 9768 9769 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9770 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9771 match(Set dst (RoundDouble (DivD src1 src2))); 9772 9773 format %{ "FLD $src1\n\t" 9774 "FDIV ST,$src2\n\t" 9775 "FSTP_D $dst\t# D-round" %} 9776 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9777 ins_encode( Push_Reg_DPR(src1), 9778 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9779 ins_pipe( fpu_mem_reg_reg ); 9780 %} 9781 9782 9783 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9784 predicate(UseSSE<=1); 9785 match(Set dst (ModD dst src)); 9786 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9787 9788 format %{ "DMOD $dst,$src" %} 9789 ins_cost(250); 9790 ins_encode(Push_Reg_Mod_DPR(dst, src), 9791 emitModDPR(), 9792 Push_Result_Mod_DPR(src), 9793 Pop_Reg_DPR(dst)); 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9798 predicate(UseSSE>=2); 9799 match(Set dst (ModD src0 src1)); 9800 effect(KILL rax, KILL cr); 9801 9802 format %{ "SUB ESP,8\t # DMOD\n" 9803 "\tMOVSD [ESP+0],$src1\n" 9804 "\tFLD_D [ESP+0]\n" 9805 "\tMOVSD [ESP+0],$src0\n" 9806 "\tFLD_D [ESP+0]\n" 9807 "loop:\tFPREM\n" 9808 "\tFWAIT\n" 9809 "\tFNSTSW AX\n" 9810 "\tSAHF\n" 9811 "\tJP loop\n" 9812 "\tFSTP_D [ESP+0]\n" 9813 "\tMOVSD $dst,[ESP+0]\n" 9814 "\tADD ESP,8\n" 9815 "\tFSTP ST0\t # Restore FPU Stack" 9816 %} 9817 ins_cost(250); 9818 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9819 ins_pipe( pipe_slow ); 9820 %} 9821 9822 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9823 predicate (UseSSE<=1); 9824 match(Set dst(TanD src)); 9825 format %{ "DTAN $dst" %} 9826 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9827 Opcode(0xDD), Opcode(0xD8)); // fstp st 9828 ins_pipe( pipe_slow ); 9829 %} 9830 9831 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9832 predicate (UseSSE>=2); 9833 match(Set dst(TanD dst)); 9834 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9835 format %{ "DTAN $dst" %} 9836 ins_encode( Push_SrcD(dst), 9837 Opcode(0xD9), Opcode(0xF2), // fptan 9838 Opcode(0xDD), Opcode(0xD8), // fstp st 9839 Push_ResultD(dst) ); 9840 ins_pipe( pipe_slow ); 9841 %} 9842 9843 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9844 predicate (UseSSE<=1); 9845 match(Set dst(AtanD dst src)); 9846 format %{ "DATA $dst,$src" %} 9847 opcode(0xD9, 0xF3); 9848 ins_encode( Push_Reg_DPR(src), 9849 OpcP, OpcS, RegOpc(dst) ); 9850 ins_pipe( pipe_slow ); 9851 %} 9852 9853 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9854 predicate (UseSSE>=2); 9855 match(Set dst(AtanD dst src)); 9856 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9857 format %{ "DATA $dst,$src" %} 9858 opcode(0xD9, 0xF3); 9859 ins_encode( Push_SrcD(src), 9860 OpcP, OpcS, Push_ResultD(dst) ); 9861 ins_pipe( pipe_slow ); 9862 %} 9863 9864 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9865 predicate (UseSSE<=1); 9866 match(Set dst (SqrtD src)); 9867 format %{ "DSQRT $dst,$src" %} 9868 opcode(0xFA, 0xD9); 9869 ins_encode( Push_Reg_DPR(src), 9870 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9875 predicate (UseSSE<=1); 9876 // The source Double operand on FPU stack 9877 match(Set dst (Log10D src)); 9878 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9879 // fxch ; swap ST(0) with ST(1) 9880 // fyl2x ; compute log_10(2) * log_2(x) 9881 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9882 "FXCH \n\t" 9883 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9884 %} 9885 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9886 Opcode(0xD9), Opcode(0xC9), // fxch 9887 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9888 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9893 predicate (UseSSE>=2); 9894 effect(KILL cr); 9895 match(Set dst (Log10D src)); 9896 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9897 // fyl2x ; compute log_10(2) * log_2(x) 9898 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9899 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9900 %} 9901 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9902 Push_SrcD(src), 9903 Opcode(0xD9), Opcode(0xF1), // fyl2x 9904 Push_ResultD(dst)); 9905 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 //-------------Float Instructions------------------------------- 9910 // Float Math 9911 9912 // Code for float compare: 9913 // fcompp(); 9914 // fwait(); fnstsw_ax(); 9915 // sahf(); 9916 // movl(dst, unordered_result); 9917 // jcc(Assembler::parity, exit); 9918 // movl(dst, less_result); 9919 // jcc(Assembler::below, exit); 9920 // movl(dst, equal_result); 9921 // jcc(Assembler::equal, exit); 9922 // movl(dst, greater_result); 9923 // exit: 9924 9925 // P6 version of float compare, sets condition codes in EFLAGS 9926 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9927 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9928 match(Set cr (CmpF src1 src2)); 9929 effect(KILL rax); 9930 ins_cost(150); 9931 format %{ "FLD $src1\n\t" 9932 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9933 "JNP exit\n\t" 9934 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9935 "SAHF\n" 9936 "exit:\tNOP // avoid branch to branch" %} 9937 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9938 ins_encode( Push_Reg_DPR(src1), 9939 OpcP, RegOpc(src2), 9940 cmpF_P6_fixup ); 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9945 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9946 match(Set cr (CmpF src1 src2)); 9947 ins_cost(100); 9948 format %{ "FLD $src1\n\t" 9949 "FUCOMIP ST,$src2 // P6 instruction" %} 9950 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9951 ins_encode( Push_Reg_DPR(src1), 9952 OpcP, RegOpc(src2)); 9953 ins_pipe( pipe_slow ); 9954 %} 9955 9956 9957 // Compare & branch 9958 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9959 predicate(UseSSE == 0); 9960 match(Set cr (CmpF src1 src2)); 9961 effect(KILL rax); 9962 ins_cost(200); 9963 format %{ "FLD $src1\n\t" 9964 "FCOMp $src2\n\t" 9965 "FNSTSW AX\n\t" 9966 "TEST AX,0x400\n\t" 9967 "JZ,s flags\n\t" 9968 "MOV AH,1\t# unordered treat as LT\n" 9969 "flags:\tSAHF" %} 9970 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9971 ins_encode( Push_Reg_DPR(src1), 9972 OpcP, RegOpc(src2), 9973 fpu_flags); 9974 ins_pipe( pipe_slow ); 9975 %} 9976 9977 // Compare vs zero into -1,0,1 9978 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9979 predicate(UseSSE == 0); 9980 match(Set dst (CmpF3 src1 zero)); 9981 effect(KILL cr, KILL rax); 9982 ins_cost(280); 9983 format %{ "FTSTF $dst,$src1" %} 9984 opcode(0xE4, 0xD9); 9985 ins_encode( Push_Reg_DPR(src1), 9986 OpcS, OpcP, PopFPU, 9987 CmpF_Result(dst)); 9988 ins_pipe( pipe_slow ); 9989 %} 9990 9991 // Compare into -1,0,1 9992 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9993 predicate(UseSSE == 0); 9994 match(Set dst (CmpF3 src1 src2)); 9995 effect(KILL cr, KILL rax); 9996 ins_cost(300); 9997 format %{ "FCMPF $dst,$src1,$src2" %} 9998 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9999 ins_encode( Push_Reg_DPR(src1), 10000 OpcP, RegOpc(src2), 10001 CmpF_Result(dst)); 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 // float compare and set condition codes in EFLAGS by XMM regs 10006 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10007 predicate(UseSSE>=1); 10008 match(Set cr (CmpF src1 src2)); 10009 ins_cost(145); 10010 format %{ "UCOMISS $src1,$src2\n\t" 10011 "JNP,s exit\n\t" 10012 "PUSHF\t# saw NaN, set CF\n\t" 10013 "AND [rsp], #0xffffff2b\n\t" 10014 "POPF\n" 10015 "exit:" %} 10016 ins_encode %{ 10017 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10018 emit_cmpfp_fixup(_masm); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10024 predicate(UseSSE>=1); 10025 match(Set cr (CmpF src1 src2)); 10026 ins_cost(100); 10027 format %{ "UCOMISS $src1,$src2" %} 10028 ins_encode %{ 10029 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 // float compare and set condition codes in EFLAGS by XMM regs 10035 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10036 predicate(UseSSE>=1); 10037 match(Set cr (CmpF src1 (LoadF src2))); 10038 ins_cost(165); 10039 format %{ "UCOMISS $src1,$src2\n\t" 10040 "JNP,s exit\n\t" 10041 "PUSHF\t# saw NaN, set CF\n\t" 10042 "AND [rsp], #0xffffff2b\n\t" 10043 "POPF\n" 10044 "exit:" %} 10045 ins_encode %{ 10046 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10047 emit_cmpfp_fixup(_masm); 10048 %} 10049 ins_pipe( pipe_slow ); 10050 %} 10051 10052 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10053 predicate(UseSSE>=1); 10054 match(Set cr (CmpF src1 (LoadF src2))); 10055 ins_cost(100); 10056 format %{ "UCOMISS $src1,$src2" %} 10057 ins_encode %{ 10058 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10059 %} 10060 ins_pipe( pipe_slow ); 10061 %} 10062 10063 // Compare into -1,0,1 in XMM 10064 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10065 predicate(UseSSE>=1); 10066 match(Set dst (CmpF3 src1 src2)); 10067 effect(KILL cr); 10068 ins_cost(255); 10069 format %{ "UCOMISS $src1, $src2\n\t" 10070 "MOV $dst, #-1\n\t" 10071 "JP,s done\n\t" 10072 "JB,s done\n\t" 10073 "SETNE $dst\n\t" 10074 "MOVZB $dst, $dst\n" 10075 "done:" %} 10076 ins_encode %{ 10077 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10078 emit_cmpfp3(_masm, $dst$$Register); 10079 %} 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 // Compare into -1,0,1 in XMM and memory 10084 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10085 predicate(UseSSE>=1); 10086 match(Set dst (CmpF3 src1 (LoadF src2))); 10087 effect(KILL cr); 10088 ins_cost(275); 10089 format %{ "UCOMISS $src1, $src2\n\t" 10090 "MOV $dst, #-1\n\t" 10091 "JP,s done\n\t" 10092 "JB,s done\n\t" 10093 "SETNE $dst\n\t" 10094 "MOVZB $dst, $dst\n" 10095 "done:" %} 10096 ins_encode %{ 10097 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10098 emit_cmpfp3(_masm, $dst$$Register); 10099 %} 10100 ins_pipe( pipe_slow ); 10101 %} 10102 10103 // Spill to obtain 24-bit precision 10104 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10105 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10106 match(Set dst (SubF src1 src2)); 10107 10108 format %{ "FSUB $dst,$src1 - $src2" %} 10109 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10110 ins_encode( Push_Reg_FPR(src1), 10111 OpcReg_FPR(src2), 10112 Pop_Mem_FPR(dst) ); 10113 ins_pipe( fpu_mem_reg_reg ); 10114 %} 10115 // 10116 // This instruction does not round to 24-bits 10117 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10118 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10119 match(Set dst (SubF dst src)); 10120 10121 format %{ "FSUB $dst,$src" %} 10122 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10123 ins_encode( Push_Reg_FPR(src), 10124 OpcP, RegOpc(dst) ); 10125 ins_pipe( fpu_reg_reg ); 10126 %} 10127 10128 // Spill to obtain 24-bit precision 10129 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10130 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10131 match(Set dst (AddF src1 src2)); 10132 10133 format %{ "FADD $dst,$src1,$src2" %} 10134 opcode(0xD8, 0x0); /* D8 C0+i */ 10135 ins_encode( Push_Reg_FPR(src2), 10136 OpcReg_FPR(src1), 10137 Pop_Mem_FPR(dst) ); 10138 ins_pipe( fpu_mem_reg_reg ); 10139 %} 10140 // 10141 // This instruction does not round to 24-bits 10142 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10143 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10144 match(Set dst (AddF dst src)); 10145 10146 format %{ "FLD $src\n\t" 10147 "FADDp $dst,ST" %} 10148 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10149 ins_encode( Push_Reg_FPR(src), 10150 OpcP, RegOpc(dst) ); 10151 ins_pipe( fpu_reg_reg ); 10152 %} 10153 10154 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10155 predicate(UseSSE==0); 10156 match(Set dst (AbsF src)); 10157 ins_cost(100); 10158 format %{ "FABS" %} 10159 opcode(0xE1, 0xD9); 10160 ins_encode( OpcS, OpcP ); 10161 ins_pipe( fpu_reg_reg ); 10162 %} 10163 10164 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10165 predicate(UseSSE==0); 10166 match(Set dst (NegF src)); 10167 ins_cost(100); 10168 format %{ "FCHS" %} 10169 opcode(0xE0, 0xD9); 10170 ins_encode( OpcS, OpcP ); 10171 ins_pipe( fpu_reg_reg ); 10172 %} 10173 10174 // Cisc-alternate to addFPR_reg 10175 // Spill to obtain 24-bit precision 10176 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10177 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10178 match(Set dst (AddF src1 (LoadF src2))); 10179 10180 format %{ "FLD $src2\n\t" 10181 "FADD ST,$src1\n\t" 10182 "FSTP_S $dst" %} 10183 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10184 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10185 OpcReg_FPR(src1), 10186 Pop_Mem_FPR(dst) ); 10187 ins_pipe( fpu_mem_reg_mem ); 10188 %} 10189 // 10190 // Cisc-alternate to addFPR_reg 10191 // This instruction does not round to 24-bits 10192 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10193 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10194 match(Set dst (AddF dst (LoadF src))); 10195 10196 format %{ "FADD $dst,$src" %} 10197 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10198 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10199 OpcP, RegOpc(dst) ); 10200 ins_pipe( fpu_reg_mem ); 10201 %} 10202 10203 // // Following two instructions for _222_mpegaudio 10204 // Spill to obtain 24-bit precision 10205 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10206 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10207 match(Set dst (AddF src1 src2)); 10208 10209 format %{ "FADD $dst,$src1,$src2" %} 10210 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10211 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10212 OpcReg_FPR(src2), 10213 Pop_Mem_FPR(dst) ); 10214 ins_pipe( fpu_mem_reg_mem ); 10215 %} 10216 10217 // Cisc-spill variant 10218 // Spill to obtain 24-bit precision 10219 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10220 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10221 match(Set dst (AddF src1 (LoadF src2))); 10222 10223 format %{ "FADD $dst,$src1,$src2 cisc" %} 10224 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10225 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10226 set_instruction_start, 10227 OpcP, RMopc_Mem(secondary,src1), 10228 Pop_Mem_FPR(dst) ); 10229 ins_pipe( fpu_mem_mem_mem ); 10230 %} 10231 10232 // Spill to obtain 24-bit precision 10233 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10234 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10235 match(Set dst (AddF src1 src2)); 10236 10237 format %{ "FADD $dst,$src1,$src2" %} 10238 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10239 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10240 set_instruction_start, 10241 OpcP, RMopc_Mem(secondary,src1), 10242 Pop_Mem_FPR(dst) ); 10243 ins_pipe( fpu_mem_mem_mem ); 10244 %} 10245 10246 10247 // Spill to obtain 24-bit precision 10248 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10249 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10250 match(Set dst (AddF src con)); 10251 format %{ "FLD $src\n\t" 10252 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10253 "FSTP_S $dst" %} 10254 ins_encode %{ 10255 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10256 __ fadd_s($constantaddress($con)); 10257 __ fstp_s(Address(rsp, $dst$$disp)); 10258 %} 10259 ins_pipe(fpu_mem_reg_con); 10260 %} 10261 // 10262 // This instruction does not round to 24-bits 10263 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10264 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10265 match(Set dst (AddF src con)); 10266 format %{ "FLD $src\n\t" 10267 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10268 "FSTP $dst" %} 10269 ins_encode %{ 10270 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10271 __ fadd_s($constantaddress($con)); 10272 __ fstp_d($dst$$reg); 10273 %} 10274 ins_pipe(fpu_reg_reg_con); 10275 %} 10276 10277 // Spill to obtain 24-bit precision 10278 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10279 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10280 match(Set dst (MulF src1 src2)); 10281 10282 format %{ "FLD $src1\n\t" 10283 "FMUL $src2\n\t" 10284 "FSTP_S $dst" %} 10285 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10286 ins_encode( Push_Reg_FPR(src1), 10287 OpcReg_FPR(src2), 10288 Pop_Mem_FPR(dst) ); 10289 ins_pipe( fpu_mem_reg_reg ); 10290 %} 10291 // 10292 // This instruction does not round to 24-bits 10293 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10294 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10295 match(Set dst (MulF src1 src2)); 10296 10297 format %{ "FLD $src1\n\t" 10298 "FMUL $src2\n\t" 10299 "FSTP_S $dst" %} 10300 opcode(0xD8, 0x1); /* D8 C8+i */ 10301 ins_encode( Push_Reg_FPR(src2), 10302 OpcReg_FPR(src1), 10303 Pop_Reg_FPR(dst) ); 10304 ins_pipe( fpu_reg_reg_reg ); 10305 %} 10306 10307 10308 // Spill to obtain 24-bit precision 10309 // Cisc-alternate to reg-reg multiply 10310 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10311 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10312 match(Set dst (MulF src1 (LoadF src2))); 10313 10314 format %{ "FLD_S $src2\n\t" 10315 "FMUL $src1\n\t" 10316 "FSTP_S $dst" %} 10317 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10318 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10319 OpcReg_FPR(src1), 10320 Pop_Mem_FPR(dst) ); 10321 ins_pipe( fpu_mem_reg_mem ); 10322 %} 10323 // 10324 // This instruction does not round to 24-bits 10325 // Cisc-alternate to reg-reg multiply 10326 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10327 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10328 match(Set dst (MulF src1 (LoadF src2))); 10329 10330 format %{ "FMUL $dst,$src1,$src2" %} 10331 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10332 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10333 OpcReg_FPR(src1), 10334 Pop_Reg_FPR(dst) ); 10335 ins_pipe( fpu_reg_reg_mem ); 10336 %} 10337 10338 // Spill to obtain 24-bit precision 10339 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10340 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10341 match(Set dst (MulF src1 src2)); 10342 10343 format %{ "FMUL $dst,$src1,$src2" %} 10344 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10345 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10346 set_instruction_start, 10347 OpcP, RMopc_Mem(secondary,src1), 10348 Pop_Mem_FPR(dst) ); 10349 ins_pipe( fpu_mem_mem_mem ); 10350 %} 10351 10352 // Spill to obtain 24-bit precision 10353 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (MulF src con)); 10356 10357 format %{ "FLD $src\n\t" 10358 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10359 "FSTP_S $dst" %} 10360 ins_encode %{ 10361 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10362 __ fmul_s($constantaddress($con)); 10363 __ fstp_s(Address(rsp, $dst$$disp)); 10364 %} 10365 ins_pipe(fpu_mem_reg_con); 10366 %} 10367 // 10368 // This instruction does not round to 24-bits 10369 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10370 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10371 match(Set dst (MulF src con)); 10372 10373 format %{ "FLD $src\n\t" 10374 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10375 "FSTP $dst" %} 10376 ins_encode %{ 10377 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10378 __ fmul_s($constantaddress($con)); 10379 __ fstp_d($dst$$reg); 10380 %} 10381 ins_pipe(fpu_reg_reg_con); 10382 %} 10383 10384 10385 // 10386 // MACRO1 -- subsume unshared load into mulFPR 10387 // This instruction does not round to 24-bits 10388 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10389 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10390 match(Set dst (MulF (LoadF mem1) src)); 10391 10392 format %{ "FLD $mem1 ===MACRO1===\n\t" 10393 "FMUL ST,$src\n\t" 10394 "FSTP $dst" %} 10395 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10396 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10397 OpcReg_FPR(src), 10398 Pop_Reg_FPR(dst) ); 10399 ins_pipe( fpu_reg_reg_mem ); 10400 %} 10401 // 10402 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10403 // This instruction does not round to 24-bits 10404 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10405 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10406 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10407 ins_cost(95); 10408 10409 format %{ "FLD $mem1 ===MACRO2===\n\t" 10410 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10411 "FADD ST,$src2\n\t" 10412 "FSTP $dst" %} 10413 opcode(0xD9); /* LoadF D9 /0 */ 10414 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10415 FMul_ST_reg(src1), 10416 FAdd_ST_reg(src2), 10417 Pop_Reg_FPR(dst) ); 10418 ins_pipe( fpu_reg_mem_reg_reg ); 10419 %} 10420 10421 // MACRO3 -- addFPR a mulFPR 10422 // This instruction does not round to 24-bits. It is a '2-address' 10423 // instruction in that the result goes back to src2. This eliminates 10424 // a move from the macro; possibly the register allocator will have 10425 // to add it back (and maybe not). 10426 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10427 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10428 match(Set src2 (AddF (MulF src0 src1) src2)); 10429 10430 format %{ "FLD $src0 ===MACRO3===\n\t" 10431 "FMUL ST,$src1\n\t" 10432 "FADDP $src2,ST" %} 10433 opcode(0xD9); /* LoadF D9 /0 */ 10434 ins_encode( Push_Reg_FPR(src0), 10435 FMul_ST_reg(src1), 10436 FAddP_reg_ST(src2) ); 10437 ins_pipe( fpu_reg_reg_reg ); 10438 %} 10439 10440 // MACRO4 -- divFPR subFPR 10441 // This instruction does not round to 24-bits 10442 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10443 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10444 match(Set dst (DivF (SubF src2 src1) src3)); 10445 10446 format %{ "FLD $src2 ===MACRO4===\n\t" 10447 "FSUB ST,$src1\n\t" 10448 "FDIV ST,$src3\n\t" 10449 "FSTP $dst" %} 10450 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10451 ins_encode( Push_Reg_FPR(src2), 10452 subFPR_divFPR_encode(src1,src3), 10453 Pop_Reg_FPR(dst) ); 10454 ins_pipe( fpu_reg_reg_reg_reg ); 10455 %} 10456 10457 // Spill to obtain 24-bit precision 10458 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10459 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (DivF src1 src2)); 10461 10462 format %{ "FDIV $dst,$src1,$src2" %} 10463 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10464 ins_encode( Push_Reg_FPR(src1), 10465 OpcReg_FPR(src2), 10466 Pop_Mem_FPR(dst) ); 10467 ins_pipe( fpu_mem_reg_reg ); 10468 %} 10469 // 10470 // This instruction does not round to 24-bits 10471 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10472 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10473 match(Set dst (DivF dst src)); 10474 10475 format %{ "FDIV $dst,$src" %} 10476 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10477 ins_encode( Push_Reg_FPR(src), 10478 OpcP, RegOpc(dst) ); 10479 ins_pipe( fpu_reg_reg ); 10480 %} 10481 10482 10483 // Spill to obtain 24-bit precision 10484 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10485 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10486 match(Set dst (ModF src1 src2)); 10487 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10488 10489 format %{ "FMOD $dst,$src1,$src2" %} 10490 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10491 emitModDPR(), 10492 Push_Result_Mod_DPR(src2), 10493 Pop_Mem_FPR(dst)); 10494 ins_pipe( pipe_slow ); 10495 %} 10496 // 10497 // This instruction does not round to 24-bits 10498 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10499 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10500 match(Set dst (ModF dst src)); 10501 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10502 10503 format %{ "FMOD $dst,$src" %} 10504 ins_encode(Push_Reg_Mod_DPR(dst, src), 10505 emitModDPR(), 10506 Push_Result_Mod_DPR(src), 10507 Pop_Reg_FPR(dst)); 10508 ins_pipe( pipe_slow ); 10509 %} 10510 10511 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10512 predicate(UseSSE>=1); 10513 match(Set dst (ModF src0 src1)); 10514 effect(KILL rax, KILL cr); 10515 format %{ "SUB ESP,4\t # FMOD\n" 10516 "\tMOVSS [ESP+0],$src1\n" 10517 "\tFLD_S [ESP+0]\n" 10518 "\tMOVSS [ESP+0],$src0\n" 10519 "\tFLD_S [ESP+0]\n" 10520 "loop:\tFPREM\n" 10521 "\tFWAIT\n" 10522 "\tFNSTSW AX\n" 10523 "\tSAHF\n" 10524 "\tJP loop\n" 10525 "\tFSTP_S [ESP+0]\n" 10526 "\tMOVSS $dst,[ESP+0]\n" 10527 "\tADD ESP,4\n" 10528 "\tFSTP ST0\t # Restore FPU Stack" 10529 %} 10530 ins_cost(250); 10531 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10532 ins_pipe( pipe_slow ); 10533 %} 10534 10535 10536 //----------Arithmetic Conversion Instructions--------------------------------- 10537 // The conversions operations are all Alpha sorted. Please keep it that way! 10538 10539 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10540 predicate(UseSSE==0); 10541 match(Set dst (RoundFloat src)); 10542 ins_cost(125); 10543 format %{ "FST_S $dst,$src\t# F-round" %} 10544 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10545 ins_pipe( fpu_mem_reg ); 10546 %} 10547 10548 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10549 predicate(UseSSE<=1); 10550 match(Set dst (RoundDouble src)); 10551 ins_cost(125); 10552 format %{ "FST_D $dst,$src\t# D-round" %} 10553 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10554 ins_pipe( fpu_mem_reg ); 10555 %} 10556 10557 // Force rounding to 24-bit precision and 6-bit exponent 10558 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10559 predicate(UseSSE==0); 10560 match(Set dst (ConvD2F src)); 10561 format %{ "FST_S $dst,$src\t# F-round" %} 10562 expand %{ 10563 roundFloat_mem_reg(dst,src); 10564 %} 10565 %} 10566 10567 // Force rounding to 24-bit precision and 6-bit exponent 10568 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10569 predicate(UseSSE==1); 10570 match(Set dst (ConvD2F src)); 10571 effect( KILL cr ); 10572 format %{ "SUB ESP,4\n\t" 10573 "FST_S [ESP],$src\t# F-round\n\t" 10574 "MOVSS $dst,[ESP]\n\t" 10575 "ADD ESP,4" %} 10576 ins_encode %{ 10577 __ subptr(rsp, 4); 10578 if ($src$$reg != FPR1L_enc) { 10579 __ fld_s($src$$reg-1); 10580 __ fstp_s(Address(rsp, 0)); 10581 } else { 10582 __ fst_s(Address(rsp, 0)); 10583 } 10584 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10585 __ addptr(rsp, 4); 10586 %} 10587 ins_pipe( pipe_slow ); 10588 %} 10589 10590 // Force rounding double precision to single precision 10591 instruct convD2F_reg(regF dst, regD src) %{ 10592 predicate(UseSSE>=2); 10593 match(Set dst (ConvD2F src)); 10594 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10595 ins_encode %{ 10596 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10597 %} 10598 ins_pipe( pipe_slow ); 10599 %} 10600 10601 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10602 predicate(UseSSE==0); 10603 match(Set dst (ConvF2D src)); 10604 format %{ "FST_S $dst,$src\t# D-round" %} 10605 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10606 ins_pipe( fpu_reg_reg ); 10607 %} 10608 10609 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10610 predicate(UseSSE==1); 10611 match(Set dst (ConvF2D src)); 10612 format %{ "FST_D $dst,$src\t# D-round" %} 10613 expand %{ 10614 roundDouble_mem_reg(dst,src); 10615 %} 10616 %} 10617 10618 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10619 predicate(UseSSE==1); 10620 match(Set dst (ConvF2D src)); 10621 effect( KILL cr ); 10622 format %{ "SUB ESP,4\n\t" 10623 "MOVSS [ESP] $src\n\t" 10624 "FLD_S [ESP]\n\t" 10625 "ADD ESP,4\n\t" 10626 "FSTP $dst\t# D-round" %} 10627 ins_encode %{ 10628 __ subptr(rsp, 4); 10629 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10630 __ fld_s(Address(rsp, 0)); 10631 __ addptr(rsp, 4); 10632 __ fstp_d($dst$$reg); 10633 %} 10634 ins_pipe( pipe_slow ); 10635 %} 10636 10637 instruct convF2D_reg(regD dst, regF src) %{ 10638 predicate(UseSSE>=2); 10639 match(Set dst (ConvF2D src)); 10640 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10641 ins_encode %{ 10642 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10643 %} 10644 ins_pipe( pipe_slow ); 10645 %} 10646 10647 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10648 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10649 predicate(UseSSE<=1); 10650 match(Set dst (ConvD2I src)); 10651 effect( KILL tmp, KILL cr ); 10652 format %{ "FLD $src\t# Convert double to int \n\t" 10653 "FLDCW trunc mode\n\t" 10654 "SUB ESP,4\n\t" 10655 "FISTp [ESP + #0]\n\t" 10656 "FLDCW std/24-bit mode\n\t" 10657 "POP EAX\n\t" 10658 "CMP EAX,0x80000000\n\t" 10659 "JNE,s fast\n\t" 10660 "FLD_D $src\n\t" 10661 "CALL d2i_wrapper\n" 10662 "fast:" %} 10663 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10664 ins_pipe( pipe_slow ); 10665 %} 10666 10667 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10668 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10669 predicate(UseSSE>=2); 10670 match(Set dst (ConvD2I src)); 10671 effect( KILL tmp, KILL cr ); 10672 format %{ "CVTTSD2SI $dst, $src\n\t" 10673 "CMP $dst,0x80000000\n\t" 10674 "JNE,s fast\n\t" 10675 "SUB ESP, 8\n\t" 10676 "MOVSD [ESP], $src\n\t" 10677 "FLD_D [ESP]\n\t" 10678 "ADD ESP, 8\n\t" 10679 "CALL d2i_wrapper\n" 10680 "fast:" %} 10681 ins_encode %{ 10682 Label fast; 10683 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10684 __ cmpl($dst$$Register, 0x80000000); 10685 __ jccb(Assembler::notEqual, fast); 10686 __ subptr(rsp, 8); 10687 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10688 __ fld_d(Address(rsp, 0)); 10689 __ addptr(rsp, 8); 10690 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10691 __ bind(fast); 10692 %} 10693 ins_pipe( pipe_slow ); 10694 %} 10695 10696 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10697 predicate(UseSSE<=1); 10698 match(Set dst (ConvD2L src)); 10699 effect( KILL cr ); 10700 format %{ "FLD $src\t# Convert double to long\n\t" 10701 "FLDCW trunc mode\n\t" 10702 "SUB ESP,8\n\t" 10703 "FISTp [ESP + #0]\n\t" 10704 "FLDCW std/24-bit mode\n\t" 10705 "POP EAX\n\t" 10706 "POP EDX\n\t" 10707 "CMP EDX,0x80000000\n\t" 10708 "JNE,s fast\n\t" 10709 "TEST EAX,EAX\n\t" 10710 "JNE,s fast\n\t" 10711 "FLD $src\n\t" 10712 "CALL d2l_wrapper\n" 10713 "fast:" %} 10714 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10715 ins_pipe( pipe_slow ); 10716 %} 10717 10718 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10719 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10720 predicate (UseSSE>=2); 10721 match(Set dst (ConvD2L src)); 10722 effect( KILL cr ); 10723 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10724 "MOVSD [ESP],$src\n\t" 10725 "FLD_D [ESP]\n\t" 10726 "FLDCW trunc mode\n\t" 10727 "FISTp [ESP + #0]\n\t" 10728 "FLDCW std/24-bit mode\n\t" 10729 "POP EAX\n\t" 10730 "POP EDX\n\t" 10731 "CMP EDX,0x80000000\n\t" 10732 "JNE,s fast\n\t" 10733 "TEST EAX,EAX\n\t" 10734 "JNE,s fast\n\t" 10735 "SUB ESP,8\n\t" 10736 "MOVSD [ESP],$src\n\t" 10737 "FLD_D [ESP]\n\t" 10738 "ADD ESP,8\n\t" 10739 "CALL d2l_wrapper\n" 10740 "fast:" %} 10741 ins_encode %{ 10742 Label fast; 10743 __ subptr(rsp, 8); 10744 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10745 __ fld_d(Address(rsp, 0)); 10746 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10747 __ fistp_d(Address(rsp, 0)); 10748 // Restore the rounding mode, mask the exception 10749 if (Compile::current()->in_24_bit_fp_mode()) { 10750 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10751 } else { 10752 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10753 } 10754 // Load the converted long, adjust CPU stack 10755 __ pop(rax); 10756 __ pop(rdx); 10757 __ cmpl(rdx, 0x80000000); 10758 __ jccb(Assembler::notEqual, fast); 10759 __ testl(rax, rax); 10760 __ jccb(Assembler::notEqual, fast); 10761 __ subptr(rsp, 8); 10762 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10763 __ fld_d(Address(rsp, 0)); 10764 __ addptr(rsp, 8); 10765 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10766 __ bind(fast); 10767 %} 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 // Convert a double to an int. Java semantics require we do complex 10772 // manglations in the corner cases. So we set the rounding mode to 10773 // 'zero', store the darned double down as an int, and reset the 10774 // rounding mode to 'nearest'. The hardware stores a flag value down 10775 // if we would overflow or converted a NAN; we check for this and 10776 // and go the slow path if needed. 10777 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10778 predicate(UseSSE==0); 10779 match(Set dst (ConvF2I src)); 10780 effect( KILL tmp, KILL cr ); 10781 format %{ "FLD $src\t# Convert float to int \n\t" 10782 "FLDCW trunc mode\n\t" 10783 "SUB ESP,4\n\t" 10784 "FISTp [ESP + #0]\n\t" 10785 "FLDCW std/24-bit mode\n\t" 10786 "POP EAX\n\t" 10787 "CMP EAX,0x80000000\n\t" 10788 "JNE,s fast\n\t" 10789 "FLD $src\n\t" 10790 "CALL d2i_wrapper\n" 10791 "fast:" %} 10792 // DPR2I_encoding works for FPR2I 10793 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 // Convert a float in xmm to an int reg. 10798 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10799 predicate(UseSSE>=1); 10800 match(Set dst (ConvF2I src)); 10801 effect( KILL tmp, KILL cr ); 10802 format %{ "CVTTSS2SI $dst, $src\n\t" 10803 "CMP $dst,0x80000000\n\t" 10804 "JNE,s fast\n\t" 10805 "SUB ESP, 4\n\t" 10806 "MOVSS [ESP], $src\n\t" 10807 "FLD [ESP]\n\t" 10808 "ADD ESP, 4\n\t" 10809 "CALL d2i_wrapper\n" 10810 "fast:" %} 10811 ins_encode %{ 10812 Label fast; 10813 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10814 __ cmpl($dst$$Register, 0x80000000); 10815 __ jccb(Assembler::notEqual, fast); 10816 __ subptr(rsp, 4); 10817 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10818 __ fld_s(Address(rsp, 0)); 10819 __ addptr(rsp, 4); 10820 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10821 __ bind(fast); 10822 %} 10823 ins_pipe( pipe_slow ); 10824 %} 10825 10826 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10827 predicate(UseSSE==0); 10828 match(Set dst (ConvF2L src)); 10829 effect( KILL cr ); 10830 format %{ "FLD $src\t# Convert float to long\n\t" 10831 "FLDCW trunc mode\n\t" 10832 "SUB ESP,8\n\t" 10833 "FISTp [ESP + #0]\n\t" 10834 "FLDCW std/24-bit mode\n\t" 10835 "POP EAX\n\t" 10836 "POP EDX\n\t" 10837 "CMP EDX,0x80000000\n\t" 10838 "JNE,s fast\n\t" 10839 "TEST EAX,EAX\n\t" 10840 "JNE,s fast\n\t" 10841 "FLD $src\n\t" 10842 "CALL d2l_wrapper\n" 10843 "fast:" %} 10844 // DPR2L_encoding works for FPR2L 10845 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10846 ins_pipe( pipe_slow ); 10847 %} 10848 10849 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10850 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10851 predicate (UseSSE>=1); 10852 match(Set dst (ConvF2L src)); 10853 effect( KILL cr ); 10854 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10855 "MOVSS [ESP],$src\n\t" 10856 "FLD_S [ESP]\n\t" 10857 "FLDCW trunc mode\n\t" 10858 "FISTp [ESP + #0]\n\t" 10859 "FLDCW std/24-bit mode\n\t" 10860 "POP EAX\n\t" 10861 "POP EDX\n\t" 10862 "CMP EDX,0x80000000\n\t" 10863 "JNE,s fast\n\t" 10864 "TEST EAX,EAX\n\t" 10865 "JNE,s fast\n\t" 10866 "SUB ESP,4\t# Convert float to long\n\t" 10867 "MOVSS [ESP],$src\n\t" 10868 "FLD_S [ESP]\n\t" 10869 "ADD ESP,4\n\t" 10870 "CALL d2l_wrapper\n" 10871 "fast:" %} 10872 ins_encode %{ 10873 Label fast; 10874 __ subptr(rsp, 8); 10875 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10876 __ fld_s(Address(rsp, 0)); 10877 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10878 __ fistp_d(Address(rsp, 0)); 10879 // Restore the rounding mode, mask the exception 10880 if (Compile::current()->in_24_bit_fp_mode()) { 10881 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10882 } else { 10883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10884 } 10885 // Load the converted long, adjust CPU stack 10886 __ pop(rax); 10887 __ pop(rdx); 10888 __ cmpl(rdx, 0x80000000); 10889 __ jccb(Assembler::notEqual, fast); 10890 __ testl(rax, rax); 10891 __ jccb(Assembler::notEqual, fast); 10892 __ subptr(rsp, 4); 10893 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10894 __ fld_s(Address(rsp, 0)); 10895 __ addptr(rsp, 4); 10896 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10897 __ bind(fast); 10898 %} 10899 ins_pipe( pipe_slow ); 10900 %} 10901 10902 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10903 predicate( UseSSE<=1 ); 10904 match(Set dst (ConvI2D src)); 10905 format %{ "FILD $src\n\t" 10906 "FSTP $dst" %} 10907 opcode(0xDB, 0x0); /* DB /0 */ 10908 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10909 ins_pipe( fpu_reg_mem ); 10910 %} 10911 10912 instruct convI2D_reg(regD dst, rRegI src) %{ 10913 predicate( UseSSE>=2 && !UseXmmI2D ); 10914 match(Set dst (ConvI2D src)); 10915 format %{ "CVTSI2SD $dst,$src" %} 10916 ins_encode %{ 10917 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10918 %} 10919 ins_pipe( pipe_slow ); 10920 %} 10921 10922 instruct convI2D_mem(regD dst, memory mem) %{ 10923 predicate( UseSSE>=2 ); 10924 match(Set dst (ConvI2D (LoadI mem))); 10925 format %{ "CVTSI2SD $dst,$mem" %} 10926 ins_encode %{ 10927 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10928 %} 10929 ins_pipe( pipe_slow ); 10930 %} 10931 10932 instruct convXI2D_reg(regD dst, rRegI src) 10933 %{ 10934 predicate( UseSSE>=2 && UseXmmI2D ); 10935 match(Set dst (ConvI2D src)); 10936 10937 format %{ "MOVD $dst,$src\n\t" 10938 "CVTDQ2PD $dst,$dst\t# i2d" %} 10939 ins_encode %{ 10940 __ movdl($dst$$XMMRegister, $src$$Register); 10941 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10942 %} 10943 ins_pipe(pipe_slow); // XXX 10944 %} 10945 10946 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10947 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10948 match(Set dst (ConvI2D (LoadI mem))); 10949 format %{ "FILD $mem\n\t" 10950 "FSTP $dst" %} 10951 opcode(0xDB); /* DB /0 */ 10952 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10953 Pop_Reg_DPR(dst)); 10954 ins_pipe( fpu_reg_mem ); 10955 %} 10956 10957 // Convert a byte to a float; no rounding step needed. 10958 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10959 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10960 match(Set dst (ConvI2F src)); 10961 format %{ "FILD $src\n\t" 10962 "FSTP $dst" %} 10963 10964 opcode(0xDB, 0x0); /* DB /0 */ 10965 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10966 ins_pipe( fpu_reg_mem ); 10967 %} 10968 10969 // In 24-bit mode, force exponent rounding by storing back out 10970 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10971 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10972 match(Set dst (ConvI2F src)); 10973 ins_cost(200); 10974 format %{ "FILD $src\n\t" 10975 "FSTP_S $dst" %} 10976 opcode(0xDB, 0x0); /* DB /0 */ 10977 ins_encode( Push_Mem_I(src), 10978 Pop_Mem_FPR(dst)); 10979 ins_pipe( fpu_mem_mem ); 10980 %} 10981 10982 // In 24-bit mode, force exponent rounding by storing back out 10983 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10984 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10985 match(Set dst (ConvI2F (LoadI mem))); 10986 ins_cost(200); 10987 format %{ "FILD $mem\n\t" 10988 "FSTP_S $dst" %} 10989 opcode(0xDB); /* DB /0 */ 10990 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10991 Pop_Mem_FPR(dst)); 10992 ins_pipe( fpu_mem_mem ); 10993 %} 10994 10995 // This instruction does not round to 24-bits 10996 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10997 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10998 match(Set dst (ConvI2F src)); 10999 format %{ "FILD $src\n\t" 11000 "FSTP $dst" %} 11001 opcode(0xDB, 0x0); /* DB /0 */ 11002 ins_encode( Push_Mem_I(src), 11003 Pop_Reg_FPR(dst)); 11004 ins_pipe( fpu_reg_mem ); 11005 %} 11006 11007 // This instruction does not round to 24-bits 11008 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11009 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11010 match(Set dst (ConvI2F (LoadI mem))); 11011 format %{ "FILD $mem\n\t" 11012 "FSTP $dst" %} 11013 opcode(0xDB); /* DB /0 */ 11014 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11015 Pop_Reg_FPR(dst)); 11016 ins_pipe( fpu_reg_mem ); 11017 %} 11018 11019 // Convert an int to a float in xmm; no rounding step needed. 11020 instruct convI2F_reg(regF dst, rRegI src) %{ 11021 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11022 match(Set dst (ConvI2F src)); 11023 format %{ "CVTSI2SS $dst, $src" %} 11024 ins_encode %{ 11025 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11026 %} 11027 ins_pipe( pipe_slow ); 11028 %} 11029 11030 instruct convXI2F_reg(regF dst, rRegI src) 11031 %{ 11032 predicate( UseSSE>=2 && UseXmmI2F ); 11033 match(Set dst (ConvI2F src)); 11034 11035 format %{ "MOVD $dst,$src\n\t" 11036 "CVTDQ2PS $dst,$dst\t# i2f" %} 11037 ins_encode %{ 11038 __ movdl($dst$$XMMRegister, $src$$Register); 11039 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11040 %} 11041 ins_pipe(pipe_slow); // XXX 11042 %} 11043 11044 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11045 match(Set dst (ConvI2L src)); 11046 effect(KILL cr); 11047 ins_cost(375); 11048 format %{ "MOV $dst.lo,$src\n\t" 11049 "MOV $dst.hi,$src\n\t" 11050 "SAR $dst.hi,31" %} 11051 ins_encode(convert_int_long(dst,src)); 11052 ins_pipe( ialu_reg_reg_long ); 11053 %} 11054 11055 // Zero-extend convert int to long 11056 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11057 match(Set dst (AndL (ConvI2L src) mask) ); 11058 effect( KILL flags ); 11059 ins_cost(250); 11060 format %{ "MOV $dst.lo,$src\n\t" 11061 "XOR $dst.hi,$dst.hi" %} 11062 opcode(0x33); // XOR 11063 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11064 ins_pipe( ialu_reg_reg_long ); 11065 %} 11066 11067 // Zero-extend long 11068 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11069 match(Set dst (AndL src mask) ); 11070 effect( KILL flags ); 11071 ins_cost(250); 11072 format %{ "MOV $dst.lo,$src.lo\n\t" 11073 "XOR $dst.hi,$dst.hi\n\t" %} 11074 opcode(0x33); // XOR 11075 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11076 ins_pipe( ialu_reg_reg_long ); 11077 %} 11078 11079 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11080 predicate (UseSSE<=1); 11081 match(Set dst (ConvL2D src)); 11082 effect( KILL cr ); 11083 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11084 "PUSH $src.lo\n\t" 11085 "FILD ST,[ESP + #0]\n\t" 11086 "ADD ESP,8\n\t" 11087 "FSTP_D $dst\t# D-round" %} 11088 opcode(0xDF, 0x5); /* DF /5 */ 11089 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11090 ins_pipe( pipe_slow ); 11091 %} 11092 11093 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11094 predicate (UseSSE>=2); 11095 match(Set dst (ConvL2D src)); 11096 effect( KILL cr ); 11097 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11098 "PUSH $src.lo\n\t" 11099 "FILD_D [ESP]\n\t" 11100 "FSTP_D [ESP]\n\t" 11101 "MOVSD $dst,[ESP]\n\t" 11102 "ADD ESP,8" %} 11103 opcode(0xDF, 0x5); /* DF /5 */ 11104 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11105 ins_pipe( pipe_slow ); 11106 %} 11107 11108 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11109 predicate (UseSSE>=1); 11110 match(Set dst (ConvL2F src)); 11111 effect( KILL cr ); 11112 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11113 "PUSH $src.lo\n\t" 11114 "FILD_D [ESP]\n\t" 11115 "FSTP_S [ESP]\n\t" 11116 "MOVSS $dst,[ESP]\n\t" 11117 "ADD ESP,8" %} 11118 opcode(0xDF, 0x5); /* DF /5 */ 11119 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11120 ins_pipe( pipe_slow ); 11121 %} 11122 11123 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11124 match(Set dst (ConvL2F src)); 11125 effect( KILL cr ); 11126 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11127 "PUSH $src.lo\n\t" 11128 "FILD ST,[ESP + #0]\n\t" 11129 "ADD ESP,8\n\t" 11130 "FSTP_S $dst\t# F-round" %} 11131 opcode(0xDF, 0x5); /* DF /5 */ 11132 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11133 ins_pipe( pipe_slow ); 11134 %} 11135 11136 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11137 match(Set dst (ConvL2I src)); 11138 effect( DEF dst, USE src ); 11139 format %{ "MOV $dst,$src.lo" %} 11140 ins_encode(enc_CopyL_Lo(dst,src)); 11141 ins_pipe( ialu_reg_reg ); 11142 %} 11143 11144 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11145 match(Set dst (MoveF2I src)); 11146 effect( DEF dst, USE src ); 11147 ins_cost(100); 11148 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11149 ins_encode %{ 11150 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11151 %} 11152 ins_pipe( ialu_reg_mem ); 11153 %} 11154 11155 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11156 predicate(UseSSE==0); 11157 match(Set dst (MoveF2I src)); 11158 effect( DEF dst, USE src ); 11159 11160 ins_cost(125); 11161 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11162 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11163 ins_pipe( fpu_mem_reg ); 11164 %} 11165 11166 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11167 predicate(UseSSE>=1); 11168 match(Set dst (MoveF2I src)); 11169 effect( DEF dst, USE src ); 11170 11171 ins_cost(95); 11172 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11173 ins_encode %{ 11174 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11175 %} 11176 ins_pipe( pipe_slow ); 11177 %} 11178 11179 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11180 predicate(UseSSE>=2); 11181 match(Set dst (MoveF2I src)); 11182 effect( DEF dst, USE src ); 11183 ins_cost(85); 11184 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11185 ins_encode %{ 11186 __ movdl($dst$$Register, $src$$XMMRegister); 11187 %} 11188 ins_pipe( pipe_slow ); 11189 %} 11190 11191 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11192 match(Set dst (MoveI2F src)); 11193 effect( DEF dst, USE src ); 11194 11195 ins_cost(100); 11196 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11197 ins_encode %{ 11198 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11199 %} 11200 ins_pipe( ialu_mem_reg ); 11201 %} 11202 11203 11204 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11205 predicate(UseSSE==0); 11206 match(Set dst (MoveI2F src)); 11207 effect(DEF dst, USE src); 11208 11209 ins_cost(125); 11210 format %{ "FLD_S $src\n\t" 11211 "FSTP $dst\t# MoveI2F_stack_reg" %} 11212 opcode(0xD9); /* D9 /0, FLD m32real */ 11213 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11214 Pop_Reg_FPR(dst) ); 11215 ins_pipe( fpu_reg_mem ); 11216 %} 11217 11218 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11219 predicate(UseSSE>=1); 11220 match(Set dst (MoveI2F src)); 11221 effect( DEF dst, USE src ); 11222 11223 ins_cost(95); 11224 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11225 ins_encode %{ 11226 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11227 %} 11228 ins_pipe( pipe_slow ); 11229 %} 11230 11231 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11232 predicate(UseSSE>=2); 11233 match(Set dst (MoveI2F src)); 11234 effect( DEF dst, USE src ); 11235 11236 ins_cost(85); 11237 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11238 ins_encode %{ 11239 __ movdl($dst$$XMMRegister, $src$$Register); 11240 %} 11241 ins_pipe( pipe_slow ); 11242 %} 11243 11244 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11245 match(Set dst (MoveD2L src)); 11246 effect(DEF dst, USE src); 11247 11248 ins_cost(250); 11249 format %{ "MOV $dst.lo,$src\n\t" 11250 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11251 opcode(0x8B, 0x8B); 11252 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11253 ins_pipe( ialu_mem_long_reg ); 11254 %} 11255 11256 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11257 predicate(UseSSE<=1); 11258 match(Set dst (MoveD2L src)); 11259 effect(DEF dst, USE src); 11260 11261 ins_cost(125); 11262 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11263 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11264 ins_pipe( fpu_mem_reg ); 11265 %} 11266 11267 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11268 predicate(UseSSE>=2); 11269 match(Set dst (MoveD2L src)); 11270 effect(DEF dst, USE src); 11271 ins_cost(95); 11272 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11273 ins_encode %{ 11274 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11275 %} 11276 ins_pipe( pipe_slow ); 11277 %} 11278 11279 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11280 predicate(UseSSE>=2); 11281 match(Set dst (MoveD2L src)); 11282 effect(DEF dst, USE src, TEMP tmp); 11283 ins_cost(85); 11284 format %{ "MOVD $dst.lo,$src\n\t" 11285 "PSHUFLW $tmp,$src,0x4E\n\t" 11286 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11287 ins_encode %{ 11288 __ movdl($dst$$Register, $src$$XMMRegister); 11289 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11290 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11291 %} 11292 ins_pipe( pipe_slow ); 11293 %} 11294 11295 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11296 match(Set dst (MoveL2D src)); 11297 effect(DEF dst, USE src); 11298 11299 ins_cost(200); 11300 format %{ "MOV $dst,$src.lo\n\t" 11301 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11302 opcode(0x89, 0x89); 11303 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11304 ins_pipe( ialu_mem_long_reg ); 11305 %} 11306 11307 11308 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11309 predicate(UseSSE<=1); 11310 match(Set dst (MoveL2D src)); 11311 effect(DEF dst, USE src); 11312 ins_cost(125); 11313 11314 format %{ "FLD_D $src\n\t" 11315 "FSTP $dst\t# MoveL2D_stack_reg" %} 11316 opcode(0xDD); /* DD /0, FLD m64real */ 11317 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11318 Pop_Reg_DPR(dst) ); 11319 ins_pipe( fpu_reg_mem ); 11320 %} 11321 11322 11323 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11324 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11325 match(Set dst (MoveL2D src)); 11326 effect(DEF dst, USE src); 11327 11328 ins_cost(95); 11329 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11330 ins_encode %{ 11331 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11332 %} 11333 ins_pipe( pipe_slow ); 11334 %} 11335 11336 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11337 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11338 match(Set dst (MoveL2D src)); 11339 effect(DEF dst, USE src); 11340 11341 ins_cost(95); 11342 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11343 ins_encode %{ 11344 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11345 %} 11346 ins_pipe( pipe_slow ); 11347 %} 11348 11349 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11350 predicate(UseSSE>=2); 11351 match(Set dst (MoveL2D src)); 11352 effect(TEMP dst, USE src, TEMP tmp); 11353 ins_cost(85); 11354 format %{ "MOVD $dst,$src.lo\n\t" 11355 "MOVD $tmp,$src.hi\n\t" 11356 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11357 ins_encode %{ 11358 __ movdl($dst$$XMMRegister, $src$$Register); 11359 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11360 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11361 %} 11362 ins_pipe( pipe_slow ); 11363 %} 11364 11365 11366 // ======================================================================= 11367 // fast clearing of an array 11368 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11369 predicate(!UseFastStosb && !((ClearArrayNode*)n)->is_large()); 11370 match(Set dummy (ClearArray cnt base)); 11371 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11372 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11373 "SHL ECX,1\t# Convert doublewords to words\n\t" 11374 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11375 ins_encode %{ 11376 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11377 %} 11378 ins_pipe( pipe_slow ); 11379 %} 11380 11381 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11382 predicate(!UseFastStosb && ((ClearArrayNode*)n)->is_large()); 11383 match(Set dummy (ClearArray cnt base)); 11384 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11385 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11386 "SHL ECX,1\t# Convert doublewords to words\n\t" 11387 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11388 ins_encode %{ 11389 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11390 %} 11391 ins_pipe( pipe_slow ); 11392 %} 11393 11394 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11395 predicate(UseFastStosb && !((ClearArrayNode*)n)->is_large()); 11396 match(Set dummy (ClearArray cnt base)); 11397 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11398 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11399 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11400 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11401 ins_encode %{ 11402 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11403 %} 11404 ins_pipe( pipe_slow ); 11405 %} 11406 11407 instruct rep_fast_stosb_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11408 predicate(UseFastStosb && ((ClearArrayNode*)n)->is_large()); 11409 match(Set dummy (ClearArray cnt base)); 11410 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11411 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11412 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11413 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11414 ins_encode %{ 11415 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11416 %} 11417 ins_pipe( pipe_slow ); 11418 %} 11419 11420 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11421 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11422 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11423 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11424 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11425 11426 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11427 ins_encode %{ 11428 __ string_compare($str1$$Register, $str2$$Register, 11429 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11430 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11431 %} 11432 ins_pipe( pipe_slow ); 11433 %} 11434 11435 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11436 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11437 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11438 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11439 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11440 11441 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11442 ins_encode %{ 11443 __ string_compare($str1$$Register, $str2$$Register, 11444 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11445 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11446 %} 11447 ins_pipe( pipe_slow ); 11448 %} 11449 11450 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11451 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11452 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11453 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11454 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11455 11456 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11457 ins_encode %{ 11458 __ string_compare($str1$$Register, $str2$$Register, 11459 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11460 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11461 %} 11462 ins_pipe( pipe_slow ); 11463 %} 11464 11465 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11466 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11467 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11468 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11469 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11470 11471 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11472 ins_encode %{ 11473 __ string_compare($str2$$Register, $str1$$Register, 11474 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11475 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11476 %} 11477 ins_pipe( pipe_slow ); 11478 %} 11479 11480 // fast string equals 11481 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11482 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11483 match(Set result (StrEquals (Binary str1 str2) cnt)); 11484 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11485 11486 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11487 ins_encode %{ 11488 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11489 $cnt$$Register, $result$$Register, $tmp3$$Register, 11490 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11491 %} 11492 11493 ins_pipe( pipe_slow ); 11494 %} 11495 11496 // fast search of substring with known size. 11497 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11498 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11499 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11500 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11501 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11502 11503 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11504 ins_encode %{ 11505 int icnt2 = (int)$int_cnt2$$constant; 11506 if (icnt2 >= 16) { 11507 // IndexOf for constant substrings with size >= 16 elements 11508 // which don't need to be loaded through stack. 11509 __ string_indexofC8($str1$$Register, $str2$$Register, 11510 $cnt1$$Register, $cnt2$$Register, 11511 icnt2, $result$$Register, 11512 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11513 } else { 11514 // Small strings are loaded through stack if they cross page boundary. 11515 __ string_indexof($str1$$Register, $str2$$Register, 11516 $cnt1$$Register, $cnt2$$Register, 11517 icnt2, $result$$Register, 11518 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11519 } 11520 %} 11521 ins_pipe( pipe_slow ); 11522 %} 11523 11524 // fast search of substring with known size. 11525 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11526 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11527 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11528 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11529 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11530 11531 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11532 ins_encode %{ 11533 int icnt2 = (int)$int_cnt2$$constant; 11534 if (icnt2 >= 8) { 11535 // IndexOf for constant substrings with size >= 8 elements 11536 // which don't need to be loaded through stack. 11537 __ string_indexofC8($str1$$Register, $str2$$Register, 11538 $cnt1$$Register, $cnt2$$Register, 11539 icnt2, $result$$Register, 11540 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11541 } else { 11542 // Small strings are loaded through stack if they cross page boundary. 11543 __ string_indexof($str1$$Register, $str2$$Register, 11544 $cnt1$$Register, $cnt2$$Register, 11545 icnt2, $result$$Register, 11546 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11547 } 11548 %} 11549 ins_pipe( pipe_slow ); 11550 %} 11551 11552 // fast search of substring with known size. 11553 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11554 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11555 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11556 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11557 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11558 11559 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11560 ins_encode %{ 11561 int icnt2 = (int)$int_cnt2$$constant; 11562 if (icnt2 >= 8) { 11563 // IndexOf for constant substrings with size >= 8 elements 11564 // which don't need to be loaded through stack. 11565 __ string_indexofC8($str1$$Register, $str2$$Register, 11566 $cnt1$$Register, $cnt2$$Register, 11567 icnt2, $result$$Register, 11568 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11569 } else { 11570 // Small strings are loaded through stack if they cross page boundary. 11571 __ string_indexof($str1$$Register, $str2$$Register, 11572 $cnt1$$Register, $cnt2$$Register, 11573 icnt2, $result$$Register, 11574 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11575 } 11576 %} 11577 ins_pipe( pipe_slow ); 11578 %} 11579 11580 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11581 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11582 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11583 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11584 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11585 11586 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11587 ins_encode %{ 11588 __ string_indexof($str1$$Register, $str2$$Register, 11589 $cnt1$$Register, $cnt2$$Register, 11590 (-1), $result$$Register, 11591 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11592 %} 11593 ins_pipe( pipe_slow ); 11594 %} 11595 11596 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11597 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11598 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11599 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11600 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11601 11602 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11603 ins_encode %{ 11604 __ string_indexof($str1$$Register, $str2$$Register, 11605 $cnt1$$Register, $cnt2$$Register, 11606 (-1), $result$$Register, 11607 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11608 %} 11609 ins_pipe( pipe_slow ); 11610 %} 11611 11612 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11613 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11614 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11615 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11616 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11617 11618 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11619 ins_encode %{ 11620 __ string_indexof($str1$$Register, $str2$$Register, 11621 $cnt1$$Register, $cnt2$$Register, 11622 (-1), $result$$Register, 11623 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11624 %} 11625 ins_pipe( pipe_slow ); 11626 %} 11627 11628 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11629 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11630 predicate(UseSSE42Intrinsics); 11631 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11632 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11633 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11634 ins_encode %{ 11635 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11636 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11637 %} 11638 ins_pipe( pipe_slow ); 11639 %} 11640 11641 // fast array equals 11642 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11643 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11644 %{ 11645 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11646 match(Set result (AryEq ary1 ary2)); 11647 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11648 //ins_cost(300); 11649 11650 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11651 ins_encode %{ 11652 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11653 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11654 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11655 %} 11656 ins_pipe( pipe_slow ); 11657 %} 11658 11659 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11660 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11661 %{ 11662 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11663 match(Set result (AryEq ary1 ary2)); 11664 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11665 //ins_cost(300); 11666 11667 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11668 ins_encode %{ 11669 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11670 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11671 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11672 %} 11673 ins_pipe( pipe_slow ); 11674 %} 11675 11676 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11677 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11678 %{ 11679 match(Set result (HasNegatives ary1 len)); 11680 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11681 11682 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11683 ins_encode %{ 11684 __ has_negatives($ary1$$Register, $len$$Register, 11685 $result$$Register, $tmp3$$Register, 11686 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11687 %} 11688 ins_pipe( pipe_slow ); 11689 %} 11690 11691 // fast char[] to byte[] compression 11692 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11693 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11694 match(Set result (StrCompressedCopy src (Binary dst len))); 11695 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11696 11697 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11698 ins_encode %{ 11699 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11700 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11701 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11702 %} 11703 ins_pipe( pipe_slow ); 11704 %} 11705 11706 // fast byte[] to char[] inflation 11707 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11708 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11709 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11710 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11711 11712 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11713 ins_encode %{ 11714 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11715 $tmp1$$XMMRegister, $tmp2$$Register); 11716 %} 11717 ins_pipe( pipe_slow ); 11718 %} 11719 11720 // encode char[] to byte[] in ISO_8859_1 11721 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11722 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11723 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11724 match(Set result (EncodeISOArray src (Binary dst len))); 11725 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11726 11727 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11728 ins_encode %{ 11729 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11730 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11731 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11732 %} 11733 ins_pipe( pipe_slow ); 11734 %} 11735 11736 11737 //----------Control Flow Instructions------------------------------------------ 11738 // Signed compare Instructions 11739 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11740 match(Set cr (CmpI op1 op2)); 11741 effect( DEF cr, USE op1, USE op2 ); 11742 format %{ "CMP $op1,$op2" %} 11743 opcode(0x3B); /* Opcode 3B /r */ 11744 ins_encode( OpcP, RegReg( op1, op2) ); 11745 ins_pipe( ialu_cr_reg_reg ); 11746 %} 11747 11748 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11749 match(Set cr (CmpI op1 op2)); 11750 effect( DEF cr, USE op1 ); 11751 format %{ "CMP $op1,$op2" %} 11752 opcode(0x81,0x07); /* Opcode 81 /7 */ 11753 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11754 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11755 ins_pipe( ialu_cr_reg_imm ); 11756 %} 11757 11758 // Cisc-spilled version of cmpI_eReg 11759 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11760 match(Set cr (CmpI op1 (LoadI op2))); 11761 11762 format %{ "CMP $op1,$op2" %} 11763 ins_cost(500); 11764 opcode(0x3B); /* Opcode 3B /r */ 11765 ins_encode( OpcP, RegMem( op1, op2) ); 11766 ins_pipe( ialu_cr_reg_mem ); 11767 %} 11768 11769 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11770 match(Set cr (CmpI src zero)); 11771 effect( DEF cr, USE src ); 11772 11773 format %{ "TEST $src,$src" %} 11774 opcode(0x85); 11775 ins_encode( OpcP, RegReg( src, src ) ); 11776 ins_pipe( ialu_cr_reg_imm ); 11777 %} 11778 11779 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11780 match(Set cr (CmpI (AndI src con) zero)); 11781 11782 format %{ "TEST $src,$con" %} 11783 opcode(0xF7,0x00); 11784 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11785 ins_pipe( ialu_cr_reg_imm ); 11786 %} 11787 11788 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11789 match(Set cr (CmpI (AndI src mem) zero)); 11790 11791 format %{ "TEST $src,$mem" %} 11792 opcode(0x85); 11793 ins_encode( OpcP, RegMem( src, mem ) ); 11794 ins_pipe( ialu_cr_reg_mem ); 11795 %} 11796 11797 // Unsigned compare Instructions; really, same as signed except they 11798 // produce an eFlagsRegU instead of eFlagsReg. 11799 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11800 match(Set cr (CmpU op1 op2)); 11801 11802 format %{ "CMPu $op1,$op2" %} 11803 opcode(0x3B); /* Opcode 3B /r */ 11804 ins_encode( OpcP, RegReg( op1, op2) ); 11805 ins_pipe( ialu_cr_reg_reg ); 11806 %} 11807 11808 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11809 match(Set cr (CmpU op1 op2)); 11810 11811 format %{ "CMPu $op1,$op2" %} 11812 opcode(0x81,0x07); /* Opcode 81 /7 */ 11813 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11814 ins_pipe( ialu_cr_reg_imm ); 11815 %} 11816 11817 // // Cisc-spilled version of cmpU_eReg 11818 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11819 match(Set cr (CmpU op1 (LoadI op2))); 11820 11821 format %{ "CMPu $op1,$op2" %} 11822 ins_cost(500); 11823 opcode(0x3B); /* Opcode 3B /r */ 11824 ins_encode( OpcP, RegMem( op1, op2) ); 11825 ins_pipe( ialu_cr_reg_mem ); 11826 %} 11827 11828 // // Cisc-spilled version of cmpU_eReg 11829 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11830 // match(Set cr (CmpU (LoadI op1) op2)); 11831 // 11832 // format %{ "CMPu $op1,$op2" %} 11833 // ins_cost(500); 11834 // opcode(0x39); /* Opcode 39 /r */ 11835 // ins_encode( OpcP, RegMem( op1, op2) ); 11836 //%} 11837 11838 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11839 match(Set cr (CmpU src zero)); 11840 11841 format %{ "TESTu $src,$src" %} 11842 opcode(0x85); 11843 ins_encode( OpcP, RegReg( src, src ) ); 11844 ins_pipe( ialu_cr_reg_imm ); 11845 %} 11846 11847 // Unsigned pointer compare Instructions 11848 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11849 match(Set cr (CmpP op1 op2)); 11850 11851 format %{ "CMPu $op1,$op2" %} 11852 opcode(0x3B); /* Opcode 3B /r */ 11853 ins_encode( OpcP, RegReg( op1, op2) ); 11854 ins_pipe( ialu_cr_reg_reg ); 11855 %} 11856 11857 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11858 match(Set cr (CmpP op1 op2)); 11859 11860 format %{ "CMPu $op1,$op2" %} 11861 opcode(0x81,0x07); /* Opcode 81 /7 */ 11862 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11863 ins_pipe( ialu_cr_reg_imm ); 11864 %} 11865 11866 // // Cisc-spilled version of cmpP_eReg 11867 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11868 match(Set cr (CmpP op1 (LoadP op2))); 11869 11870 format %{ "CMPu $op1,$op2" %} 11871 ins_cost(500); 11872 opcode(0x3B); /* Opcode 3B /r */ 11873 ins_encode( OpcP, RegMem( op1, op2) ); 11874 ins_pipe( ialu_cr_reg_mem ); 11875 %} 11876 11877 // // Cisc-spilled version of cmpP_eReg 11878 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11879 // match(Set cr (CmpP (LoadP op1) op2)); 11880 // 11881 // format %{ "CMPu $op1,$op2" %} 11882 // ins_cost(500); 11883 // opcode(0x39); /* Opcode 39 /r */ 11884 // ins_encode( OpcP, RegMem( op1, op2) ); 11885 //%} 11886 11887 // Compare raw pointer (used in out-of-heap check). 11888 // Only works because non-oop pointers must be raw pointers 11889 // and raw pointers have no anti-dependencies. 11890 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11891 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11892 match(Set cr (CmpP op1 (LoadP op2))); 11893 11894 format %{ "CMPu $op1,$op2" %} 11895 opcode(0x3B); /* Opcode 3B /r */ 11896 ins_encode( OpcP, RegMem( op1, op2) ); 11897 ins_pipe( ialu_cr_reg_mem ); 11898 %} 11899 11900 // 11901 // This will generate a signed flags result. This should be ok 11902 // since any compare to a zero should be eq/neq. 11903 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11904 match(Set cr (CmpP src zero)); 11905 11906 format %{ "TEST $src,$src" %} 11907 opcode(0x85); 11908 ins_encode( OpcP, RegReg( src, src ) ); 11909 ins_pipe( ialu_cr_reg_imm ); 11910 %} 11911 11912 // Cisc-spilled version of testP_reg 11913 // This will generate a signed flags result. This should be ok 11914 // since any compare to a zero should be eq/neq. 11915 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11916 match(Set cr (CmpP (LoadP op) zero)); 11917 11918 format %{ "TEST $op,0xFFFFFFFF" %} 11919 ins_cost(500); 11920 opcode(0xF7); /* Opcode F7 /0 */ 11921 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11922 ins_pipe( ialu_cr_reg_imm ); 11923 %} 11924 11925 // Yanked all unsigned pointer compare operations. 11926 // Pointer compares are done with CmpP which is already unsigned. 11927 11928 //----------Max and Min-------------------------------------------------------- 11929 // Min Instructions 11930 //// 11931 // *** Min and Max using the conditional move are slower than the 11932 // *** branch version on a Pentium III. 11933 // // Conditional move for min 11934 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11935 // effect( USE_DEF op2, USE op1, USE cr ); 11936 // format %{ "CMOVlt $op2,$op1\t! min" %} 11937 // opcode(0x4C,0x0F); 11938 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11939 // ins_pipe( pipe_cmov_reg ); 11940 //%} 11941 // 11942 //// Min Register with Register (P6 version) 11943 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11944 // predicate(VM_Version::supports_cmov() ); 11945 // match(Set op2 (MinI op1 op2)); 11946 // ins_cost(200); 11947 // expand %{ 11948 // eFlagsReg cr; 11949 // compI_eReg(cr,op1,op2); 11950 // cmovI_reg_lt(op2,op1,cr); 11951 // %} 11952 //%} 11953 11954 // Min Register with Register (generic version) 11955 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11956 match(Set dst (MinI dst src)); 11957 effect(KILL flags); 11958 ins_cost(300); 11959 11960 format %{ "MIN $dst,$src" %} 11961 opcode(0xCC); 11962 ins_encode( min_enc(dst,src) ); 11963 ins_pipe( pipe_slow ); 11964 %} 11965 11966 // Max Register with Register 11967 // *** Min and Max using the conditional move are slower than the 11968 // *** branch version on a Pentium III. 11969 // // Conditional move for max 11970 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11971 // effect( USE_DEF op2, USE op1, USE cr ); 11972 // format %{ "CMOVgt $op2,$op1\t! max" %} 11973 // opcode(0x4F,0x0F); 11974 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11975 // ins_pipe( pipe_cmov_reg ); 11976 //%} 11977 // 11978 // // Max Register with Register (P6 version) 11979 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11980 // predicate(VM_Version::supports_cmov() ); 11981 // match(Set op2 (MaxI op1 op2)); 11982 // ins_cost(200); 11983 // expand %{ 11984 // eFlagsReg cr; 11985 // compI_eReg(cr,op1,op2); 11986 // cmovI_reg_gt(op2,op1,cr); 11987 // %} 11988 //%} 11989 11990 // Max Register with Register (generic version) 11991 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11992 match(Set dst (MaxI dst src)); 11993 effect(KILL flags); 11994 ins_cost(300); 11995 11996 format %{ "MAX $dst,$src" %} 11997 opcode(0xCC); 11998 ins_encode( max_enc(dst,src) ); 11999 ins_pipe( pipe_slow ); 12000 %} 12001 12002 // ============================================================================ 12003 // Counted Loop limit node which represents exact final iterator value. 12004 // Note: the resulting value should fit into integer range since 12005 // counted loops have limit check on overflow. 12006 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12007 match(Set limit (LoopLimit (Binary init limit) stride)); 12008 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12009 ins_cost(300); 12010 12011 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12012 ins_encode %{ 12013 int strd = (int)$stride$$constant; 12014 assert(strd != 1 && strd != -1, "sanity"); 12015 int m1 = (strd > 0) ? 1 : -1; 12016 // Convert limit to long (EAX:EDX) 12017 __ cdql(); 12018 // Convert init to long (init:tmp) 12019 __ movl($tmp$$Register, $init$$Register); 12020 __ sarl($tmp$$Register, 31); 12021 // $limit - $init 12022 __ subl($limit$$Register, $init$$Register); 12023 __ sbbl($limit_hi$$Register, $tmp$$Register); 12024 // + ($stride - 1) 12025 if (strd > 0) { 12026 __ addl($limit$$Register, (strd - 1)); 12027 __ adcl($limit_hi$$Register, 0); 12028 __ movl($tmp$$Register, strd); 12029 } else { 12030 __ addl($limit$$Register, (strd + 1)); 12031 __ adcl($limit_hi$$Register, -1); 12032 __ lneg($limit_hi$$Register, $limit$$Register); 12033 __ movl($tmp$$Register, -strd); 12034 } 12035 // signed devision: (EAX:EDX) / pos_stride 12036 __ idivl($tmp$$Register); 12037 if (strd < 0) { 12038 // restore sign 12039 __ negl($tmp$$Register); 12040 } 12041 // (EAX) * stride 12042 __ mull($tmp$$Register); 12043 // + init (ignore upper bits) 12044 __ addl($limit$$Register, $init$$Register); 12045 %} 12046 ins_pipe( pipe_slow ); 12047 %} 12048 12049 // ============================================================================ 12050 // Branch Instructions 12051 // Jump Table 12052 instruct jumpXtnd(rRegI switch_val) %{ 12053 match(Jump switch_val); 12054 ins_cost(350); 12055 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12056 ins_encode %{ 12057 // Jump to Address(table_base + switch_reg) 12058 Address index(noreg, $switch_val$$Register, Address::times_1); 12059 __ jump(ArrayAddress($constantaddress, index)); 12060 %} 12061 ins_pipe(pipe_jmp); 12062 %} 12063 12064 // Jump Direct - Label defines a relative address from JMP+1 12065 instruct jmpDir(label labl) %{ 12066 match(Goto); 12067 effect(USE labl); 12068 12069 ins_cost(300); 12070 format %{ "JMP $labl" %} 12071 size(5); 12072 ins_encode %{ 12073 Label* L = $labl$$label; 12074 __ jmp(*L, false); // Always long jump 12075 %} 12076 ins_pipe( pipe_jmp ); 12077 %} 12078 12079 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12080 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12081 match(If cop cr); 12082 effect(USE labl); 12083 12084 ins_cost(300); 12085 format %{ "J$cop $labl" %} 12086 size(6); 12087 ins_encode %{ 12088 Label* L = $labl$$label; 12089 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12090 %} 12091 ins_pipe( pipe_jcc ); 12092 %} 12093 12094 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12095 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12096 match(CountedLoopEnd cop cr); 12097 effect(USE labl); 12098 12099 ins_cost(300); 12100 format %{ "J$cop $labl\t# Loop end" %} 12101 size(6); 12102 ins_encode %{ 12103 Label* L = $labl$$label; 12104 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12105 %} 12106 ins_pipe( pipe_jcc ); 12107 %} 12108 12109 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12110 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12111 match(CountedLoopEnd cop cmp); 12112 effect(USE labl); 12113 12114 ins_cost(300); 12115 format %{ "J$cop,u $labl\t# Loop end" %} 12116 size(6); 12117 ins_encode %{ 12118 Label* L = $labl$$label; 12119 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12120 %} 12121 ins_pipe( pipe_jcc ); 12122 %} 12123 12124 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12125 match(CountedLoopEnd cop cmp); 12126 effect(USE labl); 12127 12128 ins_cost(200); 12129 format %{ "J$cop,u $labl\t# Loop end" %} 12130 size(6); 12131 ins_encode %{ 12132 Label* L = $labl$$label; 12133 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12134 %} 12135 ins_pipe( pipe_jcc ); 12136 %} 12137 12138 // Jump Direct Conditional - using unsigned comparison 12139 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12140 match(If cop cmp); 12141 effect(USE labl); 12142 12143 ins_cost(300); 12144 format %{ "J$cop,u $labl" %} 12145 size(6); 12146 ins_encode %{ 12147 Label* L = $labl$$label; 12148 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12149 %} 12150 ins_pipe(pipe_jcc); 12151 %} 12152 12153 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12154 match(If cop cmp); 12155 effect(USE labl); 12156 12157 ins_cost(200); 12158 format %{ "J$cop,u $labl" %} 12159 size(6); 12160 ins_encode %{ 12161 Label* L = $labl$$label; 12162 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12163 %} 12164 ins_pipe(pipe_jcc); 12165 %} 12166 12167 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12168 match(If cop cmp); 12169 effect(USE labl); 12170 12171 ins_cost(200); 12172 format %{ $$template 12173 if ($cop$$cmpcode == Assembler::notEqual) { 12174 $$emit$$"JP,u $labl\n\t" 12175 $$emit$$"J$cop,u $labl" 12176 } else { 12177 $$emit$$"JP,u done\n\t" 12178 $$emit$$"J$cop,u $labl\n\t" 12179 $$emit$$"done:" 12180 } 12181 %} 12182 ins_encode %{ 12183 Label* l = $labl$$label; 12184 if ($cop$$cmpcode == Assembler::notEqual) { 12185 __ jcc(Assembler::parity, *l, false); 12186 __ jcc(Assembler::notEqual, *l, false); 12187 } else if ($cop$$cmpcode == Assembler::equal) { 12188 Label done; 12189 __ jccb(Assembler::parity, done); 12190 __ jcc(Assembler::equal, *l, false); 12191 __ bind(done); 12192 } else { 12193 ShouldNotReachHere(); 12194 } 12195 %} 12196 ins_pipe(pipe_jcc); 12197 %} 12198 12199 // ============================================================================ 12200 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12201 // array for an instance of the superklass. Set a hidden internal cache on a 12202 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12203 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12204 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12205 match(Set result (PartialSubtypeCheck sub super)); 12206 effect( KILL rcx, KILL cr ); 12207 12208 ins_cost(1100); // slightly larger than the next version 12209 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12210 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12211 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12212 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12213 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12214 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12215 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12216 "miss:\t" %} 12217 12218 opcode(0x1); // Force a XOR of EDI 12219 ins_encode( enc_PartialSubtypeCheck() ); 12220 ins_pipe( pipe_slow ); 12221 %} 12222 12223 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12224 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12225 effect( KILL rcx, KILL result ); 12226 12227 ins_cost(1000); 12228 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12229 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12230 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12231 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12232 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12233 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12234 "miss:\t" %} 12235 12236 opcode(0x0); // No need to XOR EDI 12237 ins_encode( enc_PartialSubtypeCheck() ); 12238 ins_pipe( pipe_slow ); 12239 %} 12240 12241 // ============================================================================ 12242 // Branch Instructions -- short offset versions 12243 // 12244 // These instructions are used to replace jumps of a long offset (the default 12245 // match) with jumps of a shorter offset. These instructions are all tagged 12246 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12247 // match rules in general matching. Instead, the ADLC generates a conversion 12248 // method in the MachNode which can be used to do in-place replacement of the 12249 // long variant with the shorter variant. The compiler will determine if a 12250 // branch can be taken by the is_short_branch_offset() predicate in the machine 12251 // specific code section of the file. 12252 12253 // Jump Direct - Label defines a relative address from JMP+1 12254 instruct jmpDir_short(label labl) %{ 12255 match(Goto); 12256 effect(USE labl); 12257 12258 ins_cost(300); 12259 format %{ "JMP,s $labl" %} 12260 size(2); 12261 ins_encode %{ 12262 Label* L = $labl$$label; 12263 __ jmpb(*L); 12264 %} 12265 ins_pipe( pipe_jmp ); 12266 ins_short_branch(1); 12267 %} 12268 12269 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12270 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12271 match(If cop cr); 12272 effect(USE labl); 12273 12274 ins_cost(300); 12275 format %{ "J$cop,s $labl" %} 12276 size(2); 12277 ins_encode %{ 12278 Label* L = $labl$$label; 12279 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12280 %} 12281 ins_pipe( pipe_jcc ); 12282 ins_short_branch(1); 12283 %} 12284 12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12286 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12287 match(CountedLoopEnd cop cr); 12288 effect(USE labl); 12289 12290 ins_cost(300); 12291 format %{ "J$cop,s $labl\t# Loop end" %} 12292 size(2); 12293 ins_encode %{ 12294 Label* L = $labl$$label; 12295 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12296 %} 12297 ins_pipe( pipe_jcc ); 12298 ins_short_branch(1); 12299 %} 12300 12301 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12302 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12303 match(CountedLoopEnd cop cmp); 12304 effect(USE labl); 12305 12306 ins_cost(300); 12307 format %{ "J$cop,us $labl\t# Loop end" %} 12308 size(2); 12309 ins_encode %{ 12310 Label* L = $labl$$label; 12311 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12312 %} 12313 ins_pipe( pipe_jcc ); 12314 ins_short_branch(1); 12315 %} 12316 12317 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12318 match(CountedLoopEnd cop cmp); 12319 effect(USE labl); 12320 12321 ins_cost(300); 12322 format %{ "J$cop,us $labl\t# Loop end" %} 12323 size(2); 12324 ins_encode %{ 12325 Label* L = $labl$$label; 12326 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12327 %} 12328 ins_pipe( pipe_jcc ); 12329 ins_short_branch(1); 12330 %} 12331 12332 // Jump Direct Conditional - using unsigned comparison 12333 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12334 match(If cop cmp); 12335 effect(USE labl); 12336 12337 ins_cost(300); 12338 format %{ "J$cop,us $labl" %} 12339 size(2); 12340 ins_encode %{ 12341 Label* L = $labl$$label; 12342 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12343 %} 12344 ins_pipe( pipe_jcc ); 12345 ins_short_branch(1); 12346 %} 12347 12348 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12349 match(If cop cmp); 12350 effect(USE labl); 12351 12352 ins_cost(300); 12353 format %{ "J$cop,us $labl" %} 12354 size(2); 12355 ins_encode %{ 12356 Label* L = $labl$$label; 12357 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12358 %} 12359 ins_pipe( pipe_jcc ); 12360 ins_short_branch(1); 12361 %} 12362 12363 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12364 match(If cop cmp); 12365 effect(USE labl); 12366 12367 ins_cost(300); 12368 format %{ $$template 12369 if ($cop$$cmpcode == Assembler::notEqual) { 12370 $$emit$$"JP,u,s $labl\n\t" 12371 $$emit$$"J$cop,u,s $labl" 12372 } else { 12373 $$emit$$"JP,u,s done\n\t" 12374 $$emit$$"J$cop,u,s $labl\n\t" 12375 $$emit$$"done:" 12376 } 12377 %} 12378 size(4); 12379 ins_encode %{ 12380 Label* l = $labl$$label; 12381 if ($cop$$cmpcode == Assembler::notEqual) { 12382 __ jccb(Assembler::parity, *l); 12383 __ jccb(Assembler::notEqual, *l); 12384 } else if ($cop$$cmpcode == Assembler::equal) { 12385 Label done; 12386 __ jccb(Assembler::parity, done); 12387 __ jccb(Assembler::equal, *l); 12388 __ bind(done); 12389 } else { 12390 ShouldNotReachHere(); 12391 } 12392 %} 12393 ins_pipe(pipe_jcc); 12394 ins_short_branch(1); 12395 %} 12396 12397 // ============================================================================ 12398 // Long Compare 12399 // 12400 // Currently we hold longs in 2 registers. Comparing such values efficiently 12401 // is tricky. The flavor of compare used depends on whether we are testing 12402 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12403 // The GE test is the negated LT test. The LE test can be had by commuting 12404 // the operands (yielding a GE test) and then negating; negate again for the 12405 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12406 // NE test is negated from that. 12407 12408 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12409 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12410 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12411 // are collapsed internally in the ADLC's dfa-gen code. The match for 12412 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12413 // foo match ends up with the wrong leaf. One fix is to not match both 12414 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12415 // both forms beat the trinary form of long-compare and both are very useful 12416 // on Intel which has so few registers. 12417 12418 // Manifest a CmpL result in an integer register. Very painful. 12419 // This is the test to avoid. 12420 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12421 match(Set dst (CmpL3 src1 src2)); 12422 effect( KILL flags ); 12423 ins_cost(1000); 12424 format %{ "XOR $dst,$dst\n\t" 12425 "CMP $src1.hi,$src2.hi\n\t" 12426 "JLT,s m_one\n\t" 12427 "JGT,s p_one\n\t" 12428 "CMP $src1.lo,$src2.lo\n\t" 12429 "JB,s m_one\n\t" 12430 "JEQ,s done\n" 12431 "p_one:\tINC $dst\n\t" 12432 "JMP,s done\n" 12433 "m_one:\tDEC $dst\n" 12434 "done:" %} 12435 ins_encode %{ 12436 Label p_one, m_one, done; 12437 __ xorptr($dst$$Register, $dst$$Register); 12438 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12439 __ jccb(Assembler::less, m_one); 12440 __ jccb(Assembler::greater, p_one); 12441 __ cmpl($src1$$Register, $src2$$Register); 12442 __ jccb(Assembler::below, m_one); 12443 __ jccb(Assembler::equal, done); 12444 __ bind(p_one); 12445 __ incrementl($dst$$Register); 12446 __ jmpb(done); 12447 __ bind(m_one); 12448 __ decrementl($dst$$Register); 12449 __ bind(done); 12450 %} 12451 ins_pipe( pipe_slow ); 12452 %} 12453 12454 //====== 12455 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12456 // compares. Can be used for LE or GT compares by reversing arguments. 12457 // NOT GOOD FOR EQ/NE tests. 12458 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12459 match( Set flags (CmpL src zero )); 12460 ins_cost(100); 12461 format %{ "TEST $src.hi,$src.hi" %} 12462 opcode(0x85); 12463 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12464 ins_pipe( ialu_cr_reg_reg ); 12465 %} 12466 12467 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12468 // compares. Can be used for LE or GT compares by reversing arguments. 12469 // NOT GOOD FOR EQ/NE tests. 12470 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12471 match( Set flags (CmpL src1 src2 )); 12472 effect( TEMP tmp ); 12473 ins_cost(300); 12474 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12475 "MOV $tmp,$src1.hi\n\t" 12476 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12477 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12478 ins_pipe( ialu_cr_reg_reg ); 12479 %} 12480 12481 // Long compares reg < zero/req OR reg >= zero/req. 12482 // Just a wrapper for a normal branch, plus the predicate test. 12483 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12484 match(If cmp flags); 12485 effect(USE labl); 12486 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12487 expand %{ 12488 jmpCon(cmp,flags,labl); // JLT or JGE... 12489 %} 12490 %} 12491 12492 // Compare 2 longs and CMOVE longs. 12493 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12494 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12495 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12496 ins_cost(400); 12497 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12498 "CMOV$cmp $dst.hi,$src.hi" %} 12499 opcode(0x0F,0x40); 12500 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12501 ins_pipe( pipe_cmov_reg_long ); 12502 %} 12503 12504 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12505 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12506 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12507 ins_cost(500); 12508 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12509 "CMOV$cmp $dst.hi,$src.hi" %} 12510 opcode(0x0F,0x40); 12511 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12512 ins_pipe( pipe_cmov_reg_long ); 12513 %} 12514 12515 // Compare 2 longs and CMOVE ints. 12516 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12517 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12518 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12519 ins_cost(200); 12520 format %{ "CMOV$cmp $dst,$src" %} 12521 opcode(0x0F,0x40); 12522 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12523 ins_pipe( pipe_cmov_reg ); 12524 %} 12525 12526 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12527 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12528 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12529 ins_cost(250); 12530 format %{ "CMOV$cmp $dst,$src" %} 12531 opcode(0x0F,0x40); 12532 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12533 ins_pipe( pipe_cmov_mem ); 12534 %} 12535 12536 // Compare 2 longs and CMOVE ints. 12537 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12538 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12539 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12540 ins_cost(200); 12541 format %{ "CMOV$cmp $dst,$src" %} 12542 opcode(0x0F,0x40); 12543 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12544 ins_pipe( pipe_cmov_reg ); 12545 %} 12546 12547 // Compare 2 longs and CMOVE doubles 12548 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12549 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12550 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12551 ins_cost(200); 12552 expand %{ 12553 fcmovDPR_regS(cmp,flags,dst,src); 12554 %} 12555 %} 12556 12557 // Compare 2 longs and CMOVE doubles 12558 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12559 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12560 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12561 ins_cost(200); 12562 expand %{ 12563 fcmovD_regS(cmp,flags,dst,src); 12564 %} 12565 %} 12566 12567 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12568 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12569 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12570 ins_cost(200); 12571 expand %{ 12572 fcmovFPR_regS(cmp,flags,dst,src); 12573 %} 12574 %} 12575 12576 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12577 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12578 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12579 ins_cost(200); 12580 expand %{ 12581 fcmovF_regS(cmp,flags,dst,src); 12582 %} 12583 %} 12584 12585 //====== 12586 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12587 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12588 match( Set flags (CmpL src zero )); 12589 effect(TEMP tmp); 12590 ins_cost(200); 12591 format %{ "MOV $tmp,$src.lo\n\t" 12592 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12593 ins_encode( long_cmp_flags0( src, tmp ) ); 12594 ins_pipe( ialu_reg_reg_long ); 12595 %} 12596 12597 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12598 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12599 match( Set flags (CmpL src1 src2 )); 12600 ins_cost(200+300); 12601 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12602 "JNE,s skip\n\t" 12603 "CMP $src1.hi,$src2.hi\n\t" 12604 "skip:\t" %} 12605 ins_encode( long_cmp_flags1( src1, src2 ) ); 12606 ins_pipe( ialu_cr_reg_reg ); 12607 %} 12608 12609 // Long compare reg == zero/reg OR reg != zero/reg 12610 // Just a wrapper for a normal branch, plus the predicate test. 12611 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12612 match(If cmp flags); 12613 effect(USE labl); 12614 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12615 expand %{ 12616 jmpCon(cmp,flags,labl); // JEQ or JNE... 12617 %} 12618 %} 12619 12620 // Compare 2 longs and CMOVE longs. 12621 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12622 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12623 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12624 ins_cost(400); 12625 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12626 "CMOV$cmp $dst.hi,$src.hi" %} 12627 opcode(0x0F,0x40); 12628 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12629 ins_pipe( pipe_cmov_reg_long ); 12630 %} 12631 12632 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12633 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12634 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12635 ins_cost(500); 12636 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12637 "CMOV$cmp $dst.hi,$src.hi" %} 12638 opcode(0x0F,0x40); 12639 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12640 ins_pipe( pipe_cmov_reg_long ); 12641 %} 12642 12643 // Compare 2 longs and CMOVE ints. 12644 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12645 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12646 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12647 ins_cost(200); 12648 format %{ "CMOV$cmp $dst,$src" %} 12649 opcode(0x0F,0x40); 12650 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12651 ins_pipe( pipe_cmov_reg ); 12652 %} 12653 12654 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12655 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12656 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12657 ins_cost(250); 12658 format %{ "CMOV$cmp $dst,$src" %} 12659 opcode(0x0F,0x40); 12660 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12661 ins_pipe( pipe_cmov_mem ); 12662 %} 12663 12664 // Compare 2 longs and CMOVE ints. 12665 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12666 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12667 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12668 ins_cost(200); 12669 format %{ "CMOV$cmp $dst,$src" %} 12670 opcode(0x0F,0x40); 12671 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12672 ins_pipe( pipe_cmov_reg ); 12673 %} 12674 12675 // Compare 2 longs and CMOVE doubles 12676 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12677 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12678 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12679 ins_cost(200); 12680 expand %{ 12681 fcmovDPR_regS(cmp,flags,dst,src); 12682 %} 12683 %} 12684 12685 // Compare 2 longs and CMOVE doubles 12686 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12687 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12688 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12689 ins_cost(200); 12690 expand %{ 12691 fcmovD_regS(cmp,flags,dst,src); 12692 %} 12693 %} 12694 12695 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12696 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12697 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12698 ins_cost(200); 12699 expand %{ 12700 fcmovFPR_regS(cmp,flags,dst,src); 12701 %} 12702 %} 12703 12704 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12705 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12706 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12707 ins_cost(200); 12708 expand %{ 12709 fcmovF_regS(cmp,flags,dst,src); 12710 %} 12711 %} 12712 12713 //====== 12714 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12715 // Same as cmpL_reg_flags_LEGT except must negate src 12716 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12717 match( Set flags (CmpL src zero )); 12718 effect( TEMP tmp ); 12719 ins_cost(300); 12720 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12721 "CMP $tmp,$src.lo\n\t" 12722 "SBB $tmp,$src.hi\n\t" %} 12723 ins_encode( long_cmp_flags3(src, tmp) ); 12724 ins_pipe( ialu_reg_reg_long ); 12725 %} 12726 12727 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12728 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12729 // requires a commuted test to get the same result. 12730 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12731 match( Set flags (CmpL src1 src2 )); 12732 effect( TEMP tmp ); 12733 ins_cost(300); 12734 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12735 "MOV $tmp,$src2.hi\n\t" 12736 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12737 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12738 ins_pipe( ialu_cr_reg_reg ); 12739 %} 12740 12741 // Long compares reg < zero/req OR reg >= zero/req. 12742 // Just a wrapper for a normal branch, plus the predicate test 12743 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12744 match(If cmp flags); 12745 effect(USE labl); 12746 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12747 ins_cost(300); 12748 expand %{ 12749 jmpCon(cmp,flags,labl); // JGT or JLE... 12750 %} 12751 %} 12752 12753 // Compare 2 longs and CMOVE longs. 12754 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12755 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12756 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12757 ins_cost(400); 12758 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12759 "CMOV$cmp $dst.hi,$src.hi" %} 12760 opcode(0x0F,0x40); 12761 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12762 ins_pipe( pipe_cmov_reg_long ); 12763 %} 12764 12765 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12766 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12767 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12768 ins_cost(500); 12769 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12770 "CMOV$cmp $dst.hi,$src.hi+4" %} 12771 opcode(0x0F,0x40); 12772 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12773 ins_pipe( pipe_cmov_reg_long ); 12774 %} 12775 12776 // Compare 2 longs and CMOVE ints. 12777 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12778 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12779 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12780 ins_cost(200); 12781 format %{ "CMOV$cmp $dst,$src" %} 12782 opcode(0x0F,0x40); 12783 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12784 ins_pipe( pipe_cmov_reg ); 12785 %} 12786 12787 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12788 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12789 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12790 ins_cost(250); 12791 format %{ "CMOV$cmp $dst,$src" %} 12792 opcode(0x0F,0x40); 12793 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12794 ins_pipe( pipe_cmov_mem ); 12795 %} 12796 12797 // Compare 2 longs and CMOVE ptrs. 12798 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12799 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12800 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12801 ins_cost(200); 12802 format %{ "CMOV$cmp $dst,$src" %} 12803 opcode(0x0F,0x40); 12804 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12805 ins_pipe( pipe_cmov_reg ); 12806 %} 12807 12808 // Compare 2 longs and CMOVE doubles 12809 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12810 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12811 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12812 ins_cost(200); 12813 expand %{ 12814 fcmovDPR_regS(cmp,flags,dst,src); 12815 %} 12816 %} 12817 12818 // Compare 2 longs and CMOVE doubles 12819 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12820 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12821 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12822 ins_cost(200); 12823 expand %{ 12824 fcmovD_regS(cmp,flags,dst,src); 12825 %} 12826 %} 12827 12828 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12829 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12830 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12831 ins_cost(200); 12832 expand %{ 12833 fcmovFPR_regS(cmp,flags,dst,src); 12834 %} 12835 %} 12836 12837 12838 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12839 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12840 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12841 ins_cost(200); 12842 expand %{ 12843 fcmovF_regS(cmp,flags,dst,src); 12844 %} 12845 %} 12846 12847 12848 // ============================================================================ 12849 // Procedure Call/Return Instructions 12850 // Call Java Static Instruction 12851 // Note: If this code changes, the corresponding ret_addr_offset() and 12852 // compute_padding() functions will have to be adjusted. 12853 instruct CallStaticJavaDirect(method meth) %{ 12854 match(CallStaticJava); 12855 effect(USE meth); 12856 12857 ins_cost(300); 12858 format %{ "CALL,static " %} 12859 opcode(0xE8); /* E8 cd */ 12860 ins_encode( pre_call_resets, 12861 Java_Static_Call( meth ), 12862 call_epilog, 12863 post_call_FPU ); 12864 ins_pipe( pipe_slow ); 12865 ins_alignment(4); 12866 %} 12867 12868 // Call Java Dynamic Instruction 12869 // Note: If this code changes, the corresponding ret_addr_offset() and 12870 // compute_padding() functions will have to be adjusted. 12871 instruct CallDynamicJavaDirect(method meth) %{ 12872 match(CallDynamicJava); 12873 effect(USE meth); 12874 12875 ins_cost(300); 12876 format %{ "MOV EAX,(oop)-1\n\t" 12877 "CALL,dynamic" %} 12878 opcode(0xE8); /* E8 cd */ 12879 ins_encode( pre_call_resets, 12880 Java_Dynamic_Call( meth ), 12881 call_epilog, 12882 post_call_FPU ); 12883 ins_pipe( pipe_slow ); 12884 ins_alignment(4); 12885 %} 12886 12887 // Call Runtime Instruction 12888 instruct CallRuntimeDirect(method meth) %{ 12889 match(CallRuntime ); 12890 effect(USE meth); 12891 12892 ins_cost(300); 12893 format %{ "CALL,runtime " %} 12894 opcode(0xE8); /* E8 cd */ 12895 // Use FFREEs to clear entries in float stack 12896 ins_encode( pre_call_resets, 12897 FFree_Float_Stack_All, 12898 Java_To_Runtime( meth ), 12899 post_call_FPU ); 12900 ins_pipe( pipe_slow ); 12901 %} 12902 12903 // Call runtime without safepoint 12904 instruct CallLeafDirect(method meth) %{ 12905 match(CallLeaf); 12906 effect(USE meth); 12907 12908 ins_cost(300); 12909 format %{ "CALL_LEAF,runtime " %} 12910 opcode(0xE8); /* E8 cd */ 12911 ins_encode( pre_call_resets, 12912 FFree_Float_Stack_All, 12913 Java_To_Runtime( meth ), 12914 Verify_FPU_For_Leaf, post_call_FPU ); 12915 ins_pipe( pipe_slow ); 12916 %} 12917 12918 instruct CallLeafNoFPDirect(method meth) %{ 12919 match(CallLeafNoFP); 12920 effect(USE meth); 12921 12922 ins_cost(300); 12923 format %{ "CALL_LEAF_NOFP,runtime " %} 12924 opcode(0xE8); /* E8 cd */ 12925 ins_encode(Java_To_Runtime(meth)); 12926 ins_pipe( pipe_slow ); 12927 %} 12928 12929 12930 // Return Instruction 12931 // Remove the return address & jump to it. 12932 instruct Ret() %{ 12933 match(Return); 12934 format %{ "RET" %} 12935 opcode(0xC3); 12936 ins_encode(OpcP); 12937 ins_pipe( pipe_jmp ); 12938 %} 12939 12940 // Tail Call; Jump from runtime stub to Java code. 12941 // Also known as an 'interprocedural jump'. 12942 // Target of jump will eventually return to caller. 12943 // TailJump below removes the return address. 12944 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12945 match(TailCall jump_target method_oop ); 12946 ins_cost(300); 12947 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12948 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12949 ins_encode( OpcP, RegOpc(jump_target) ); 12950 ins_pipe( pipe_jmp ); 12951 %} 12952 12953 12954 // Tail Jump; remove the return address; jump to target. 12955 // TailCall above leaves the return address around. 12956 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12957 match( TailJump jump_target ex_oop ); 12958 ins_cost(300); 12959 format %{ "POP EDX\t# pop return address into dummy\n\t" 12960 "JMP $jump_target " %} 12961 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12962 ins_encode( enc_pop_rdx, 12963 OpcP, RegOpc(jump_target) ); 12964 ins_pipe( pipe_jmp ); 12965 %} 12966 12967 // Create exception oop: created by stack-crawling runtime code. 12968 // Created exception is now available to this handler, and is setup 12969 // just prior to jumping to this handler. No code emitted. 12970 instruct CreateException( eAXRegP ex_oop ) 12971 %{ 12972 match(Set ex_oop (CreateEx)); 12973 12974 size(0); 12975 // use the following format syntax 12976 format %{ "# exception oop is in EAX; no code emitted" %} 12977 ins_encode(); 12978 ins_pipe( empty ); 12979 %} 12980 12981 12982 // Rethrow exception: 12983 // The exception oop will come in the first argument position. 12984 // Then JUMP (not call) to the rethrow stub code. 12985 instruct RethrowException() 12986 %{ 12987 match(Rethrow); 12988 12989 // use the following format syntax 12990 format %{ "JMP rethrow_stub" %} 12991 ins_encode(enc_rethrow); 12992 ins_pipe( pipe_jmp ); 12993 %} 12994 12995 // inlined locking and unlocking 12996 12997 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12998 predicate(Compile::current()->use_rtm()); 12999 match(Set cr (FastLock object box)); 13000 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13001 ins_cost(300); 13002 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13003 ins_encode %{ 13004 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13005 $scr$$Register, $cx1$$Register, $cx2$$Register, 13006 _counters, _rtm_counters, _stack_rtm_counters, 13007 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13008 true, ra_->C->profile_rtm()); 13009 %} 13010 ins_pipe(pipe_slow); 13011 %} 13012 13013 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13014 predicate(!Compile::current()->use_rtm()); 13015 match(Set cr (FastLock object box)); 13016 effect(TEMP tmp, TEMP scr, USE_KILL box); 13017 ins_cost(300); 13018 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13019 ins_encode %{ 13020 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13021 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13022 %} 13023 ins_pipe(pipe_slow); 13024 %} 13025 13026 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13027 match(Set cr (FastUnlock object box)); 13028 effect(TEMP tmp, USE_KILL box); 13029 ins_cost(300); 13030 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13031 ins_encode %{ 13032 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13033 %} 13034 ins_pipe(pipe_slow); 13035 %} 13036 13037 13038 13039 // ============================================================================ 13040 // Safepoint Instruction 13041 instruct safePoint_poll(eFlagsReg cr) %{ 13042 match(SafePoint); 13043 effect(KILL cr); 13044 13045 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13046 // On SPARC that might be acceptable as we can generate the address with 13047 // just a sethi, saving an or. By polling at offset 0 we can end up 13048 // putting additional pressure on the index-0 in the D$. Because of 13049 // alignment (just like the situation at hand) the lower indices tend 13050 // to see more traffic. It'd be better to change the polling address 13051 // to offset 0 of the last $line in the polling page. 13052 13053 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13054 ins_cost(125); 13055 size(6) ; 13056 ins_encode( Safepoint_Poll() ); 13057 ins_pipe( ialu_reg_mem ); 13058 %} 13059 13060 13061 // ============================================================================ 13062 // This name is KNOWN by the ADLC and cannot be changed. 13063 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13064 // for this guy. 13065 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13066 match(Set dst (ThreadLocal)); 13067 effect(DEF dst, KILL cr); 13068 13069 format %{ "MOV $dst, Thread::current()" %} 13070 ins_encode %{ 13071 Register dstReg = as_Register($dst$$reg); 13072 __ get_thread(dstReg); 13073 %} 13074 ins_pipe( ialu_reg_fat ); 13075 %} 13076 13077 13078 13079 //----------PEEPHOLE RULES----------------------------------------------------- 13080 // These must follow all instruction definitions as they use the names 13081 // defined in the instructions definitions. 13082 // 13083 // peepmatch ( root_instr_name [preceding_instruction]* ); 13084 // 13085 // peepconstraint %{ 13086 // (instruction_number.operand_name relational_op instruction_number.operand_name 13087 // [, ...] ); 13088 // // instruction numbers are zero-based using left to right order in peepmatch 13089 // 13090 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13091 // // provide an instruction_number.operand_name for each operand that appears 13092 // // in the replacement instruction's match rule 13093 // 13094 // ---------VM FLAGS--------------------------------------------------------- 13095 // 13096 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13097 // 13098 // Each peephole rule is given an identifying number starting with zero and 13099 // increasing by one in the order seen by the parser. An individual peephole 13100 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13101 // on the command-line. 13102 // 13103 // ---------CURRENT LIMITATIONS---------------------------------------------- 13104 // 13105 // Only match adjacent instructions in same basic block 13106 // Only equality constraints 13107 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13108 // Only one replacement instruction 13109 // 13110 // ---------EXAMPLE---------------------------------------------------------- 13111 // 13112 // // pertinent parts of existing instructions in architecture description 13113 // instruct movI(rRegI dst, rRegI src) %{ 13114 // match(Set dst (CopyI src)); 13115 // %} 13116 // 13117 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13118 // match(Set dst (AddI dst src)); 13119 // effect(KILL cr); 13120 // %} 13121 // 13122 // // Change (inc mov) to lea 13123 // peephole %{ 13124 // // increment preceeded by register-register move 13125 // peepmatch ( incI_eReg movI ); 13126 // // require that the destination register of the increment 13127 // // match the destination register of the move 13128 // peepconstraint ( 0.dst == 1.dst ); 13129 // // construct a replacement instruction that sets 13130 // // the destination to ( move's source register + one ) 13131 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13132 // %} 13133 // 13134 // Implementation no longer uses movX instructions since 13135 // machine-independent system no longer uses CopyX nodes. 13136 // 13137 // peephole %{ 13138 // peepmatch ( incI_eReg movI ); 13139 // peepconstraint ( 0.dst == 1.dst ); 13140 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13141 // %} 13142 // 13143 // peephole %{ 13144 // peepmatch ( decI_eReg movI ); 13145 // peepconstraint ( 0.dst == 1.dst ); 13146 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13147 // %} 13148 // 13149 // peephole %{ 13150 // peepmatch ( addI_eReg_imm movI ); 13151 // peepconstraint ( 0.dst == 1.dst ); 13152 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13153 // %} 13154 // 13155 // peephole %{ 13156 // peepmatch ( addP_eReg_imm movP ); 13157 // peepconstraint ( 0.dst == 1.dst ); 13158 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13159 // %} 13160 13161 // // Change load of spilled value to only a spill 13162 // instruct storeI(memory mem, rRegI src) %{ 13163 // match(Set mem (StoreI mem src)); 13164 // %} 13165 // 13166 // instruct loadI(rRegI dst, memory mem) %{ 13167 // match(Set dst (LoadI mem)); 13168 // %} 13169 // 13170 peephole %{ 13171 peepmatch ( loadI storeI ); 13172 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13173 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13174 %} 13175 13176 //----------SMARTSPILL RULES--------------------------------------------------- 13177 // These must follow all instruction definitions as they use the names 13178 // defined in the instructions definitions.