1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 if (reg_lo+1 == reg_hi) { // double move? 799 if (is_load) { 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } else { 805 if (is_load) { 806 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 807 } else { 808 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 809 } 810 } 811 #ifndef PRODUCT 812 } else if (!do_size) { 813 if (size != 0) st->print("\n\t"); 814 if (reg_lo+1 == reg_hi) { // double move? 815 if (is_load) st->print("%s %s,[ESP + #%d]", 816 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 817 Matcher::regName[reg_lo], offset); 818 else st->print("MOVSD [ESP + #%d],%s", 819 offset, Matcher::regName[reg_lo]); 820 } else { 821 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 822 Matcher::regName[reg_lo], offset); 823 else st->print("MOVSS [ESP + #%d],%s", 824 offset, Matcher::regName[reg_lo]); 825 } 826 #endif 827 } 828 bool is_single_byte = false; 829 if ((UseAVX > 2) && (offset != 0)) { 830 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 831 } 832 int offset_size = 0; 833 if (UseAVX > 2 ) { 834 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 835 } else { 836 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 837 } 838 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 840 return size+5+offset_size; 841 } 842 843 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 845 int src_hi, int dst_hi, int size, outputStream* st ) { 846 if (cbuf) { 847 MacroAssembler _masm(cbuf); 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 850 as_XMMRegister(Matcher::_regEncode[src_lo])); 851 } else { 852 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } 855 #ifndef PRODUCT 856 } else if (!do_size) { 857 if (size != 0) st->print("\n\t"); 858 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 859 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 860 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } else { 865 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 866 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } else { 868 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } 870 } 871 #endif 872 } 873 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 874 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 875 int sz = (UseAVX > 2) ? 6 : 4; 876 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 877 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 878 return size + sz; 879 } 880 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 MacroAssembler _masm(cbuf); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888 #ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891 #endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894 } 895 896 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1016 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Threshold size for cleararray. 1424 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1425 1426 // Needs 2 CMOV's for longs. 1427 const int Matcher::long_cmove_cost() { return 1; } 1428 1429 // No CMOVF/CMOVD with SSE/SSE2 1430 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1431 1432 // Does the CPU require late expand (see block.cpp for description of late expand)? 1433 const bool Matcher::require_postalloc_expand = false; 1434 1435 // Should the Matcher clone shifts on addressing modes, expecting them to 1436 // be subsumed into complex addressing expressions or compute them into 1437 // registers? True for Intel but false for most RISCs 1438 const bool Matcher::clone_shift_expressions = true; 1439 1440 // Do we need to mask the count passed to shift instructions or does 1441 // the cpu only look at the lower 5/6 bits anyway? 1442 const bool Matcher::need_masked_shift_count = false; 1443 1444 bool Matcher::narrow_oop_use_complex_address() { 1445 ShouldNotCallThis(); 1446 return true; 1447 } 1448 1449 bool Matcher::narrow_klass_use_complex_address() { 1450 ShouldNotCallThis(); 1451 return true; 1452 } 1453 1454 1455 // Is it better to copy float constants, or load them directly from memory? 1456 // Intel can load a float constant from a direct address, requiring no 1457 // extra registers. Most RISCs will have to materialize an address into a 1458 // register first, so they would do better to copy the constant from stack. 1459 const bool Matcher::rematerialize_float_constants = true; 1460 1461 // If CPU can load and store mis-aligned doubles directly then no fixup is 1462 // needed. Else we split the double into 2 integer pieces and move it 1463 // piece-by-piece. Only happens when passing doubles into C code as the 1464 // Java calling convention forces doubles to be aligned. 1465 const bool Matcher::misaligned_doubles_ok = true; 1466 1467 1468 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1469 // Get the memory operand from the node 1470 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1471 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1472 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1473 uint opcnt = 1; // First operand 1474 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1475 while( idx >= skipped+num_edges ) { 1476 skipped += num_edges; 1477 opcnt++; // Bump operand count 1478 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1479 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1480 } 1481 1482 MachOper *memory = node->_opnds[opcnt]; 1483 MachOper *new_memory = NULL; 1484 switch (memory->opcode()) { 1485 case DIRECT: 1486 case INDOFFSET32X: 1487 // No transformation necessary. 1488 return; 1489 case INDIRECT: 1490 new_memory = new indirect_win95_safeOper( ); 1491 break; 1492 case INDOFFSET8: 1493 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1494 break; 1495 case INDOFFSET32: 1496 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1497 break; 1498 case INDINDEXOFFSET: 1499 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1500 break; 1501 case INDINDEXSCALE: 1502 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1503 break; 1504 case INDINDEXSCALEOFFSET: 1505 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1506 break; 1507 case LOAD_LONG_INDIRECT: 1508 case LOAD_LONG_INDOFFSET32: 1509 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1510 return; 1511 default: 1512 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1513 return; 1514 } 1515 node->_opnds[opcnt] = new_memory; 1516 } 1517 1518 // Advertise here if the CPU requires explicit rounding operations 1519 // to implement the UseStrictFP mode. 1520 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1521 1522 // Are floats conerted to double when stored to stack during deoptimization? 1523 // On x32 it is stored with convertion only when FPU is used for floats. 1524 bool Matcher::float_in_double() { return (UseSSE == 0); } 1525 1526 // Do ints take an entire long register or just half? 1527 const bool Matcher::int_in_long = false; 1528 1529 // Return whether or not this register is ever used as an argument. This 1530 // function is used on startup to build the trampoline stubs in generateOptoStub. 1531 // Registers not mentioned will be killed by the VM call in the trampoline, and 1532 // arguments in those registers not be available to the callee. 1533 bool Matcher::can_be_java_arg( int reg ) { 1534 if( reg == ECX_num || reg == EDX_num ) return true; 1535 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1536 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1537 return false; 1538 } 1539 1540 bool Matcher::is_spillable_arg( int reg ) { 1541 return can_be_java_arg(reg); 1542 } 1543 1544 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1545 // Use hardware integer DIV instruction when 1546 // it is faster than a code which use multiply. 1547 // Only when constant divisor fits into 32 bit 1548 // (min_jint is excluded to get only correct 1549 // positive 32 bit values from negative). 1550 return VM_Version::has_fast_idiv() && 1551 (divisor == (int)divisor && divisor != min_jint); 1552 } 1553 1554 // Register for DIVI projection of divmodI 1555 RegMask Matcher::divI_proj_mask() { 1556 return EAX_REG_mask(); 1557 } 1558 1559 // Register for MODI projection of divmodI 1560 RegMask Matcher::modI_proj_mask() { 1561 return EDX_REG_mask(); 1562 } 1563 1564 // Register for DIVL projection of divmodL 1565 RegMask Matcher::divL_proj_mask() { 1566 ShouldNotReachHere(); 1567 return RegMask(); 1568 } 1569 1570 // Register for MODL projection of divmodL 1571 RegMask Matcher::modL_proj_mask() { 1572 ShouldNotReachHere(); 1573 return RegMask(); 1574 } 1575 1576 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1577 return NO_REG_mask(); 1578 } 1579 1580 // Returns true if the high 32 bits of the value is known to be zero. 1581 bool is_operand_hi32_zero(Node* n) { 1582 int opc = n->Opcode(); 1583 if (opc == Op_AndL) { 1584 Node* o2 = n->in(2); 1585 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1586 return true; 1587 } 1588 } 1589 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1590 return true; 1591 } 1592 return false; 1593 } 1594 1595 %} 1596 1597 //----------ENCODING BLOCK----------------------------------------------------- 1598 // This block specifies the encoding classes used by the compiler to output 1599 // byte streams. Encoding classes generate functions which are called by 1600 // Machine Instruction Nodes in order to generate the bit encoding of the 1601 // instruction. Operands specify their base encoding interface with the 1602 // interface keyword. There are currently supported four interfaces, 1603 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1604 // operand to generate a function which returns its register number when 1605 // queried. CONST_INTER causes an operand to generate a function which 1606 // returns the value of the constant when queried. MEMORY_INTER causes an 1607 // operand to generate four functions which return the Base Register, the 1608 // Index Register, the Scale Value, and the Offset Value of the operand when 1609 // queried. COND_INTER causes an operand to generate six functions which 1610 // return the encoding code (ie - encoding bits for the instruction) 1611 // associated with each basic boolean condition for a conditional instruction. 1612 // Instructions specify two basic values for encoding. They use the 1613 // ins_encode keyword to specify their encoding class (which must be one of 1614 // the class names specified in the encoding block), and they use the 1615 // opcode keyword to specify, in order, their primary, secondary, and 1616 // tertiary opcode. Only the opcode sections which a particular instruction 1617 // needs for encoding need to be specified. 1618 encode %{ 1619 // Build emit functions for each basic byte or larger field in the intel 1620 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1621 // code in the enc_class source block. Emit functions will live in the 1622 // main source block for now. In future, we can generalize this by 1623 // adding a syntax that specifies the sizes of fields in an order, 1624 // so that the adlc can build the emit functions automagically 1625 1626 // Emit primary opcode 1627 enc_class OpcP %{ 1628 emit_opcode(cbuf, $primary); 1629 %} 1630 1631 // Emit secondary opcode 1632 enc_class OpcS %{ 1633 emit_opcode(cbuf, $secondary); 1634 %} 1635 1636 // Emit opcode directly 1637 enc_class Opcode(immI d8) %{ 1638 emit_opcode(cbuf, $d8$$constant); 1639 %} 1640 1641 enc_class SizePrefix %{ 1642 emit_opcode(cbuf,0x66); 1643 %} 1644 1645 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1646 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1647 %} 1648 1649 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1650 emit_opcode(cbuf,$opcode$$constant); 1651 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1652 %} 1653 1654 enc_class mov_r32_imm0( rRegI dst ) %{ 1655 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1656 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1657 %} 1658 1659 enc_class cdq_enc %{ 1660 // Full implementation of Java idiv and irem; checks for 1661 // special case as described in JVM spec., p.243 & p.271. 1662 // 1663 // normal case special case 1664 // 1665 // input : rax,: dividend min_int 1666 // reg: divisor -1 1667 // 1668 // output: rax,: quotient (= rax, idiv reg) min_int 1669 // rdx: remainder (= rax, irem reg) 0 1670 // 1671 // Code sequnce: 1672 // 1673 // 81 F8 00 00 00 80 cmp rax,80000000h 1674 // 0F 85 0B 00 00 00 jne normal_case 1675 // 33 D2 xor rdx,edx 1676 // 83 F9 FF cmp rcx,0FFh 1677 // 0F 84 03 00 00 00 je done 1678 // normal_case: 1679 // 99 cdq 1680 // F7 F9 idiv rax,ecx 1681 // done: 1682 // 1683 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1684 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1686 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1687 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1688 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1689 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1690 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1691 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1692 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1693 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1694 // normal_case: 1695 emit_opcode(cbuf,0x99); // cdq 1696 // idiv (note: must be emitted by the user of this rule) 1697 // normal: 1698 %} 1699 1700 // Dense encoding for older common ops 1701 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1702 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1703 %} 1704 1705 1706 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1707 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1708 // Check for 8-bit immediate, and set sign extend bit in opcode 1709 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1710 emit_opcode(cbuf, $primary | 0x02); 1711 } 1712 else { // If 32-bit immediate 1713 emit_opcode(cbuf, $primary); 1714 } 1715 %} 1716 1717 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1718 // Emit primary opcode and set sign-extend bit 1719 // Check for 8-bit immediate, and set sign extend bit in opcode 1720 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1721 emit_opcode(cbuf, $primary | 0x02); } 1722 else { // If 32-bit immediate 1723 emit_opcode(cbuf, $primary); 1724 } 1725 // Emit r/m byte with secondary opcode, after primary opcode. 1726 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1727 %} 1728 1729 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1730 // Check for 8-bit immediate, and set sign extend bit in opcode 1731 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1732 $$$emit8$imm$$constant; 1733 } 1734 else { // If 32-bit immediate 1735 // Output immediate 1736 $$$emit32$imm$$constant; 1737 } 1738 %} 1739 1740 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1741 // Emit primary opcode and set sign-extend bit 1742 // Check for 8-bit immediate, and set sign extend bit in opcode 1743 int con = (int)$imm$$constant; // Throw away top bits 1744 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1745 // Emit r/m byte with secondary opcode, after primary opcode. 1746 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1747 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1748 else emit_d32(cbuf,con); 1749 %} 1750 1751 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1752 // Emit primary opcode and set sign-extend bit 1753 // Check for 8-bit immediate, and set sign extend bit in opcode 1754 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1755 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1756 // Emit r/m byte with tertiary opcode, after primary opcode. 1757 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1758 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1759 else emit_d32(cbuf,con); 1760 %} 1761 1762 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1763 emit_cc(cbuf, $secondary, $dst$$reg ); 1764 %} 1765 1766 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1767 int destlo = $dst$$reg; 1768 int desthi = HIGH_FROM_LOW(destlo); 1769 // bswap lo 1770 emit_opcode(cbuf, 0x0F); 1771 emit_cc(cbuf, 0xC8, destlo); 1772 // bswap hi 1773 emit_opcode(cbuf, 0x0F); 1774 emit_cc(cbuf, 0xC8, desthi); 1775 // xchg lo and hi 1776 emit_opcode(cbuf, 0x87); 1777 emit_rm(cbuf, 0x3, destlo, desthi); 1778 %} 1779 1780 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1781 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1782 %} 1783 1784 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1785 $$$emit8$primary; 1786 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1787 %} 1788 1789 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1790 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1791 emit_d8(cbuf, op >> 8 ); 1792 emit_d8(cbuf, op & 255); 1793 %} 1794 1795 // emulate a CMOV with a conditional branch around a MOV 1796 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1797 // Invert sense of branch from sense of CMOV 1798 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1799 emit_d8( cbuf, $brOffs$$constant ); 1800 %} 1801 1802 enc_class enc_PartialSubtypeCheck( ) %{ 1803 Register Redi = as_Register(EDI_enc); // result register 1804 Register Reax = as_Register(EAX_enc); // super class 1805 Register Recx = as_Register(ECX_enc); // killed 1806 Register Resi = as_Register(ESI_enc); // sub class 1807 Label miss; 1808 1809 MacroAssembler _masm(&cbuf); 1810 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1811 NULL, &miss, 1812 /*set_cond_codes:*/ true); 1813 if ($primary) { 1814 __ xorptr(Redi, Redi); 1815 } 1816 __ bind(miss); 1817 %} 1818 1819 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1820 MacroAssembler masm(&cbuf); 1821 int start = masm.offset(); 1822 if (UseSSE >= 2) { 1823 if (VerifyFPU) { 1824 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1825 } 1826 } else { 1827 // External c_calling_convention expects the FPU stack to be 'clean'. 1828 // Compiled code leaves it dirty. Do cleanup now. 1829 masm.empty_FPU_stack(); 1830 } 1831 if (sizeof_FFree_Float_Stack_All == -1) { 1832 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1833 } else { 1834 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1835 } 1836 %} 1837 1838 enc_class Verify_FPU_For_Leaf %{ 1839 if( VerifyFPU ) { 1840 MacroAssembler masm(&cbuf); 1841 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1842 } 1843 %} 1844 1845 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1846 // This is the instruction starting address for relocation info. 1847 cbuf.set_insts_mark(); 1848 $$$emit8$primary; 1849 // CALL directly to the runtime 1850 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1851 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1852 1853 if (UseSSE >= 2) { 1854 MacroAssembler _masm(&cbuf); 1855 BasicType rt = tf()->return_type(); 1856 1857 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1858 // A C runtime call where the return value is unused. In SSE2+ 1859 // mode the result needs to be removed from the FPU stack. It's 1860 // likely that this function call could be removed by the 1861 // optimizer if the C function is a pure function. 1862 __ ffree(0); 1863 } else if (rt == T_FLOAT) { 1864 __ lea(rsp, Address(rsp, -4)); 1865 __ fstp_s(Address(rsp, 0)); 1866 __ movflt(xmm0, Address(rsp, 0)); 1867 __ lea(rsp, Address(rsp, 4)); 1868 } else if (rt == T_DOUBLE) { 1869 __ lea(rsp, Address(rsp, -8)); 1870 __ fstp_d(Address(rsp, 0)); 1871 __ movdbl(xmm0, Address(rsp, 0)); 1872 __ lea(rsp, Address(rsp, 8)); 1873 } 1874 } 1875 %} 1876 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 if (ra_->C->max_vector_size() > 16) { 1886 // Clear upper bits of YMM registers when current compiled code uses 1887 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1888 MacroAssembler _masm(&cbuf); 1889 __ vzeroupper(); 1890 } 1891 debug_only(int off1 = cbuf.insts_size()); 1892 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1893 %} 1894 1895 enc_class post_call_FPU %{ 1896 // If method sets FPU control word do it here also 1897 if (Compile::current()->in_24_bit_fp_mode()) { 1898 MacroAssembler masm(&cbuf); 1899 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1900 } 1901 %} 1902 1903 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1904 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1905 // who we intended to call. 1906 cbuf.set_insts_mark(); 1907 $$$emit8$primary; 1908 1909 if (!_method) { 1910 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1911 runtime_call_Relocation::spec(), 1912 RELOC_IMM32); 1913 } else { 1914 int method_index = resolved_method_index(cbuf); 1915 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1916 : static_call_Relocation::spec(method_index); 1917 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1918 rspec, RELOC_DISP32); 1919 // Emit stubs for static call. 1920 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1921 if (stub == NULL) { 1922 ciEnv::current()->record_failure("CodeCache is full"); 1923 return; 1924 } 1925 } 1926 %} 1927 1928 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1929 MacroAssembler _masm(&cbuf); 1930 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1931 %} 1932 1933 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1934 int disp = in_bytes(Method::from_compiled_offset()); 1935 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1936 1937 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1938 cbuf.set_insts_mark(); 1939 $$$emit8$primary; 1940 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1941 emit_d8(cbuf, disp); // Displacement 1942 1943 %} 1944 1945 // Following encoding is no longer used, but may be restored if calling 1946 // convention changes significantly. 1947 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1948 // 1949 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1950 // // int ic_reg = Matcher::inline_cache_reg(); 1951 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1952 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1953 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1954 // 1955 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1956 // // // so we load it immediately before the call 1957 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1958 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1959 // 1960 // // xor rbp,ebp 1961 // emit_opcode(cbuf, 0x33); 1962 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1963 // 1964 // // CALL to interpreter. 1965 // cbuf.set_insts_mark(); 1966 // $$$emit8$primary; 1967 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1968 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1969 // %} 1970 1971 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1972 $$$emit8$primary; 1973 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1974 $$$emit8$shift$$constant; 1975 %} 1976 1977 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1978 // Load immediate does not have a zero or sign extended version 1979 // for 8-bit immediates 1980 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1981 $$$emit32$src$$constant; 1982 %} 1983 1984 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1985 // Load immediate does not have a zero or sign extended version 1986 // for 8-bit immediates 1987 emit_opcode(cbuf, $primary + $dst$$reg); 1988 $$$emit32$src$$constant; 1989 %} 1990 1991 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1992 // Load immediate does not have a zero or sign extended version 1993 // for 8-bit immediates 1994 int dst_enc = $dst$$reg; 1995 int src_con = $src$$constant & 0x0FFFFFFFFL; 1996 if (src_con == 0) { 1997 // xor dst, dst 1998 emit_opcode(cbuf, 0x33); 1999 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2000 } else { 2001 emit_opcode(cbuf, $primary + dst_enc); 2002 emit_d32(cbuf, src_con); 2003 } 2004 %} 2005 2006 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2007 // Load immediate does not have a zero or sign extended version 2008 // for 8-bit immediates 2009 int dst_enc = $dst$$reg + 2; 2010 int src_con = ((julong)($src$$constant)) >> 32; 2011 if (src_con == 0) { 2012 // xor dst, dst 2013 emit_opcode(cbuf, 0x33); 2014 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2015 } else { 2016 emit_opcode(cbuf, $primary + dst_enc); 2017 emit_d32(cbuf, src_con); 2018 } 2019 %} 2020 2021 2022 // Encode a reg-reg copy. If it is useless, then empty encoding. 2023 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2024 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2025 %} 2026 2027 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2028 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2029 %} 2030 2031 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2032 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2033 %} 2034 2035 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2036 $$$emit8$primary; 2037 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2038 %} 2039 2040 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2041 $$$emit8$secondary; 2042 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2043 %} 2044 2045 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2046 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2047 %} 2048 2049 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2050 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2051 %} 2052 2053 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2054 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2055 %} 2056 2057 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2058 // Output immediate 2059 $$$emit32$src$$constant; 2060 %} 2061 2062 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2063 // Output Float immediate bits 2064 jfloat jf = $src$$constant; 2065 int jf_as_bits = jint_cast( jf ); 2066 emit_d32(cbuf, jf_as_bits); 2067 %} 2068 2069 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2070 // Output Float immediate bits 2071 jfloat jf = $src$$constant; 2072 int jf_as_bits = jint_cast( jf ); 2073 emit_d32(cbuf, jf_as_bits); 2074 %} 2075 2076 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2077 // Output immediate 2078 $$$emit16$src$$constant; 2079 %} 2080 2081 enc_class Con_d32(immI src) %{ 2082 emit_d32(cbuf,$src$$constant); 2083 %} 2084 2085 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2086 // Output immediate memory reference 2087 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2088 emit_d32(cbuf, 0x00); 2089 %} 2090 2091 enc_class lock_prefix( ) %{ 2092 if( os::is_MP() ) 2093 emit_opcode(cbuf,0xF0); // [Lock] 2094 %} 2095 2096 // Cmp-xchg long value. 2097 // Note: we need to swap rbx, and rcx before and after the 2098 // cmpxchg8 instruction because the instruction uses 2099 // rcx as the high order word of the new value to store but 2100 // our register encoding uses rbx,. 2101 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2102 2103 // XCHG rbx,ecx 2104 emit_opcode(cbuf,0x87); 2105 emit_opcode(cbuf,0xD9); 2106 // [Lock] 2107 if( os::is_MP() ) 2108 emit_opcode(cbuf,0xF0); 2109 // CMPXCHG8 [Eptr] 2110 emit_opcode(cbuf,0x0F); 2111 emit_opcode(cbuf,0xC7); 2112 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2113 // XCHG rbx,ecx 2114 emit_opcode(cbuf,0x87); 2115 emit_opcode(cbuf,0xD9); 2116 %} 2117 2118 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2119 // [Lock] 2120 if( os::is_MP() ) 2121 emit_opcode(cbuf,0xF0); 2122 2123 // CMPXCHG [Eptr] 2124 emit_opcode(cbuf,0x0F); 2125 emit_opcode(cbuf,0xB1); 2126 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2127 %} 2128 2129 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2130 int res_encoding = $res$$reg; 2131 2132 // MOV res,0 2133 emit_opcode( cbuf, 0xB8 + res_encoding); 2134 emit_d32( cbuf, 0 ); 2135 // JNE,s fail 2136 emit_opcode(cbuf,0x75); 2137 emit_d8(cbuf, 5 ); 2138 // MOV res,1 2139 emit_opcode( cbuf, 0xB8 + res_encoding); 2140 emit_d32( cbuf, 1 ); 2141 // fail: 2142 %} 2143 2144 enc_class set_instruction_start( ) %{ 2145 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2146 %} 2147 2148 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2149 int reg_encoding = $ereg$$reg; 2150 int base = $mem$$base; 2151 int index = $mem$$index; 2152 int scale = $mem$$scale; 2153 int displace = $mem$$disp; 2154 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2155 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2156 %} 2157 2158 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2159 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2160 int base = $mem$$base; 2161 int index = $mem$$index; 2162 int scale = $mem$$scale; 2163 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2164 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2165 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2166 %} 2167 2168 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2169 int r1, r2; 2170 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2171 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2172 emit_opcode(cbuf,0x0F); 2173 emit_opcode(cbuf,$tertiary); 2174 emit_rm(cbuf, 0x3, r1, r2); 2175 emit_d8(cbuf,$cnt$$constant); 2176 emit_d8(cbuf,$primary); 2177 emit_rm(cbuf, 0x3, $secondary, r1); 2178 emit_d8(cbuf,$cnt$$constant); 2179 %} 2180 2181 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2182 emit_opcode( cbuf, 0x8B ); // Move 2183 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2184 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2185 emit_d8(cbuf,$primary); 2186 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2187 emit_d8(cbuf,$cnt$$constant-32); 2188 } 2189 emit_d8(cbuf,$primary); 2190 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2191 emit_d8(cbuf,31); 2192 %} 2193 2194 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2195 int r1, r2; 2196 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2197 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2198 2199 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2200 emit_rm(cbuf, 0x3, r1, r2); 2201 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2202 emit_opcode(cbuf,$primary); 2203 emit_rm(cbuf, 0x3, $secondary, r1); 2204 emit_d8(cbuf,$cnt$$constant-32); 2205 } 2206 emit_opcode(cbuf,0x33); // XOR r2,r2 2207 emit_rm(cbuf, 0x3, r2, r2); 2208 %} 2209 2210 // Clone of RegMem but accepts an extra parameter to access each 2211 // half of a double in memory; it never needs relocation info. 2212 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2213 emit_opcode(cbuf,$opcode$$constant); 2214 int reg_encoding = $rm_reg$$reg; 2215 int base = $mem$$base; 2216 int index = $mem$$index; 2217 int scale = $mem$$scale; 2218 int displace = $mem$$disp + $disp_for_half$$constant; 2219 relocInfo::relocType disp_reloc = relocInfo::none; 2220 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2221 %} 2222 2223 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2224 // 2225 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2226 // and it never needs relocation information. 2227 // Frequently used to move data between FPU's Stack Top and memory. 2228 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2229 int rm_byte_opcode = $rm_opcode$$constant; 2230 int base = $mem$$base; 2231 int index = $mem$$index; 2232 int scale = $mem$$scale; 2233 int displace = $mem$$disp; 2234 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2235 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2236 %} 2237 2238 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2239 int rm_byte_opcode = $rm_opcode$$constant; 2240 int base = $mem$$base; 2241 int index = $mem$$index; 2242 int scale = $mem$$scale; 2243 int displace = $mem$$disp; 2244 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2245 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2246 %} 2247 2248 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2249 int reg_encoding = $dst$$reg; 2250 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2251 int index = 0x04; // 0x04 indicates no index 2252 int scale = 0x00; // 0x00 indicates no scale 2253 int displace = $src1$$constant; // 0x00 indicates no displacement 2254 relocInfo::relocType disp_reloc = relocInfo::none; 2255 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2256 %} 2257 2258 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2259 // Compare dst,src 2260 emit_opcode(cbuf,0x3B); 2261 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2262 // jmp dst < src around move 2263 emit_opcode(cbuf,0x7C); 2264 emit_d8(cbuf,2); 2265 // move dst,src 2266 emit_opcode(cbuf,0x8B); 2267 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2268 %} 2269 2270 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2271 // Compare dst,src 2272 emit_opcode(cbuf,0x3B); 2273 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2274 // jmp dst > src around move 2275 emit_opcode(cbuf,0x7F); 2276 emit_d8(cbuf,2); 2277 // move dst,src 2278 emit_opcode(cbuf,0x8B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 %} 2281 2282 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2283 // If src is FPR1, we can just FST to store it. 2284 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2285 int reg_encoding = 0x2; // Just store 2286 int base = $mem$$base; 2287 int index = $mem$$index; 2288 int scale = $mem$$scale; 2289 int displace = $mem$$disp; 2290 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2291 if( $src$$reg != FPR1L_enc ) { 2292 reg_encoding = 0x3; // Store & pop 2293 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2294 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2295 } 2296 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2297 emit_opcode(cbuf,$primary); 2298 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2299 %} 2300 2301 enc_class neg_reg(rRegI dst) %{ 2302 // NEG $dst 2303 emit_opcode(cbuf,0xF7); 2304 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2305 %} 2306 2307 enc_class setLT_reg(eCXRegI dst) %{ 2308 // SETLT $dst 2309 emit_opcode(cbuf,0x0F); 2310 emit_opcode(cbuf,0x9C); 2311 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2312 %} 2313 2314 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2315 int tmpReg = $tmp$$reg; 2316 2317 // SUB $p,$q 2318 emit_opcode(cbuf,0x2B); 2319 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2320 // SBB $tmp,$tmp 2321 emit_opcode(cbuf,0x1B); 2322 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2323 // AND $tmp,$y 2324 emit_opcode(cbuf,0x23); 2325 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2326 // ADD $p,$tmp 2327 emit_opcode(cbuf,0x03); 2328 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2329 %} 2330 2331 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2332 // TEST shift,32 2333 emit_opcode(cbuf,0xF7); 2334 emit_rm(cbuf, 0x3, 0, ECX_enc); 2335 emit_d32(cbuf,0x20); 2336 // JEQ,s small 2337 emit_opcode(cbuf, 0x74); 2338 emit_d8(cbuf, 0x04); 2339 // MOV $dst.hi,$dst.lo 2340 emit_opcode( cbuf, 0x8B ); 2341 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2342 // CLR $dst.lo 2343 emit_opcode(cbuf, 0x33); 2344 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2345 // small: 2346 // SHLD $dst.hi,$dst.lo,$shift 2347 emit_opcode(cbuf,0x0F); 2348 emit_opcode(cbuf,0xA5); 2349 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2350 // SHL $dst.lo,$shift" 2351 emit_opcode(cbuf,0xD3); 2352 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2353 %} 2354 2355 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2356 // TEST shift,32 2357 emit_opcode(cbuf,0xF7); 2358 emit_rm(cbuf, 0x3, 0, ECX_enc); 2359 emit_d32(cbuf,0x20); 2360 // JEQ,s small 2361 emit_opcode(cbuf, 0x74); 2362 emit_d8(cbuf, 0x04); 2363 // MOV $dst.lo,$dst.hi 2364 emit_opcode( cbuf, 0x8B ); 2365 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2366 // CLR $dst.hi 2367 emit_opcode(cbuf, 0x33); 2368 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2369 // small: 2370 // SHRD $dst.lo,$dst.hi,$shift 2371 emit_opcode(cbuf,0x0F); 2372 emit_opcode(cbuf,0xAD); 2373 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2374 // SHR $dst.hi,$shift" 2375 emit_opcode(cbuf,0xD3); 2376 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2377 %} 2378 2379 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2380 // TEST shift,32 2381 emit_opcode(cbuf,0xF7); 2382 emit_rm(cbuf, 0x3, 0, ECX_enc); 2383 emit_d32(cbuf,0x20); 2384 // JEQ,s small 2385 emit_opcode(cbuf, 0x74); 2386 emit_d8(cbuf, 0x05); 2387 // MOV $dst.lo,$dst.hi 2388 emit_opcode( cbuf, 0x8B ); 2389 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2390 // SAR $dst.hi,31 2391 emit_opcode(cbuf, 0xC1); 2392 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2393 emit_d8(cbuf, 0x1F ); 2394 // small: 2395 // SHRD $dst.lo,$dst.hi,$shift 2396 emit_opcode(cbuf,0x0F); 2397 emit_opcode(cbuf,0xAD); 2398 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2399 // SAR $dst.hi,$shift" 2400 emit_opcode(cbuf,0xD3); 2401 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2402 %} 2403 2404 2405 // ----------------- Encodings for floating point unit ----------------- 2406 // May leave result in FPU-TOS or FPU reg depending on opcodes 2407 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2408 $$$emit8$primary; 2409 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2410 %} 2411 2412 // Pop argument in FPR0 with FSTP ST(0) 2413 enc_class PopFPU() %{ 2414 emit_opcode( cbuf, 0xDD ); 2415 emit_d8( cbuf, 0xD8 ); 2416 %} 2417 2418 // !!!!! equivalent to Pop_Reg_F 2419 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2420 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2421 emit_d8( cbuf, 0xD8+$dst$$reg ); 2422 %} 2423 2424 enc_class Push_Reg_DPR( regDPR dst ) %{ 2425 emit_opcode( cbuf, 0xD9 ); 2426 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2427 %} 2428 2429 enc_class strictfp_bias1( regDPR dst ) %{ 2430 emit_opcode( cbuf, 0xDB ); // FLD m80real 2431 emit_opcode( cbuf, 0x2D ); 2432 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2433 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2434 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2435 %} 2436 2437 enc_class strictfp_bias2( regDPR dst ) %{ 2438 emit_opcode( cbuf, 0xDB ); // FLD m80real 2439 emit_opcode( cbuf, 0x2D ); 2440 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2441 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2442 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2443 %} 2444 2445 // Special case for moving an integer register to a stack slot. 2446 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2447 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2448 %} 2449 2450 // Special case for moving a register to a stack slot. 2451 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2452 // Opcode already emitted 2453 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2454 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2455 emit_d32(cbuf, $dst$$disp); // Displacement 2456 %} 2457 2458 // Push the integer in stackSlot 'src' onto FP-stack 2459 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2460 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2461 %} 2462 2463 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2464 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2465 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2466 %} 2467 2468 // Same as Pop_Mem_F except for opcode 2469 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2470 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2471 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2472 %} 2473 2474 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2475 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2476 emit_d8( cbuf, 0xD8+$dst$$reg ); 2477 %} 2478 2479 enc_class Push_Reg_FPR( regFPR dst ) %{ 2480 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2481 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2482 %} 2483 2484 // Push FPU's float to a stack-slot, and pop FPU-stack 2485 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2486 int pop = 0x02; 2487 if ($src$$reg != FPR1L_enc) { 2488 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2489 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2490 pop = 0x03; 2491 } 2492 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2493 %} 2494 2495 // Push FPU's double to a stack-slot, and pop FPU-stack 2496 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2497 int pop = 0x02; 2498 if ($src$$reg != FPR1L_enc) { 2499 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2500 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2501 pop = 0x03; 2502 } 2503 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2504 %} 2505 2506 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2507 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2508 int pop = 0xD0 - 1; // -1 since we skip FLD 2509 if ($src$$reg != FPR1L_enc) { 2510 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2511 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2512 pop = 0xD8; 2513 } 2514 emit_opcode( cbuf, 0xDD ); 2515 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2516 %} 2517 2518 2519 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2520 // load dst in FPR0 2521 emit_opcode( cbuf, 0xD9 ); 2522 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2523 if ($src$$reg != FPR1L_enc) { 2524 // fincstp 2525 emit_opcode (cbuf, 0xD9); 2526 emit_opcode (cbuf, 0xF7); 2527 // swap src with FPR1: 2528 // FXCH FPR1 with src 2529 emit_opcode(cbuf, 0xD9); 2530 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2531 // fdecstp 2532 emit_opcode (cbuf, 0xD9); 2533 emit_opcode (cbuf, 0xF6); 2534 } 2535 %} 2536 2537 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2538 MacroAssembler _masm(&cbuf); 2539 __ subptr(rsp, 8); 2540 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2541 __ fld_d(Address(rsp, 0)); 2542 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2543 __ fld_d(Address(rsp, 0)); 2544 %} 2545 2546 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2547 MacroAssembler _masm(&cbuf); 2548 __ subptr(rsp, 4); 2549 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2550 __ fld_s(Address(rsp, 0)); 2551 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2552 __ fld_s(Address(rsp, 0)); 2553 %} 2554 2555 enc_class Push_ResultD(regD dst) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ fstp_d(Address(rsp, 0)); 2558 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2559 __ addptr(rsp, 8); 2560 %} 2561 2562 enc_class Push_ResultF(regF dst, immI d8) %{ 2563 MacroAssembler _masm(&cbuf); 2564 __ fstp_s(Address(rsp, 0)); 2565 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2566 __ addptr(rsp, $d8$$constant); 2567 %} 2568 2569 enc_class Push_SrcD(regD src) %{ 2570 MacroAssembler _masm(&cbuf); 2571 __ subptr(rsp, 8); 2572 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2573 __ fld_d(Address(rsp, 0)); 2574 %} 2575 2576 enc_class push_stack_temp_qword() %{ 2577 MacroAssembler _masm(&cbuf); 2578 __ subptr(rsp, 8); 2579 %} 2580 2581 enc_class pop_stack_temp_qword() %{ 2582 MacroAssembler _masm(&cbuf); 2583 __ addptr(rsp, 8); 2584 %} 2585 2586 enc_class push_xmm_to_fpr1(regD src) %{ 2587 MacroAssembler _masm(&cbuf); 2588 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2589 __ fld_d(Address(rsp, 0)); 2590 %} 2591 2592 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2593 if ($src$$reg != FPR1L_enc) { 2594 // fincstp 2595 emit_opcode (cbuf, 0xD9); 2596 emit_opcode (cbuf, 0xF7); 2597 // FXCH FPR1 with src 2598 emit_opcode(cbuf, 0xD9); 2599 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2600 // fdecstp 2601 emit_opcode (cbuf, 0xD9); 2602 emit_opcode (cbuf, 0xF6); 2603 } 2604 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2605 // // FSTP FPR$dst$$reg 2606 // emit_opcode( cbuf, 0xDD ); 2607 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2608 %} 2609 2610 enc_class fnstsw_sahf_skip_parity() %{ 2611 // fnstsw ax 2612 emit_opcode( cbuf, 0xDF ); 2613 emit_opcode( cbuf, 0xE0 ); 2614 // sahf 2615 emit_opcode( cbuf, 0x9E ); 2616 // jnp ::skip 2617 emit_opcode( cbuf, 0x7B ); 2618 emit_opcode( cbuf, 0x05 ); 2619 %} 2620 2621 enc_class emitModDPR() %{ 2622 // fprem must be iterative 2623 // :: loop 2624 // fprem 2625 emit_opcode( cbuf, 0xD9 ); 2626 emit_opcode( cbuf, 0xF8 ); 2627 // wait 2628 emit_opcode( cbuf, 0x9b ); 2629 // fnstsw ax 2630 emit_opcode( cbuf, 0xDF ); 2631 emit_opcode( cbuf, 0xE0 ); 2632 // sahf 2633 emit_opcode( cbuf, 0x9E ); 2634 // jp ::loop 2635 emit_opcode( cbuf, 0x0F ); 2636 emit_opcode( cbuf, 0x8A ); 2637 emit_opcode( cbuf, 0xF4 ); 2638 emit_opcode( cbuf, 0xFF ); 2639 emit_opcode( cbuf, 0xFF ); 2640 emit_opcode( cbuf, 0xFF ); 2641 %} 2642 2643 enc_class fpu_flags() %{ 2644 // fnstsw_ax 2645 emit_opcode( cbuf, 0xDF); 2646 emit_opcode( cbuf, 0xE0); 2647 // test ax,0x0400 2648 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2649 emit_opcode( cbuf, 0xA9 ); 2650 emit_d16 ( cbuf, 0x0400 ); 2651 // // // This sequence works, but stalls for 12-16 cycles on PPro 2652 // // test rax,0x0400 2653 // emit_opcode( cbuf, 0xA9 ); 2654 // emit_d32 ( cbuf, 0x00000400 ); 2655 // 2656 // jz exit (no unordered comparison) 2657 emit_opcode( cbuf, 0x74 ); 2658 emit_d8 ( cbuf, 0x02 ); 2659 // mov ah,1 - treat as LT case (set carry flag) 2660 emit_opcode( cbuf, 0xB4 ); 2661 emit_d8 ( cbuf, 0x01 ); 2662 // sahf 2663 emit_opcode( cbuf, 0x9E); 2664 %} 2665 2666 enc_class cmpF_P6_fixup() %{ 2667 // Fixup the integer flags in case comparison involved a NaN 2668 // 2669 // JNP exit (no unordered comparison, P-flag is set by NaN) 2670 emit_opcode( cbuf, 0x7B ); 2671 emit_d8 ( cbuf, 0x03 ); 2672 // MOV AH,1 - treat as LT case (set carry flag) 2673 emit_opcode( cbuf, 0xB4 ); 2674 emit_d8 ( cbuf, 0x01 ); 2675 // SAHF 2676 emit_opcode( cbuf, 0x9E); 2677 // NOP // target for branch to avoid branch to branch 2678 emit_opcode( cbuf, 0x90); 2679 %} 2680 2681 // fnstsw_ax(); 2682 // sahf(); 2683 // movl(dst, nan_result); 2684 // jcc(Assembler::parity, exit); 2685 // movl(dst, less_result); 2686 // jcc(Assembler::below, exit); 2687 // movl(dst, equal_result); 2688 // jcc(Assembler::equal, exit); 2689 // movl(dst, greater_result); 2690 2691 // less_result = 1; 2692 // greater_result = -1; 2693 // equal_result = 0; 2694 // nan_result = -1; 2695 2696 enc_class CmpF_Result(rRegI dst) %{ 2697 // fnstsw_ax(); 2698 emit_opcode( cbuf, 0xDF); 2699 emit_opcode( cbuf, 0xE0); 2700 // sahf 2701 emit_opcode( cbuf, 0x9E); 2702 // movl(dst, nan_result); 2703 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2704 emit_d32( cbuf, -1 ); 2705 // jcc(Assembler::parity, exit); 2706 emit_opcode( cbuf, 0x7A ); 2707 emit_d8 ( cbuf, 0x13 ); 2708 // movl(dst, less_result); 2709 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2710 emit_d32( cbuf, -1 ); 2711 // jcc(Assembler::below, exit); 2712 emit_opcode( cbuf, 0x72 ); 2713 emit_d8 ( cbuf, 0x0C ); 2714 // movl(dst, equal_result); 2715 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2716 emit_d32( cbuf, 0 ); 2717 // jcc(Assembler::equal, exit); 2718 emit_opcode( cbuf, 0x74 ); 2719 emit_d8 ( cbuf, 0x05 ); 2720 // movl(dst, greater_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, 1 ); 2723 %} 2724 2725 2726 // Compare the longs and set flags 2727 // BROKEN! Do Not use as-is 2728 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2729 // CMP $src1.hi,$src2.hi 2730 emit_opcode( cbuf, 0x3B ); 2731 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2732 // JNE,s done 2733 emit_opcode(cbuf,0x75); 2734 emit_d8(cbuf, 2 ); 2735 // CMP $src1.lo,$src2.lo 2736 emit_opcode( cbuf, 0x3B ); 2737 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2738 // done: 2739 %} 2740 2741 enc_class convert_int_long( regL dst, rRegI src ) %{ 2742 // mov $dst.lo,$src 2743 int dst_encoding = $dst$$reg; 2744 int src_encoding = $src$$reg; 2745 encode_Copy( cbuf, dst_encoding , src_encoding ); 2746 // mov $dst.hi,$src 2747 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2748 // sar $dst.hi,31 2749 emit_opcode( cbuf, 0xC1 ); 2750 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2751 emit_d8(cbuf, 0x1F ); 2752 %} 2753 2754 enc_class convert_long_double( eRegL src ) %{ 2755 // push $src.hi 2756 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2757 // push $src.lo 2758 emit_opcode(cbuf, 0x50+$src$$reg ); 2759 // fild 64-bits at [SP] 2760 emit_opcode(cbuf,0xdf); 2761 emit_d8(cbuf, 0x6C); 2762 emit_d8(cbuf, 0x24); 2763 emit_d8(cbuf, 0x00); 2764 // pop stack 2765 emit_opcode(cbuf, 0x83); // add SP, #8 2766 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2767 emit_d8(cbuf, 0x8); 2768 %} 2769 2770 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2771 // IMUL EDX:EAX,$src1 2772 emit_opcode( cbuf, 0xF7 ); 2773 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2774 // SAR EDX,$cnt-32 2775 int shift_count = ((int)$cnt$$constant) - 32; 2776 if (shift_count > 0) { 2777 emit_opcode(cbuf, 0xC1); 2778 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2779 emit_d8(cbuf, shift_count); 2780 } 2781 %} 2782 2783 // this version doesn't have add sp, 8 2784 enc_class convert_long_double2( eRegL src ) %{ 2785 // push $src.hi 2786 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2787 // push $src.lo 2788 emit_opcode(cbuf, 0x50+$src$$reg ); 2789 // fild 64-bits at [SP] 2790 emit_opcode(cbuf,0xdf); 2791 emit_d8(cbuf, 0x6C); 2792 emit_d8(cbuf, 0x24); 2793 emit_d8(cbuf, 0x00); 2794 %} 2795 2796 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2797 // Basic idea: long = (long)int * (long)int 2798 // IMUL EDX:EAX, src 2799 emit_opcode( cbuf, 0xF7 ); 2800 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2801 %} 2802 2803 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2804 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2805 // MUL EDX:EAX, src 2806 emit_opcode( cbuf, 0xF7 ); 2807 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2808 %} 2809 2810 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2811 // Basic idea: lo(result) = lo(x_lo * y_lo) 2812 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2813 // MOV $tmp,$src.lo 2814 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2815 // IMUL $tmp,EDX 2816 emit_opcode( cbuf, 0x0F ); 2817 emit_opcode( cbuf, 0xAF ); 2818 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2819 // MOV EDX,$src.hi 2820 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2821 // IMUL EDX,EAX 2822 emit_opcode( cbuf, 0x0F ); 2823 emit_opcode( cbuf, 0xAF ); 2824 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2825 // ADD $tmp,EDX 2826 emit_opcode( cbuf, 0x03 ); 2827 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2828 // MUL EDX:EAX,$src.lo 2829 emit_opcode( cbuf, 0xF7 ); 2830 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2831 // ADD EDX,ESI 2832 emit_opcode( cbuf, 0x03 ); 2833 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2834 %} 2835 2836 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2837 // Basic idea: lo(result) = lo(src * y_lo) 2838 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2839 // IMUL $tmp,EDX,$src 2840 emit_opcode( cbuf, 0x6B ); 2841 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2842 emit_d8( cbuf, (int)$src$$constant ); 2843 // MOV EDX,$src 2844 emit_opcode(cbuf, 0xB8 + EDX_enc); 2845 emit_d32( cbuf, (int)$src$$constant ); 2846 // MUL EDX:EAX,EDX 2847 emit_opcode( cbuf, 0xF7 ); 2848 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2849 // ADD EDX,ESI 2850 emit_opcode( cbuf, 0x03 ); 2851 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2852 %} 2853 2854 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2855 // PUSH src1.hi 2856 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2857 // PUSH src1.lo 2858 emit_opcode(cbuf, 0x50+$src1$$reg ); 2859 // PUSH src2.hi 2860 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2861 // PUSH src2.lo 2862 emit_opcode(cbuf, 0x50+$src2$$reg ); 2863 // CALL directly to the runtime 2864 cbuf.set_insts_mark(); 2865 emit_opcode(cbuf,0xE8); // Call into runtime 2866 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2867 // Restore stack 2868 emit_opcode(cbuf, 0x83); // add SP, #framesize 2869 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2870 emit_d8(cbuf, 4*4); 2871 %} 2872 2873 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2874 // PUSH src1.hi 2875 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2876 // PUSH src1.lo 2877 emit_opcode(cbuf, 0x50+$src1$$reg ); 2878 // PUSH src2.hi 2879 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2880 // PUSH src2.lo 2881 emit_opcode(cbuf, 0x50+$src2$$reg ); 2882 // CALL directly to the runtime 2883 cbuf.set_insts_mark(); 2884 emit_opcode(cbuf,0xE8); // Call into runtime 2885 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2886 // Restore stack 2887 emit_opcode(cbuf, 0x83); // add SP, #framesize 2888 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2889 emit_d8(cbuf, 4*4); 2890 %} 2891 2892 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2893 // MOV $tmp,$src.lo 2894 emit_opcode(cbuf, 0x8B); 2895 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2896 // OR $tmp,$src.hi 2897 emit_opcode(cbuf, 0x0B); 2898 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2899 %} 2900 2901 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2902 // CMP $src1.lo,$src2.lo 2903 emit_opcode( cbuf, 0x3B ); 2904 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2905 // JNE,s skip 2906 emit_cc(cbuf, 0x70, 0x5); 2907 emit_d8(cbuf,2); 2908 // CMP $src1.hi,$src2.hi 2909 emit_opcode( cbuf, 0x3B ); 2910 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2911 %} 2912 2913 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2914 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2915 emit_opcode( cbuf, 0x3B ); 2916 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2917 // MOV $tmp,$src1.hi 2918 emit_opcode( cbuf, 0x8B ); 2919 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2920 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2921 emit_opcode( cbuf, 0x1B ); 2922 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2923 %} 2924 2925 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2926 // XOR $tmp,$tmp 2927 emit_opcode(cbuf,0x33); // XOR 2928 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2929 // CMP $tmp,$src.lo 2930 emit_opcode( cbuf, 0x3B ); 2931 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2932 // SBB $tmp,$src.hi 2933 emit_opcode( cbuf, 0x1B ); 2934 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2935 %} 2936 2937 // Sniff, sniff... smells like Gnu Superoptimizer 2938 enc_class neg_long( eRegL dst ) %{ 2939 emit_opcode(cbuf,0xF7); // NEG hi 2940 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2941 emit_opcode(cbuf,0xF7); // NEG lo 2942 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2943 emit_opcode(cbuf,0x83); // SBB hi,0 2944 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2945 emit_d8 (cbuf,0 ); 2946 %} 2947 2948 enc_class enc_pop_rdx() %{ 2949 emit_opcode(cbuf,0x5A); 2950 %} 2951 2952 enc_class enc_rethrow() %{ 2953 cbuf.set_insts_mark(); 2954 emit_opcode(cbuf, 0xE9); // jmp entry 2955 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2956 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2957 %} 2958 2959 2960 // Convert a double to an int. Java semantics require we do complex 2961 // manglelations in the corner cases. So we set the rounding mode to 2962 // 'zero', store the darned double down as an int, and reset the 2963 // rounding mode to 'nearest'. The hardware throws an exception which 2964 // patches up the correct value directly to the stack. 2965 enc_class DPR2I_encoding( regDPR src ) %{ 2966 // Flip to round-to-zero mode. We attempted to allow invalid-op 2967 // exceptions here, so that a NAN or other corner-case value will 2968 // thrown an exception (but normal values get converted at full speed). 2969 // However, I2C adapters and other float-stack manglers leave pending 2970 // invalid-op exceptions hanging. We would have to clear them before 2971 // enabling them and that is more expensive than just testing for the 2972 // invalid value Intel stores down in the corner cases. 2973 emit_opcode(cbuf,0xD9); // FLDCW trunc 2974 emit_opcode(cbuf,0x2D); 2975 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2976 // Allocate a word 2977 emit_opcode(cbuf,0x83); // SUB ESP,4 2978 emit_opcode(cbuf,0xEC); 2979 emit_d8(cbuf,0x04); 2980 // Encoding assumes a double has been pushed into FPR0. 2981 // Store down the double as an int, popping the FPU stack 2982 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2983 emit_opcode(cbuf,0x1C); 2984 emit_d8(cbuf,0x24); 2985 // Restore the rounding mode; mask the exception 2986 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2987 emit_opcode(cbuf,0x2D); 2988 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2989 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2990 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2991 2992 // Load the converted int; adjust CPU stack 2993 emit_opcode(cbuf,0x58); // POP EAX 2994 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2995 emit_d32 (cbuf,0x80000000); // 0x80000000 2996 emit_opcode(cbuf,0x75); // JNE around_slow_call 2997 emit_d8 (cbuf,0x07); // Size of slow_call 2998 // Push src onto stack slow-path 2999 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3000 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3001 // CALL directly to the runtime 3002 cbuf.set_insts_mark(); 3003 emit_opcode(cbuf,0xE8); // Call into runtime 3004 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3005 // Carry on here... 3006 %} 3007 3008 enc_class DPR2L_encoding( regDPR src ) %{ 3009 emit_opcode(cbuf,0xD9); // FLDCW trunc 3010 emit_opcode(cbuf,0x2D); 3011 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3012 // Allocate a word 3013 emit_opcode(cbuf,0x83); // SUB ESP,8 3014 emit_opcode(cbuf,0xEC); 3015 emit_d8(cbuf,0x08); 3016 // Encoding assumes a double has been pushed into FPR0. 3017 // Store down the double as a long, popping the FPU stack 3018 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3019 emit_opcode(cbuf,0x3C); 3020 emit_d8(cbuf,0x24); 3021 // Restore the rounding mode; mask the exception 3022 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3023 emit_opcode(cbuf,0x2D); 3024 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3025 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3026 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3027 3028 // Load the converted int; adjust CPU stack 3029 emit_opcode(cbuf,0x58); // POP EAX 3030 emit_opcode(cbuf,0x5A); // POP EDX 3031 emit_opcode(cbuf,0x81); // CMP EDX,imm 3032 emit_d8 (cbuf,0xFA); // rdx 3033 emit_d32 (cbuf,0x80000000); // 0x80000000 3034 emit_opcode(cbuf,0x75); // JNE around_slow_call 3035 emit_d8 (cbuf,0x07+4); // Size of slow_call 3036 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3037 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3038 emit_opcode(cbuf,0x75); // JNE around_slow_call 3039 emit_d8 (cbuf,0x07); // Size of slow_call 3040 // Push src onto stack slow-path 3041 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3042 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3043 // CALL directly to the runtime 3044 cbuf.set_insts_mark(); 3045 emit_opcode(cbuf,0xE8); // Call into runtime 3046 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3047 // Carry on here... 3048 %} 3049 3050 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3051 // Operand was loaded from memory into fp ST (stack top) 3052 // FMUL ST,$src /* D8 C8+i */ 3053 emit_opcode(cbuf, 0xD8); 3054 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3055 %} 3056 3057 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3058 // FADDP ST,src2 /* D8 C0+i */ 3059 emit_opcode(cbuf, 0xD8); 3060 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3061 //could use FADDP src2,fpST /* DE C0+i */ 3062 %} 3063 3064 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3065 // FADDP src2,ST /* DE C0+i */ 3066 emit_opcode(cbuf, 0xDE); 3067 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3068 %} 3069 3070 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3071 // Operand has been loaded into fp ST (stack top) 3072 // FSUB ST,$src1 3073 emit_opcode(cbuf, 0xD8); 3074 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3075 3076 // FDIV 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3079 %} 3080 3081 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3082 // Operand was loaded from memory into fp ST (stack top) 3083 // FADD ST,$src /* D8 C0+i */ 3084 emit_opcode(cbuf, 0xD8); 3085 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3086 3087 // FMUL ST,src2 /* D8 C*+i */ 3088 emit_opcode(cbuf, 0xD8); 3089 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3090 %} 3091 3092 3093 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3094 // Operand was loaded from memory into fp ST (stack top) 3095 // FADD ST,$src /* D8 C0+i */ 3096 emit_opcode(cbuf, 0xD8); 3097 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3098 3099 // FMULP src2,ST /* DE C8+i */ 3100 emit_opcode(cbuf, 0xDE); 3101 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3102 %} 3103 3104 // Atomically load the volatile long 3105 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3106 emit_opcode(cbuf,0xDF); 3107 int rm_byte_opcode = 0x05; 3108 int base = $mem$$base; 3109 int index = $mem$$index; 3110 int scale = $mem$$scale; 3111 int displace = $mem$$disp; 3112 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3113 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3114 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3115 %} 3116 3117 // Volatile Store Long. Must be atomic, so move it into 3118 // the FP TOS and then do a 64-bit FIST. Has to probe the 3119 // target address before the store (for null-ptr checks) 3120 // so the memory operand is used twice in the encoding. 3121 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3122 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3123 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3124 emit_opcode(cbuf,0xDF); 3125 int rm_byte_opcode = 0x07; 3126 int base = $mem$$base; 3127 int index = $mem$$index; 3128 int scale = $mem$$scale; 3129 int displace = $mem$$disp; 3130 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3131 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3132 %} 3133 3134 // Safepoint Poll. This polls the safepoint page, and causes an 3135 // exception if it is not readable. Unfortunately, it kills the condition code 3136 // in the process 3137 // We current use TESTL [spp],EDI 3138 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3139 3140 enc_class Safepoint_Poll() %{ 3141 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3142 emit_opcode(cbuf,0x85); 3143 emit_rm (cbuf, 0x0, 0x7, 0x5); 3144 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3145 %} 3146 %} 3147 3148 3149 //----------FRAME-------------------------------------------------------------- 3150 // Definition of frame structure and management information. 3151 // 3152 // S T A C K L A Y O U T Allocators stack-slot number 3153 // | (to get allocators register number 3154 // G Owned by | | v add OptoReg::stack0()) 3155 // r CALLER | | 3156 // o | +--------+ pad to even-align allocators stack-slot 3157 // w V | pad0 | numbers; owned by CALLER 3158 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3159 // h ^ | in | 5 3160 // | | args | 4 Holes in incoming args owned by SELF 3161 // | | | | 3 3162 // | | +--------+ 3163 // V | | old out| Empty on Intel, window on Sparc 3164 // | old |preserve| Must be even aligned. 3165 // | SP-+--------+----> Matcher::_old_SP, even aligned 3166 // | | in | 3 area for Intel ret address 3167 // Owned by |preserve| Empty on Sparc. 3168 // SELF +--------+ 3169 // | | pad2 | 2 pad to align old SP 3170 // | +--------+ 1 3171 // | | locks | 0 3172 // | +--------+----> OptoReg::stack0(), even aligned 3173 // | | pad1 | 11 pad to align new SP 3174 // | +--------+ 3175 // | | | 10 3176 // | | spills | 9 spills 3177 // V | | 8 (pad0 slot for callee) 3178 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3179 // ^ | out | 7 3180 // | | args | 6 Holes in outgoing args owned by CALLEE 3181 // Owned by +--------+ 3182 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3183 // | new |preserve| Must be even-aligned. 3184 // | SP-+--------+----> Matcher::_new_SP, even aligned 3185 // | | | 3186 // 3187 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3188 // known from SELF's arguments and the Java calling convention. 3189 // Region 6-7 is determined per call site. 3190 // Note 2: If the calling convention leaves holes in the incoming argument 3191 // area, those holes are owned by SELF. Holes in the outgoing area 3192 // are owned by the CALLEE. Holes should not be nessecary in the 3193 // incoming area, as the Java calling convention is completely under 3194 // the control of the AD file. Doubles can be sorted and packed to 3195 // avoid holes. Holes in the outgoing arguments may be nessecary for 3196 // varargs C calling conventions. 3197 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3198 // even aligned with pad0 as needed. 3199 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3200 // region 6-11 is even aligned; it may be padded out more so that 3201 // the region from SP to FP meets the minimum stack alignment. 3202 3203 frame %{ 3204 // What direction does stack grow in (assumed to be same for C & Java) 3205 stack_direction(TOWARDS_LOW); 3206 3207 // These three registers define part of the calling convention 3208 // between compiled code and the interpreter. 3209 inline_cache_reg(EAX); // Inline Cache Register 3210 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3211 3212 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3213 cisc_spilling_operand_name(indOffset32); 3214 3215 // Number of stack slots consumed by locking an object 3216 sync_stack_slots(1); 3217 3218 // Compiled code's Frame Pointer 3219 frame_pointer(ESP); 3220 // Interpreter stores its frame pointer in a register which is 3221 // stored to the stack by I2CAdaptors. 3222 // I2CAdaptors convert from interpreted java to compiled java. 3223 interpreter_frame_pointer(EBP); 3224 3225 // Stack alignment requirement 3226 // Alignment size in bytes (128-bit -> 16 bytes) 3227 stack_alignment(StackAlignmentInBytes); 3228 3229 // Number of stack slots between incoming argument block and the start of 3230 // a new frame. The PROLOG must add this many slots to the stack. The 3231 // EPILOG must remove this many slots. Intel needs one slot for 3232 // return address and one for rbp, (must save rbp) 3233 in_preserve_stack_slots(2+VerifyStackAtCalls); 3234 3235 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3236 // for calls to C. Supports the var-args backing area for register parms. 3237 varargs_C_out_slots_killed(0); 3238 3239 // The after-PROLOG location of the return address. Location of 3240 // return address specifies a type (REG or STACK) and a number 3241 // representing the register number (i.e. - use a register name) or 3242 // stack slot. 3243 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3244 // Otherwise, it is above the locks and verification slot and alignment word 3245 return_addr(STACK - 1 + 3246 round_to((Compile::current()->in_preserve_stack_slots() + 3247 Compile::current()->fixed_slots()), 3248 stack_alignment_in_slots())); 3249 3250 // Body of function which returns an integer array locating 3251 // arguments either in registers or in stack slots. Passed an array 3252 // of ideal registers called "sig" and a "length" count. Stack-slot 3253 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3254 // arguments for a CALLEE. Incoming stack arguments are 3255 // automatically biased by the preserve_stack_slots field above. 3256 calling_convention %{ 3257 // No difference between ingoing/outgoing just pass false 3258 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3259 %} 3260 3261 3262 // Body of function which returns an integer array locating 3263 // arguments either in registers or in stack slots. Passed an array 3264 // of ideal registers called "sig" and a "length" count. Stack-slot 3265 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3266 // arguments for a CALLEE. Incoming stack arguments are 3267 // automatically biased by the preserve_stack_slots field above. 3268 c_calling_convention %{ 3269 // This is obviously always outgoing 3270 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3271 %} 3272 3273 // Location of C & interpreter return values 3274 c_return_value %{ 3275 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3276 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3277 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3278 3279 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3280 // that C functions return float and double results in XMM0. 3281 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3282 return OptoRegPair(XMM0b_num,XMM0_num); 3283 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3284 return OptoRegPair(OptoReg::Bad,XMM0_num); 3285 3286 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3287 %} 3288 3289 // Location of return values 3290 return_value %{ 3291 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3292 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3293 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3294 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3295 return OptoRegPair(XMM0b_num,XMM0_num); 3296 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3297 return OptoRegPair(OptoReg::Bad,XMM0_num); 3298 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3299 %} 3300 3301 %} 3302 3303 //----------ATTRIBUTES--------------------------------------------------------- 3304 //----------Operand Attributes------------------------------------------------- 3305 op_attrib op_cost(0); // Required cost attribute 3306 3307 //----------Instruction Attributes--------------------------------------------- 3308 ins_attrib ins_cost(100); // Required cost attribute 3309 ins_attrib ins_size(8); // Required size attribute (in bits) 3310 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3311 // non-matching short branch variant of some 3312 // long branch? 3313 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3314 // specifies the alignment that some part of the instruction (not 3315 // necessarily the start) requires. If > 1, a compute_padding() 3316 // function must be provided for the instruction 3317 3318 //----------OPERANDS----------------------------------------------------------- 3319 // Operand definitions must precede instruction definitions for correct parsing 3320 // in the ADLC because operands constitute user defined types which are used in 3321 // instruction definitions. 3322 3323 //----------Simple Operands---------------------------------------------------- 3324 // Immediate Operands 3325 // Integer Immediate 3326 operand immI() %{ 3327 match(ConI); 3328 3329 op_cost(10); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 // Constant for test vs zero 3335 operand immI0() %{ 3336 predicate(n->get_int() == 0); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Constant for increment 3345 operand immI1() %{ 3346 predicate(n->get_int() == 1); 3347 match(ConI); 3348 3349 op_cost(0); 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Constant for decrement 3355 operand immI_M1() %{ 3356 predicate(n->get_int() == -1); 3357 match(ConI); 3358 3359 op_cost(0); 3360 format %{ %} 3361 interface(CONST_INTER); 3362 %} 3363 3364 // Valid scale values for addressing modes 3365 operand immI2() %{ 3366 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3367 match(ConI); 3368 3369 format %{ %} 3370 interface(CONST_INTER); 3371 %} 3372 3373 operand immI8() %{ 3374 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3375 match(ConI); 3376 3377 op_cost(5); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 operand immI16() %{ 3383 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3384 match(ConI); 3385 3386 op_cost(10); 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 // Int Immediate non-negative 3392 operand immU31() 3393 %{ 3394 predicate(n->get_int() >= 0); 3395 match(ConI); 3396 3397 op_cost(0); 3398 format %{ %} 3399 interface(CONST_INTER); 3400 %} 3401 3402 // Constant for long shifts 3403 operand immI_32() %{ 3404 predicate( n->get_int() == 32 ); 3405 match(ConI); 3406 3407 op_cost(0); 3408 format %{ %} 3409 interface(CONST_INTER); 3410 %} 3411 3412 operand immI_1_31() %{ 3413 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3414 match(ConI); 3415 3416 op_cost(0); 3417 format %{ %} 3418 interface(CONST_INTER); 3419 %} 3420 3421 operand immI_32_63() %{ 3422 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3423 match(ConI); 3424 op_cost(0); 3425 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 operand immI_1() %{ 3431 predicate( n->get_int() == 1 ); 3432 match(ConI); 3433 3434 op_cost(0); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 operand immI_2() %{ 3440 predicate( n->get_int() == 2 ); 3441 match(ConI); 3442 3443 op_cost(0); 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immI_3() %{ 3449 predicate( n->get_int() == 3 ); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 // Pointer Immediate 3458 operand immP() %{ 3459 match(ConP); 3460 3461 op_cost(10); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 // NULL Pointer Immediate 3467 operand immP0() %{ 3468 predicate( n->get_ptr() == 0 ); 3469 match(ConP); 3470 op_cost(0); 3471 3472 format %{ %} 3473 interface(CONST_INTER); 3474 %} 3475 3476 // Long Immediate 3477 operand immL() %{ 3478 match(ConL); 3479 3480 op_cost(20); 3481 format %{ %} 3482 interface(CONST_INTER); 3483 %} 3484 3485 // Long Immediate zero 3486 operand immL0() %{ 3487 predicate( n->get_long() == 0L ); 3488 match(ConL); 3489 op_cost(0); 3490 3491 format %{ %} 3492 interface(CONST_INTER); 3493 %} 3494 3495 // Long Immediate zero 3496 operand immL_M1() %{ 3497 predicate( n->get_long() == -1L ); 3498 match(ConL); 3499 op_cost(0); 3500 3501 format %{ %} 3502 interface(CONST_INTER); 3503 %} 3504 3505 // Long immediate from 0 to 127. 3506 // Used for a shorter form of long mul by 10. 3507 operand immL_127() %{ 3508 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3509 match(ConL); 3510 op_cost(0); 3511 3512 format %{ %} 3513 interface(CONST_INTER); 3514 %} 3515 3516 // Long Immediate: low 32-bit mask 3517 operand immL_32bits() %{ 3518 predicate(n->get_long() == 0xFFFFFFFFL); 3519 match(ConL); 3520 op_cost(0); 3521 3522 format %{ %} 3523 interface(CONST_INTER); 3524 %} 3525 3526 // Long Immediate: low 32-bit mask 3527 operand immL32() %{ 3528 predicate(n->get_long() == (int)(n->get_long())); 3529 match(ConL); 3530 op_cost(20); 3531 3532 format %{ %} 3533 interface(CONST_INTER); 3534 %} 3535 3536 //Double Immediate zero 3537 operand immDPR0() %{ 3538 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3539 // bug that generates code such that NaNs compare equal to 0.0 3540 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3541 match(ConD); 3542 3543 op_cost(5); 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Double Immediate one 3549 operand immDPR1() %{ 3550 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3551 match(ConD); 3552 3553 op_cost(5); 3554 format %{ %} 3555 interface(CONST_INTER); 3556 %} 3557 3558 // Double Immediate 3559 operand immDPR() %{ 3560 predicate(UseSSE<=1); 3561 match(ConD); 3562 3563 op_cost(5); 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 operand immD() %{ 3569 predicate(UseSSE>=2); 3570 match(ConD); 3571 3572 op_cost(5); 3573 format %{ %} 3574 interface(CONST_INTER); 3575 %} 3576 3577 // Double Immediate zero 3578 operand immD0() %{ 3579 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3580 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3581 // compare equal to -0.0. 3582 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3583 match(ConD); 3584 3585 format %{ %} 3586 interface(CONST_INTER); 3587 %} 3588 3589 // Float Immediate zero 3590 operand immFPR0() %{ 3591 predicate(UseSSE == 0 && n->getf() == 0.0F); 3592 match(ConF); 3593 3594 op_cost(5); 3595 format %{ %} 3596 interface(CONST_INTER); 3597 %} 3598 3599 // Float Immediate one 3600 operand immFPR1() %{ 3601 predicate(UseSSE == 0 && n->getf() == 1.0F); 3602 match(ConF); 3603 3604 op_cost(5); 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 // Float Immediate 3610 operand immFPR() %{ 3611 predicate( UseSSE == 0 ); 3612 match(ConF); 3613 3614 op_cost(5); 3615 format %{ %} 3616 interface(CONST_INTER); 3617 %} 3618 3619 // Float Immediate 3620 operand immF() %{ 3621 predicate(UseSSE >= 1); 3622 match(ConF); 3623 3624 op_cost(5); 3625 format %{ %} 3626 interface(CONST_INTER); 3627 %} 3628 3629 // Float Immediate zero. Zero and not -0.0 3630 operand immF0() %{ 3631 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3632 match(ConF); 3633 3634 op_cost(5); 3635 format %{ %} 3636 interface(CONST_INTER); 3637 %} 3638 3639 // Immediates for special shifts (sign extend) 3640 3641 // Constants for increment 3642 operand immI_16() %{ 3643 predicate( n->get_int() == 16 ); 3644 match(ConI); 3645 3646 format %{ %} 3647 interface(CONST_INTER); 3648 %} 3649 3650 operand immI_24() %{ 3651 predicate( n->get_int() == 24 ); 3652 match(ConI); 3653 3654 format %{ %} 3655 interface(CONST_INTER); 3656 %} 3657 3658 // Constant for byte-wide masking 3659 operand immI_255() %{ 3660 predicate( n->get_int() == 255 ); 3661 match(ConI); 3662 3663 format %{ %} 3664 interface(CONST_INTER); 3665 %} 3666 3667 // Constant for short-wide masking 3668 operand immI_65535() %{ 3669 predicate(n->get_int() == 65535); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 // Register Operands 3677 // Integer Register 3678 operand rRegI() %{ 3679 constraint(ALLOC_IN_RC(int_reg)); 3680 match(RegI); 3681 match(xRegI); 3682 match(eAXRegI); 3683 match(eBXRegI); 3684 match(eCXRegI); 3685 match(eDXRegI); 3686 match(eDIRegI); 3687 match(eSIRegI); 3688 3689 format %{ %} 3690 interface(REG_INTER); 3691 %} 3692 3693 // Subset of Integer Register 3694 operand xRegI(rRegI reg) %{ 3695 constraint(ALLOC_IN_RC(int_x_reg)); 3696 match(reg); 3697 match(eAXRegI); 3698 match(eBXRegI); 3699 match(eCXRegI); 3700 match(eDXRegI); 3701 3702 format %{ %} 3703 interface(REG_INTER); 3704 %} 3705 3706 // Special Registers 3707 operand eAXRegI(xRegI reg) %{ 3708 constraint(ALLOC_IN_RC(eax_reg)); 3709 match(reg); 3710 match(rRegI); 3711 3712 format %{ "EAX" %} 3713 interface(REG_INTER); 3714 %} 3715 3716 // Special Registers 3717 operand eBXRegI(xRegI reg) %{ 3718 constraint(ALLOC_IN_RC(ebx_reg)); 3719 match(reg); 3720 match(rRegI); 3721 3722 format %{ "EBX" %} 3723 interface(REG_INTER); 3724 %} 3725 3726 operand eCXRegI(xRegI reg) %{ 3727 constraint(ALLOC_IN_RC(ecx_reg)); 3728 match(reg); 3729 match(rRegI); 3730 3731 format %{ "ECX" %} 3732 interface(REG_INTER); 3733 %} 3734 3735 operand eDXRegI(xRegI reg) %{ 3736 constraint(ALLOC_IN_RC(edx_reg)); 3737 match(reg); 3738 match(rRegI); 3739 3740 format %{ "EDX" %} 3741 interface(REG_INTER); 3742 %} 3743 3744 operand eDIRegI(xRegI reg) %{ 3745 constraint(ALLOC_IN_RC(edi_reg)); 3746 match(reg); 3747 match(rRegI); 3748 3749 format %{ "EDI" %} 3750 interface(REG_INTER); 3751 %} 3752 3753 operand naxRegI() %{ 3754 constraint(ALLOC_IN_RC(nax_reg)); 3755 match(RegI); 3756 match(eCXRegI); 3757 match(eDXRegI); 3758 match(eSIRegI); 3759 match(eDIRegI); 3760 3761 format %{ %} 3762 interface(REG_INTER); 3763 %} 3764 3765 operand nadxRegI() %{ 3766 constraint(ALLOC_IN_RC(nadx_reg)); 3767 match(RegI); 3768 match(eBXRegI); 3769 match(eCXRegI); 3770 match(eSIRegI); 3771 match(eDIRegI); 3772 3773 format %{ %} 3774 interface(REG_INTER); 3775 %} 3776 3777 operand ncxRegI() %{ 3778 constraint(ALLOC_IN_RC(ncx_reg)); 3779 match(RegI); 3780 match(eAXRegI); 3781 match(eDXRegI); 3782 match(eSIRegI); 3783 match(eDIRegI); 3784 3785 format %{ %} 3786 interface(REG_INTER); 3787 %} 3788 3789 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3790 // // 3791 operand eSIRegI(xRegI reg) %{ 3792 constraint(ALLOC_IN_RC(esi_reg)); 3793 match(reg); 3794 match(rRegI); 3795 3796 format %{ "ESI" %} 3797 interface(REG_INTER); 3798 %} 3799 3800 // Pointer Register 3801 operand anyRegP() %{ 3802 constraint(ALLOC_IN_RC(any_reg)); 3803 match(RegP); 3804 match(eAXRegP); 3805 match(eBXRegP); 3806 match(eCXRegP); 3807 match(eDIRegP); 3808 match(eRegP); 3809 3810 format %{ %} 3811 interface(REG_INTER); 3812 %} 3813 3814 operand eRegP() %{ 3815 constraint(ALLOC_IN_RC(int_reg)); 3816 match(RegP); 3817 match(eAXRegP); 3818 match(eBXRegP); 3819 match(eCXRegP); 3820 match(eDIRegP); 3821 3822 format %{ %} 3823 interface(REG_INTER); 3824 %} 3825 3826 // On windows95, EBP is not safe to use for implicit null tests. 3827 operand eRegP_no_EBP() %{ 3828 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3829 match(RegP); 3830 match(eAXRegP); 3831 match(eBXRegP); 3832 match(eCXRegP); 3833 match(eDIRegP); 3834 3835 op_cost(100); 3836 format %{ %} 3837 interface(REG_INTER); 3838 %} 3839 3840 operand naxRegP() %{ 3841 constraint(ALLOC_IN_RC(nax_reg)); 3842 match(RegP); 3843 match(eBXRegP); 3844 match(eDXRegP); 3845 match(eCXRegP); 3846 match(eSIRegP); 3847 match(eDIRegP); 3848 3849 format %{ %} 3850 interface(REG_INTER); 3851 %} 3852 3853 operand nabxRegP() %{ 3854 constraint(ALLOC_IN_RC(nabx_reg)); 3855 match(RegP); 3856 match(eCXRegP); 3857 match(eDXRegP); 3858 match(eSIRegP); 3859 match(eDIRegP); 3860 3861 format %{ %} 3862 interface(REG_INTER); 3863 %} 3864 3865 operand pRegP() %{ 3866 constraint(ALLOC_IN_RC(p_reg)); 3867 match(RegP); 3868 match(eBXRegP); 3869 match(eDXRegP); 3870 match(eSIRegP); 3871 match(eDIRegP); 3872 3873 format %{ %} 3874 interface(REG_INTER); 3875 %} 3876 3877 // Special Registers 3878 // Return a pointer value 3879 operand eAXRegP(eRegP reg) %{ 3880 constraint(ALLOC_IN_RC(eax_reg)); 3881 match(reg); 3882 format %{ "EAX" %} 3883 interface(REG_INTER); 3884 %} 3885 3886 // Used in AtomicAdd 3887 operand eBXRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(ebx_reg)); 3889 match(reg); 3890 format %{ "EBX" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 // Tail-call (interprocedural jump) to interpreter 3895 operand eCXRegP(eRegP reg) %{ 3896 constraint(ALLOC_IN_RC(ecx_reg)); 3897 match(reg); 3898 format %{ "ECX" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 operand eSIRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(esi_reg)); 3904 match(reg); 3905 format %{ "ESI" %} 3906 interface(REG_INTER); 3907 %} 3908 3909 // Used in rep stosw 3910 operand eDIRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(edi_reg)); 3912 match(reg); 3913 format %{ "EDI" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 operand eRegL() %{ 3918 constraint(ALLOC_IN_RC(long_reg)); 3919 match(RegL); 3920 match(eADXRegL); 3921 3922 format %{ %} 3923 interface(REG_INTER); 3924 %} 3925 3926 operand eADXRegL( eRegL reg ) %{ 3927 constraint(ALLOC_IN_RC(eadx_reg)); 3928 match(reg); 3929 3930 format %{ "EDX:EAX" %} 3931 interface(REG_INTER); 3932 %} 3933 3934 operand eBCXRegL( eRegL reg ) %{ 3935 constraint(ALLOC_IN_RC(ebcx_reg)); 3936 match(reg); 3937 3938 format %{ "EBX:ECX" %} 3939 interface(REG_INTER); 3940 %} 3941 3942 // Special case for integer high multiply 3943 operand eADXRegL_low_only() %{ 3944 constraint(ALLOC_IN_RC(eadx_reg)); 3945 match(RegL); 3946 3947 format %{ "EAX" %} 3948 interface(REG_INTER); 3949 %} 3950 3951 // Flags register, used as output of compare instructions 3952 operand eFlagsReg() %{ 3953 constraint(ALLOC_IN_RC(int_flags)); 3954 match(RegFlags); 3955 3956 format %{ "EFLAGS" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 // Flags register, used as output of FLOATING POINT compare instructions 3961 operand eFlagsRegU() %{ 3962 constraint(ALLOC_IN_RC(int_flags)); 3963 match(RegFlags); 3964 3965 format %{ "EFLAGS_U" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 operand eFlagsRegUCF() %{ 3970 constraint(ALLOC_IN_RC(int_flags)); 3971 match(RegFlags); 3972 predicate(false); 3973 3974 format %{ "EFLAGS_U_CF" %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Condition Code Register used by long compare 3979 operand flagsReg_long_LTGE() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 format %{ "FLAGS_LTGE" %} 3983 interface(REG_INTER); 3984 %} 3985 operand flagsReg_long_EQNE() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_EQNE" %} 3989 interface(REG_INTER); 3990 %} 3991 operand flagsReg_long_LEGT() %{ 3992 constraint(ALLOC_IN_RC(int_flags)); 3993 match(RegFlags); 3994 format %{ "FLAGS_LEGT" %} 3995 interface(REG_INTER); 3996 %} 3997 3998 // Float register operands 3999 operand regDPR() %{ 4000 predicate( UseSSE < 2 ); 4001 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4002 match(RegD); 4003 match(regDPR1); 4004 match(regDPR2); 4005 format %{ %} 4006 interface(REG_INTER); 4007 %} 4008 4009 operand regDPR1(regDPR reg) %{ 4010 predicate( UseSSE < 2 ); 4011 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4012 match(reg); 4013 format %{ "FPR1" %} 4014 interface(REG_INTER); 4015 %} 4016 4017 operand regDPR2(regDPR reg) %{ 4018 predicate( UseSSE < 2 ); 4019 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4020 match(reg); 4021 format %{ "FPR2" %} 4022 interface(REG_INTER); 4023 %} 4024 4025 operand regnotDPR1(regDPR reg) %{ 4026 predicate( UseSSE < 2 ); 4027 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4028 match(reg); 4029 format %{ %} 4030 interface(REG_INTER); 4031 %} 4032 4033 // Float register operands 4034 operand regFPR() %{ 4035 predicate( UseSSE < 2 ); 4036 constraint(ALLOC_IN_RC(fp_flt_reg)); 4037 match(RegF); 4038 match(regFPR1); 4039 format %{ %} 4040 interface(REG_INTER); 4041 %} 4042 4043 // Float register operands 4044 operand regFPR1(regFPR reg) %{ 4045 predicate( UseSSE < 2 ); 4046 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4047 match(reg); 4048 format %{ "FPR1" %} 4049 interface(REG_INTER); 4050 %} 4051 4052 // XMM Float register operands 4053 operand regF() %{ 4054 predicate( UseSSE>=1 ); 4055 constraint(ALLOC_IN_RC(float_reg_legacy)); 4056 match(RegF); 4057 format %{ %} 4058 interface(REG_INTER); 4059 %} 4060 4061 // XMM Double register operands 4062 operand regD() %{ 4063 predicate( UseSSE>=2 ); 4064 constraint(ALLOC_IN_RC(double_reg_legacy)); 4065 match(RegD); 4066 format %{ %} 4067 interface(REG_INTER); 4068 %} 4069 4070 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4071 // runtime code generation via reg_class_dynamic. 4072 operand vecS() %{ 4073 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4074 match(VecS); 4075 4076 format %{ %} 4077 interface(REG_INTER); 4078 %} 4079 4080 operand vecD() %{ 4081 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4082 match(VecD); 4083 4084 format %{ %} 4085 interface(REG_INTER); 4086 %} 4087 4088 operand vecX() %{ 4089 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4090 match(VecX); 4091 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 operand vecY() %{ 4097 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4098 match(VecY); 4099 4100 format %{ %} 4101 interface(REG_INTER); 4102 %} 4103 4104 //----------Memory Operands---------------------------------------------------- 4105 // Direct Memory Operand 4106 operand direct(immP addr) %{ 4107 match(addr); 4108 4109 format %{ "[$addr]" %} 4110 interface(MEMORY_INTER) %{ 4111 base(0xFFFFFFFF); 4112 index(0x4); 4113 scale(0x0); 4114 disp($addr); 4115 %} 4116 %} 4117 4118 // Indirect Memory Operand 4119 operand indirect(eRegP reg) %{ 4120 constraint(ALLOC_IN_RC(int_reg)); 4121 match(reg); 4122 4123 format %{ "[$reg]" %} 4124 interface(MEMORY_INTER) %{ 4125 base($reg); 4126 index(0x4); 4127 scale(0x0); 4128 disp(0x0); 4129 %} 4130 %} 4131 4132 // Indirect Memory Plus Short Offset Operand 4133 operand indOffset8(eRegP reg, immI8 off) %{ 4134 match(AddP reg off); 4135 4136 format %{ "[$reg + $off]" %} 4137 interface(MEMORY_INTER) %{ 4138 base($reg); 4139 index(0x4); 4140 scale(0x0); 4141 disp($off); 4142 %} 4143 %} 4144 4145 // Indirect Memory Plus Long Offset Operand 4146 operand indOffset32(eRegP reg, immI off) %{ 4147 match(AddP reg off); 4148 4149 format %{ "[$reg + $off]" %} 4150 interface(MEMORY_INTER) %{ 4151 base($reg); 4152 index(0x4); 4153 scale(0x0); 4154 disp($off); 4155 %} 4156 %} 4157 4158 // Indirect Memory Plus Long Offset Operand 4159 operand indOffset32X(rRegI reg, immP off) %{ 4160 match(AddP off reg); 4161 4162 format %{ "[$reg + $off]" %} 4163 interface(MEMORY_INTER) %{ 4164 base($reg); 4165 index(0x4); 4166 scale(0x0); 4167 disp($off); 4168 %} 4169 %} 4170 4171 // Indirect Memory Plus Index Register Plus Offset Operand 4172 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4173 match(AddP (AddP reg ireg) off); 4174 4175 op_cost(10); 4176 format %{"[$reg + $off + $ireg]" %} 4177 interface(MEMORY_INTER) %{ 4178 base($reg); 4179 index($ireg); 4180 scale(0x0); 4181 disp($off); 4182 %} 4183 %} 4184 4185 // Indirect Memory Plus Index Register Plus Offset Operand 4186 operand indIndex(eRegP reg, rRegI ireg) %{ 4187 match(AddP reg ireg); 4188 4189 op_cost(10); 4190 format %{"[$reg + $ireg]" %} 4191 interface(MEMORY_INTER) %{ 4192 base($reg); 4193 index($ireg); 4194 scale(0x0); 4195 disp(0x0); 4196 %} 4197 %} 4198 4199 // // ------------------------------------------------------------------------- 4200 // // 486 architecture doesn't support "scale * index + offset" with out a base 4201 // // ------------------------------------------------------------------------- 4202 // // Scaled Memory Operands 4203 // // Indirect Memory Times Scale Plus Offset Operand 4204 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4205 // match(AddP off (LShiftI ireg scale)); 4206 // 4207 // op_cost(10); 4208 // format %{"[$off + $ireg << $scale]" %} 4209 // interface(MEMORY_INTER) %{ 4210 // base(0x4); 4211 // index($ireg); 4212 // scale($scale); 4213 // disp($off); 4214 // %} 4215 // %} 4216 4217 // Indirect Memory Times Scale Plus Index Register 4218 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4219 match(AddP reg (LShiftI ireg scale)); 4220 4221 op_cost(10); 4222 format %{"[$reg + $ireg << $scale]" %} 4223 interface(MEMORY_INTER) %{ 4224 base($reg); 4225 index($ireg); 4226 scale($scale); 4227 disp(0x0); 4228 %} 4229 %} 4230 4231 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4232 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4233 match(AddP (AddP reg (LShiftI ireg scale)) off); 4234 4235 op_cost(10); 4236 format %{"[$reg + $off + $ireg << $scale]" %} 4237 interface(MEMORY_INTER) %{ 4238 base($reg); 4239 index($ireg); 4240 scale($scale); 4241 disp($off); 4242 %} 4243 %} 4244 4245 //----------Load Long Memory Operands------------------------------------------ 4246 // The load-long idiom will use it's address expression again after loading 4247 // the first word of the long. If the load-long destination overlaps with 4248 // registers used in the addressing expression, the 2nd half will be loaded 4249 // from a clobbered address. Fix this by requiring that load-long use 4250 // address registers that do not overlap with the load-long target. 4251 4252 // load-long support 4253 operand load_long_RegP() %{ 4254 constraint(ALLOC_IN_RC(esi_reg)); 4255 match(RegP); 4256 match(eSIRegP); 4257 op_cost(100); 4258 format %{ %} 4259 interface(REG_INTER); 4260 %} 4261 4262 // Indirect Memory Operand Long 4263 operand load_long_indirect(load_long_RegP reg) %{ 4264 constraint(ALLOC_IN_RC(esi_reg)); 4265 match(reg); 4266 4267 format %{ "[$reg]" %} 4268 interface(MEMORY_INTER) %{ 4269 base($reg); 4270 index(0x4); 4271 scale(0x0); 4272 disp(0x0); 4273 %} 4274 %} 4275 4276 // Indirect Memory Plus Long Offset Operand 4277 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4278 match(AddP reg off); 4279 4280 format %{ "[$reg + $off]" %} 4281 interface(MEMORY_INTER) %{ 4282 base($reg); 4283 index(0x4); 4284 scale(0x0); 4285 disp($off); 4286 %} 4287 %} 4288 4289 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4290 4291 4292 //----------Special Memory Operands-------------------------------------------- 4293 // Stack Slot Operand - This operand is used for loading and storing temporary 4294 // values on the stack where a match requires a value to 4295 // flow through memory. 4296 operand stackSlotP(sRegP reg) %{ 4297 constraint(ALLOC_IN_RC(stack_slots)); 4298 // No match rule because this operand is only generated in matching 4299 format %{ "[$reg]" %} 4300 interface(MEMORY_INTER) %{ 4301 base(0x4); // ESP 4302 index(0x4); // No Index 4303 scale(0x0); // No Scale 4304 disp($reg); // Stack Offset 4305 %} 4306 %} 4307 4308 operand stackSlotI(sRegI reg) %{ 4309 constraint(ALLOC_IN_RC(stack_slots)); 4310 // No match rule because this operand is only generated in matching 4311 format %{ "[$reg]" %} 4312 interface(MEMORY_INTER) %{ 4313 base(0x4); // ESP 4314 index(0x4); // No Index 4315 scale(0x0); // No Scale 4316 disp($reg); // Stack Offset 4317 %} 4318 %} 4319 4320 operand stackSlotF(sRegF reg) %{ 4321 constraint(ALLOC_IN_RC(stack_slots)); 4322 // No match rule because this operand is only generated in matching 4323 format %{ "[$reg]" %} 4324 interface(MEMORY_INTER) %{ 4325 base(0x4); // ESP 4326 index(0x4); // No Index 4327 scale(0x0); // No Scale 4328 disp($reg); // Stack Offset 4329 %} 4330 %} 4331 4332 operand stackSlotD(sRegD reg) %{ 4333 constraint(ALLOC_IN_RC(stack_slots)); 4334 // No match rule because this operand is only generated in matching 4335 format %{ "[$reg]" %} 4336 interface(MEMORY_INTER) %{ 4337 base(0x4); // ESP 4338 index(0x4); // No Index 4339 scale(0x0); // No Scale 4340 disp($reg); // Stack Offset 4341 %} 4342 %} 4343 4344 operand stackSlotL(sRegL reg) %{ 4345 constraint(ALLOC_IN_RC(stack_slots)); 4346 // No match rule because this operand is only generated in matching 4347 format %{ "[$reg]" %} 4348 interface(MEMORY_INTER) %{ 4349 base(0x4); // ESP 4350 index(0x4); // No Index 4351 scale(0x0); // No Scale 4352 disp($reg); // Stack Offset 4353 %} 4354 %} 4355 4356 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4357 // Indirect Memory Operand 4358 operand indirect_win95_safe(eRegP_no_EBP reg) 4359 %{ 4360 constraint(ALLOC_IN_RC(int_reg)); 4361 match(reg); 4362 4363 op_cost(100); 4364 format %{ "[$reg]" %} 4365 interface(MEMORY_INTER) %{ 4366 base($reg); 4367 index(0x4); 4368 scale(0x0); 4369 disp(0x0); 4370 %} 4371 %} 4372 4373 // Indirect Memory Plus Short Offset Operand 4374 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4375 %{ 4376 match(AddP reg off); 4377 4378 op_cost(100); 4379 format %{ "[$reg + $off]" %} 4380 interface(MEMORY_INTER) %{ 4381 base($reg); 4382 index(0x4); 4383 scale(0x0); 4384 disp($off); 4385 %} 4386 %} 4387 4388 // Indirect Memory Plus Long Offset Operand 4389 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4390 %{ 4391 match(AddP reg off); 4392 4393 op_cost(100); 4394 format %{ "[$reg + $off]" %} 4395 interface(MEMORY_INTER) %{ 4396 base($reg); 4397 index(0x4); 4398 scale(0x0); 4399 disp($off); 4400 %} 4401 %} 4402 4403 // Indirect Memory Plus Index Register Plus Offset Operand 4404 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4405 %{ 4406 match(AddP (AddP reg ireg) off); 4407 4408 op_cost(100); 4409 format %{"[$reg + $off + $ireg]" %} 4410 interface(MEMORY_INTER) %{ 4411 base($reg); 4412 index($ireg); 4413 scale(0x0); 4414 disp($off); 4415 %} 4416 %} 4417 4418 // Indirect Memory Times Scale Plus Index Register 4419 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4420 %{ 4421 match(AddP reg (LShiftI ireg scale)); 4422 4423 op_cost(100); 4424 format %{"[$reg + $ireg << $scale]" %} 4425 interface(MEMORY_INTER) %{ 4426 base($reg); 4427 index($ireg); 4428 scale($scale); 4429 disp(0x0); 4430 %} 4431 %} 4432 4433 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4434 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4435 %{ 4436 match(AddP (AddP reg (LShiftI ireg scale)) off); 4437 4438 op_cost(100); 4439 format %{"[$reg + $off + $ireg << $scale]" %} 4440 interface(MEMORY_INTER) %{ 4441 base($reg); 4442 index($ireg); 4443 scale($scale); 4444 disp($off); 4445 %} 4446 %} 4447 4448 //----------Conditional Branch Operands---------------------------------------- 4449 // Comparison Op - This is the operation of the comparison, and is limited to 4450 // the following set of codes: 4451 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4452 // 4453 // Other attributes of the comparison, such as unsignedness, are specified 4454 // by the comparison instruction that sets a condition code flags register. 4455 // That result is represented by a flags operand whose subtype is appropriate 4456 // to the unsignedness (etc.) of the comparison. 4457 // 4458 // Later, the instruction which matches both the Comparison Op (a Bool) and 4459 // the flags (produced by the Cmp) specifies the coding of the comparison op 4460 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4461 4462 // Comparision Code 4463 operand cmpOp() %{ 4464 match(Bool); 4465 4466 format %{ "" %} 4467 interface(COND_INTER) %{ 4468 equal(0x4, "e"); 4469 not_equal(0x5, "ne"); 4470 less(0xC, "l"); 4471 greater_equal(0xD, "ge"); 4472 less_equal(0xE, "le"); 4473 greater(0xF, "g"); 4474 overflow(0x0, "o"); 4475 no_overflow(0x1, "no"); 4476 %} 4477 %} 4478 4479 // Comparison Code, unsigned compare. Used by FP also, with 4480 // C2 (unordered) turned into GT or LT already. The other bits 4481 // C0 and C3 are turned into Carry & Zero flags. 4482 operand cmpOpU() %{ 4483 match(Bool); 4484 4485 format %{ "" %} 4486 interface(COND_INTER) %{ 4487 equal(0x4, "e"); 4488 not_equal(0x5, "ne"); 4489 less(0x2, "b"); 4490 greater_equal(0x3, "nb"); 4491 less_equal(0x6, "be"); 4492 greater(0x7, "nbe"); 4493 overflow(0x0, "o"); 4494 no_overflow(0x1, "no"); 4495 %} 4496 %} 4497 4498 // Floating comparisons that don't require any fixup for the unordered case 4499 operand cmpOpUCF() %{ 4500 match(Bool); 4501 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4502 n->as_Bool()->_test._test == BoolTest::ge || 4503 n->as_Bool()->_test._test == BoolTest::le || 4504 n->as_Bool()->_test._test == BoolTest::gt); 4505 format %{ "" %} 4506 interface(COND_INTER) %{ 4507 equal(0x4, "e"); 4508 not_equal(0x5, "ne"); 4509 less(0x2, "b"); 4510 greater_equal(0x3, "nb"); 4511 less_equal(0x6, "be"); 4512 greater(0x7, "nbe"); 4513 overflow(0x0, "o"); 4514 no_overflow(0x1, "no"); 4515 %} 4516 %} 4517 4518 4519 // Floating comparisons that can be fixed up with extra conditional jumps 4520 operand cmpOpUCF2() %{ 4521 match(Bool); 4522 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4523 n->as_Bool()->_test._test == BoolTest::eq); 4524 format %{ "" %} 4525 interface(COND_INTER) %{ 4526 equal(0x4, "e"); 4527 not_equal(0x5, "ne"); 4528 less(0x2, "b"); 4529 greater_equal(0x3, "nb"); 4530 less_equal(0x6, "be"); 4531 greater(0x7, "nbe"); 4532 overflow(0x0, "o"); 4533 no_overflow(0x1, "no"); 4534 %} 4535 %} 4536 4537 // Comparison Code for FP conditional move 4538 operand cmpOp_fcmov() %{ 4539 match(Bool); 4540 4541 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4542 n->as_Bool()->_test._test != BoolTest::no_overflow); 4543 format %{ "" %} 4544 interface(COND_INTER) %{ 4545 equal (0x0C8); 4546 not_equal (0x1C8); 4547 less (0x0C0); 4548 greater_equal(0x1C0); 4549 less_equal (0x0D0); 4550 greater (0x1D0); 4551 overflow(0x0, "o"); // not really supported by the instruction 4552 no_overflow(0x1, "no"); // not really supported by the instruction 4553 %} 4554 %} 4555 4556 // Comparision Code used in long compares 4557 operand cmpOp_commute() %{ 4558 match(Bool); 4559 4560 format %{ "" %} 4561 interface(COND_INTER) %{ 4562 equal(0x4, "e"); 4563 not_equal(0x5, "ne"); 4564 less(0xF, "g"); 4565 greater_equal(0xE, "le"); 4566 less_equal(0xD, "ge"); 4567 greater(0xC, "l"); 4568 overflow(0x0, "o"); 4569 no_overflow(0x1, "no"); 4570 %} 4571 %} 4572 4573 //----------OPERAND CLASSES---------------------------------------------------- 4574 // Operand Classes are groups of operands that are used as to simplify 4575 // instruction definitions by not requiring the AD writer to specify separate 4576 // instructions for every form of operand when the instruction accepts 4577 // multiple operand types with the same basic encoding and format. The classic 4578 // case of this is memory operands. 4579 4580 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4581 indIndex, indIndexScale, indIndexScaleOffset); 4582 4583 // Long memory operations are encoded in 2 instructions and a +4 offset. 4584 // This means some kind of offset is always required and you cannot use 4585 // an oop as the offset (done when working on static globals). 4586 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4587 indIndex, indIndexScale, indIndexScaleOffset); 4588 4589 4590 //----------PIPELINE----------------------------------------------------------- 4591 // Rules which define the behavior of the target architectures pipeline. 4592 pipeline %{ 4593 4594 //----------ATTRIBUTES--------------------------------------------------------- 4595 attributes %{ 4596 variable_size_instructions; // Fixed size instructions 4597 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4598 instruction_unit_size = 1; // An instruction is 1 bytes long 4599 instruction_fetch_unit_size = 16; // The processor fetches one line 4600 instruction_fetch_units = 1; // of 16 bytes 4601 4602 // List of nop instructions 4603 nops( MachNop ); 4604 %} 4605 4606 //----------RESOURCES---------------------------------------------------------- 4607 // Resources are the functional units available to the machine 4608 4609 // Generic P2/P3 pipeline 4610 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4611 // 3 instructions decoded per cycle. 4612 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4613 // 2 ALU op, only ALU0 handles mul/div instructions. 4614 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4615 MS0, MS1, MEM = MS0 | MS1, 4616 BR, FPU, 4617 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4618 4619 //----------PIPELINE DESCRIPTION----------------------------------------------- 4620 // Pipeline Description specifies the stages in the machine's pipeline 4621 4622 // Generic P2/P3 pipeline 4623 pipe_desc(S0, S1, S2, S3, S4, S5); 4624 4625 //----------PIPELINE CLASSES--------------------------------------------------- 4626 // Pipeline Classes describe the stages in which input and output are 4627 // referenced by the hardware pipeline. 4628 4629 // Naming convention: ialu or fpu 4630 // Then: _reg 4631 // Then: _reg if there is a 2nd register 4632 // Then: _long if it's a pair of instructions implementing a long 4633 // Then: _fat if it requires the big decoder 4634 // Or: _mem if it requires the big decoder and a memory unit. 4635 4636 // Integer ALU reg operation 4637 pipe_class ialu_reg(rRegI dst) %{ 4638 single_instruction; 4639 dst : S4(write); 4640 dst : S3(read); 4641 DECODE : S0; // any decoder 4642 ALU : S3; // any alu 4643 %} 4644 4645 // Long ALU reg operation 4646 pipe_class ialu_reg_long(eRegL dst) %{ 4647 instruction_count(2); 4648 dst : S4(write); 4649 dst : S3(read); 4650 DECODE : S0(2); // any 2 decoders 4651 ALU : S3(2); // both alus 4652 %} 4653 4654 // Integer ALU reg operation using big decoder 4655 pipe_class ialu_reg_fat(rRegI dst) %{ 4656 single_instruction; 4657 dst : S4(write); 4658 dst : S3(read); 4659 D0 : S0; // big decoder only 4660 ALU : S3; // any alu 4661 %} 4662 4663 // Long ALU reg operation using big decoder 4664 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4665 instruction_count(2); 4666 dst : S4(write); 4667 dst : S3(read); 4668 D0 : S0(2); // big decoder only; twice 4669 ALU : S3(2); // any 2 alus 4670 %} 4671 4672 // Integer ALU reg-reg operation 4673 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4674 single_instruction; 4675 dst : S4(write); 4676 src : S3(read); 4677 DECODE : S0; // any decoder 4678 ALU : S3; // any alu 4679 %} 4680 4681 // Long ALU reg-reg operation 4682 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4683 instruction_count(2); 4684 dst : S4(write); 4685 src : S3(read); 4686 DECODE : S0(2); // any 2 decoders 4687 ALU : S3(2); // both alus 4688 %} 4689 4690 // Integer ALU reg-reg operation 4691 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4692 single_instruction; 4693 dst : S4(write); 4694 src : S3(read); 4695 D0 : S0; // big decoder only 4696 ALU : S3; // any alu 4697 %} 4698 4699 // Long ALU reg-reg operation 4700 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4701 instruction_count(2); 4702 dst : S4(write); 4703 src : S3(read); 4704 D0 : S0(2); // big decoder only; twice 4705 ALU : S3(2); // both alus 4706 %} 4707 4708 // Integer ALU reg-mem operation 4709 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4710 single_instruction; 4711 dst : S5(write); 4712 mem : S3(read); 4713 D0 : S0; // big decoder only 4714 ALU : S4; // any alu 4715 MEM : S3; // any mem 4716 %} 4717 4718 // Long ALU reg-mem operation 4719 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4720 instruction_count(2); 4721 dst : S5(write); 4722 mem : S3(read); 4723 D0 : S0(2); // big decoder only; twice 4724 ALU : S4(2); // any 2 alus 4725 MEM : S3(2); // both mems 4726 %} 4727 4728 // Integer mem operation (prefetch) 4729 pipe_class ialu_mem(memory mem) 4730 %{ 4731 single_instruction; 4732 mem : S3(read); 4733 D0 : S0; // big decoder only 4734 MEM : S3; // any mem 4735 %} 4736 4737 // Integer Store to Memory 4738 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4739 single_instruction; 4740 mem : S3(read); 4741 src : S5(read); 4742 D0 : S0; // big decoder only 4743 ALU : S4; // any alu 4744 MEM : S3; 4745 %} 4746 4747 // Long Store to Memory 4748 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4749 instruction_count(2); 4750 mem : S3(read); 4751 src : S5(read); 4752 D0 : S0(2); // big decoder only; twice 4753 ALU : S4(2); // any 2 alus 4754 MEM : S3(2); // Both mems 4755 %} 4756 4757 // Integer Store to Memory 4758 pipe_class ialu_mem_imm(memory mem) %{ 4759 single_instruction; 4760 mem : S3(read); 4761 D0 : S0; // big decoder only 4762 ALU : S4; // any alu 4763 MEM : S3; 4764 %} 4765 4766 // Integer ALU0 reg-reg operation 4767 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4768 single_instruction; 4769 dst : S4(write); 4770 src : S3(read); 4771 D0 : S0; // Big decoder only 4772 ALU0 : S3; // only alu0 4773 %} 4774 4775 // Integer ALU0 reg-mem operation 4776 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4777 single_instruction; 4778 dst : S5(write); 4779 mem : S3(read); 4780 D0 : S0; // big decoder only 4781 ALU0 : S4; // ALU0 only 4782 MEM : S3; // any mem 4783 %} 4784 4785 // Integer ALU reg-reg operation 4786 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4787 single_instruction; 4788 cr : S4(write); 4789 src1 : S3(read); 4790 src2 : S3(read); 4791 DECODE : S0; // any decoder 4792 ALU : S3; // any alu 4793 %} 4794 4795 // Integer ALU reg-imm operation 4796 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4797 single_instruction; 4798 cr : S4(write); 4799 src1 : S3(read); 4800 DECODE : S0; // any decoder 4801 ALU : S3; // any alu 4802 %} 4803 4804 // Integer ALU reg-mem operation 4805 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4806 single_instruction; 4807 cr : S4(write); 4808 src1 : S3(read); 4809 src2 : S3(read); 4810 D0 : S0; // big decoder only 4811 ALU : S4; // any alu 4812 MEM : S3; 4813 %} 4814 4815 // Conditional move reg-reg 4816 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4817 instruction_count(4); 4818 y : S4(read); 4819 q : S3(read); 4820 p : S3(read); 4821 DECODE : S0(4); // any decoder 4822 %} 4823 4824 // Conditional move reg-reg 4825 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4826 single_instruction; 4827 dst : S4(write); 4828 src : S3(read); 4829 cr : S3(read); 4830 DECODE : S0; // any decoder 4831 %} 4832 4833 // Conditional move reg-mem 4834 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4835 single_instruction; 4836 dst : S4(write); 4837 src : S3(read); 4838 cr : S3(read); 4839 DECODE : S0; // any decoder 4840 MEM : S3; 4841 %} 4842 4843 // Conditional move reg-reg long 4844 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4845 single_instruction; 4846 dst : S4(write); 4847 src : S3(read); 4848 cr : S3(read); 4849 DECODE : S0(2); // any 2 decoders 4850 %} 4851 4852 // Conditional move double reg-reg 4853 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4854 single_instruction; 4855 dst : S4(write); 4856 src : S3(read); 4857 cr : S3(read); 4858 DECODE : S0; // any decoder 4859 %} 4860 4861 // Float reg-reg operation 4862 pipe_class fpu_reg(regDPR dst) %{ 4863 instruction_count(2); 4864 dst : S3(read); 4865 DECODE : S0(2); // any 2 decoders 4866 FPU : S3; 4867 %} 4868 4869 // Float reg-reg operation 4870 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4871 instruction_count(2); 4872 dst : S4(write); 4873 src : S3(read); 4874 DECODE : S0(2); // any 2 decoders 4875 FPU : S3; 4876 %} 4877 4878 // Float reg-reg operation 4879 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4880 instruction_count(3); 4881 dst : S4(write); 4882 src1 : S3(read); 4883 src2 : S3(read); 4884 DECODE : S0(3); // any 3 decoders 4885 FPU : S3(2); 4886 %} 4887 4888 // Float reg-reg operation 4889 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4890 instruction_count(4); 4891 dst : S4(write); 4892 src1 : S3(read); 4893 src2 : S3(read); 4894 src3 : S3(read); 4895 DECODE : S0(4); // any 3 decoders 4896 FPU : S3(2); 4897 %} 4898 4899 // Float reg-reg operation 4900 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4901 instruction_count(4); 4902 dst : S4(write); 4903 src1 : S3(read); 4904 src2 : S3(read); 4905 src3 : S3(read); 4906 DECODE : S1(3); // any 3 decoders 4907 D0 : S0; // Big decoder only 4908 FPU : S3(2); 4909 MEM : S3; 4910 %} 4911 4912 // Float reg-mem operation 4913 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4914 instruction_count(2); 4915 dst : S5(write); 4916 mem : S3(read); 4917 D0 : S0; // big decoder only 4918 DECODE : S1; // any decoder for FPU POP 4919 FPU : S4; 4920 MEM : S3; // any mem 4921 %} 4922 4923 // Float reg-mem operation 4924 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4925 instruction_count(3); 4926 dst : S5(write); 4927 src1 : S3(read); 4928 mem : S3(read); 4929 D0 : S0; // big decoder only 4930 DECODE : S1(2); // any decoder for FPU POP 4931 FPU : S4; 4932 MEM : S3; // any mem 4933 %} 4934 4935 // Float mem-reg operation 4936 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4937 instruction_count(2); 4938 src : S5(read); 4939 mem : S3(read); 4940 DECODE : S0; // any decoder for FPU PUSH 4941 D0 : S1; // big decoder only 4942 FPU : S4; 4943 MEM : S3; // any mem 4944 %} 4945 4946 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4947 instruction_count(3); 4948 src1 : S3(read); 4949 src2 : S3(read); 4950 mem : S3(read); 4951 DECODE : S0(2); // any decoder for FPU PUSH 4952 D0 : S1; // big decoder only 4953 FPU : S4; 4954 MEM : S3; // any mem 4955 %} 4956 4957 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4958 instruction_count(3); 4959 src1 : S3(read); 4960 src2 : S3(read); 4961 mem : S4(read); 4962 DECODE : S0; // any decoder for FPU PUSH 4963 D0 : S0(2); // big decoder only 4964 FPU : S4; 4965 MEM : S3(2); // any mem 4966 %} 4967 4968 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4969 instruction_count(2); 4970 src1 : S3(read); 4971 dst : S4(read); 4972 D0 : S0(2); // big decoder only 4973 MEM : S3(2); // any mem 4974 %} 4975 4976 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4977 instruction_count(3); 4978 src1 : S3(read); 4979 src2 : S3(read); 4980 dst : S4(read); 4981 D0 : S0(3); // big decoder only 4982 FPU : S4; 4983 MEM : S3(3); // any mem 4984 %} 4985 4986 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4987 instruction_count(3); 4988 src1 : S4(read); 4989 mem : S4(read); 4990 DECODE : S0; // any decoder for FPU PUSH 4991 D0 : S0(2); // big decoder only 4992 FPU : S4; 4993 MEM : S3(2); // any mem 4994 %} 4995 4996 // Float load constant 4997 pipe_class fpu_reg_con(regDPR dst) %{ 4998 instruction_count(2); 4999 dst : S5(write); 5000 D0 : S0; // big decoder only for the load 5001 DECODE : S1; // any decoder for FPU POP 5002 FPU : S4; 5003 MEM : S3; // any mem 5004 %} 5005 5006 // Float load constant 5007 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5008 instruction_count(3); 5009 dst : S5(write); 5010 src : S3(read); 5011 D0 : S0; // big decoder only for the load 5012 DECODE : S1(2); // any decoder for FPU POP 5013 FPU : S4; 5014 MEM : S3; // any mem 5015 %} 5016 5017 // UnConditional branch 5018 pipe_class pipe_jmp( label labl ) %{ 5019 single_instruction; 5020 BR : S3; 5021 %} 5022 5023 // Conditional branch 5024 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5025 single_instruction; 5026 cr : S1(read); 5027 BR : S3; 5028 %} 5029 5030 // Allocation idiom 5031 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5032 instruction_count(1); force_serialization; 5033 fixed_latency(6); 5034 heap_ptr : S3(read); 5035 DECODE : S0(3); 5036 D0 : S2; 5037 MEM : S3; 5038 ALU : S3(2); 5039 dst : S5(write); 5040 BR : S5; 5041 %} 5042 5043 // Generic big/slow expanded idiom 5044 pipe_class pipe_slow( ) %{ 5045 instruction_count(10); multiple_bundles; force_serialization; 5046 fixed_latency(100); 5047 D0 : S0(2); 5048 MEM : S3(2); 5049 %} 5050 5051 // The real do-nothing guy 5052 pipe_class empty( ) %{ 5053 instruction_count(0); 5054 %} 5055 5056 // Define the class for the Nop node 5057 define %{ 5058 MachNop = empty; 5059 %} 5060 5061 %} 5062 5063 //----------INSTRUCTIONS------------------------------------------------------- 5064 // 5065 // match -- States which machine-independent subtree may be replaced 5066 // by this instruction. 5067 // ins_cost -- The estimated cost of this instruction is used by instruction 5068 // selection to identify a minimum cost tree of machine 5069 // instructions that matches a tree of machine-independent 5070 // instructions. 5071 // format -- A string providing the disassembly for this instruction. 5072 // The value of an instruction's operand may be inserted 5073 // by referring to it with a '$' prefix. 5074 // opcode -- Three instruction opcodes may be provided. These are referred 5075 // to within an encode class as $primary, $secondary, and $tertiary 5076 // respectively. The primary opcode is commonly used to 5077 // indicate the type of machine instruction, while secondary 5078 // and tertiary are often used for prefix options or addressing 5079 // modes. 5080 // ins_encode -- A list of encode classes with parameters. The encode class 5081 // name must have been defined in an 'enc_class' specification 5082 // in the encode section of the architecture description. 5083 5084 //----------BSWAP-Instruction-------------------------------------------------- 5085 instruct bytes_reverse_int(rRegI dst) %{ 5086 match(Set dst (ReverseBytesI dst)); 5087 5088 format %{ "BSWAP $dst" %} 5089 opcode(0x0F, 0xC8); 5090 ins_encode( OpcP, OpcSReg(dst) ); 5091 ins_pipe( ialu_reg ); 5092 %} 5093 5094 instruct bytes_reverse_long(eRegL dst) %{ 5095 match(Set dst (ReverseBytesL dst)); 5096 5097 format %{ "BSWAP $dst.lo\n\t" 5098 "BSWAP $dst.hi\n\t" 5099 "XCHG $dst.lo $dst.hi" %} 5100 5101 ins_cost(125); 5102 ins_encode( bswap_long_bytes(dst) ); 5103 ins_pipe( ialu_reg_reg); 5104 %} 5105 5106 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5107 match(Set dst (ReverseBytesUS dst)); 5108 effect(KILL cr); 5109 5110 format %{ "BSWAP $dst\n\t" 5111 "SHR $dst,16\n\t" %} 5112 ins_encode %{ 5113 __ bswapl($dst$$Register); 5114 __ shrl($dst$$Register, 16); 5115 %} 5116 ins_pipe( ialu_reg ); 5117 %} 5118 5119 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5120 match(Set dst (ReverseBytesS dst)); 5121 effect(KILL cr); 5122 5123 format %{ "BSWAP $dst\n\t" 5124 "SAR $dst,16\n\t" %} 5125 ins_encode %{ 5126 __ bswapl($dst$$Register); 5127 __ sarl($dst$$Register, 16); 5128 %} 5129 ins_pipe( ialu_reg ); 5130 %} 5131 5132 5133 //---------- Zeros Count Instructions ------------------------------------------ 5134 5135 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5136 predicate(UseCountLeadingZerosInstruction); 5137 match(Set dst (CountLeadingZerosI src)); 5138 effect(KILL cr); 5139 5140 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5141 ins_encode %{ 5142 __ lzcntl($dst$$Register, $src$$Register); 5143 %} 5144 ins_pipe(ialu_reg); 5145 %} 5146 5147 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5148 predicate(!UseCountLeadingZerosInstruction); 5149 match(Set dst (CountLeadingZerosI src)); 5150 effect(KILL cr); 5151 5152 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5153 "JNZ skip\n\t" 5154 "MOV $dst, -1\n" 5155 "skip:\n\t" 5156 "NEG $dst\n\t" 5157 "ADD $dst, 31" %} 5158 ins_encode %{ 5159 Register Rdst = $dst$$Register; 5160 Register Rsrc = $src$$Register; 5161 Label skip; 5162 __ bsrl(Rdst, Rsrc); 5163 __ jccb(Assembler::notZero, skip); 5164 __ movl(Rdst, -1); 5165 __ bind(skip); 5166 __ negl(Rdst); 5167 __ addl(Rdst, BitsPerInt - 1); 5168 %} 5169 ins_pipe(ialu_reg); 5170 %} 5171 5172 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5173 predicate(UseCountLeadingZerosInstruction); 5174 match(Set dst (CountLeadingZerosL src)); 5175 effect(TEMP dst, KILL cr); 5176 5177 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5178 "JNC done\n\t" 5179 "LZCNT $dst, $src.lo\n\t" 5180 "ADD $dst, 32\n" 5181 "done:" %} 5182 ins_encode %{ 5183 Register Rdst = $dst$$Register; 5184 Register Rsrc = $src$$Register; 5185 Label done; 5186 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5187 __ jccb(Assembler::carryClear, done); 5188 __ lzcntl(Rdst, Rsrc); 5189 __ addl(Rdst, BitsPerInt); 5190 __ bind(done); 5191 %} 5192 ins_pipe(ialu_reg); 5193 %} 5194 5195 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5196 predicate(!UseCountLeadingZerosInstruction); 5197 match(Set dst (CountLeadingZerosL src)); 5198 effect(TEMP dst, KILL cr); 5199 5200 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5201 "JZ msw_is_zero\n\t" 5202 "ADD $dst, 32\n\t" 5203 "JMP not_zero\n" 5204 "msw_is_zero:\n\t" 5205 "BSR $dst, $src.lo\n\t" 5206 "JNZ not_zero\n\t" 5207 "MOV $dst, -1\n" 5208 "not_zero:\n\t" 5209 "NEG $dst\n\t" 5210 "ADD $dst, 63\n" %} 5211 ins_encode %{ 5212 Register Rdst = $dst$$Register; 5213 Register Rsrc = $src$$Register; 5214 Label msw_is_zero; 5215 Label not_zero; 5216 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5217 __ jccb(Assembler::zero, msw_is_zero); 5218 __ addl(Rdst, BitsPerInt); 5219 __ jmpb(not_zero); 5220 __ bind(msw_is_zero); 5221 __ bsrl(Rdst, Rsrc); 5222 __ jccb(Assembler::notZero, not_zero); 5223 __ movl(Rdst, -1); 5224 __ bind(not_zero); 5225 __ negl(Rdst); 5226 __ addl(Rdst, BitsPerLong - 1); 5227 %} 5228 ins_pipe(ialu_reg); 5229 %} 5230 5231 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5232 predicate(UseCountTrailingZerosInstruction); 5233 match(Set dst (CountTrailingZerosI src)); 5234 effect(KILL cr); 5235 5236 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5237 ins_encode %{ 5238 __ tzcntl($dst$$Register, $src$$Register); 5239 %} 5240 ins_pipe(ialu_reg); 5241 %} 5242 5243 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5244 predicate(!UseCountTrailingZerosInstruction); 5245 match(Set dst (CountTrailingZerosI src)); 5246 effect(KILL cr); 5247 5248 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5249 "JNZ done\n\t" 5250 "MOV $dst, 32\n" 5251 "done:" %} 5252 ins_encode %{ 5253 Register Rdst = $dst$$Register; 5254 Label done; 5255 __ bsfl(Rdst, $src$$Register); 5256 __ jccb(Assembler::notZero, done); 5257 __ movl(Rdst, BitsPerInt); 5258 __ bind(done); 5259 %} 5260 ins_pipe(ialu_reg); 5261 %} 5262 5263 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5264 predicate(UseCountTrailingZerosInstruction); 5265 match(Set dst (CountTrailingZerosL src)); 5266 effect(TEMP dst, KILL cr); 5267 5268 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5269 "JNC done\n\t" 5270 "TZCNT $dst, $src.hi\n\t" 5271 "ADD $dst, 32\n" 5272 "done:" %} 5273 ins_encode %{ 5274 Register Rdst = $dst$$Register; 5275 Register Rsrc = $src$$Register; 5276 Label done; 5277 __ tzcntl(Rdst, Rsrc); 5278 __ jccb(Assembler::carryClear, done); 5279 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5280 __ addl(Rdst, BitsPerInt); 5281 __ bind(done); 5282 %} 5283 ins_pipe(ialu_reg); 5284 %} 5285 5286 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5287 predicate(!UseCountTrailingZerosInstruction); 5288 match(Set dst (CountTrailingZerosL src)); 5289 effect(TEMP dst, KILL cr); 5290 5291 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5292 "JNZ done\n\t" 5293 "BSF $dst, $src.hi\n\t" 5294 "JNZ msw_not_zero\n\t" 5295 "MOV $dst, 32\n" 5296 "msw_not_zero:\n\t" 5297 "ADD $dst, 32\n" 5298 "done:" %} 5299 ins_encode %{ 5300 Register Rdst = $dst$$Register; 5301 Register Rsrc = $src$$Register; 5302 Label msw_not_zero; 5303 Label done; 5304 __ bsfl(Rdst, Rsrc); 5305 __ jccb(Assembler::notZero, done); 5306 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5307 __ jccb(Assembler::notZero, msw_not_zero); 5308 __ movl(Rdst, BitsPerInt); 5309 __ bind(msw_not_zero); 5310 __ addl(Rdst, BitsPerInt); 5311 __ bind(done); 5312 %} 5313 ins_pipe(ialu_reg); 5314 %} 5315 5316 5317 //---------- Population Count Instructions ------------------------------------- 5318 5319 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5320 predicate(UsePopCountInstruction); 5321 match(Set dst (PopCountI src)); 5322 effect(KILL cr); 5323 5324 format %{ "POPCNT $dst, $src" %} 5325 ins_encode %{ 5326 __ popcntl($dst$$Register, $src$$Register); 5327 %} 5328 ins_pipe(ialu_reg); 5329 %} 5330 5331 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5332 predicate(UsePopCountInstruction); 5333 match(Set dst (PopCountI (LoadI mem))); 5334 effect(KILL cr); 5335 5336 format %{ "POPCNT $dst, $mem" %} 5337 ins_encode %{ 5338 __ popcntl($dst$$Register, $mem$$Address); 5339 %} 5340 ins_pipe(ialu_reg); 5341 %} 5342 5343 // Note: Long.bitCount(long) returns an int. 5344 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5345 predicate(UsePopCountInstruction); 5346 match(Set dst (PopCountL src)); 5347 effect(KILL cr, TEMP tmp, TEMP dst); 5348 5349 format %{ "POPCNT $dst, $src.lo\n\t" 5350 "POPCNT $tmp, $src.hi\n\t" 5351 "ADD $dst, $tmp" %} 5352 ins_encode %{ 5353 __ popcntl($dst$$Register, $src$$Register); 5354 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5355 __ addl($dst$$Register, $tmp$$Register); 5356 %} 5357 ins_pipe(ialu_reg); 5358 %} 5359 5360 // Note: Long.bitCount(long) returns an int. 5361 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5362 predicate(UsePopCountInstruction); 5363 match(Set dst (PopCountL (LoadL mem))); 5364 effect(KILL cr, TEMP tmp, TEMP dst); 5365 5366 format %{ "POPCNT $dst, $mem\n\t" 5367 "POPCNT $tmp, $mem+4\n\t" 5368 "ADD $dst, $tmp" %} 5369 ins_encode %{ 5370 //__ popcntl($dst$$Register, $mem$$Address$$first); 5371 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5372 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5373 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5374 __ addl($dst$$Register, $tmp$$Register); 5375 %} 5376 ins_pipe(ialu_reg); 5377 %} 5378 5379 5380 //----------Load/Store/Move Instructions--------------------------------------- 5381 //----------Load Instructions-------------------------------------------------- 5382 // Load Byte (8bit signed) 5383 instruct loadB(xRegI dst, memory mem) %{ 5384 match(Set dst (LoadB mem)); 5385 5386 ins_cost(125); 5387 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5388 5389 ins_encode %{ 5390 __ movsbl($dst$$Register, $mem$$Address); 5391 %} 5392 5393 ins_pipe(ialu_reg_mem); 5394 %} 5395 5396 // Load Byte (8bit signed) into Long Register 5397 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5398 match(Set dst (ConvI2L (LoadB mem))); 5399 effect(KILL cr); 5400 5401 ins_cost(375); 5402 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5403 "MOV $dst.hi,$dst.lo\n\t" 5404 "SAR $dst.hi,7" %} 5405 5406 ins_encode %{ 5407 __ movsbl($dst$$Register, $mem$$Address); 5408 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5409 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5410 %} 5411 5412 ins_pipe(ialu_reg_mem); 5413 %} 5414 5415 // Load Unsigned Byte (8bit UNsigned) 5416 instruct loadUB(xRegI dst, memory mem) %{ 5417 match(Set dst (LoadUB mem)); 5418 5419 ins_cost(125); 5420 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5421 5422 ins_encode %{ 5423 __ movzbl($dst$$Register, $mem$$Address); 5424 %} 5425 5426 ins_pipe(ialu_reg_mem); 5427 %} 5428 5429 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5430 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5431 match(Set dst (ConvI2L (LoadUB mem))); 5432 effect(KILL cr); 5433 5434 ins_cost(250); 5435 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5436 "XOR $dst.hi,$dst.hi" %} 5437 5438 ins_encode %{ 5439 Register Rdst = $dst$$Register; 5440 __ movzbl(Rdst, $mem$$Address); 5441 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5442 %} 5443 5444 ins_pipe(ialu_reg_mem); 5445 %} 5446 5447 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5448 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5449 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5450 effect(KILL cr); 5451 5452 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5453 "XOR $dst.hi,$dst.hi\n\t" 5454 "AND $dst.lo,right_n_bits($mask, 8)" %} 5455 ins_encode %{ 5456 Register Rdst = $dst$$Register; 5457 __ movzbl(Rdst, $mem$$Address); 5458 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5459 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5460 %} 5461 ins_pipe(ialu_reg_mem); 5462 %} 5463 5464 // Load Short (16bit signed) 5465 instruct loadS(rRegI dst, memory mem) %{ 5466 match(Set dst (LoadS mem)); 5467 5468 ins_cost(125); 5469 format %{ "MOVSX $dst,$mem\t# short" %} 5470 5471 ins_encode %{ 5472 __ movswl($dst$$Register, $mem$$Address); 5473 %} 5474 5475 ins_pipe(ialu_reg_mem); 5476 %} 5477 5478 // Load Short (16 bit signed) to Byte (8 bit signed) 5479 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5480 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5481 5482 ins_cost(125); 5483 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5484 ins_encode %{ 5485 __ movsbl($dst$$Register, $mem$$Address); 5486 %} 5487 ins_pipe(ialu_reg_mem); 5488 %} 5489 5490 // Load Short (16bit signed) into Long Register 5491 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5492 match(Set dst (ConvI2L (LoadS mem))); 5493 effect(KILL cr); 5494 5495 ins_cost(375); 5496 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5497 "MOV $dst.hi,$dst.lo\n\t" 5498 "SAR $dst.hi,15" %} 5499 5500 ins_encode %{ 5501 __ movswl($dst$$Register, $mem$$Address); 5502 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5503 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5504 %} 5505 5506 ins_pipe(ialu_reg_mem); 5507 %} 5508 5509 // Load Unsigned Short/Char (16bit unsigned) 5510 instruct loadUS(rRegI dst, memory mem) %{ 5511 match(Set dst (LoadUS mem)); 5512 5513 ins_cost(125); 5514 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5515 5516 ins_encode %{ 5517 __ movzwl($dst$$Register, $mem$$Address); 5518 %} 5519 5520 ins_pipe(ialu_reg_mem); 5521 %} 5522 5523 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5524 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5525 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5526 5527 ins_cost(125); 5528 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5529 ins_encode %{ 5530 __ movsbl($dst$$Register, $mem$$Address); 5531 %} 5532 ins_pipe(ialu_reg_mem); 5533 %} 5534 5535 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5536 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5537 match(Set dst (ConvI2L (LoadUS mem))); 5538 effect(KILL cr); 5539 5540 ins_cost(250); 5541 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5542 "XOR $dst.hi,$dst.hi" %} 5543 5544 ins_encode %{ 5545 __ movzwl($dst$$Register, $mem$$Address); 5546 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5547 %} 5548 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5553 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5555 effect(KILL cr); 5556 5557 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5558 "XOR $dst.hi,$dst.hi" %} 5559 ins_encode %{ 5560 Register Rdst = $dst$$Register; 5561 __ movzbl(Rdst, $mem$$Address); 5562 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5563 %} 5564 ins_pipe(ialu_reg_mem); 5565 %} 5566 5567 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5568 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5569 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5570 effect(KILL cr); 5571 5572 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5573 "XOR $dst.hi,$dst.hi\n\t" 5574 "AND $dst.lo,right_n_bits($mask, 16)" %} 5575 ins_encode %{ 5576 Register Rdst = $dst$$Register; 5577 __ movzwl(Rdst, $mem$$Address); 5578 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5579 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5580 %} 5581 ins_pipe(ialu_reg_mem); 5582 %} 5583 5584 // Load Integer 5585 instruct loadI(rRegI dst, memory mem) %{ 5586 match(Set dst (LoadI mem)); 5587 5588 ins_cost(125); 5589 format %{ "MOV $dst,$mem\t# int" %} 5590 5591 ins_encode %{ 5592 __ movl($dst$$Register, $mem$$Address); 5593 %} 5594 5595 ins_pipe(ialu_reg_mem); 5596 %} 5597 5598 // Load Integer (32 bit signed) to Byte (8 bit signed) 5599 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5600 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5601 5602 ins_cost(125); 5603 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5604 ins_encode %{ 5605 __ movsbl($dst$$Register, $mem$$Address); 5606 %} 5607 ins_pipe(ialu_reg_mem); 5608 %} 5609 5610 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5611 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5612 match(Set dst (AndI (LoadI mem) mask)); 5613 5614 ins_cost(125); 5615 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5616 ins_encode %{ 5617 __ movzbl($dst$$Register, $mem$$Address); 5618 %} 5619 ins_pipe(ialu_reg_mem); 5620 %} 5621 5622 // Load Integer (32 bit signed) to Short (16 bit signed) 5623 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5624 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5625 5626 ins_cost(125); 5627 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5628 ins_encode %{ 5629 __ movswl($dst$$Register, $mem$$Address); 5630 %} 5631 ins_pipe(ialu_reg_mem); 5632 %} 5633 5634 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5635 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5636 match(Set dst (AndI (LoadI mem) mask)); 5637 5638 ins_cost(125); 5639 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5640 ins_encode %{ 5641 __ movzwl($dst$$Register, $mem$$Address); 5642 %} 5643 ins_pipe(ialu_reg_mem); 5644 %} 5645 5646 // Load Integer into Long Register 5647 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5648 match(Set dst (ConvI2L (LoadI mem))); 5649 effect(KILL cr); 5650 5651 ins_cost(375); 5652 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5653 "MOV $dst.hi,$dst.lo\n\t" 5654 "SAR $dst.hi,31" %} 5655 5656 ins_encode %{ 5657 __ movl($dst$$Register, $mem$$Address); 5658 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5659 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5660 %} 5661 5662 ins_pipe(ialu_reg_mem); 5663 %} 5664 5665 // Load Integer with mask 0xFF into Long Register 5666 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5667 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5668 effect(KILL cr); 5669 5670 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5671 "XOR $dst.hi,$dst.hi" %} 5672 ins_encode %{ 5673 Register Rdst = $dst$$Register; 5674 __ movzbl(Rdst, $mem$$Address); 5675 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5676 %} 5677 ins_pipe(ialu_reg_mem); 5678 %} 5679 5680 // Load Integer with mask 0xFFFF into Long Register 5681 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5682 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5683 effect(KILL cr); 5684 5685 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5686 "XOR $dst.hi,$dst.hi" %} 5687 ins_encode %{ 5688 Register Rdst = $dst$$Register; 5689 __ movzwl(Rdst, $mem$$Address); 5690 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5691 %} 5692 ins_pipe(ialu_reg_mem); 5693 %} 5694 5695 // Load Integer with 31-bit mask into Long Register 5696 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5697 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5698 effect(KILL cr); 5699 5700 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5701 "XOR $dst.hi,$dst.hi\n\t" 5702 "AND $dst.lo,$mask" %} 5703 ins_encode %{ 5704 Register Rdst = $dst$$Register; 5705 __ movl(Rdst, $mem$$Address); 5706 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5707 __ andl(Rdst, $mask$$constant); 5708 %} 5709 ins_pipe(ialu_reg_mem); 5710 %} 5711 5712 // Load Unsigned Integer into Long Register 5713 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5714 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5715 effect(KILL cr); 5716 5717 ins_cost(250); 5718 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5719 "XOR $dst.hi,$dst.hi" %} 5720 5721 ins_encode %{ 5722 __ movl($dst$$Register, $mem$$Address); 5723 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5724 %} 5725 5726 ins_pipe(ialu_reg_mem); 5727 %} 5728 5729 // Load Long. Cannot clobber address while loading, so restrict address 5730 // register to ESI 5731 instruct loadL(eRegL dst, load_long_memory mem) %{ 5732 predicate(!((LoadLNode*)n)->require_atomic_access()); 5733 match(Set dst (LoadL mem)); 5734 5735 ins_cost(250); 5736 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5737 "MOV $dst.hi,$mem+4" %} 5738 5739 ins_encode %{ 5740 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5741 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5742 __ movl($dst$$Register, Amemlo); 5743 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5744 %} 5745 5746 ins_pipe(ialu_reg_long_mem); 5747 %} 5748 5749 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5750 // then store it down to the stack and reload on the int 5751 // side. 5752 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5753 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5754 match(Set dst (LoadL mem)); 5755 5756 ins_cost(200); 5757 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5758 "FISTp $dst" %} 5759 ins_encode(enc_loadL_volatile(mem,dst)); 5760 ins_pipe( fpu_reg_mem ); 5761 %} 5762 5763 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5764 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5765 match(Set dst (LoadL mem)); 5766 effect(TEMP tmp); 5767 ins_cost(180); 5768 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5769 "MOVSD $dst,$tmp" %} 5770 ins_encode %{ 5771 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5772 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5778 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5779 match(Set dst (LoadL mem)); 5780 effect(TEMP tmp); 5781 ins_cost(160); 5782 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5783 "MOVD $dst.lo,$tmp\n\t" 5784 "PSRLQ $tmp,32\n\t" 5785 "MOVD $dst.hi,$tmp" %} 5786 ins_encode %{ 5787 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5788 __ movdl($dst$$Register, $tmp$$XMMRegister); 5789 __ psrlq($tmp$$XMMRegister, 32); 5790 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 // Load Range 5796 instruct loadRange(rRegI dst, memory mem) %{ 5797 match(Set dst (LoadRange mem)); 5798 5799 ins_cost(125); 5800 format %{ "MOV $dst,$mem" %} 5801 opcode(0x8B); 5802 ins_encode( OpcP, RegMem(dst,mem)); 5803 ins_pipe( ialu_reg_mem ); 5804 %} 5805 5806 5807 // Load Pointer 5808 instruct loadP(eRegP dst, memory mem) %{ 5809 match(Set dst (LoadP mem)); 5810 5811 ins_cost(125); 5812 format %{ "MOV $dst,$mem" %} 5813 opcode(0x8B); 5814 ins_encode( OpcP, RegMem(dst,mem)); 5815 ins_pipe( ialu_reg_mem ); 5816 %} 5817 5818 // Load Klass Pointer 5819 instruct loadKlass(eRegP dst, memory mem) %{ 5820 match(Set dst (LoadKlass mem)); 5821 5822 ins_cost(125); 5823 format %{ "MOV $dst,$mem" %} 5824 opcode(0x8B); 5825 ins_encode( OpcP, RegMem(dst,mem)); 5826 ins_pipe( ialu_reg_mem ); 5827 %} 5828 5829 // Load Double 5830 instruct loadDPR(regDPR dst, memory mem) %{ 5831 predicate(UseSSE<=1); 5832 match(Set dst (LoadD mem)); 5833 5834 ins_cost(150); 5835 format %{ "FLD_D ST,$mem\n\t" 5836 "FSTP $dst" %} 5837 opcode(0xDD); /* DD /0 */ 5838 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5839 Pop_Reg_DPR(dst) ); 5840 ins_pipe( fpu_reg_mem ); 5841 %} 5842 5843 // Load Double to XMM 5844 instruct loadD(regD dst, memory mem) %{ 5845 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5846 match(Set dst (LoadD mem)); 5847 ins_cost(145); 5848 format %{ "MOVSD $dst,$mem" %} 5849 ins_encode %{ 5850 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5851 %} 5852 ins_pipe( pipe_slow ); 5853 %} 5854 5855 instruct loadD_partial(regD dst, memory mem) %{ 5856 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5857 match(Set dst (LoadD mem)); 5858 ins_cost(145); 5859 format %{ "MOVLPD $dst,$mem" %} 5860 ins_encode %{ 5861 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5862 %} 5863 ins_pipe( pipe_slow ); 5864 %} 5865 5866 // Load to XMM register (single-precision floating point) 5867 // MOVSS instruction 5868 instruct loadF(regF dst, memory mem) %{ 5869 predicate(UseSSE>=1); 5870 match(Set dst (LoadF mem)); 5871 ins_cost(145); 5872 format %{ "MOVSS $dst,$mem" %} 5873 ins_encode %{ 5874 __ movflt ($dst$$XMMRegister, $mem$$Address); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 // Load Float 5880 instruct loadFPR(regFPR dst, memory mem) %{ 5881 predicate(UseSSE==0); 5882 match(Set dst (LoadF mem)); 5883 5884 ins_cost(150); 5885 format %{ "FLD_S ST,$mem\n\t" 5886 "FSTP $dst" %} 5887 opcode(0xD9); /* D9 /0 */ 5888 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5889 Pop_Reg_FPR(dst) ); 5890 ins_pipe( fpu_reg_mem ); 5891 %} 5892 5893 // Load Effective Address 5894 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5895 match(Set dst mem); 5896 5897 ins_cost(110); 5898 format %{ "LEA $dst,$mem" %} 5899 opcode(0x8D); 5900 ins_encode( OpcP, RegMem(dst,mem)); 5901 ins_pipe( ialu_reg_reg_fat ); 5902 %} 5903 5904 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5905 match(Set dst mem); 5906 5907 ins_cost(110); 5908 format %{ "LEA $dst,$mem" %} 5909 opcode(0x8D); 5910 ins_encode( OpcP, RegMem(dst,mem)); 5911 ins_pipe( ialu_reg_reg_fat ); 5912 %} 5913 5914 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5915 match(Set dst mem); 5916 5917 ins_cost(110); 5918 format %{ "LEA $dst,$mem" %} 5919 opcode(0x8D); 5920 ins_encode( OpcP, RegMem(dst,mem)); 5921 ins_pipe( ialu_reg_reg_fat ); 5922 %} 5923 5924 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5925 match(Set dst mem); 5926 5927 ins_cost(110); 5928 format %{ "LEA $dst,$mem" %} 5929 opcode(0x8D); 5930 ins_encode( OpcP, RegMem(dst,mem)); 5931 ins_pipe( ialu_reg_reg_fat ); 5932 %} 5933 5934 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5935 match(Set dst mem); 5936 5937 ins_cost(110); 5938 format %{ "LEA $dst,$mem" %} 5939 opcode(0x8D); 5940 ins_encode( OpcP, RegMem(dst,mem)); 5941 ins_pipe( ialu_reg_reg_fat ); 5942 %} 5943 5944 // Load Constant 5945 instruct loadConI(rRegI dst, immI src) %{ 5946 match(Set dst src); 5947 5948 format %{ "MOV $dst,$src" %} 5949 ins_encode( LdImmI(dst, src) ); 5950 ins_pipe( ialu_reg_fat ); 5951 %} 5952 5953 // Load Constant zero 5954 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5955 match(Set dst src); 5956 effect(KILL cr); 5957 5958 ins_cost(50); 5959 format %{ "XOR $dst,$dst" %} 5960 opcode(0x33); /* + rd */ 5961 ins_encode( OpcP, RegReg( dst, dst ) ); 5962 ins_pipe( ialu_reg ); 5963 %} 5964 5965 instruct loadConP(eRegP dst, immP src) %{ 5966 match(Set dst src); 5967 5968 format %{ "MOV $dst,$src" %} 5969 opcode(0xB8); /* + rd */ 5970 ins_encode( LdImmP(dst, src) ); 5971 ins_pipe( ialu_reg_fat ); 5972 %} 5973 5974 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5975 match(Set dst src); 5976 effect(KILL cr); 5977 ins_cost(200); 5978 format %{ "MOV $dst.lo,$src.lo\n\t" 5979 "MOV $dst.hi,$src.hi" %} 5980 opcode(0xB8); 5981 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5982 ins_pipe( ialu_reg_long_fat ); 5983 %} 5984 5985 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5986 match(Set dst src); 5987 effect(KILL cr); 5988 ins_cost(150); 5989 format %{ "XOR $dst.lo,$dst.lo\n\t" 5990 "XOR $dst.hi,$dst.hi" %} 5991 opcode(0x33,0x33); 5992 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5993 ins_pipe( ialu_reg_long ); 5994 %} 5995 5996 // The instruction usage is guarded by predicate in operand immFPR(). 5997 instruct loadConFPR(regFPR dst, immFPR con) %{ 5998 match(Set dst con); 5999 ins_cost(125); 6000 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6001 "FSTP $dst" %} 6002 ins_encode %{ 6003 __ fld_s($constantaddress($con)); 6004 __ fstp_d($dst$$reg); 6005 %} 6006 ins_pipe(fpu_reg_con); 6007 %} 6008 6009 // The instruction usage is guarded by predicate in operand immFPR0(). 6010 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6011 match(Set dst con); 6012 ins_cost(125); 6013 format %{ "FLDZ ST\n\t" 6014 "FSTP $dst" %} 6015 ins_encode %{ 6016 __ fldz(); 6017 __ fstp_d($dst$$reg); 6018 %} 6019 ins_pipe(fpu_reg_con); 6020 %} 6021 6022 // The instruction usage is guarded by predicate in operand immFPR1(). 6023 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6024 match(Set dst con); 6025 ins_cost(125); 6026 format %{ "FLD1 ST\n\t" 6027 "FSTP $dst" %} 6028 ins_encode %{ 6029 __ fld1(); 6030 __ fstp_d($dst$$reg); 6031 %} 6032 ins_pipe(fpu_reg_con); 6033 %} 6034 6035 // The instruction usage is guarded by predicate in operand immF(). 6036 instruct loadConF(regF dst, immF con) %{ 6037 match(Set dst con); 6038 ins_cost(125); 6039 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6040 ins_encode %{ 6041 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6042 %} 6043 ins_pipe(pipe_slow); 6044 %} 6045 6046 // The instruction usage is guarded by predicate in operand immF0(). 6047 instruct loadConF0(regF dst, immF0 src) %{ 6048 match(Set dst src); 6049 ins_cost(100); 6050 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6051 ins_encode %{ 6052 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6053 %} 6054 ins_pipe(pipe_slow); 6055 %} 6056 6057 // The instruction usage is guarded by predicate in operand immDPR(). 6058 instruct loadConDPR(regDPR dst, immDPR con) %{ 6059 match(Set dst con); 6060 ins_cost(125); 6061 6062 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6063 "FSTP $dst" %} 6064 ins_encode %{ 6065 __ fld_d($constantaddress($con)); 6066 __ fstp_d($dst$$reg); 6067 %} 6068 ins_pipe(fpu_reg_con); 6069 %} 6070 6071 // The instruction usage is guarded by predicate in operand immDPR0(). 6072 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6073 match(Set dst con); 6074 ins_cost(125); 6075 6076 format %{ "FLDZ ST\n\t" 6077 "FSTP $dst" %} 6078 ins_encode %{ 6079 __ fldz(); 6080 __ fstp_d($dst$$reg); 6081 %} 6082 ins_pipe(fpu_reg_con); 6083 %} 6084 6085 // The instruction usage is guarded by predicate in operand immDPR1(). 6086 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6087 match(Set dst con); 6088 ins_cost(125); 6089 6090 format %{ "FLD1 ST\n\t" 6091 "FSTP $dst" %} 6092 ins_encode %{ 6093 __ fld1(); 6094 __ fstp_d($dst$$reg); 6095 %} 6096 ins_pipe(fpu_reg_con); 6097 %} 6098 6099 // The instruction usage is guarded by predicate in operand immD(). 6100 instruct loadConD(regD dst, immD con) %{ 6101 match(Set dst con); 6102 ins_cost(125); 6103 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6104 ins_encode %{ 6105 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6106 %} 6107 ins_pipe(pipe_slow); 6108 %} 6109 6110 // The instruction usage is guarded by predicate in operand immD0(). 6111 instruct loadConD0(regD dst, immD0 src) %{ 6112 match(Set dst src); 6113 ins_cost(100); 6114 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6115 ins_encode %{ 6116 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6117 %} 6118 ins_pipe( pipe_slow ); 6119 %} 6120 6121 // Load Stack Slot 6122 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6123 match(Set dst src); 6124 ins_cost(125); 6125 6126 format %{ "MOV $dst,$src" %} 6127 opcode(0x8B); 6128 ins_encode( OpcP, RegMem(dst,src)); 6129 ins_pipe( ialu_reg_mem ); 6130 %} 6131 6132 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6133 match(Set dst src); 6134 6135 ins_cost(200); 6136 format %{ "MOV $dst,$src.lo\n\t" 6137 "MOV $dst+4,$src.hi" %} 6138 opcode(0x8B, 0x8B); 6139 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6140 ins_pipe( ialu_mem_long_reg ); 6141 %} 6142 6143 // Load Stack Slot 6144 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6145 match(Set dst src); 6146 ins_cost(125); 6147 6148 format %{ "MOV $dst,$src" %} 6149 opcode(0x8B); 6150 ins_encode( OpcP, RegMem(dst,src)); 6151 ins_pipe( ialu_reg_mem ); 6152 %} 6153 6154 // Load Stack Slot 6155 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6156 match(Set dst src); 6157 ins_cost(125); 6158 6159 format %{ "FLD_S $src\n\t" 6160 "FSTP $dst" %} 6161 opcode(0xD9); /* D9 /0, FLD m32real */ 6162 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6163 Pop_Reg_FPR(dst) ); 6164 ins_pipe( fpu_reg_mem ); 6165 %} 6166 6167 // Load Stack Slot 6168 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6169 match(Set dst src); 6170 ins_cost(125); 6171 6172 format %{ "FLD_D $src\n\t" 6173 "FSTP $dst" %} 6174 opcode(0xDD); /* DD /0, FLD m64real */ 6175 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6176 Pop_Reg_DPR(dst) ); 6177 ins_pipe( fpu_reg_mem ); 6178 %} 6179 6180 // Prefetch instructions for allocation. 6181 // Must be safe to execute with invalid address (cannot fault). 6182 6183 instruct prefetchAlloc0( memory mem ) %{ 6184 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6185 match(PrefetchAllocation mem); 6186 ins_cost(0); 6187 size(0); 6188 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6189 ins_encode(); 6190 ins_pipe(empty); 6191 %} 6192 6193 instruct prefetchAlloc( memory mem ) %{ 6194 predicate(AllocatePrefetchInstr==3); 6195 match( PrefetchAllocation mem ); 6196 ins_cost(100); 6197 6198 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6199 ins_encode %{ 6200 __ prefetchw($mem$$Address); 6201 %} 6202 ins_pipe(ialu_mem); 6203 %} 6204 6205 instruct prefetchAllocNTA( memory mem ) %{ 6206 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6207 match(PrefetchAllocation mem); 6208 ins_cost(100); 6209 6210 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6211 ins_encode %{ 6212 __ prefetchnta($mem$$Address); 6213 %} 6214 ins_pipe(ialu_mem); 6215 %} 6216 6217 instruct prefetchAllocT0( memory mem ) %{ 6218 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6219 match(PrefetchAllocation mem); 6220 ins_cost(100); 6221 6222 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6223 ins_encode %{ 6224 __ prefetcht0($mem$$Address); 6225 %} 6226 ins_pipe(ialu_mem); 6227 %} 6228 6229 instruct prefetchAllocT2( memory mem ) %{ 6230 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6231 match(PrefetchAllocation mem); 6232 ins_cost(100); 6233 6234 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6235 ins_encode %{ 6236 __ prefetcht2($mem$$Address); 6237 %} 6238 ins_pipe(ialu_mem); 6239 %} 6240 6241 //----------Store Instructions------------------------------------------------- 6242 6243 // Store Byte 6244 instruct storeB(memory mem, xRegI src) %{ 6245 match(Set mem (StoreB mem src)); 6246 6247 ins_cost(125); 6248 format %{ "MOV8 $mem,$src" %} 6249 opcode(0x88); 6250 ins_encode( OpcP, RegMem( src, mem ) ); 6251 ins_pipe( ialu_mem_reg ); 6252 %} 6253 6254 // Store Char/Short 6255 instruct storeC(memory mem, rRegI src) %{ 6256 match(Set mem (StoreC mem src)); 6257 6258 ins_cost(125); 6259 format %{ "MOV16 $mem,$src" %} 6260 opcode(0x89, 0x66); 6261 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6262 ins_pipe( ialu_mem_reg ); 6263 %} 6264 6265 // Store Integer 6266 instruct storeI(memory mem, rRegI src) %{ 6267 match(Set mem (StoreI mem src)); 6268 6269 ins_cost(125); 6270 format %{ "MOV $mem,$src" %} 6271 opcode(0x89); 6272 ins_encode( OpcP, RegMem( src, mem ) ); 6273 ins_pipe( ialu_mem_reg ); 6274 %} 6275 6276 // Store Long 6277 instruct storeL(long_memory mem, eRegL src) %{ 6278 predicate(!((StoreLNode*)n)->require_atomic_access()); 6279 match(Set mem (StoreL mem src)); 6280 6281 ins_cost(200); 6282 format %{ "MOV $mem,$src.lo\n\t" 6283 "MOV $mem+4,$src.hi" %} 6284 opcode(0x89, 0x89); 6285 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6286 ins_pipe( ialu_mem_long_reg ); 6287 %} 6288 6289 // Store Long to Integer 6290 instruct storeL2I(memory mem, eRegL src) %{ 6291 match(Set mem (StoreI mem (ConvL2I src))); 6292 6293 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6294 ins_encode %{ 6295 __ movl($mem$$Address, $src$$Register); 6296 %} 6297 ins_pipe(ialu_mem_reg); 6298 %} 6299 6300 // Volatile Store Long. Must be atomic, so move it into 6301 // the FP TOS and then do a 64-bit FIST. Has to probe the 6302 // target address before the store (for null-ptr checks) 6303 // so the memory operand is used twice in the encoding. 6304 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6305 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6306 match(Set mem (StoreL mem src)); 6307 effect( KILL cr ); 6308 ins_cost(400); 6309 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6310 "FILD $src\n\t" 6311 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6312 opcode(0x3B); 6313 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6314 ins_pipe( fpu_reg_mem ); 6315 %} 6316 6317 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6318 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6319 match(Set mem (StoreL mem src)); 6320 effect( TEMP tmp, KILL cr ); 6321 ins_cost(380); 6322 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6323 "MOVSD $tmp,$src\n\t" 6324 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6325 ins_encode %{ 6326 __ cmpl(rax, $mem$$Address); 6327 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6328 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6334 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6335 match(Set mem (StoreL mem src)); 6336 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6337 ins_cost(360); 6338 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6339 "MOVD $tmp,$src.lo\n\t" 6340 "MOVD $tmp2,$src.hi\n\t" 6341 "PUNPCKLDQ $tmp,$tmp2\n\t" 6342 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6343 ins_encode %{ 6344 __ cmpl(rax, $mem$$Address); 6345 __ movdl($tmp$$XMMRegister, $src$$Register); 6346 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6347 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6348 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6349 %} 6350 ins_pipe( pipe_slow ); 6351 %} 6352 6353 // Store Pointer; for storing unknown oops and raw pointers 6354 instruct storeP(memory mem, anyRegP src) %{ 6355 match(Set mem (StoreP mem src)); 6356 6357 ins_cost(125); 6358 format %{ "MOV $mem,$src" %} 6359 opcode(0x89); 6360 ins_encode( OpcP, RegMem( src, mem ) ); 6361 ins_pipe( ialu_mem_reg ); 6362 %} 6363 6364 // Store Integer Immediate 6365 instruct storeImmI(memory mem, immI src) %{ 6366 match(Set mem (StoreI mem src)); 6367 6368 ins_cost(150); 6369 format %{ "MOV $mem,$src" %} 6370 opcode(0xC7); /* C7 /0 */ 6371 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6372 ins_pipe( ialu_mem_imm ); 6373 %} 6374 6375 // Store Short/Char Immediate 6376 instruct storeImmI16(memory mem, immI16 src) %{ 6377 predicate(UseStoreImmI16); 6378 match(Set mem (StoreC mem src)); 6379 6380 ins_cost(150); 6381 format %{ "MOV16 $mem,$src" %} 6382 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6383 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6384 ins_pipe( ialu_mem_imm ); 6385 %} 6386 6387 // Store Pointer Immediate; null pointers or constant oops that do not 6388 // need card-mark barriers. 6389 instruct storeImmP(memory mem, immP src) %{ 6390 match(Set mem (StoreP mem src)); 6391 6392 ins_cost(150); 6393 format %{ "MOV $mem,$src" %} 6394 opcode(0xC7); /* C7 /0 */ 6395 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6396 ins_pipe( ialu_mem_imm ); 6397 %} 6398 6399 // Store Byte Immediate 6400 instruct storeImmB(memory mem, immI8 src) %{ 6401 match(Set mem (StoreB mem src)); 6402 6403 ins_cost(150); 6404 format %{ "MOV8 $mem,$src" %} 6405 opcode(0xC6); /* C6 /0 */ 6406 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6407 ins_pipe( ialu_mem_imm ); 6408 %} 6409 6410 // Store CMS card-mark Immediate 6411 instruct storeImmCM(memory mem, immI8 src) %{ 6412 match(Set mem (StoreCM mem src)); 6413 6414 ins_cost(150); 6415 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6416 opcode(0xC6); /* C6 /0 */ 6417 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6418 ins_pipe( ialu_mem_imm ); 6419 %} 6420 6421 // Store Double 6422 instruct storeDPR( memory mem, regDPR1 src) %{ 6423 predicate(UseSSE<=1); 6424 match(Set mem (StoreD mem src)); 6425 6426 ins_cost(100); 6427 format %{ "FST_D $mem,$src" %} 6428 opcode(0xDD); /* DD /2 */ 6429 ins_encode( enc_FPR_store(mem,src) ); 6430 ins_pipe( fpu_mem_reg ); 6431 %} 6432 6433 // Store double does rounding on x86 6434 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6435 predicate(UseSSE<=1); 6436 match(Set mem (StoreD mem (RoundDouble src))); 6437 6438 ins_cost(100); 6439 format %{ "FST_D $mem,$src\t# round" %} 6440 opcode(0xDD); /* DD /2 */ 6441 ins_encode( enc_FPR_store(mem,src) ); 6442 ins_pipe( fpu_mem_reg ); 6443 %} 6444 6445 // Store XMM register to memory (double-precision floating points) 6446 // MOVSD instruction 6447 instruct storeD(memory mem, regD src) %{ 6448 predicate(UseSSE>=2); 6449 match(Set mem (StoreD mem src)); 6450 ins_cost(95); 6451 format %{ "MOVSD $mem,$src" %} 6452 ins_encode %{ 6453 __ movdbl($mem$$Address, $src$$XMMRegister); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 // Store XMM register to memory (single-precision floating point) 6459 // MOVSS instruction 6460 instruct storeF(memory mem, regF src) %{ 6461 predicate(UseSSE>=1); 6462 match(Set mem (StoreF mem src)); 6463 ins_cost(95); 6464 format %{ "MOVSS $mem,$src" %} 6465 ins_encode %{ 6466 __ movflt($mem$$Address, $src$$XMMRegister); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 // Store Float 6472 instruct storeFPR( memory mem, regFPR1 src) %{ 6473 predicate(UseSSE==0); 6474 match(Set mem (StoreF mem src)); 6475 6476 ins_cost(100); 6477 format %{ "FST_S $mem,$src" %} 6478 opcode(0xD9); /* D9 /2 */ 6479 ins_encode( enc_FPR_store(mem,src) ); 6480 ins_pipe( fpu_mem_reg ); 6481 %} 6482 6483 // Store Float does rounding on x86 6484 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6485 predicate(UseSSE==0); 6486 match(Set mem (StoreF mem (RoundFloat src))); 6487 6488 ins_cost(100); 6489 format %{ "FST_S $mem,$src\t# round" %} 6490 opcode(0xD9); /* D9 /2 */ 6491 ins_encode( enc_FPR_store(mem,src) ); 6492 ins_pipe( fpu_mem_reg ); 6493 %} 6494 6495 // Store Float does rounding on x86 6496 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6497 predicate(UseSSE<=1); 6498 match(Set mem (StoreF mem (ConvD2F src))); 6499 6500 ins_cost(100); 6501 format %{ "FST_S $mem,$src\t# D-round" %} 6502 opcode(0xD9); /* D9 /2 */ 6503 ins_encode( enc_FPR_store(mem,src) ); 6504 ins_pipe( fpu_mem_reg ); 6505 %} 6506 6507 // Store immediate Float value (it is faster than store from FPU register) 6508 // The instruction usage is guarded by predicate in operand immFPR(). 6509 instruct storeFPR_imm( memory mem, immFPR src) %{ 6510 match(Set mem (StoreF mem src)); 6511 6512 ins_cost(50); 6513 format %{ "MOV $mem,$src\t# store float" %} 6514 opcode(0xC7); /* C7 /0 */ 6515 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6516 ins_pipe( ialu_mem_imm ); 6517 %} 6518 6519 // Store immediate Float value (it is faster than store from XMM register) 6520 // The instruction usage is guarded by predicate in operand immF(). 6521 instruct storeF_imm( memory mem, immF src) %{ 6522 match(Set mem (StoreF mem src)); 6523 6524 ins_cost(50); 6525 format %{ "MOV $mem,$src\t# store float" %} 6526 opcode(0xC7); /* C7 /0 */ 6527 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6528 ins_pipe( ialu_mem_imm ); 6529 %} 6530 6531 // Store Integer to stack slot 6532 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6533 match(Set dst src); 6534 6535 ins_cost(100); 6536 format %{ "MOV $dst,$src" %} 6537 opcode(0x89); 6538 ins_encode( OpcPRegSS( dst, src ) ); 6539 ins_pipe( ialu_mem_reg ); 6540 %} 6541 6542 // Store Integer to stack slot 6543 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6544 match(Set dst src); 6545 6546 ins_cost(100); 6547 format %{ "MOV $dst,$src" %} 6548 opcode(0x89); 6549 ins_encode( OpcPRegSS( dst, src ) ); 6550 ins_pipe( ialu_mem_reg ); 6551 %} 6552 6553 // Store Long to stack slot 6554 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6555 match(Set dst src); 6556 6557 ins_cost(200); 6558 format %{ "MOV $dst,$src.lo\n\t" 6559 "MOV $dst+4,$src.hi" %} 6560 opcode(0x89, 0x89); 6561 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6562 ins_pipe( ialu_mem_long_reg ); 6563 %} 6564 6565 //----------MemBar Instructions----------------------------------------------- 6566 // Memory barrier flavors 6567 6568 instruct membar_acquire() %{ 6569 match(MemBarAcquire); 6570 match(LoadFence); 6571 ins_cost(400); 6572 6573 size(0); 6574 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6575 ins_encode(); 6576 ins_pipe(empty); 6577 %} 6578 6579 instruct membar_acquire_lock() %{ 6580 match(MemBarAcquireLock); 6581 ins_cost(0); 6582 6583 size(0); 6584 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6585 ins_encode( ); 6586 ins_pipe(empty); 6587 %} 6588 6589 instruct membar_release() %{ 6590 match(MemBarRelease); 6591 match(StoreFence); 6592 ins_cost(400); 6593 6594 size(0); 6595 format %{ "MEMBAR-release ! (empty encoding)" %} 6596 ins_encode( ); 6597 ins_pipe(empty); 6598 %} 6599 6600 instruct membar_release_lock() %{ 6601 match(MemBarReleaseLock); 6602 ins_cost(0); 6603 6604 size(0); 6605 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6606 ins_encode( ); 6607 ins_pipe(empty); 6608 %} 6609 6610 instruct membar_volatile(eFlagsReg cr) %{ 6611 match(MemBarVolatile); 6612 effect(KILL cr); 6613 ins_cost(400); 6614 6615 format %{ 6616 $$template 6617 if (os::is_MP()) { 6618 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6619 } else { 6620 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6621 } 6622 %} 6623 ins_encode %{ 6624 __ membar(Assembler::StoreLoad); 6625 %} 6626 ins_pipe(pipe_slow); 6627 %} 6628 6629 instruct unnecessary_membar_volatile() %{ 6630 match(MemBarVolatile); 6631 predicate(Matcher::post_store_load_barrier(n)); 6632 ins_cost(0); 6633 6634 size(0); 6635 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6636 ins_encode( ); 6637 ins_pipe(empty); 6638 %} 6639 6640 instruct membar_storestore() %{ 6641 match(MemBarStoreStore); 6642 ins_cost(0); 6643 6644 size(0); 6645 format %{ "MEMBAR-storestore (empty encoding)" %} 6646 ins_encode( ); 6647 ins_pipe(empty); 6648 %} 6649 6650 //----------Move Instructions-------------------------------------------------- 6651 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6652 match(Set dst (CastX2P src)); 6653 format %{ "# X2P $dst, $src" %} 6654 ins_encode( /*empty encoding*/ ); 6655 ins_cost(0); 6656 ins_pipe(empty); 6657 %} 6658 6659 instruct castP2X(rRegI dst, eRegP src ) %{ 6660 match(Set dst (CastP2X src)); 6661 ins_cost(50); 6662 format %{ "MOV $dst, $src\t# CastP2X" %} 6663 ins_encode( enc_Copy( dst, src) ); 6664 ins_pipe( ialu_reg_reg ); 6665 %} 6666 6667 //----------Conditional Move--------------------------------------------------- 6668 // Conditional move 6669 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6670 predicate(!VM_Version::supports_cmov() ); 6671 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6672 ins_cost(200); 6673 format %{ "J$cop,us skip\t# signed cmove\n\t" 6674 "MOV $dst,$src\n" 6675 "skip:" %} 6676 ins_encode %{ 6677 Label Lskip; 6678 // Invert sense of branch from sense of CMOV 6679 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6680 __ movl($dst$$Register, $src$$Register); 6681 __ bind(Lskip); 6682 %} 6683 ins_pipe( pipe_cmov_reg ); 6684 %} 6685 6686 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6687 predicate(!VM_Version::supports_cmov() ); 6688 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6689 ins_cost(200); 6690 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6691 "MOV $dst,$src\n" 6692 "skip:" %} 6693 ins_encode %{ 6694 Label Lskip; 6695 // Invert sense of branch from sense of CMOV 6696 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6697 __ movl($dst$$Register, $src$$Register); 6698 __ bind(Lskip); 6699 %} 6700 ins_pipe( pipe_cmov_reg ); 6701 %} 6702 6703 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6704 predicate(VM_Version::supports_cmov() ); 6705 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6706 ins_cost(200); 6707 format %{ "CMOV$cop $dst,$src" %} 6708 opcode(0x0F,0x40); 6709 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6710 ins_pipe( pipe_cmov_reg ); 6711 %} 6712 6713 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6714 predicate(VM_Version::supports_cmov() ); 6715 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6716 ins_cost(200); 6717 format %{ "CMOV$cop $dst,$src" %} 6718 opcode(0x0F,0x40); 6719 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6720 ins_pipe( pipe_cmov_reg ); 6721 %} 6722 6723 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6724 predicate(VM_Version::supports_cmov() ); 6725 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6726 ins_cost(200); 6727 expand %{ 6728 cmovI_regU(cop, cr, dst, src); 6729 %} 6730 %} 6731 6732 // Conditional move 6733 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6734 predicate(VM_Version::supports_cmov() ); 6735 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6736 ins_cost(250); 6737 format %{ "CMOV$cop $dst,$src" %} 6738 opcode(0x0F,0x40); 6739 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6740 ins_pipe( pipe_cmov_mem ); 6741 %} 6742 6743 // Conditional move 6744 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6745 predicate(VM_Version::supports_cmov() ); 6746 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6747 ins_cost(250); 6748 format %{ "CMOV$cop $dst,$src" %} 6749 opcode(0x0F,0x40); 6750 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6751 ins_pipe( pipe_cmov_mem ); 6752 %} 6753 6754 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6755 predicate(VM_Version::supports_cmov() ); 6756 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6757 ins_cost(250); 6758 expand %{ 6759 cmovI_memU(cop, cr, dst, src); 6760 %} 6761 %} 6762 6763 // Conditional move 6764 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6765 predicate(VM_Version::supports_cmov() ); 6766 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6767 ins_cost(200); 6768 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6769 opcode(0x0F,0x40); 6770 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6771 ins_pipe( pipe_cmov_reg ); 6772 %} 6773 6774 // Conditional move (non-P6 version) 6775 // Note: a CMoveP is generated for stubs and native wrappers 6776 // regardless of whether we are on a P6, so we 6777 // emulate a cmov here 6778 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6779 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6780 ins_cost(300); 6781 format %{ "Jn$cop skip\n\t" 6782 "MOV $dst,$src\t# pointer\n" 6783 "skip:" %} 6784 opcode(0x8b); 6785 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6786 ins_pipe( pipe_cmov_reg ); 6787 %} 6788 6789 // Conditional move 6790 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6791 predicate(VM_Version::supports_cmov() ); 6792 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6793 ins_cost(200); 6794 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6795 opcode(0x0F,0x40); 6796 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6797 ins_pipe( pipe_cmov_reg ); 6798 %} 6799 6800 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6801 predicate(VM_Version::supports_cmov() ); 6802 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6803 ins_cost(200); 6804 expand %{ 6805 cmovP_regU(cop, cr, dst, src); 6806 %} 6807 %} 6808 6809 // DISABLED: Requires the ADLC to emit a bottom_type call that 6810 // correctly meets the two pointer arguments; one is an incoming 6811 // register but the other is a memory operand. ALSO appears to 6812 // be buggy with implicit null checks. 6813 // 6814 //// Conditional move 6815 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6816 // predicate(VM_Version::supports_cmov() ); 6817 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6818 // ins_cost(250); 6819 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6820 // opcode(0x0F,0x40); 6821 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6822 // ins_pipe( pipe_cmov_mem ); 6823 //%} 6824 // 6825 //// Conditional move 6826 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6827 // predicate(VM_Version::supports_cmov() ); 6828 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6829 // ins_cost(250); 6830 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6831 // opcode(0x0F,0x40); 6832 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6833 // ins_pipe( pipe_cmov_mem ); 6834 //%} 6835 6836 // Conditional move 6837 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6838 predicate(UseSSE<=1); 6839 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6840 ins_cost(200); 6841 format %{ "FCMOV$cop $dst,$src\t# double" %} 6842 opcode(0xDA); 6843 ins_encode( enc_cmov_dpr(cop,src) ); 6844 ins_pipe( pipe_cmovDPR_reg ); 6845 %} 6846 6847 // Conditional move 6848 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6849 predicate(UseSSE==0); 6850 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6851 ins_cost(200); 6852 format %{ "FCMOV$cop $dst,$src\t# float" %} 6853 opcode(0xDA); 6854 ins_encode( enc_cmov_dpr(cop,src) ); 6855 ins_pipe( pipe_cmovDPR_reg ); 6856 %} 6857 6858 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6859 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6860 predicate(UseSSE<=1); 6861 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6862 ins_cost(200); 6863 format %{ "Jn$cop skip\n\t" 6864 "MOV $dst,$src\t# double\n" 6865 "skip:" %} 6866 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6867 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6868 ins_pipe( pipe_cmovDPR_reg ); 6869 %} 6870 6871 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6872 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6873 predicate(UseSSE==0); 6874 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6875 ins_cost(200); 6876 format %{ "Jn$cop skip\n\t" 6877 "MOV $dst,$src\t# float\n" 6878 "skip:" %} 6879 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6880 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6881 ins_pipe( pipe_cmovDPR_reg ); 6882 %} 6883 6884 // No CMOVE with SSE/SSE2 6885 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6886 predicate (UseSSE>=1); 6887 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6888 ins_cost(200); 6889 format %{ "Jn$cop skip\n\t" 6890 "MOVSS $dst,$src\t# float\n" 6891 "skip:" %} 6892 ins_encode %{ 6893 Label skip; 6894 // Invert sense of branch from sense of CMOV 6895 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6896 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6897 __ bind(skip); 6898 %} 6899 ins_pipe( pipe_slow ); 6900 %} 6901 6902 // No CMOVE with SSE/SSE2 6903 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6904 predicate (UseSSE>=2); 6905 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6906 ins_cost(200); 6907 format %{ "Jn$cop skip\n\t" 6908 "MOVSD $dst,$src\t# float\n" 6909 "skip:" %} 6910 ins_encode %{ 6911 Label skip; 6912 // Invert sense of branch from sense of CMOV 6913 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6914 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6915 __ bind(skip); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 // unsigned version 6921 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6922 predicate (UseSSE>=1); 6923 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6924 ins_cost(200); 6925 format %{ "Jn$cop skip\n\t" 6926 "MOVSS $dst,$src\t# float\n" 6927 "skip:" %} 6928 ins_encode %{ 6929 Label skip; 6930 // Invert sense of branch from sense of CMOV 6931 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6932 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6933 __ bind(skip); 6934 %} 6935 ins_pipe( pipe_slow ); 6936 %} 6937 6938 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6939 predicate (UseSSE>=1); 6940 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6941 ins_cost(200); 6942 expand %{ 6943 fcmovF_regU(cop, cr, dst, src); 6944 %} 6945 %} 6946 6947 // unsigned version 6948 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6949 predicate (UseSSE>=2); 6950 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6951 ins_cost(200); 6952 format %{ "Jn$cop skip\n\t" 6953 "MOVSD $dst,$src\t# float\n" 6954 "skip:" %} 6955 ins_encode %{ 6956 Label skip; 6957 // Invert sense of branch from sense of CMOV 6958 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6959 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6960 __ bind(skip); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6966 predicate (UseSSE>=2); 6967 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6968 ins_cost(200); 6969 expand %{ 6970 fcmovD_regU(cop, cr, dst, src); 6971 %} 6972 %} 6973 6974 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6975 predicate(VM_Version::supports_cmov() ); 6976 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6977 ins_cost(200); 6978 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6979 "CMOV$cop $dst.hi,$src.hi" %} 6980 opcode(0x0F,0x40); 6981 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6982 ins_pipe( pipe_cmov_reg_long ); 6983 %} 6984 6985 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6986 predicate(VM_Version::supports_cmov() ); 6987 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6988 ins_cost(200); 6989 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6990 "CMOV$cop $dst.hi,$src.hi" %} 6991 opcode(0x0F,0x40); 6992 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6993 ins_pipe( pipe_cmov_reg_long ); 6994 %} 6995 6996 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6997 predicate(VM_Version::supports_cmov() ); 6998 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6999 ins_cost(200); 7000 expand %{ 7001 cmovL_regU(cop, cr, dst, src); 7002 %} 7003 %} 7004 7005 //----------Arithmetic Instructions-------------------------------------------- 7006 //----------Addition Instructions---------------------------------------------- 7007 7008 // Integer Addition Instructions 7009 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7010 match(Set dst (AddI dst src)); 7011 effect(KILL cr); 7012 7013 size(2); 7014 format %{ "ADD $dst,$src" %} 7015 opcode(0x03); 7016 ins_encode( OpcP, RegReg( dst, src) ); 7017 ins_pipe( ialu_reg_reg ); 7018 %} 7019 7020 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7021 match(Set dst (AddI dst src)); 7022 effect(KILL cr); 7023 7024 format %{ "ADD $dst,$src" %} 7025 opcode(0x81, 0x00); /* /0 id */ 7026 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7027 ins_pipe( ialu_reg ); 7028 %} 7029 7030 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7031 predicate(UseIncDec); 7032 match(Set dst (AddI dst src)); 7033 effect(KILL cr); 7034 7035 size(1); 7036 format %{ "INC $dst" %} 7037 opcode(0x40); /* */ 7038 ins_encode( Opc_plus( primary, dst ) ); 7039 ins_pipe( ialu_reg ); 7040 %} 7041 7042 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7043 match(Set dst (AddI src0 src1)); 7044 ins_cost(110); 7045 7046 format %{ "LEA $dst,[$src0 + $src1]" %} 7047 opcode(0x8D); /* 0x8D /r */ 7048 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7049 ins_pipe( ialu_reg_reg ); 7050 %} 7051 7052 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7053 match(Set dst (AddP src0 src1)); 7054 ins_cost(110); 7055 7056 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7057 opcode(0x8D); /* 0x8D /r */ 7058 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7059 ins_pipe( ialu_reg_reg ); 7060 %} 7061 7062 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7063 predicate(UseIncDec); 7064 match(Set dst (AddI dst src)); 7065 effect(KILL cr); 7066 7067 size(1); 7068 format %{ "DEC $dst" %} 7069 opcode(0x48); /* */ 7070 ins_encode( Opc_plus( primary, dst ) ); 7071 ins_pipe( ialu_reg ); 7072 %} 7073 7074 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7075 match(Set dst (AddP dst src)); 7076 effect(KILL cr); 7077 7078 size(2); 7079 format %{ "ADD $dst,$src" %} 7080 opcode(0x03); 7081 ins_encode( OpcP, RegReg( dst, src) ); 7082 ins_pipe( ialu_reg_reg ); 7083 %} 7084 7085 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7086 match(Set dst (AddP dst src)); 7087 effect(KILL cr); 7088 7089 format %{ "ADD $dst,$src" %} 7090 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7091 // ins_encode( RegImm( dst, src) ); 7092 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7093 ins_pipe( ialu_reg ); 7094 %} 7095 7096 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7097 match(Set dst (AddI dst (LoadI src))); 7098 effect(KILL cr); 7099 7100 ins_cost(125); 7101 format %{ "ADD $dst,$src" %} 7102 opcode(0x03); 7103 ins_encode( OpcP, RegMem( dst, src) ); 7104 ins_pipe( ialu_reg_mem ); 7105 %} 7106 7107 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7108 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7109 effect(KILL cr); 7110 7111 ins_cost(150); 7112 format %{ "ADD $dst,$src" %} 7113 opcode(0x01); /* Opcode 01 /r */ 7114 ins_encode( OpcP, RegMem( src, dst ) ); 7115 ins_pipe( ialu_mem_reg ); 7116 %} 7117 7118 // Add Memory with Immediate 7119 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7120 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7121 effect(KILL cr); 7122 7123 ins_cost(125); 7124 format %{ "ADD $dst,$src" %} 7125 opcode(0x81); /* Opcode 81 /0 id */ 7126 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7127 ins_pipe( ialu_mem_imm ); 7128 %} 7129 7130 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7131 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7132 effect(KILL cr); 7133 7134 ins_cost(125); 7135 format %{ "INC $dst" %} 7136 opcode(0xFF); /* Opcode FF /0 */ 7137 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7138 ins_pipe( ialu_mem_imm ); 7139 %} 7140 7141 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7142 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7143 effect(KILL cr); 7144 7145 ins_cost(125); 7146 format %{ "DEC $dst" %} 7147 opcode(0xFF); /* Opcode FF /1 */ 7148 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7149 ins_pipe( ialu_mem_imm ); 7150 %} 7151 7152 7153 instruct checkCastPP( eRegP dst ) %{ 7154 match(Set dst (CheckCastPP dst)); 7155 7156 size(0); 7157 format %{ "#checkcastPP of $dst" %} 7158 ins_encode( /*empty encoding*/ ); 7159 ins_pipe( empty ); 7160 %} 7161 7162 instruct castPP( eRegP dst ) %{ 7163 match(Set dst (CastPP dst)); 7164 format %{ "#castPP of $dst" %} 7165 ins_encode( /*empty encoding*/ ); 7166 ins_pipe( empty ); 7167 %} 7168 7169 instruct castII( rRegI dst ) %{ 7170 match(Set dst (CastII dst)); 7171 format %{ "#castII of $dst" %} 7172 ins_encode( /*empty encoding*/ ); 7173 ins_cost(0); 7174 ins_pipe( empty ); 7175 %} 7176 7177 7178 // Load-locked - same as a regular pointer load when used with compare-swap 7179 instruct loadPLocked(eRegP dst, memory mem) %{ 7180 match(Set dst (LoadPLocked mem)); 7181 7182 ins_cost(125); 7183 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7184 opcode(0x8B); 7185 ins_encode( OpcP, RegMem(dst,mem)); 7186 ins_pipe( ialu_reg_mem ); 7187 %} 7188 7189 // Conditional-store of the updated heap-top. 7190 // Used during allocation of the shared heap. 7191 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7192 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7193 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7194 // EAX is killed if there is contention, but then it's also unused. 7195 // In the common case of no contention, EAX holds the new oop address. 7196 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7197 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7198 ins_pipe( pipe_cmpxchg ); 7199 %} 7200 7201 // Conditional-store of an int value. 7202 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7203 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7204 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7205 effect(KILL oldval); 7206 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7207 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7208 ins_pipe( pipe_cmpxchg ); 7209 %} 7210 7211 // Conditional-store of a long value. 7212 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7213 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7214 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7215 effect(KILL oldval); 7216 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7217 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7218 "XCHG EBX,ECX" 7219 %} 7220 ins_encode %{ 7221 // Note: we need to swap rbx, and rcx before and after the 7222 // cmpxchg8 instruction because the instruction uses 7223 // rcx as the high order word of the new value to store but 7224 // our register encoding uses rbx. 7225 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7226 if( os::is_MP() ) 7227 __ lock(); 7228 __ cmpxchg8($mem$$Address); 7229 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7230 %} 7231 ins_pipe( pipe_cmpxchg ); 7232 %} 7233 7234 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7235 7236 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7237 predicate(VM_Version::supports_cx8()); 7238 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7239 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7240 effect(KILL cr, KILL oldval); 7241 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7242 "MOV $res,0\n\t" 7243 "JNE,s fail\n\t" 7244 "MOV $res,1\n" 7245 "fail:" %} 7246 ins_encode( enc_cmpxchg8(mem_ptr), 7247 enc_flags_ne_to_boolean(res) ); 7248 ins_pipe( pipe_cmpxchg ); 7249 %} 7250 7251 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7252 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7253 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7254 effect(KILL cr, KILL oldval); 7255 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7256 "MOV $res,0\n\t" 7257 "JNE,s fail\n\t" 7258 "MOV $res,1\n" 7259 "fail:" %} 7260 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7261 ins_pipe( pipe_cmpxchg ); 7262 %} 7263 7264 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7265 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7266 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7267 effect(KILL cr, KILL oldval); 7268 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7269 "MOV $res,0\n\t" 7270 "JNE,s fail\n\t" 7271 "MOV $res,1\n" 7272 "fail:" %} 7273 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7274 ins_pipe( pipe_cmpxchg ); 7275 %} 7276 7277 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7278 predicate(VM_Version::supports_cx8()); 7279 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7280 effect(KILL cr); 7281 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7282 ins_encode( enc_cmpxchg8(mem_ptr) ); 7283 ins_pipe( pipe_cmpxchg ); 7284 %} 7285 7286 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7287 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7288 effect(KILL cr); 7289 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7290 ins_encode( enc_cmpxchg(mem_ptr) ); 7291 ins_pipe( pipe_cmpxchg ); 7292 %} 7293 7294 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7295 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7296 effect(KILL cr); 7297 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7298 ins_encode( enc_cmpxchg(mem_ptr) ); 7299 ins_pipe( pipe_cmpxchg ); 7300 %} 7301 7302 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7303 predicate(n->as_LoadStore()->result_not_used()); 7304 match(Set dummy (GetAndAddI mem add)); 7305 effect(KILL cr); 7306 format %{ "ADDL [$mem],$add" %} 7307 ins_encode %{ 7308 if (os::is_MP()) { __ lock(); } 7309 __ addl($mem$$Address, $add$$constant); 7310 %} 7311 ins_pipe( pipe_cmpxchg ); 7312 %} 7313 7314 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7315 match(Set newval (GetAndAddI mem newval)); 7316 effect(KILL cr); 7317 format %{ "XADDL [$mem],$newval" %} 7318 ins_encode %{ 7319 if (os::is_MP()) { __ lock(); } 7320 __ xaddl($mem$$Address, $newval$$Register); 7321 %} 7322 ins_pipe( pipe_cmpxchg ); 7323 %} 7324 7325 instruct xchgI( memory mem, rRegI newval) %{ 7326 match(Set newval (GetAndSetI mem newval)); 7327 format %{ "XCHGL $newval,[$mem]" %} 7328 ins_encode %{ 7329 __ xchgl($newval$$Register, $mem$$Address); 7330 %} 7331 ins_pipe( pipe_cmpxchg ); 7332 %} 7333 7334 instruct xchgP( memory mem, pRegP newval) %{ 7335 match(Set newval (GetAndSetP mem newval)); 7336 format %{ "XCHGL $newval,[$mem]" %} 7337 ins_encode %{ 7338 __ xchgl($newval$$Register, $mem$$Address); 7339 %} 7340 ins_pipe( pipe_cmpxchg ); 7341 %} 7342 7343 //----------Subtraction Instructions------------------------------------------- 7344 7345 // Integer Subtraction Instructions 7346 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7347 match(Set dst (SubI dst src)); 7348 effect(KILL cr); 7349 7350 size(2); 7351 format %{ "SUB $dst,$src" %} 7352 opcode(0x2B); 7353 ins_encode( OpcP, RegReg( dst, src) ); 7354 ins_pipe( ialu_reg_reg ); 7355 %} 7356 7357 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7358 match(Set dst (SubI dst src)); 7359 effect(KILL cr); 7360 7361 format %{ "SUB $dst,$src" %} 7362 opcode(0x81,0x05); /* Opcode 81 /5 */ 7363 // ins_encode( RegImm( dst, src) ); 7364 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7365 ins_pipe( ialu_reg ); 7366 %} 7367 7368 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7369 match(Set dst (SubI dst (LoadI src))); 7370 effect(KILL cr); 7371 7372 ins_cost(125); 7373 format %{ "SUB $dst,$src" %} 7374 opcode(0x2B); 7375 ins_encode( OpcP, RegMem( dst, src) ); 7376 ins_pipe( ialu_reg_mem ); 7377 %} 7378 7379 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7380 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7381 effect(KILL cr); 7382 7383 ins_cost(150); 7384 format %{ "SUB $dst,$src" %} 7385 opcode(0x29); /* Opcode 29 /r */ 7386 ins_encode( OpcP, RegMem( src, dst ) ); 7387 ins_pipe( ialu_mem_reg ); 7388 %} 7389 7390 // Subtract from a pointer 7391 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7392 match(Set dst (AddP dst (SubI zero src))); 7393 effect(KILL cr); 7394 7395 size(2); 7396 format %{ "SUB $dst,$src" %} 7397 opcode(0x2B); 7398 ins_encode( OpcP, RegReg( dst, src) ); 7399 ins_pipe( ialu_reg_reg ); 7400 %} 7401 7402 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7403 match(Set dst (SubI zero dst)); 7404 effect(KILL cr); 7405 7406 size(2); 7407 format %{ "NEG $dst" %} 7408 opcode(0xF7,0x03); // Opcode F7 /3 7409 ins_encode( OpcP, RegOpc( dst ) ); 7410 ins_pipe( ialu_reg ); 7411 %} 7412 7413 //----------Multiplication/Division Instructions------------------------------- 7414 // Integer Multiplication Instructions 7415 // Multiply Register 7416 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7417 match(Set dst (MulI dst src)); 7418 effect(KILL cr); 7419 7420 size(3); 7421 ins_cost(300); 7422 format %{ "IMUL $dst,$src" %} 7423 opcode(0xAF, 0x0F); 7424 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7425 ins_pipe( ialu_reg_reg_alu0 ); 7426 %} 7427 7428 // Multiply 32-bit Immediate 7429 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7430 match(Set dst (MulI src imm)); 7431 effect(KILL cr); 7432 7433 ins_cost(300); 7434 format %{ "IMUL $dst,$src,$imm" %} 7435 opcode(0x69); /* 69 /r id */ 7436 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7437 ins_pipe( ialu_reg_reg_alu0 ); 7438 %} 7439 7440 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7441 match(Set dst src); 7442 effect(KILL cr); 7443 7444 // Note that this is artificially increased to make it more expensive than loadConL 7445 ins_cost(250); 7446 format %{ "MOV EAX,$src\t// low word only" %} 7447 opcode(0xB8); 7448 ins_encode( LdImmL_Lo(dst, src) ); 7449 ins_pipe( ialu_reg_fat ); 7450 %} 7451 7452 // Multiply by 32-bit Immediate, taking the shifted high order results 7453 // (special case for shift by 32) 7454 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7455 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7456 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7457 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7458 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7459 effect(USE src1, KILL cr); 7460 7461 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7462 ins_cost(0*100 + 1*400 - 150); 7463 format %{ "IMUL EDX:EAX,$src1" %} 7464 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7465 ins_pipe( pipe_slow ); 7466 %} 7467 7468 // Multiply by 32-bit Immediate, taking the shifted high order results 7469 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7470 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7471 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7472 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7473 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7474 effect(USE src1, KILL cr); 7475 7476 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7477 ins_cost(1*100 + 1*400 - 150); 7478 format %{ "IMUL EDX:EAX,$src1\n\t" 7479 "SAR EDX,$cnt-32" %} 7480 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7481 ins_pipe( pipe_slow ); 7482 %} 7483 7484 // Multiply Memory 32-bit Immediate 7485 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7486 match(Set dst (MulI (LoadI src) imm)); 7487 effect(KILL cr); 7488 7489 ins_cost(300); 7490 format %{ "IMUL $dst,$src,$imm" %} 7491 opcode(0x69); /* 69 /r id */ 7492 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7493 ins_pipe( ialu_reg_mem_alu0 ); 7494 %} 7495 7496 // Multiply Memory 7497 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7498 match(Set dst (MulI dst (LoadI src))); 7499 effect(KILL cr); 7500 7501 ins_cost(350); 7502 format %{ "IMUL $dst,$src" %} 7503 opcode(0xAF, 0x0F); 7504 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7505 ins_pipe( ialu_reg_mem_alu0 ); 7506 %} 7507 7508 // Multiply Register Int to Long 7509 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7510 // Basic Idea: long = (long)int * (long)int 7511 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7512 effect(DEF dst, USE src, USE src1, KILL flags); 7513 7514 ins_cost(300); 7515 format %{ "IMUL $dst,$src1" %} 7516 7517 ins_encode( long_int_multiply( dst, src1 ) ); 7518 ins_pipe( ialu_reg_reg_alu0 ); 7519 %} 7520 7521 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7522 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7523 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7524 effect(KILL flags); 7525 7526 ins_cost(300); 7527 format %{ "MUL $dst,$src1" %} 7528 7529 ins_encode( long_uint_multiply(dst, src1) ); 7530 ins_pipe( ialu_reg_reg_alu0 ); 7531 %} 7532 7533 // Multiply Register Long 7534 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7535 match(Set dst (MulL dst src)); 7536 effect(KILL cr, TEMP tmp); 7537 ins_cost(4*100+3*400); 7538 // Basic idea: lo(result) = lo(x_lo * y_lo) 7539 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7540 format %{ "MOV $tmp,$src.lo\n\t" 7541 "IMUL $tmp,EDX\n\t" 7542 "MOV EDX,$src.hi\n\t" 7543 "IMUL EDX,EAX\n\t" 7544 "ADD $tmp,EDX\n\t" 7545 "MUL EDX:EAX,$src.lo\n\t" 7546 "ADD EDX,$tmp" %} 7547 ins_encode( long_multiply( dst, src, tmp ) ); 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 // Multiply Register Long where the left operand's high 32 bits are zero 7552 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7553 predicate(is_operand_hi32_zero(n->in(1))); 7554 match(Set dst (MulL dst src)); 7555 effect(KILL cr, TEMP tmp); 7556 ins_cost(2*100+2*400); 7557 // Basic idea: lo(result) = lo(x_lo * y_lo) 7558 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7559 format %{ "MOV $tmp,$src.hi\n\t" 7560 "IMUL $tmp,EAX\n\t" 7561 "MUL EDX:EAX,$src.lo\n\t" 7562 "ADD EDX,$tmp" %} 7563 ins_encode %{ 7564 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7565 __ imull($tmp$$Register, rax); 7566 __ mull($src$$Register); 7567 __ addl(rdx, $tmp$$Register); 7568 %} 7569 ins_pipe( pipe_slow ); 7570 %} 7571 7572 // Multiply Register Long where the right operand's high 32 bits are zero 7573 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7574 predicate(is_operand_hi32_zero(n->in(2))); 7575 match(Set dst (MulL dst src)); 7576 effect(KILL cr, TEMP tmp); 7577 ins_cost(2*100+2*400); 7578 // Basic idea: lo(result) = lo(x_lo * y_lo) 7579 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7580 format %{ "MOV $tmp,$src.lo\n\t" 7581 "IMUL $tmp,EDX\n\t" 7582 "MUL EDX:EAX,$src.lo\n\t" 7583 "ADD EDX,$tmp" %} 7584 ins_encode %{ 7585 __ movl($tmp$$Register, $src$$Register); 7586 __ imull($tmp$$Register, rdx); 7587 __ mull($src$$Register); 7588 __ addl(rdx, $tmp$$Register); 7589 %} 7590 ins_pipe( pipe_slow ); 7591 %} 7592 7593 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7594 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7595 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7596 match(Set dst (MulL dst src)); 7597 effect(KILL cr); 7598 ins_cost(1*400); 7599 // Basic idea: lo(result) = lo(x_lo * y_lo) 7600 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7601 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7602 ins_encode %{ 7603 __ mull($src$$Register); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 // Multiply Register Long by small constant 7609 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7610 match(Set dst (MulL dst src)); 7611 effect(KILL cr, TEMP tmp); 7612 ins_cost(2*100+2*400); 7613 size(12); 7614 // Basic idea: lo(result) = lo(src * EAX) 7615 // hi(result) = hi(src * EAX) + lo(src * EDX) 7616 format %{ "IMUL $tmp,EDX,$src\n\t" 7617 "MOV EDX,$src\n\t" 7618 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7619 "ADD EDX,$tmp" %} 7620 ins_encode( long_multiply_con( dst, src, tmp ) ); 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 // Integer DIV with Register 7625 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7626 match(Set rax (DivI rax div)); 7627 effect(KILL rdx, KILL cr); 7628 size(26); 7629 ins_cost(30*100+10*100); 7630 format %{ "CMP EAX,0x80000000\n\t" 7631 "JNE,s normal\n\t" 7632 "XOR EDX,EDX\n\t" 7633 "CMP ECX,-1\n\t" 7634 "JE,s done\n" 7635 "normal: CDQ\n\t" 7636 "IDIV $div\n\t" 7637 "done:" %} 7638 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7639 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7640 ins_pipe( ialu_reg_reg_alu0 ); 7641 %} 7642 7643 // Divide Register Long 7644 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7645 match(Set dst (DivL src1 src2)); 7646 effect( KILL cr, KILL cx, KILL bx ); 7647 ins_cost(10000); 7648 format %{ "PUSH $src1.hi\n\t" 7649 "PUSH $src1.lo\n\t" 7650 "PUSH $src2.hi\n\t" 7651 "PUSH $src2.lo\n\t" 7652 "CALL SharedRuntime::ldiv\n\t" 7653 "ADD ESP,16" %} 7654 ins_encode( long_div(src1,src2) ); 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 // Integer DIVMOD with Register, both quotient and mod results 7659 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7660 match(DivModI rax div); 7661 effect(KILL cr); 7662 size(26); 7663 ins_cost(30*100+10*100); 7664 format %{ "CMP EAX,0x80000000\n\t" 7665 "JNE,s normal\n\t" 7666 "XOR EDX,EDX\n\t" 7667 "CMP ECX,-1\n\t" 7668 "JE,s done\n" 7669 "normal: CDQ\n\t" 7670 "IDIV $div\n\t" 7671 "done:" %} 7672 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7673 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 // Integer MOD with Register 7678 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7679 match(Set rdx (ModI rax div)); 7680 effect(KILL rax, KILL cr); 7681 7682 size(26); 7683 ins_cost(300); 7684 format %{ "CDQ\n\t" 7685 "IDIV $div" %} 7686 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7687 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7688 ins_pipe( ialu_reg_reg_alu0 ); 7689 %} 7690 7691 // Remainder Register Long 7692 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7693 match(Set dst (ModL src1 src2)); 7694 effect( KILL cr, KILL cx, KILL bx ); 7695 ins_cost(10000); 7696 format %{ "PUSH $src1.hi\n\t" 7697 "PUSH $src1.lo\n\t" 7698 "PUSH $src2.hi\n\t" 7699 "PUSH $src2.lo\n\t" 7700 "CALL SharedRuntime::lrem\n\t" 7701 "ADD ESP,16" %} 7702 ins_encode( long_mod(src1,src2) ); 7703 ins_pipe( pipe_slow ); 7704 %} 7705 7706 // Divide Register Long (no special case since divisor != -1) 7707 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7708 match(Set dst (DivL dst imm)); 7709 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7710 ins_cost(1000); 7711 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7712 "XOR $tmp2,$tmp2\n\t" 7713 "CMP $tmp,EDX\n\t" 7714 "JA,s fast\n\t" 7715 "MOV $tmp2,EAX\n\t" 7716 "MOV EAX,EDX\n\t" 7717 "MOV EDX,0\n\t" 7718 "JLE,s pos\n\t" 7719 "LNEG EAX : $tmp2\n\t" 7720 "DIV $tmp # unsigned division\n\t" 7721 "XCHG EAX,$tmp2\n\t" 7722 "DIV $tmp\n\t" 7723 "LNEG $tmp2 : EAX\n\t" 7724 "JMP,s done\n" 7725 "pos:\n\t" 7726 "DIV $tmp\n\t" 7727 "XCHG EAX,$tmp2\n" 7728 "fast:\n\t" 7729 "DIV $tmp\n" 7730 "done:\n\t" 7731 "MOV EDX,$tmp2\n\t" 7732 "NEG EDX:EAX # if $imm < 0" %} 7733 ins_encode %{ 7734 int con = (int)$imm$$constant; 7735 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7736 int pcon = (con > 0) ? con : -con; 7737 Label Lfast, Lpos, Ldone; 7738 7739 __ movl($tmp$$Register, pcon); 7740 __ xorl($tmp2$$Register,$tmp2$$Register); 7741 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7742 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7743 7744 __ movl($tmp2$$Register, $dst$$Register); // save 7745 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7746 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7747 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7748 7749 // Negative dividend. 7750 // convert value to positive to use unsigned division 7751 __ lneg($dst$$Register, $tmp2$$Register); 7752 __ divl($tmp$$Register); 7753 __ xchgl($dst$$Register, $tmp2$$Register); 7754 __ divl($tmp$$Register); 7755 // revert result back to negative 7756 __ lneg($tmp2$$Register, $dst$$Register); 7757 __ jmpb(Ldone); 7758 7759 __ bind(Lpos); 7760 __ divl($tmp$$Register); // Use unsigned division 7761 __ xchgl($dst$$Register, $tmp2$$Register); 7762 // Fallthrow for final divide, tmp2 has 32 bit hi result 7763 7764 __ bind(Lfast); 7765 // fast path: src is positive 7766 __ divl($tmp$$Register); // Use unsigned division 7767 7768 __ bind(Ldone); 7769 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7770 if (con < 0) { 7771 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7772 } 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 // Remainder Register Long (remainder fit into 32 bits) 7778 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7779 match(Set dst (ModL dst imm)); 7780 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7781 ins_cost(1000); 7782 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7783 "CMP $tmp,EDX\n\t" 7784 "JA,s fast\n\t" 7785 "MOV $tmp2,EAX\n\t" 7786 "MOV EAX,EDX\n\t" 7787 "MOV EDX,0\n\t" 7788 "JLE,s pos\n\t" 7789 "LNEG EAX : $tmp2\n\t" 7790 "DIV $tmp # unsigned division\n\t" 7791 "MOV EAX,$tmp2\n\t" 7792 "DIV $tmp\n\t" 7793 "NEG EDX\n\t" 7794 "JMP,s done\n" 7795 "pos:\n\t" 7796 "DIV $tmp\n\t" 7797 "MOV EAX,$tmp2\n" 7798 "fast:\n\t" 7799 "DIV $tmp\n" 7800 "done:\n\t" 7801 "MOV EAX,EDX\n\t" 7802 "SAR EDX,31\n\t" %} 7803 ins_encode %{ 7804 int con = (int)$imm$$constant; 7805 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7806 int pcon = (con > 0) ? con : -con; 7807 Label Lfast, Lpos, Ldone; 7808 7809 __ movl($tmp$$Register, pcon); 7810 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7811 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7812 7813 __ movl($tmp2$$Register, $dst$$Register); // save 7814 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7815 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7816 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7817 7818 // Negative dividend. 7819 // convert value to positive to use unsigned division 7820 __ lneg($dst$$Register, $tmp2$$Register); 7821 __ divl($tmp$$Register); 7822 __ movl($dst$$Register, $tmp2$$Register); 7823 __ divl($tmp$$Register); 7824 // revert remainder back to negative 7825 __ negl(HIGH_FROM_LOW($dst$$Register)); 7826 __ jmpb(Ldone); 7827 7828 __ bind(Lpos); 7829 __ divl($tmp$$Register); 7830 __ movl($dst$$Register, $tmp2$$Register); 7831 7832 __ bind(Lfast); 7833 // fast path: src is positive 7834 __ divl($tmp$$Register); 7835 7836 __ bind(Ldone); 7837 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7838 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7839 7840 %} 7841 ins_pipe( pipe_slow ); 7842 %} 7843 7844 // Integer Shift Instructions 7845 // Shift Left by one 7846 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7847 match(Set dst (LShiftI dst shift)); 7848 effect(KILL cr); 7849 7850 size(2); 7851 format %{ "SHL $dst,$shift" %} 7852 opcode(0xD1, 0x4); /* D1 /4 */ 7853 ins_encode( OpcP, RegOpc( dst ) ); 7854 ins_pipe( ialu_reg ); 7855 %} 7856 7857 // Shift Left by 8-bit immediate 7858 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7859 match(Set dst (LShiftI dst shift)); 7860 effect(KILL cr); 7861 7862 size(3); 7863 format %{ "SHL $dst,$shift" %} 7864 opcode(0xC1, 0x4); /* C1 /4 ib */ 7865 ins_encode( RegOpcImm( dst, shift) ); 7866 ins_pipe( ialu_reg ); 7867 %} 7868 7869 // Shift Left by variable 7870 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7871 match(Set dst (LShiftI dst shift)); 7872 effect(KILL cr); 7873 7874 size(2); 7875 format %{ "SHL $dst,$shift" %} 7876 opcode(0xD3, 0x4); /* D3 /4 */ 7877 ins_encode( OpcP, RegOpc( dst ) ); 7878 ins_pipe( ialu_reg_reg ); 7879 %} 7880 7881 // Arithmetic shift right by one 7882 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7883 match(Set dst (RShiftI dst shift)); 7884 effect(KILL cr); 7885 7886 size(2); 7887 format %{ "SAR $dst,$shift" %} 7888 opcode(0xD1, 0x7); /* D1 /7 */ 7889 ins_encode( OpcP, RegOpc( dst ) ); 7890 ins_pipe( ialu_reg ); 7891 %} 7892 7893 // Arithmetic shift right by one 7894 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7895 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7896 effect(KILL cr); 7897 format %{ "SAR $dst,$shift" %} 7898 opcode(0xD1, 0x7); /* D1 /7 */ 7899 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7900 ins_pipe( ialu_mem_imm ); 7901 %} 7902 7903 // Arithmetic Shift Right by 8-bit immediate 7904 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7905 match(Set dst (RShiftI dst shift)); 7906 effect(KILL cr); 7907 7908 size(3); 7909 format %{ "SAR $dst,$shift" %} 7910 opcode(0xC1, 0x7); /* C1 /7 ib */ 7911 ins_encode( RegOpcImm( dst, shift ) ); 7912 ins_pipe( ialu_mem_imm ); 7913 %} 7914 7915 // Arithmetic Shift Right by 8-bit immediate 7916 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7917 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7918 effect(KILL cr); 7919 7920 format %{ "SAR $dst,$shift" %} 7921 opcode(0xC1, 0x7); /* C1 /7 ib */ 7922 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7923 ins_pipe( ialu_mem_imm ); 7924 %} 7925 7926 // Arithmetic Shift Right by variable 7927 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7928 match(Set dst (RShiftI dst shift)); 7929 effect(KILL cr); 7930 7931 size(2); 7932 format %{ "SAR $dst,$shift" %} 7933 opcode(0xD3, 0x7); /* D3 /7 */ 7934 ins_encode( OpcP, RegOpc( dst ) ); 7935 ins_pipe( ialu_reg_reg ); 7936 %} 7937 7938 // Logical shift right by one 7939 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7940 match(Set dst (URShiftI dst shift)); 7941 effect(KILL cr); 7942 7943 size(2); 7944 format %{ "SHR $dst,$shift" %} 7945 opcode(0xD1, 0x5); /* D1 /5 */ 7946 ins_encode( OpcP, RegOpc( dst ) ); 7947 ins_pipe( ialu_reg ); 7948 %} 7949 7950 // Logical Shift Right by 8-bit immediate 7951 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7952 match(Set dst (URShiftI dst shift)); 7953 effect(KILL cr); 7954 7955 size(3); 7956 format %{ "SHR $dst,$shift" %} 7957 opcode(0xC1, 0x5); /* C1 /5 ib */ 7958 ins_encode( RegOpcImm( dst, shift) ); 7959 ins_pipe( ialu_reg ); 7960 %} 7961 7962 7963 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7964 // This idiom is used by the compiler for the i2b bytecode. 7965 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7966 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7967 7968 size(3); 7969 format %{ "MOVSX $dst,$src :8" %} 7970 ins_encode %{ 7971 __ movsbl($dst$$Register, $src$$Register); 7972 %} 7973 ins_pipe(ialu_reg_reg); 7974 %} 7975 7976 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7977 // This idiom is used by the compiler the i2s bytecode. 7978 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7979 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7980 7981 size(3); 7982 format %{ "MOVSX $dst,$src :16" %} 7983 ins_encode %{ 7984 __ movswl($dst$$Register, $src$$Register); 7985 %} 7986 ins_pipe(ialu_reg_reg); 7987 %} 7988 7989 7990 // Logical Shift Right by variable 7991 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7992 match(Set dst (URShiftI dst shift)); 7993 effect(KILL cr); 7994 7995 size(2); 7996 format %{ "SHR $dst,$shift" %} 7997 opcode(0xD3, 0x5); /* D3 /5 */ 7998 ins_encode( OpcP, RegOpc( dst ) ); 7999 ins_pipe( ialu_reg_reg ); 8000 %} 8001 8002 8003 //----------Logical Instructions----------------------------------------------- 8004 //----------Integer Logical Instructions--------------------------------------- 8005 // And Instructions 8006 // And Register with Register 8007 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8008 match(Set dst (AndI dst src)); 8009 effect(KILL cr); 8010 8011 size(2); 8012 format %{ "AND $dst,$src" %} 8013 opcode(0x23); 8014 ins_encode( OpcP, RegReg( dst, src) ); 8015 ins_pipe( ialu_reg_reg ); 8016 %} 8017 8018 // And Register with Immediate 8019 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8020 match(Set dst (AndI dst src)); 8021 effect(KILL cr); 8022 8023 format %{ "AND $dst,$src" %} 8024 opcode(0x81,0x04); /* Opcode 81 /4 */ 8025 // ins_encode( RegImm( dst, src) ); 8026 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8027 ins_pipe( ialu_reg ); 8028 %} 8029 8030 // And Register with Memory 8031 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8032 match(Set dst (AndI dst (LoadI src))); 8033 effect(KILL cr); 8034 8035 ins_cost(125); 8036 format %{ "AND $dst,$src" %} 8037 opcode(0x23); 8038 ins_encode( OpcP, RegMem( dst, src) ); 8039 ins_pipe( ialu_reg_mem ); 8040 %} 8041 8042 // And Memory with Register 8043 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8044 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8045 effect(KILL cr); 8046 8047 ins_cost(150); 8048 format %{ "AND $dst,$src" %} 8049 opcode(0x21); /* Opcode 21 /r */ 8050 ins_encode( OpcP, RegMem( src, dst ) ); 8051 ins_pipe( ialu_mem_reg ); 8052 %} 8053 8054 // And Memory with Immediate 8055 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8056 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8057 effect(KILL cr); 8058 8059 ins_cost(125); 8060 format %{ "AND $dst,$src" %} 8061 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8062 // ins_encode( MemImm( dst, src) ); 8063 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8064 ins_pipe( ialu_mem_imm ); 8065 %} 8066 8067 // BMI1 instructions 8068 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8069 match(Set dst (AndI (XorI src1 minus_1) src2)); 8070 predicate(UseBMI1Instructions); 8071 effect(KILL cr); 8072 8073 format %{ "ANDNL $dst, $src1, $src2" %} 8074 8075 ins_encode %{ 8076 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8077 %} 8078 ins_pipe(ialu_reg); 8079 %} 8080 8081 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8082 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8083 predicate(UseBMI1Instructions); 8084 effect(KILL cr); 8085 8086 ins_cost(125); 8087 format %{ "ANDNL $dst, $src1, $src2" %} 8088 8089 ins_encode %{ 8090 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8091 %} 8092 ins_pipe(ialu_reg_mem); 8093 %} 8094 8095 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8096 match(Set dst (AndI (SubI imm_zero src) src)); 8097 predicate(UseBMI1Instructions); 8098 effect(KILL cr); 8099 8100 format %{ "BLSIL $dst, $src" %} 8101 8102 ins_encode %{ 8103 __ blsil($dst$$Register, $src$$Register); 8104 %} 8105 ins_pipe(ialu_reg); 8106 %} 8107 8108 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8109 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8110 predicate(UseBMI1Instructions); 8111 effect(KILL cr); 8112 8113 ins_cost(125); 8114 format %{ "BLSIL $dst, $src" %} 8115 8116 ins_encode %{ 8117 __ blsil($dst$$Register, $src$$Address); 8118 %} 8119 ins_pipe(ialu_reg_mem); 8120 %} 8121 8122 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8123 %{ 8124 match(Set dst (XorI (AddI src minus_1) src)); 8125 predicate(UseBMI1Instructions); 8126 effect(KILL cr); 8127 8128 format %{ "BLSMSKL $dst, $src" %} 8129 8130 ins_encode %{ 8131 __ blsmskl($dst$$Register, $src$$Register); 8132 %} 8133 8134 ins_pipe(ialu_reg); 8135 %} 8136 8137 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8138 %{ 8139 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8140 predicate(UseBMI1Instructions); 8141 effect(KILL cr); 8142 8143 ins_cost(125); 8144 format %{ "BLSMSKL $dst, $src" %} 8145 8146 ins_encode %{ 8147 __ blsmskl($dst$$Register, $src$$Address); 8148 %} 8149 8150 ins_pipe(ialu_reg_mem); 8151 %} 8152 8153 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8154 %{ 8155 match(Set dst (AndI (AddI src minus_1) src) ); 8156 predicate(UseBMI1Instructions); 8157 effect(KILL cr); 8158 8159 format %{ "BLSRL $dst, $src" %} 8160 8161 ins_encode %{ 8162 __ blsrl($dst$$Register, $src$$Register); 8163 %} 8164 8165 ins_pipe(ialu_reg); 8166 %} 8167 8168 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8169 %{ 8170 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8171 predicate(UseBMI1Instructions); 8172 effect(KILL cr); 8173 8174 ins_cost(125); 8175 format %{ "BLSRL $dst, $src" %} 8176 8177 ins_encode %{ 8178 __ blsrl($dst$$Register, $src$$Address); 8179 %} 8180 8181 ins_pipe(ialu_reg_mem); 8182 %} 8183 8184 // Or Instructions 8185 // Or Register with Register 8186 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8187 match(Set dst (OrI dst src)); 8188 effect(KILL cr); 8189 8190 size(2); 8191 format %{ "OR $dst,$src" %} 8192 opcode(0x0B); 8193 ins_encode( OpcP, RegReg( dst, src) ); 8194 ins_pipe( ialu_reg_reg ); 8195 %} 8196 8197 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8198 match(Set dst (OrI dst (CastP2X src))); 8199 effect(KILL cr); 8200 8201 size(2); 8202 format %{ "OR $dst,$src" %} 8203 opcode(0x0B); 8204 ins_encode( OpcP, RegReg( dst, src) ); 8205 ins_pipe( ialu_reg_reg ); 8206 %} 8207 8208 8209 // Or Register with Immediate 8210 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8211 match(Set dst (OrI dst src)); 8212 effect(KILL cr); 8213 8214 format %{ "OR $dst,$src" %} 8215 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8216 // ins_encode( RegImm( dst, src) ); 8217 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8218 ins_pipe( ialu_reg ); 8219 %} 8220 8221 // Or Register with Memory 8222 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8223 match(Set dst (OrI dst (LoadI src))); 8224 effect(KILL cr); 8225 8226 ins_cost(125); 8227 format %{ "OR $dst,$src" %} 8228 opcode(0x0B); 8229 ins_encode( OpcP, RegMem( dst, src) ); 8230 ins_pipe( ialu_reg_mem ); 8231 %} 8232 8233 // Or Memory with Register 8234 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8235 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8236 effect(KILL cr); 8237 8238 ins_cost(150); 8239 format %{ "OR $dst,$src" %} 8240 opcode(0x09); /* Opcode 09 /r */ 8241 ins_encode( OpcP, RegMem( src, dst ) ); 8242 ins_pipe( ialu_mem_reg ); 8243 %} 8244 8245 // Or Memory with Immediate 8246 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8247 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8248 effect(KILL cr); 8249 8250 ins_cost(125); 8251 format %{ "OR $dst,$src" %} 8252 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8253 // ins_encode( MemImm( dst, src) ); 8254 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8255 ins_pipe( ialu_mem_imm ); 8256 %} 8257 8258 // ROL/ROR 8259 // ROL expand 8260 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8261 effect(USE_DEF dst, USE shift, KILL cr); 8262 8263 format %{ "ROL $dst, $shift" %} 8264 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8265 ins_encode( OpcP, RegOpc( dst )); 8266 ins_pipe( ialu_reg ); 8267 %} 8268 8269 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8270 effect(USE_DEF dst, USE shift, KILL cr); 8271 8272 format %{ "ROL $dst, $shift" %} 8273 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8274 ins_encode( RegOpcImm(dst, shift) ); 8275 ins_pipe(ialu_reg); 8276 %} 8277 8278 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8279 effect(USE_DEF dst, USE shift, KILL cr); 8280 8281 format %{ "ROL $dst, $shift" %} 8282 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8283 ins_encode(OpcP, RegOpc(dst)); 8284 ins_pipe( ialu_reg_reg ); 8285 %} 8286 // end of ROL expand 8287 8288 // ROL 32bit by one once 8289 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8290 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8291 8292 expand %{ 8293 rolI_eReg_imm1(dst, lshift, cr); 8294 %} 8295 %} 8296 8297 // ROL 32bit var by imm8 once 8298 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8299 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8300 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8301 8302 expand %{ 8303 rolI_eReg_imm8(dst, lshift, cr); 8304 %} 8305 %} 8306 8307 // ROL 32bit var by var once 8308 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8309 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8310 8311 expand %{ 8312 rolI_eReg_CL(dst, shift, cr); 8313 %} 8314 %} 8315 8316 // ROL 32bit var by var once 8317 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8318 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8319 8320 expand %{ 8321 rolI_eReg_CL(dst, shift, cr); 8322 %} 8323 %} 8324 8325 // ROR expand 8326 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8327 effect(USE_DEF dst, USE shift, KILL cr); 8328 8329 format %{ "ROR $dst, $shift" %} 8330 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8331 ins_encode( OpcP, RegOpc( dst ) ); 8332 ins_pipe( ialu_reg ); 8333 %} 8334 8335 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8336 effect (USE_DEF dst, USE shift, KILL cr); 8337 8338 format %{ "ROR $dst, $shift" %} 8339 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8340 ins_encode( RegOpcImm(dst, shift) ); 8341 ins_pipe( ialu_reg ); 8342 %} 8343 8344 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8345 effect(USE_DEF dst, USE shift, KILL cr); 8346 8347 format %{ "ROR $dst, $shift" %} 8348 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8349 ins_encode(OpcP, RegOpc(dst)); 8350 ins_pipe( ialu_reg_reg ); 8351 %} 8352 // end of ROR expand 8353 8354 // ROR right once 8355 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8356 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8357 8358 expand %{ 8359 rorI_eReg_imm1(dst, rshift, cr); 8360 %} 8361 %} 8362 8363 // ROR 32bit by immI8 once 8364 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8365 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8366 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8367 8368 expand %{ 8369 rorI_eReg_imm8(dst, rshift, cr); 8370 %} 8371 %} 8372 8373 // ROR 32bit var by var once 8374 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8375 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8376 8377 expand %{ 8378 rorI_eReg_CL(dst, shift, cr); 8379 %} 8380 %} 8381 8382 // ROR 32bit var by var once 8383 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8384 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8385 8386 expand %{ 8387 rorI_eReg_CL(dst, shift, cr); 8388 %} 8389 %} 8390 8391 // Xor Instructions 8392 // Xor Register with Register 8393 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8394 match(Set dst (XorI dst src)); 8395 effect(KILL cr); 8396 8397 size(2); 8398 format %{ "XOR $dst,$src" %} 8399 opcode(0x33); 8400 ins_encode( OpcP, RegReg( dst, src) ); 8401 ins_pipe( ialu_reg_reg ); 8402 %} 8403 8404 // Xor Register with Immediate -1 8405 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8406 match(Set dst (XorI dst imm)); 8407 8408 size(2); 8409 format %{ "NOT $dst" %} 8410 ins_encode %{ 8411 __ notl($dst$$Register); 8412 %} 8413 ins_pipe( ialu_reg ); 8414 %} 8415 8416 // Xor Register with Immediate 8417 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8418 match(Set dst (XorI dst src)); 8419 effect(KILL cr); 8420 8421 format %{ "XOR $dst,$src" %} 8422 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8423 // ins_encode( RegImm( dst, src) ); 8424 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8425 ins_pipe( ialu_reg ); 8426 %} 8427 8428 // Xor Register with Memory 8429 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8430 match(Set dst (XorI dst (LoadI src))); 8431 effect(KILL cr); 8432 8433 ins_cost(125); 8434 format %{ "XOR $dst,$src" %} 8435 opcode(0x33); 8436 ins_encode( OpcP, RegMem(dst, src) ); 8437 ins_pipe( ialu_reg_mem ); 8438 %} 8439 8440 // Xor Memory with Register 8441 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8442 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8443 effect(KILL cr); 8444 8445 ins_cost(150); 8446 format %{ "XOR $dst,$src" %} 8447 opcode(0x31); /* Opcode 31 /r */ 8448 ins_encode( OpcP, RegMem( src, dst ) ); 8449 ins_pipe( ialu_mem_reg ); 8450 %} 8451 8452 // Xor Memory with Immediate 8453 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8454 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8455 effect(KILL cr); 8456 8457 ins_cost(125); 8458 format %{ "XOR $dst,$src" %} 8459 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8460 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8461 ins_pipe( ialu_mem_imm ); 8462 %} 8463 8464 //----------Convert Int to Boolean--------------------------------------------- 8465 8466 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8467 effect( DEF dst, USE src ); 8468 format %{ "MOV $dst,$src" %} 8469 ins_encode( enc_Copy( dst, src) ); 8470 ins_pipe( ialu_reg_reg ); 8471 %} 8472 8473 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8474 effect( USE_DEF dst, USE src, KILL cr ); 8475 8476 size(4); 8477 format %{ "NEG $dst\n\t" 8478 "ADC $dst,$src" %} 8479 ins_encode( neg_reg(dst), 8480 OpcRegReg(0x13,dst,src) ); 8481 ins_pipe( ialu_reg_reg_long ); 8482 %} 8483 8484 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8485 match(Set dst (Conv2B src)); 8486 8487 expand %{ 8488 movI_nocopy(dst,src); 8489 ci2b(dst,src,cr); 8490 %} 8491 %} 8492 8493 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8494 effect( DEF dst, USE src ); 8495 format %{ "MOV $dst,$src" %} 8496 ins_encode( enc_Copy( dst, src) ); 8497 ins_pipe( ialu_reg_reg ); 8498 %} 8499 8500 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8501 effect( USE_DEF dst, USE src, KILL cr ); 8502 format %{ "NEG $dst\n\t" 8503 "ADC $dst,$src" %} 8504 ins_encode( neg_reg(dst), 8505 OpcRegReg(0x13,dst,src) ); 8506 ins_pipe( ialu_reg_reg_long ); 8507 %} 8508 8509 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8510 match(Set dst (Conv2B src)); 8511 8512 expand %{ 8513 movP_nocopy(dst,src); 8514 cp2b(dst,src,cr); 8515 %} 8516 %} 8517 8518 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8519 match(Set dst (CmpLTMask p q)); 8520 effect(KILL cr); 8521 ins_cost(400); 8522 8523 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8524 format %{ "XOR $dst,$dst\n\t" 8525 "CMP $p,$q\n\t" 8526 "SETlt $dst\n\t" 8527 "NEG $dst" %} 8528 ins_encode %{ 8529 Register Rp = $p$$Register; 8530 Register Rq = $q$$Register; 8531 Register Rd = $dst$$Register; 8532 Label done; 8533 __ xorl(Rd, Rd); 8534 __ cmpl(Rp, Rq); 8535 __ setb(Assembler::less, Rd); 8536 __ negl(Rd); 8537 %} 8538 8539 ins_pipe(pipe_slow); 8540 %} 8541 8542 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8543 match(Set dst (CmpLTMask dst zero)); 8544 effect(DEF dst, KILL cr); 8545 ins_cost(100); 8546 8547 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8548 ins_encode %{ 8549 __ sarl($dst$$Register, 31); 8550 %} 8551 ins_pipe(ialu_reg); 8552 %} 8553 8554 /* better to save a register than avoid a branch */ 8555 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8556 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8557 effect(KILL cr); 8558 ins_cost(400); 8559 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8560 "JGE done\n\t" 8561 "ADD $p,$y\n" 8562 "done: " %} 8563 ins_encode %{ 8564 Register Rp = $p$$Register; 8565 Register Rq = $q$$Register; 8566 Register Ry = $y$$Register; 8567 Label done; 8568 __ subl(Rp, Rq); 8569 __ jccb(Assembler::greaterEqual, done); 8570 __ addl(Rp, Ry); 8571 __ bind(done); 8572 %} 8573 8574 ins_pipe(pipe_cmplt); 8575 %} 8576 8577 /* better to save a register than avoid a branch */ 8578 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8579 match(Set y (AndI (CmpLTMask p q) y)); 8580 effect(KILL cr); 8581 8582 ins_cost(300); 8583 8584 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8585 "JLT done\n\t" 8586 "XORL $y, $y\n" 8587 "done: " %} 8588 ins_encode %{ 8589 Register Rp = $p$$Register; 8590 Register Rq = $q$$Register; 8591 Register Ry = $y$$Register; 8592 Label done; 8593 __ cmpl(Rp, Rq); 8594 __ jccb(Assembler::less, done); 8595 __ xorl(Ry, Ry); 8596 __ bind(done); 8597 %} 8598 8599 ins_pipe(pipe_cmplt); 8600 %} 8601 8602 /* If I enable this, I encourage spilling in the inner loop of compress. 8603 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8604 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8605 */ 8606 //----------Overflow Math Instructions----------------------------------------- 8607 8608 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8609 %{ 8610 match(Set cr (OverflowAddI op1 op2)); 8611 effect(DEF cr, USE_KILL op1, USE op2); 8612 8613 format %{ "ADD $op1, $op2\t# overflow check int" %} 8614 8615 ins_encode %{ 8616 __ addl($op1$$Register, $op2$$Register); 8617 %} 8618 ins_pipe(ialu_reg_reg); 8619 %} 8620 8621 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8622 %{ 8623 match(Set cr (OverflowAddI op1 op2)); 8624 effect(DEF cr, USE_KILL op1, USE op2); 8625 8626 format %{ "ADD $op1, $op2\t# overflow check int" %} 8627 8628 ins_encode %{ 8629 __ addl($op1$$Register, $op2$$constant); 8630 %} 8631 ins_pipe(ialu_reg_reg); 8632 %} 8633 8634 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8635 %{ 8636 match(Set cr (OverflowSubI op1 op2)); 8637 8638 format %{ "CMP $op1, $op2\t# overflow check int" %} 8639 ins_encode %{ 8640 __ cmpl($op1$$Register, $op2$$Register); 8641 %} 8642 ins_pipe(ialu_reg_reg); 8643 %} 8644 8645 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8646 %{ 8647 match(Set cr (OverflowSubI op1 op2)); 8648 8649 format %{ "CMP $op1, $op2\t# overflow check int" %} 8650 ins_encode %{ 8651 __ cmpl($op1$$Register, $op2$$constant); 8652 %} 8653 ins_pipe(ialu_reg_reg); 8654 %} 8655 8656 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8657 %{ 8658 match(Set cr (OverflowSubI zero op2)); 8659 effect(DEF cr, USE_KILL op2); 8660 8661 format %{ "NEG $op2\t# overflow check int" %} 8662 ins_encode %{ 8663 __ negl($op2$$Register); 8664 %} 8665 ins_pipe(ialu_reg_reg); 8666 %} 8667 8668 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8669 %{ 8670 match(Set cr (OverflowMulI op1 op2)); 8671 effect(DEF cr, USE_KILL op1, USE op2); 8672 8673 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8674 ins_encode %{ 8675 __ imull($op1$$Register, $op2$$Register); 8676 %} 8677 ins_pipe(ialu_reg_reg_alu0); 8678 %} 8679 8680 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8681 %{ 8682 match(Set cr (OverflowMulI op1 op2)); 8683 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8684 8685 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8686 ins_encode %{ 8687 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8688 %} 8689 ins_pipe(ialu_reg_reg_alu0); 8690 %} 8691 8692 //----------Long Instructions------------------------------------------------ 8693 // Add Long Register with Register 8694 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8695 match(Set dst (AddL dst src)); 8696 effect(KILL cr); 8697 ins_cost(200); 8698 format %{ "ADD $dst.lo,$src.lo\n\t" 8699 "ADC $dst.hi,$src.hi" %} 8700 opcode(0x03, 0x13); 8701 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8702 ins_pipe( ialu_reg_reg_long ); 8703 %} 8704 8705 // Add Long Register with Immediate 8706 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8707 match(Set dst (AddL dst src)); 8708 effect(KILL cr); 8709 format %{ "ADD $dst.lo,$src.lo\n\t" 8710 "ADC $dst.hi,$src.hi" %} 8711 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8712 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8713 ins_pipe( ialu_reg_long ); 8714 %} 8715 8716 // Add Long Register with Memory 8717 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8718 match(Set dst (AddL dst (LoadL mem))); 8719 effect(KILL cr); 8720 ins_cost(125); 8721 format %{ "ADD $dst.lo,$mem\n\t" 8722 "ADC $dst.hi,$mem+4" %} 8723 opcode(0x03, 0x13); 8724 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8725 ins_pipe( ialu_reg_long_mem ); 8726 %} 8727 8728 // Subtract Long Register with Register. 8729 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8730 match(Set dst (SubL dst src)); 8731 effect(KILL cr); 8732 ins_cost(200); 8733 format %{ "SUB $dst.lo,$src.lo\n\t" 8734 "SBB $dst.hi,$src.hi" %} 8735 opcode(0x2B, 0x1B); 8736 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8737 ins_pipe( ialu_reg_reg_long ); 8738 %} 8739 8740 // Subtract Long Register with Immediate 8741 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8742 match(Set dst (SubL dst src)); 8743 effect(KILL cr); 8744 format %{ "SUB $dst.lo,$src.lo\n\t" 8745 "SBB $dst.hi,$src.hi" %} 8746 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8747 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8748 ins_pipe( ialu_reg_long ); 8749 %} 8750 8751 // Subtract Long Register with Memory 8752 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8753 match(Set dst (SubL dst (LoadL mem))); 8754 effect(KILL cr); 8755 ins_cost(125); 8756 format %{ "SUB $dst.lo,$mem\n\t" 8757 "SBB $dst.hi,$mem+4" %} 8758 opcode(0x2B, 0x1B); 8759 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8760 ins_pipe( ialu_reg_long_mem ); 8761 %} 8762 8763 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8764 match(Set dst (SubL zero dst)); 8765 effect(KILL cr); 8766 ins_cost(300); 8767 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8768 ins_encode( neg_long(dst) ); 8769 ins_pipe( ialu_reg_reg_long ); 8770 %} 8771 8772 // And Long Register with Register 8773 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8774 match(Set dst (AndL dst src)); 8775 effect(KILL cr); 8776 format %{ "AND $dst.lo,$src.lo\n\t" 8777 "AND $dst.hi,$src.hi" %} 8778 opcode(0x23,0x23); 8779 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8780 ins_pipe( ialu_reg_reg_long ); 8781 %} 8782 8783 // And Long Register with Immediate 8784 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8785 match(Set dst (AndL dst src)); 8786 effect(KILL cr); 8787 format %{ "AND $dst.lo,$src.lo\n\t" 8788 "AND $dst.hi,$src.hi" %} 8789 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8790 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8791 ins_pipe( ialu_reg_long ); 8792 %} 8793 8794 // And Long Register with Memory 8795 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8796 match(Set dst (AndL dst (LoadL mem))); 8797 effect(KILL cr); 8798 ins_cost(125); 8799 format %{ "AND $dst.lo,$mem\n\t" 8800 "AND $dst.hi,$mem+4" %} 8801 opcode(0x23, 0x23); 8802 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8803 ins_pipe( ialu_reg_long_mem ); 8804 %} 8805 8806 // BMI1 instructions 8807 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8808 match(Set dst (AndL (XorL src1 minus_1) src2)); 8809 predicate(UseBMI1Instructions); 8810 effect(KILL cr, TEMP dst); 8811 8812 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8813 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8814 %} 8815 8816 ins_encode %{ 8817 Register Rdst = $dst$$Register; 8818 Register Rsrc1 = $src1$$Register; 8819 Register Rsrc2 = $src2$$Register; 8820 __ andnl(Rdst, Rsrc1, Rsrc2); 8821 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8822 %} 8823 ins_pipe(ialu_reg_reg_long); 8824 %} 8825 8826 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8827 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8828 predicate(UseBMI1Instructions); 8829 effect(KILL cr, TEMP dst); 8830 8831 ins_cost(125); 8832 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8833 "ANDNL $dst.hi, $src1.hi, $src2+4" 8834 %} 8835 8836 ins_encode %{ 8837 Register Rdst = $dst$$Register; 8838 Register Rsrc1 = $src1$$Register; 8839 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8840 8841 __ andnl(Rdst, Rsrc1, $src2$$Address); 8842 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8843 %} 8844 ins_pipe(ialu_reg_mem); 8845 %} 8846 8847 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8848 match(Set dst (AndL (SubL imm_zero src) src)); 8849 predicate(UseBMI1Instructions); 8850 effect(KILL cr, TEMP dst); 8851 8852 format %{ "MOVL $dst.hi, 0\n\t" 8853 "BLSIL $dst.lo, $src.lo\n\t" 8854 "JNZ done\n\t" 8855 "BLSIL $dst.hi, $src.hi\n" 8856 "done:" 8857 %} 8858 8859 ins_encode %{ 8860 Label done; 8861 Register Rdst = $dst$$Register; 8862 Register Rsrc = $src$$Register; 8863 __ movl(HIGH_FROM_LOW(Rdst), 0); 8864 __ blsil(Rdst, Rsrc); 8865 __ jccb(Assembler::notZero, done); 8866 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8867 __ bind(done); 8868 %} 8869 ins_pipe(ialu_reg); 8870 %} 8871 8872 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8873 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8874 predicate(UseBMI1Instructions); 8875 effect(KILL cr, TEMP dst); 8876 8877 ins_cost(125); 8878 format %{ "MOVL $dst.hi, 0\n\t" 8879 "BLSIL $dst.lo, $src\n\t" 8880 "JNZ done\n\t" 8881 "BLSIL $dst.hi, $src+4\n" 8882 "done:" 8883 %} 8884 8885 ins_encode %{ 8886 Label done; 8887 Register Rdst = $dst$$Register; 8888 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8889 8890 __ movl(HIGH_FROM_LOW(Rdst), 0); 8891 __ blsil(Rdst, $src$$Address); 8892 __ jccb(Assembler::notZero, done); 8893 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8894 __ bind(done); 8895 %} 8896 ins_pipe(ialu_reg_mem); 8897 %} 8898 8899 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8900 %{ 8901 match(Set dst (XorL (AddL src minus_1) src)); 8902 predicate(UseBMI1Instructions); 8903 effect(KILL cr, TEMP dst); 8904 8905 format %{ "MOVL $dst.hi, 0\n\t" 8906 "BLSMSKL $dst.lo, $src.lo\n\t" 8907 "JNC done\n\t" 8908 "BLSMSKL $dst.hi, $src.hi\n" 8909 "done:" 8910 %} 8911 8912 ins_encode %{ 8913 Label done; 8914 Register Rdst = $dst$$Register; 8915 Register Rsrc = $src$$Register; 8916 __ movl(HIGH_FROM_LOW(Rdst), 0); 8917 __ blsmskl(Rdst, Rsrc); 8918 __ jccb(Assembler::carryClear, done); 8919 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8920 __ bind(done); 8921 %} 8922 8923 ins_pipe(ialu_reg); 8924 %} 8925 8926 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8927 %{ 8928 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8929 predicate(UseBMI1Instructions); 8930 effect(KILL cr, TEMP dst); 8931 8932 ins_cost(125); 8933 format %{ "MOVL $dst.hi, 0\n\t" 8934 "BLSMSKL $dst.lo, $src\n\t" 8935 "JNC done\n\t" 8936 "BLSMSKL $dst.hi, $src+4\n" 8937 "done:" 8938 %} 8939 8940 ins_encode %{ 8941 Label done; 8942 Register Rdst = $dst$$Register; 8943 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8944 8945 __ movl(HIGH_FROM_LOW(Rdst), 0); 8946 __ blsmskl(Rdst, $src$$Address); 8947 __ jccb(Assembler::carryClear, done); 8948 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8949 __ bind(done); 8950 %} 8951 8952 ins_pipe(ialu_reg_mem); 8953 %} 8954 8955 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8956 %{ 8957 match(Set dst (AndL (AddL src minus_1) src) ); 8958 predicate(UseBMI1Instructions); 8959 effect(KILL cr, TEMP dst); 8960 8961 format %{ "MOVL $dst.hi, $src.hi\n\t" 8962 "BLSRL $dst.lo, $src.lo\n\t" 8963 "JNC done\n\t" 8964 "BLSRL $dst.hi, $src.hi\n" 8965 "done:" 8966 %} 8967 8968 ins_encode %{ 8969 Label done; 8970 Register Rdst = $dst$$Register; 8971 Register Rsrc = $src$$Register; 8972 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8973 __ blsrl(Rdst, Rsrc); 8974 __ jccb(Assembler::carryClear, done); 8975 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8976 __ bind(done); 8977 %} 8978 8979 ins_pipe(ialu_reg); 8980 %} 8981 8982 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8983 %{ 8984 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8985 predicate(UseBMI1Instructions); 8986 effect(KILL cr, TEMP dst); 8987 8988 ins_cost(125); 8989 format %{ "MOVL $dst.hi, $src+4\n\t" 8990 "BLSRL $dst.lo, $src\n\t" 8991 "JNC done\n\t" 8992 "BLSRL $dst.hi, $src+4\n" 8993 "done:" 8994 %} 8995 8996 ins_encode %{ 8997 Label done; 8998 Register Rdst = $dst$$Register; 8999 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9000 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9001 __ blsrl(Rdst, $src$$Address); 9002 __ jccb(Assembler::carryClear, done); 9003 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9004 __ bind(done); 9005 %} 9006 9007 ins_pipe(ialu_reg_mem); 9008 %} 9009 9010 // Or Long Register with Register 9011 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9012 match(Set dst (OrL dst src)); 9013 effect(KILL cr); 9014 format %{ "OR $dst.lo,$src.lo\n\t" 9015 "OR $dst.hi,$src.hi" %} 9016 opcode(0x0B,0x0B); 9017 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9018 ins_pipe( ialu_reg_reg_long ); 9019 %} 9020 9021 // Or Long Register with Immediate 9022 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9023 match(Set dst (OrL dst src)); 9024 effect(KILL cr); 9025 format %{ "OR $dst.lo,$src.lo\n\t" 9026 "OR $dst.hi,$src.hi" %} 9027 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9028 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9029 ins_pipe( ialu_reg_long ); 9030 %} 9031 9032 // Or Long Register with Memory 9033 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9034 match(Set dst (OrL dst (LoadL mem))); 9035 effect(KILL cr); 9036 ins_cost(125); 9037 format %{ "OR $dst.lo,$mem\n\t" 9038 "OR $dst.hi,$mem+4" %} 9039 opcode(0x0B,0x0B); 9040 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9041 ins_pipe( ialu_reg_long_mem ); 9042 %} 9043 9044 // Xor Long Register with Register 9045 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9046 match(Set dst (XorL dst src)); 9047 effect(KILL cr); 9048 format %{ "XOR $dst.lo,$src.lo\n\t" 9049 "XOR $dst.hi,$src.hi" %} 9050 opcode(0x33,0x33); 9051 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9052 ins_pipe( ialu_reg_reg_long ); 9053 %} 9054 9055 // Xor Long Register with Immediate -1 9056 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9057 match(Set dst (XorL dst imm)); 9058 format %{ "NOT $dst.lo\n\t" 9059 "NOT $dst.hi" %} 9060 ins_encode %{ 9061 __ notl($dst$$Register); 9062 __ notl(HIGH_FROM_LOW($dst$$Register)); 9063 %} 9064 ins_pipe( ialu_reg_long ); 9065 %} 9066 9067 // Xor Long Register with Immediate 9068 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9069 match(Set dst (XorL dst src)); 9070 effect(KILL cr); 9071 format %{ "XOR $dst.lo,$src.lo\n\t" 9072 "XOR $dst.hi,$src.hi" %} 9073 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9074 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9075 ins_pipe( ialu_reg_long ); 9076 %} 9077 9078 // Xor Long Register with Memory 9079 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9080 match(Set dst (XorL dst (LoadL mem))); 9081 effect(KILL cr); 9082 ins_cost(125); 9083 format %{ "XOR $dst.lo,$mem\n\t" 9084 "XOR $dst.hi,$mem+4" %} 9085 opcode(0x33,0x33); 9086 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9087 ins_pipe( ialu_reg_long_mem ); 9088 %} 9089 9090 // Shift Left Long by 1 9091 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9092 predicate(UseNewLongLShift); 9093 match(Set dst (LShiftL dst cnt)); 9094 effect(KILL cr); 9095 ins_cost(100); 9096 format %{ "ADD $dst.lo,$dst.lo\n\t" 9097 "ADC $dst.hi,$dst.hi" %} 9098 ins_encode %{ 9099 __ addl($dst$$Register,$dst$$Register); 9100 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9101 %} 9102 ins_pipe( ialu_reg_long ); 9103 %} 9104 9105 // Shift Left Long by 2 9106 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9107 predicate(UseNewLongLShift); 9108 match(Set dst (LShiftL dst cnt)); 9109 effect(KILL cr); 9110 ins_cost(100); 9111 format %{ "ADD $dst.lo,$dst.lo\n\t" 9112 "ADC $dst.hi,$dst.hi\n\t" 9113 "ADD $dst.lo,$dst.lo\n\t" 9114 "ADC $dst.hi,$dst.hi" %} 9115 ins_encode %{ 9116 __ addl($dst$$Register,$dst$$Register); 9117 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9118 __ addl($dst$$Register,$dst$$Register); 9119 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9120 %} 9121 ins_pipe( ialu_reg_long ); 9122 %} 9123 9124 // Shift Left Long by 3 9125 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9126 predicate(UseNewLongLShift); 9127 match(Set dst (LShiftL dst cnt)); 9128 effect(KILL cr); 9129 ins_cost(100); 9130 format %{ "ADD $dst.lo,$dst.lo\n\t" 9131 "ADC $dst.hi,$dst.hi\n\t" 9132 "ADD $dst.lo,$dst.lo\n\t" 9133 "ADC $dst.hi,$dst.hi\n\t" 9134 "ADD $dst.lo,$dst.lo\n\t" 9135 "ADC $dst.hi,$dst.hi" %} 9136 ins_encode %{ 9137 __ addl($dst$$Register,$dst$$Register); 9138 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9139 __ addl($dst$$Register,$dst$$Register); 9140 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9141 __ addl($dst$$Register,$dst$$Register); 9142 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9143 %} 9144 ins_pipe( ialu_reg_long ); 9145 %} 9146 9147 // Shift Left Long by 1-31 9148 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9149 match(Set dst (LShiftL dst cnt)); 9150 effect(KILL cr); 9151 ins_cost(200); 9152 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9153 "SHL $dst.lo,$cnt" %} 9154 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9155 ins_encode( move_long_small_shift(dst,cnt) ); 9156 ins_pipe( ialu_reg_long ); 9157 %} 9158 9159 // Shift Left Long by 32-63 9160 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9161 match(Set dst (LShiftL dst cnt)); 9162 effect(KILL cr); 9163 ins_cost(300); 9164 format %{ "MOV $dst.hi,$dst.lo\n" 9165 "\tSHL $dst.hi,$cnt-32\n" 9166 "\tXOR $dst.lo,$dst.lo" %} 9167 opcode(0xC1, 0x4); /* C1 /4 ib */ 9168 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9169 ins_pipe( ialu_reg_long ); 9170 %} 9171 9172 // Shift Left Long by variable 9173 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9174 match(Set dst (LShiftL dst shift)); 9175 effect(KILL cr); 9176 ins_cost(500+200); 9177 size(17); 9178 format %{ "TEST $shift,32\n\t" 9179 "JEQ,s small\n\t" 9180 "MOV $dst.hi,$dst.lo\n\t" 9181 "XOR $dst.lo,$dst.lo\n" 9182 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9183 "SHL $dst.lo,$shift" %} 9184 ins_encode( shift_left_long( dst, shift ) ); 9185 ins_pipe( pipe_slow ); 9186 %} 9187 9188 // Shift Right Long by 1-31 9189 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9190 match(Set dst (URShiftL dst cnt)); 9191 effect(KILL cr); 9192 ins_cost(200); 9193 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9194 "SHR $dst.hi,$cnt" %} 9195 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9196 ins_encode( move_long_small_shift(dst,cnt) ); 9197 ins_pipe( ialu_reg_long ); 9198 %} 9199 9200 // Shift Right Long by 32-63 9201 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9202 match(Set dst (URShiftL dst cnt)); 9203 effect(KILL cr); 9204 ins_cost(300); 9205 format %{ "MOV $dst.lo,$dst.hi\n" 9206 "\tSHR $dst.lo,$cnt-32\n" 9207 "\tXOR $dst.hi,$dst.hi" %} 9208 opcode(0xC1, 0x5); /* C1 /5 ib */ 9209 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9210 ins_pipe( ialu_reg_long ); 9211 %} 9212 9213 // Shift Right Long by variable 9214 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9215 match(Set dst (URShiftL dst shift)); 9216 effect(KILL cr); 9217 ins_cost(600); 9218 size(17); 9219 format %{ "TEST $shift,32\n\t" 9220 "JEQ,s small\n\t" 9221 "MOV $dst.lo,$dst.hi\n\t" 9222 "XOR $dst.hi,$dst.hi\n" 9223 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9224 "SHR $dst.hi,$shift" %} 9225 ins_encode( shift_right_long( dst, shift ) ); 9226 ins_pipe( pipe_slow ); 9227 %} 9228 9229 // Shift Right Long by 1-31 9230 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9231 match(Set dst (RShiftL dst cnt)); 9232 effect(KILL cr); 9233 ins_cost(200); 9234 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9235 "SAR $dst.hi,$cnt" %} 9236 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9237 ins_encode( move_long_small_shift(dst,cnt) ); 9238 ins_pipe( ialu_reg_long ); 9239 %} 9240 9241 // Shift Right Long by 32-63 9242 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9243 match(Set dst (RShiftL dst cnt)); 9244 effect(KILL cr); 9245 ins_cost(300); 9246 format %{ "MOV $dst.lo,$dst.hi\n" 9247 "\tSAR $dst.lo,$cnt-32\n" 9248 "\tSAR $dst.hi,31" %} 9249 opcode(0xC1, 0x7); /* C1 /7 ib */ 9250 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9251 ins_pipe( ialu_reg_long ); 9252 %} 9253 9254 // Shift Right arithmetic Long by variable 9255 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9256 match(Set dst (RShiftL dst shift)); 9257 effect(KILL cr); 9258 ins_cost(600); 9259 size(18); 9260 format %{ "TEST $shift,32\n\t" 9261 "JEQ,s small\n\t" 9262 "MOV $dst.lo,$dst.hi\n\t" 9263 "SAR $dst.hi,31\n" 9264 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9265 "SAR $dst.hi,$shift" %} 9266 ins_encode( shift_right_arith_long( dst, shift ) ); 9267 ins_pipe( pipe_slow ); 9268 %} 9269 9270 9271 //----------Double Instructions------------------------------------------------ 9272 // Double Math 9273 9274 // Compare & branch 9275 9276 // P6 version of float compare, sets condition codes in EFLAGS 9277 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9278 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9279 match(Set cr (CmpD src1 src2)); 9280 effect(KILL rax); 9281 ins_cost(150); 9282 format %{ "FLD $src1\n\t" 9283 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9284 "JNP exit\n\t" 9285 "MOV ah,1 // saw a NaN, set CF\n\t" 9286 "SAHF\n" 9287 "exit:\tNOP // avoid branch to branch" %} 9288 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9289 ins_encode( Push_Reg_DPR(src1), 9290 OpcP, RegOpc(src2), 9291 cmpF_P6_fixup ); 9292 ins_pipe( pipe_slow ); 9293 %} 9294 9295 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9296 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9297 match(Set cr (CmpD src1 src2)); 9298 ins_cost(150); 9299 format %{ "FLD $src1\n\t" 9300 "FUCOMIP ST,$src2 // P6 instruction" %} 9301 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9302 ins_encode( Push_Reg_DPR(src1), 9303 OpcP, RegOpc(src2)); 9304 ins_pipe( pipe_slow ); 9305 %} 9306 9307 // Compare & branch 9308 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9309 predicate(UseSSE<=1); 9310 match(Set cr (CmpD src1 src2)); 9311 effect(KILL rax); 9312 ins_cost(200); 9313 format %{ "FLD $src1\n\t" 9314 "FCOMp $src2\n\t" 9315 "FNSTSW AX\n\t" 9316 "TEST AX,0x400\n\t" 9317 "JZ,s flags\n\t" 9318 "MOV AH,1\t# unordered treat as LT\n" 9319 "flags:\tSAHF" %} 9320 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9321 ins_encode( Push_Reg_DPR(src1), 9322 OpcP, RegOpc(src2), 9323 fpu_flags); 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 // Compare vs zero into -1,0,1 9328 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9329 predicate(UseSSE<=1); 9330 match(Set dst (CmpD3 src1 zero)); 9331 effect(KILL cr, KILL rax); 9332 ins_cost(280); 9333 format %{ "FTSTD $dst,$src1" %} 9334 opcode(0xE4, 0xD9); 9335 ins_encode( Push_Reg_DPR(src1), 9336 OpcS, OpcP, PopFPU, 9337 CmpF_Result(dst)); 9338 ins_pipe( pipe_slow ); 9339 %} 9340 9341 // Compare into -1,0,1 9342 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9343 predicate(UseSSE<=1); 9344 match(Set dst (CmpD3 src1 src2)); 9345 effect(KILL cr, KILL rax); 9346 ins_cost(300); 9347 format %{ "FCMPD $dst,$src1,$src2" %} 9348 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9349 ins_encode( Push_Reg_DPR(src1), 9350 OpcP, RegOpc(src2), 9351 CmpF_Result(dst)); 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 // float compare and set condition codes in EFLAGS by XMM regs 9356 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9357 predicate(UseSSE>=2); 9358 match(Set cr (CmpD src1 src2)); 9359 ins_cost(145); 9360 format %{ "UCOMISD $src1,$src2\n\t" 9361 "JNP,s exit\n\t" 9362 "PUSHF\t# saw NaN, set CF\n\t" 9363 "AND [rsp], #0xffffff2b\n\t" 9364 "POPF\n" 9365 "exit:" %} 9366 ins_encode %{ 9367 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9368 emit_cmpfp_fixup(_masm); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9374 predicate(UseSSE>=2); 9375 match(Set cr (CmpD src1 src2)); 9376 ins_cost(100); 9377 format %{ "UCOMISD $src1,$src2" %} 9378 ins_encode %{ 9379 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9380 %} 9381 ins_pipe( pipe_slow ); 9382 %} 9383 9384 // float compare and set condition codes in EFLAGS by XMM regs 9385 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9386 predicate(UseSSE>=2); 9387 match(Set cr (CmpD src1 (LoadD src2))); 9388 ins_cost(145); 9389 format %{ "UCOMISD $src1,$src2\n\t" 9390 "JNP,s exit\n\t" 9391 "PUSHF\t# saw NaN, set CF\n\t" 9392 "AND [rsp], #0xffffff2b\n\t" 9393 "POPF\n" 9394 "exit:" %} 9395 ins_encode %{ 9396 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9397 emit_cmpfp_fixup(_masm); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9403 predicate(UseSSE>=2); 9404 match(Set cr (CmpD src1 (LoadD src2))); 9405 ins_cost(100); 9406 format %{ "UCOMISD $src1,$src2" %} 9407 ins_encode %{ 9408 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9409 %} 9410 ins_pipe( pipe_slow ); 9411 %} 9412 9413 // Compare into -1,0,1 in XMM 9414 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9415 predicate(UseSSE>=2); 9416 match(Set dst (CmpD3 src1 src2)); 9417 effect(KILL cr); 9418 ins_cost(255); 9419 format %{ "UCOMISD $src1, $src2\n\t" 9420 "MOV $dst, #-1\n\t" 9421 "JP,s done\n\t" 9422 "JB,s done\n\t" 9423 "SETNE $dst\n\t" 9424 "MOVZB $dst, $dst\n" 9425 "done:" %} 9426 ins_encode %{ 9427 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9428 emit_cmpfp3(_masm, $dst$$Register); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 // Compare into -1,0,1 in XMM and memory 9434 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9435 predicate(UseSSE>=2); 9436 match(Set dst (CmpD3 src1 (LoadD src2))); 9437 effect(KILL cr); 9438 ins_cost(275); 9439 format %{ "UCOMISD $src1, $src2\n\t" 9440 "MOV $dst, #-1\n\t" 9441 "JP,s done\n\t" 9442 "JB,s done\n\t" 9443 "SETNE $dst\n\t" 9444 "MOVZB $dst, $dst\n" 9445 "done:" %} 9446 ins_encode %{ 9447 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9448 emit_cmpfp3(_masm, $dst$$Register); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 9454 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9455 predicate (UseSSE <=1); 9456 match(Set dst (SubD dst src)); 9457 9458 format %{ "FLD $src\n\t" 9459 "DSUBp $dst,ST" %} 9460 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9461 ins_cost(150); 9462 ins_encode( Push_Reg_DPR(src), 9463 OpcP, RegOpc(dst) ); 9464 ins_pipe( fpu_reg_reg ); 9465 %} 9466 9467 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9468 predicate (UseSSE <=1); 9469 match(Set dst (RoundDouble (SubD src1 src2))); 9470 ins_cost(250); 9471 9472 format %{ "FLD $src2\n\t" 9473 "DSUB ST,$src1\n\t" 9474 "FSTP_D $dst\t# D-round" %} 9475 opcode(0xD8, 0x5); 9476 ins_encode( Push_Reg_DPR(src2), 9477 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9478 ins_pipe( fpu_mem_reg_reg ); 9479 %} 9480 9481 9482 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9483 predicate (UseSSE <=1); 9484 match(Set dst (SubD dst (LoadD src))); 9485 ins_cost(150); 9486 9487 format %{ "FLD $src\n\t" 9488 "DSUBp $dst,ST" %} 9489 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9490 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9491 OpcP, RegOpc(dst) ); 9492 ins_pipe( fpu_reg_mem ); 9493 %} 9494 9495 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9496 predicate (UseSSE<=1); 9497 match(Set dst (AbsD src)); 9498 ins_cost(100); 9499 format %{ "FABS" %} 9500 opcode(0xE1, 0xD9); 9501 ins_encode( OpcS, OpcP ); 9502 ins_pipe( fpu_reg_reg ); 9503 %} 9504 9505 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9506 predicate(UseSSE<=1); 9507 match(Set dst (NegD src)); 9508 ins_cost(100); 9509 format %{ "FCHS" %} 9510 opcode(0xE0, 0xD9); 9511 ins_encode( OpcS, OpcP ); 9512 ins_pipe( fpu_reg_reg ); 9513 %} 9514 9515 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9516 predicate(UseSSE<=1); 9517 match(Set dst (AddD dst src)); 9518 format %{ "FLD $src\n\t" 9519 "DADD $dst,ST" %} 9520 size(4); 9521 ins_cost(150); 9522 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9523 ins_encode( Push_Reg_DPR(src), 9524 OpcP, RegOpc(dst) ); 9525 ins_pipe( fpu_reg_reg ); 9526 %} 9527 9528 9529 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9530 predicate(UseSSE<=1); 9531 match(Set dst (RoundDouble (AddD src1 src2))); 9532 ins_cost(250); 9533 9534 format %{ "FLD $src2\n\t" 9535 "DADD ST,$src1\n\t" 9536 "FSTP_D $dst\t# D-round" %} 9537 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9538 ins_encode( Push_Reg_DPR(src2), 9539 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9540 ins_pipe( fpu_mem_reg_reg ); 9541 %} 9542 9543 9544 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9545 predicate(UseSSE<=1); 9546 match(Set dst (AddD dst (LoadD src))); 9547 ins_cost(150); 9548 9549 format %{ "FLD $src\n\t" 9550 "DADDp $dst,ST" %} 9551 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9552 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9553 OpcP, RegOpc(dst) ); 9554 ins_pipe( fpu_reg_mem ); 9555 %} 9556 9557 // add-to-memory 9558 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9559 predicate(UseSSE<=1); 9560 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9561 ins_cost(150); 9562 9563 format %{ "FLD_D $dst\n\t" 9564 "DADD ST,$src\n\t" 9565 "FST_D $dst" %} 9566 opcode(0xDD, 0x0); 9567 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9568 Opcode(0xD8), RegOpc(src), 9569 set_instruction_start, 9570 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9571 ins_pipe( fpu_reg_mem ); 9572 %} 9573 9574 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9575 predicate(UseSSE<=1); 9576 match(Set dst (AddD dst con)); 9577 ins_cost(125); 9578 format %{ "FLD1\n\t" 9579 "DADDp $dst,ST" %} 9580 ins_encode %{ 9581 __ fld1(); 9582 __ faddp($dst$$reg); 9583 %} 9584 ins_pipe(fpu_reg); 9585 %} 9586 9587 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9588 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9589 match(Set dst (AddD dst con)); 9590 ins_cost(200); 9591 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9592 "DADDp $dst,ST" %} 9593 ins_encode %{ 9594 __ fld_d($constantaddress($con)); 9595 __ faddp($dst$$reg); 9596 %} 9597 ins_pipe(fpu_reg_mem); 9598 %} 9599 9600 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9601 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9602 match(Set dst (RoundDouble (AddD src con))); 9603 ins_cost(200); 9604 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9605 "DADD ST,$src\n\t" 9606 "FSTP_D $dst\t# D-round" %} 9607 ins_encode %{ 9608 __ fld_d($constantaddress($con)); 9609 __ fadd($src$$reg); 9610 __ fstp_d(Address(rsp, $dst$$disp)); 9611 %} 9612 ins_pipe(fpu_mem_reg_con); 9613 %} 9614 9615 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9616 predicate(UseSSE<=1); 9617 match(Set dst (MulD dst src)); 9618 format %{ "FLD $src\n\t" 9619 "DMULp $dst,ST" %} 9620 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9621 ins_cost(150); 9622 ins_encode( Push_Reg_DPR(src), 9623 OpcP, RegOpc(dst) ); 9624 ins_pipe( fpu_reg_reg ); 9625 %} 9626 9627 // Strict FP instruction biases argument before multiply then 9628 // biases result to avoid double rounding of subnormals. 9629 // 9630 // scale arg1 by multiplying arg1 by 2^(-15360) 9631 // load arg2 9632 // multiply scaled arg1 by arg2 9633 // rescale product by 2^(15360) 9634 // 9635 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9636 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9637 match(Set dst (MulD dst src)); 9638 ins_cost(1); // Select this instruction for all strict FP double multiplies 9639 9640 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9641 "DMULp $dst,ST\n\t" 9642 "FLD $src\n\t" 9643 "DMULp $dst,ST\n\t" 9644 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9645 "DMULp $dst,ST\n\t" %} 9646 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9647 ins_encode( strictfp_bias1(dst), 9648 Push_Reg_DPR(src), 9649 OpcP, RegOpc(dst), 9650 strictfp_bias2(dst) ); 9651 ins_pipe( fpu_reg_reg ); 9652 %} 9653 9654 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9655 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9656 match(Set dst (MulD dst con)); 9657 ins_cost(200); 9658 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9659 "DMULp $dst,ST" %} 9660 ins_encode %{ 9661 __ fld_d($constantaddress($con)); 9662 __ fmulp($dst$$reg); 9663 %} 9664 ins_pipe(fpu_reg_mem); 9665 %} 9666 9667 9668 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9669 predicate( UseSSE<=1 ); 9670 match(Set dst (MulD dst (LoadD src))); 9671 ins_cost(200); 9672 format %{ "FLD_D $src\n\t" 9673 "DMULp $dst,ST" %} 9674 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9675 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9676 OpcP, RegOpc(dst) ); 9677 ins_pipe( fpu_reg_mem ); 9678 %} 9679 9680 // 9681 // Cisc-alternate to reg-reg multiply 9682 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9683 predicate( UseSSE<=1 ); 9684 match(Set dst (MulD src (LoadD mem))); 9685 ins_cost(250); 9686 format %{ "FLD_D $mem\n\t" 9687 "DMUL ST,$src\n\t" 9688 "FSTP_D $dst" %} 9689 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9690 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9691 OpcReg_FPR(src), 9692 Pop_Reg_DPR(dst) ); 9693 ins_pipe( fpu_reg_reg_mem ); 9694 %} 9695 9696 9697 // MACRO3 -- addDPR a mulDPR 9698 // This instruction is a '2-address' instruction in that the result goes 9699 // back to src2. This eliminates a move from the macro; possibly the 9700 // register allocator will have to add it back (and maybe not). 9701 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9702 predicate( UseSSE<=1 ); 9703 match(Set src2 (AddD (MulD src0 src1) src2)); 9704 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9705 "DMUL ST,$src1\n\t" 9706 "DADDp $src2,ST" %} 9707 ins_cost(250); 9708 opcode(0xDD); /* LoadD DD /0 */ 9709 ins_encode( Push_Reg_FPR(src0), 9710 FMul_ST_reg(src1), 9711 FAddP_reg_ST(src2) ); 9712 ins_pipe( fpu_reg_reg_reg ); 9713 %} 9714 9715 9716 // MACRO3 -- subDPR a mulDPR 9717 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9718 predicate( UseSSE<=1 ); 9719 match(Set src2 (SubD (MulD src0 src1) src2)); 9720 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9721 "DMUL ST,$src1\n\t" 9722 "DSUBRp $src2,ST" %} 9723 ins_cost(250); 9724 ins_encode( Push_Reg_FPR(src0), 9725 FMul_ST_reg(src1), 9726 Opcode(0xDE), Opc_plus(0xE0,src2)); 9727 ins_pipe( fpu_reg_reg_reg ); 9728 %} 9729 9730 9731 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9732 predicate( UseSSE<=1 ); 9733 match(Set dst (DivD dst src)); 9734 9735 format %{ "FLD $src\n\t" 9736 "FDIVp $dst,ST" %} 9737 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9738 ins_cost(150); 9739 ins_encode( Push_Reg_DPR(src), 9740 OpcP, RegOpc(dst) ); 9741 ins_pipe( fpu_reg_reg ); 9742 %} 9743 9744 // Strict FP instruction biases argument before division then 9745 // biases result, to avoid double rounding of subnormals. 9746 // 9747 // scale dividend by multiplying dividend by 2^(-15360) 9748 // load divisor 9749 // divide scaled dividend by divisor 9750 // rescale quotient by 2^(15360) 9751 // 9752 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9753 predicate (UseSSE<=1); 9754 match(Set dst (DivD dst src)); 9755 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9756 ins_cost(01); 9757 9758 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9759 "DMULp $dst,ST\n\t" 9760 "FLD $src\n\t" 9761 "FDIVp $dst,ST\n\t" 9762 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9763 "DMULp $dst,ST\n\t" %} 9764 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9765 ins_encode( strictfp_bias1(dst), 9766 Push_Reg_DPR(src), 9767 OpcP, RegOpc(dst), 9768 strictfp_bias2(dst) ); 9769 ins_pipe( fpu_reg_reg ); 9770 %} 9771 9772 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9773 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9774 match(Set dst (RoundDouble (DivD src1 src2))); 9775 9776 format %{ "FLD $src1\n\t" 9777 "FDIV ST,$src2\n\t" 9778 "FSTP_D $dst\t# D-round" %} 9779 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9780 ins_encode( Push_Reg_DPR(src1), 9781 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9782 ins_pipe( fpu_mem_reg_reg ); 9783 %} 9784 9785 9786 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9787 predicate(UseSSE<=1); 9788 match(Set dst (ModD dst src)); 9789 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9790 9791 format %{ "DMOD $dst,$src" %} 9792 ins_cost(250); 9793 ins_encode(Push_Reg_Mod_DPR(dst, src), 9794 emitModDPR(), 9795 Push_Result_Mod_DPR(src), 9796 Pop_Reg_DPR(dst)); 9797 ins_pipe( pipe_slow ); 9798 %} 9799 9800 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9801 predicate(UseSSE>=2); 9802 match(Set dst (ModD src0 src1)); 9803 effect(KILL rax, KILL cr); 9804 9805 format %{ "SUB ESP,8\t # DMOD\n" 9806 "\tMOVSD [ESP+0],$src1\n" 9807 "\tFLD_D [ESP+0]\n" 9808 "\tMOVSD [ESP+0],$src0\n" 9809 "\tFLD_D [ESP+0]\n" 9810 "loop:\tFPREM\n" 9811 "\tFWAIT\n" 9812 "\tFNSTSW AX\n" 9813 "\tSAHF\n" 9814 "\tJP loop\n" 9815 "\tFSTP_D [ESP+0]\n" 9816 "\tMOVSD $dst,[ESP+0]\n" 9817 "\tADD ESP,8\n" 9818 "\tFSTP ST0\t # Restore FPU Stack" 9819 %} 9820 ins_cost(250); 9821 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9822 ins_pipe( pipe_slow ); 9823 %} 9824 9825 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9826 predicate (UseSSE<=1); 9827 match(Set dst(TanD src)); 9828 format %{ "DTAN $dst" %} 9829 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9830 Opcode(0xDD), Opcode(0xD8)); // fstp st 9831 ins_pipe( pipe_slow ); 9832 %} 9833 9834 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9835 predicate (UseSSE>=2); 9836 match(Set dst(TanD dst)); 9837 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9838 format %{ "DTAN $dst" %} 9839 ins_encode( Push_SrcD(dst), 9840 Opcode(0xD9), Opcode(0xF2), // fptan 9841 Opcode(0xDD), Opcode(0xD8), // fstp st 9842 Push_ResultD(dst) ); 9843 ins_pipe( pipe_slow ); 9844 %} 9845 9846 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9847 predicate (UseSSE<=1); 9848 match(Set dst(AtanD dst src)); 9849 format %{ "DATA $dst,$src" %} 9850 opcode(0xD9, 0xF3); 9851 ins_encode( Push_Reg_DPR(src), 9852 OpcP, OpcS, RegOpc(dst) ); 9853 ins_pipe( pipe_slow ); 9854 %} 9855 9856 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9857 predicate (UseSSE>=2); 9858 match(Set dst(AtanD dst src)); 9859 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9860 format %{ "DATA $dst,$src" %} 9861 opcode(0xD9, 0xF3); 9862 ins_encode( Push_SrcD(src), 9863 OpcP, OpcS, Push_ResultD(dst) ); 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9868 predicate (UseSSE<=1); 9869 match(Set dst (SqrtD src)); 9870 format %{ "DSQRT $dst,$src" %} 9871 opcode(0xFA, 0xD9); 9872 ins_encode( Push_Reg_DPR(src), 9873 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9874 ins_pipe( pipe_slow ); 9875 %} 9876 9877 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9878 predicate (UseSSE<=1); 9879 // The source Double operand on FPU stack 9880 match(Set dst (Log10D src)); 9881 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9882 // fxch ; swap ST(0) with ST(1) 9883 // fyl2x ; compute log_10(2) * log_2(x) 9884 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9885 "FXCH \n\t" 9886 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9887 %} 9888 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9889 Opcode(0xD9), Opcode(0xC9), // fxch 9890 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9891 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9896 predicate (UseSSE>=2); 9897 effect(KILL cr); 9898 match(Set dst (Log10D src)); 9899 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9900 // fyl2x ; compute log_10(2) * log_2(x) 9901 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9902 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9903 %} 9904 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9905 Push_SrcD(src), 9906 Opcode(0xD9), Opcode(0xF1), // fyl2x 9907 Push_ResultD(dst)); 9908 9909 ins_pipe( pipe_slow ); 9910 %} 9911 9912 //-------------Float Instructions------------------------------- 9913 // Float Math 9914 9915 // Code for float compare: 9916 // fcompp(); 9917 // fwait(); fnstsw_ax(); 9918 // sahf(); 9919 // movl(dst, unordered_result); 9920 // jcc(Assembler::parity, exit); 9921 // movl(dst, less_result); 9922 // jcc(Assembler::below, exit); 9923 // movl(dst, equal_result); 9924 // jcc(Assembler::equal, exit); 9925 // movl(dst, greater_result); 9926 // exit: 9927 9928 // P6 version of float compare, sets condition codes in EFLAGS 9929 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9930 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9931 match(Set cr (CmpF src1 src2)); 9932 effect(KILL rax); 9933 ins_cost(150); 9934 format %{ "FLD $src1\n\t" 9935 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9936 "JNP exit\n\t" 9937 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9938 "SAHF\n" 9939 "exit:\tNOP // avoid branch to branch" %} 9940 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9941 ins_encode( Push_Reg_DPR(src1), 9942 OpcP, RegOpc(src2), 9943 cmpF_P6_fixup ); 9944 ins_pipe( pipe_slow ); 9945 %} 9946 9947 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9948 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9949 match(Set cr (CmpF src1 src2)); 9950 ins_cost(100); 9951 format %{ "FLD $src1\n\t" 9952 "FUCOMIP ST,$src2 // P6 instruction" %} 9953 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9954 ins_encode( Push_Reg_DPR(src1), 9955 OpcP, RegOpc(src2)); 9956 ins_pipe( pipe_slow ); 9957 %} 9958 9959 9960 // Compare & branch 9961 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9962 predicate(UseSSE == 0); 9963 match(Set cr (CmpF src1 src2)); 9964 effect(KILL rax); 9965 ins_cost(200); 9966 format %{ "FLD $src1\n\t" 9967 "FCOMp $src2\n\t" 9968 "FNSTSW AX\n\t" 9969 "TEST AX,0x400\n\t" 9970 "JZ,s flags\n\t" 9971 "MOV AH,1\t# unordered treat as LT\n" 9972 "flags:\tSAHF" %} 9973 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9974 ins_encode( Push_Reg_DPR(src1), 9975 OpcP, RegOpc(src2), 9976 fpu_flags); 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 // Compare vs zero into -1,0,1 9981 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9982 predicate(UseSSE == 0); 9983 match(Set dst (CmpF3 src1 zero)); 9984 effect(KILL cr, KILL rax); 9985 ins_cost(280); 9986 format %{ "FTSTF $dst,$src1" %} 9987 opcode(0xE4, 0xD9); 9988 ins_encode( Push_Reg_DPR(src1), 9989 OpcS, OpcP, PopFPU, 9990 CmpF_Result(dst)); 9991 ins_pipe( pipe_slow ); 9992 %} 9993 9994 // Compare into -1,0,1 9995 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9996 predicate(UseSSE == 0); 9997 match(Set dst (CmpF3 src1 src2)); 9998 effect(KILL cr, KILL rax); 9999 ins_cost(300); 10000 format %{ "FCMPF $dst,$src1,$src2" %} 10001 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10002 ins_encode( Push_Reg_DPR(src1), 10003 OpcP, RegOpc(src2), 10004 CmpF_Result(dst)); 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 // float compare and set condition codes in EFLAGS by XMM regs 10009 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10010 predicate(UseSSE>=1); 10011 match(Set cr (CmpF src1 src2)); 10012 ins_cost(145); 10013 format %{ "UCOMISS $src1,$src2\n\t" 10014 "JNP,s exit\n\t" 10015 "PUSHF\t# saw NaN, set CF\n\t" 10016 "AND [rsp], #0xffffff2b\n\t" 10017 "POPF\n" 10018 "exit:" %} 10019 ins_encode %{ 10020 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10021 emit_cmpfp_fixup(_masm); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10027 predicate(UseSSE>=1); 10028 match(Set cr (CmpF src1 src2)); 10029 ins_cost(100); 10030 format %{ "UCOMISS $src1,$src2" %} 10031 ins_encode %{ 10032 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 // float compare and set condition codes in EFLAGS by XMM regs 10038 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10039 predicate(UseSSE>=1); 10040 match(Set cr (CmpF src1 (LoadF src2))); 10041 ins_cost(165); 10042 format %{ "UCOMISS $src1,$src2\n\t" 10043 "JNP,s exit\n\t" 10044 "PUSHF\t# saw NaN, set CF\n\t" 10045 "AND [rsp], #0xffffff2b\n\t" 10046 "POPF\n" 10047 "exit:" %} 10048 ins_encode %{ 10049 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10050 emit_cmpfp_fixup(_masm); 10051 %} 10052 ins_pipe( pipe_slow ); 10053 %} 10054 10055 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10056 predicate(UseSSE>=1); 10057 match(Set cr (CmpF src1 (LoadF src2))); 10058 ins_cost(100); 10059 format %{ "UCOMISS $src1,$src2" %} 10060 ins_encode %{ 10061 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10062 %} 10063 ins_pipe( pipe_slow ); 10064 %} 10065 10066 // Compare into -1,0,1 in XMM 10067 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10068 predicate(UseSSE>=1); 10069 match(Set dst (CmpF3 src1 src2)); 10070 effect(KILL cr); 10071 ins_cost(255); 10072 format %{ "UCOMISS $src1, $src2\n\t" 10073 "MOV $dst, #-1\n\t" 10074 "JP,s done\n\t" 10075 "JB,s done\n\t" 10076 "SETNE $dst\n\t" 10077 "MOVZB $dst, $dst\n" 10078 "done:" %} 10079 ins_encode %{ 10080 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10081 emit_cmpfp3(_masm, $dst$$Register); 10082 %} 10083 ins_pipe( pipe_slow ); 10084 %} 10085 10086 // Compare into -1,0,1 in XMM and memory 10087 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10088 predicate(UseSSE>=1); 10089 match(Set dst (CmpF3 src1 (LoadF src2))); 10090 effect(KILL cr); 10091 ins_cost(275); 10092 format %{ "UCOMISS $src1, $src2\n\t" 10093 "MOV $dst, #-1\n\t" 10094 "JP,s done\n\t" 10095 "JB,s done\n\t" 10096 "SETNE $dst\n\t" 10097 "MOVZB $dst, $dst\n" 10098 "done:" %} 10099 ins_encode %{ 10100 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10101 emit_cmpfp3(_masm, $dst$$Register); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 // Spill to obtain 24-bit precision 10107 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10108 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10109 match(Set dst (SubF src1 src2)); 10110 10111 format %{ "FSUB $dst,$src1 - $src2" %} 10112 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10113 ins_encode( Push_Reg_FPR(src1), 10114 OpcReg_FPR(src2), 10115 Pop_Mem_FPR(dst) ); 10116 ins_pipe( fpu_mem_reg_reg ); 10117 %} 10118 // 10119 // This instruction does not round to 24-bits 10120 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10121 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10122 match(Set dst (SubF dst src)); 10123 10124 format %{ "FSUB $dst,$src" %} 10125 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10126 ins_encode( Push_Reg_FPR(src), 10127 OpcP, RegOpc(dst) ); 10128 ins_pipe( fpu_reg_reg ); 10129 %} 10130 10131 // Spill to obtain 24-bit precision 10132 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10133 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10134 match(Set dst (AddF src1 src2)); 10135 10136 format %{ "FADD $dst,$src1,$src2" %} 10137 opcode(0xD8, 0x0); /* D8 C0+i */ 10138 ins_encode( Push_Reg_FPR(src2), 10139 OpcReg_FPR(src1), 10140 Pop_Mem_FPR(dst) ); 10141 ins_pipe( fpu_mem_reg_reg ); 10142 %} 10143 // 10144 // This instruction does not round to 24-bits 10145 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10146 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10147 match(Set dst (AddF dst src)); 10148 10149 format %{ "FLD $src\n\t" 10150 "FADDp $dst,ST" %} 10151 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10152 ins_encode( Push_Reg_FPR(src), 10153 OpcP, RegOpc(dst) ); 10154 ins_pipe( fpu_reg_reg ); 10155 %} 10156 10157 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10158 predicate(UseSSE==0); 10159 match(Set dst (AbsF src)); 10160 ins_cost(100); 10161 format %{ "FABS" %} 10162 opcode(0xE1, 0xD9); 10163 ins_encode( OpcS, OpcP ); 10164 ins_pipe( fpu_reg_reg ); 10165 %} 10166 10167 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10168 predicate(UseSSE==0); 10169 match(Set dst (NegF src)); 10170 ins_cost(100); 10171 format %{ "FCHS" %} 10172 opcode(0xE0, 0xD9); 10173 ins_encode( OpcS, OpcP ); 10174 ins_pipe( fpu_reg_reg ); 10175 %} 10176 10177 // Cisc-alternate to addFPR_reg 10178 // Spill to obtain 24-bit precision 10179 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10181 match(Set dst (AddF src1 (LoadF src2))); 10182 10183 format %{ "FLD $src2\n\t" 10184 "FADD ST,$src1\n\t" 10185 "FSTP_S $dst" %} 10186 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10187 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10188 OpcReg_FPR(src1), 10189 Pop_Mem_FPR(dst) ); 10190 ins_pipe( fpu_mem_reg_mem ); 10191 %} 10192 // 10193 // Cisc-alternate to addFPR_reg 10194 // This instruction does not round to 24-bits 10195 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10196 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10197 match(Set dst (AddF dst (LoadF src))); 10198 10199 format %{ "FADD $dst,$src" %} 10200 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10201 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10202 OpcP, RegOpc(dst) ); 10203 ins_pipe( fpu_reg_mem ); 10204 %} 10205 10206 // // Following two instructions for _222_mpegaudio 10207 // Spill to obtain 24-bit precision 10208 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10209 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10210 match(Set dst (AddF src1 src2)); 10211 10212 format %{ "FADD $dst,$src1,$src2" %} 10213 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10214 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10215 OpcReg_FPR(src2), 10216 Pop_Mem_FPR(dst) ); 10217 ins_pipe( fpu_mem_reg_mem ); 10218 %} 10219 10220 // Cisc-spill variant 10221 // Spill to obtain 24-bit precision 10222 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10223 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10224 match(Set dst (AddF src1 (LoadF src2))); 10225 10226 format %{ "FADD $dst,$src1,$src2 cisc" %} 10227 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10228 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10229 set_instruction_start, 10230 OpcP, RMopc_Mem(secondary,src1), 10231 Pop_Mem_FPR(dst) ); 10232 ins_pipe( fpu_mem_mem_mem ); 10233 %} 10234 10235 // Spill to obtain 24-bit precision 10236 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10237 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10238 match(Set dst (AddF src1 src2)); 10239 10240 format %{ "FADD $dst,$src1,$src2" %} 10241 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10242 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10243 set_instruction_start, 10244 OpcP, RMopc_Mem(secondary,src1), 10245 Pop_Mem_FPR(dst) ); 10246 ins_pipe( fpu_mem_mem_mem ); 10247 %} 10248 10249 10250 // Spill to obtain 24-bit precision 10251 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10252 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10253 match(Set dst (AddF src con)); 10254 format %{ "FLD $src\n\t" 10255 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10256 "FSTP_S $dst" %} 10257 ins_encode %{ 10258 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10259 __ fadd_s($constantaddress($con)); 10260 __ fstp_s(Address(rsp, $dst$$disp)); 10261 %} 10262 ins_pipe(fpu_mem_reg_con); 10263 %} 10264 // 10265 // This instruction does not round to 24-bits 10266 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10267 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10268 match(Set dst (AddF src con)); 10269 format %{ "FLD $src\n\t" 10270 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10271 "FSTP $dst" %} 10272 ins_encode %{ 10273 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10274 __ fadd_s($constantaddress($con)); 10275 __ fstp_d($dst$$reg); 10276 %} 10277 ins_pipe(fpu_reg_reg_con); 10278 %} 10279 10280 // Spill to obtain 24-bit precision 10281 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10282 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10283 match(Set dst (MulF src1 src2)); 10284 10285 format %{ "FLD $src1\n\t" 10286 "FMUL $src2\n\t" 10287 "FSTP_S $dst" %} 10288 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10289 ins_encode( Push_Reg_FPR(src1), 10290 OpcReg_FPR(src2), 10291 Pop_Mem_FPR(dst) ); 10292 ins_pipe( fpu_mem_reg_reg ); 10293 %} 10294 // 10295 // This instruction does not round to 24-bits 10296 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10297 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10298 match(Set dst (MulF src1 src2)); 10299 10300 format %{ "FLD $src1\n\t" 10301 "FMUL $src2\n\t" 10302 "FSTP_S $dst" %} 10303 opcode(0xD8, 0x1); /* D8 C8+i */ 10304 ins_encode( Push_Reg_FPR(src2), 10305 OpcReg_FPR(src1), 10306 Pop_Reg_FPR(dst) ); 10307 ins_pipe( fpu_reg_reg_reg ); 10308 %} 10309 10310 10311 // Spill to obtain 24-bit precision 10312 // Cisc-alternate to reg-reg multiply 10313 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10314 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10315 match(Set dst (MulF src1 (LoadF src2))); 10316 10317 format %{ "FLD_S $src2\n\t" 10318 "FMUL $src1\n\t" 10319 "FSTP_S $dst" %} 10320 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10321 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10322 OpcReg_FPR(src1), 10323 Pop_Mem_FPR(dst) ); 10324 ins_pipe( fpu_mem_reg_mem ); 10325 %} 10326 // 10327 // This instruction does not round to 24-bits 10328 // Cisc-alternate to reg-reg multiply 10329 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10330 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10331 match(Set dst (MulF src1 (LoadF src2))); 10332 10333 format %{ "FMUL $dst,$src1,$src2" %} 10334 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10335 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10336 OpcReg_FPR(src1), 10337 Pop_Reg_FPR(dst) ); 10338 ins_pipe( fpu_reg_reg_mem ); 10339 %} 10340 10341 // Spill to obtain 24-bit precision 10342 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10343 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10344 match(Set dst (MulF src1 src2)); 10345 10346 format %{ "FMUL $dst,$src1,$src2" %} 10347 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10348 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10349 set_instruction_start, 10350 OpcP, RMopc_Mem(secondary,src1), 10351 Pop_Mem_FPR(dst) ); 10352 ins_pipe( fpu_mem_mem_mem ); 10353 %} 10354 10355 // Spill to obtain 24-bit precision 10356 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10357 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10358 match(Set dst (MulF src con)); 10359 10360 format %{ "FLD $src\n\t" 10361 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10362 "FSTP_S $dst" %} 10363 ins_encode %{ 10364 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10365 __ fmul_s($constantaddress($con)); 10366 __ fstp_s(Address(rsp, $dst$$disp)); 10367 %} 10368 ins_pipe(fpu_mem_reg_con); 10369 %} 10370 // 10371 // This instruction does not round to 24-bits 10372 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10373 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10374 match(Set dst (MulF src con)); 10375 10376 format %{ "FLD $src\n\t" 10377 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10378 "FSTP $dst" %} 10379 ins_encode %{ 10380 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10381 __ fmul_s($constantaddress($con)); 10382 __ fstp_d($dst$$reg); 10383 %} 10384 ins_pipe(fpu_reg_reg_con); 10385 %} 10386 10387 10388 // 10389 // MACRO1 -- subsume unshared load into mulFPR 10390 // This instruction does not round to 24-bits 10391 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10392 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10393 match(Set dst (MulF (LoadF mem1) src)); 10394 10395 format %{ "FLD $mem1 ===MACRO1===\n\t" 10396 "FMUL ST,$src\n\t" 10397 "FSTP $dst" %} 10398 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10399 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10400 OpcReg_FPR(src), 10401 Pop_Reg_FPR(dst) ); 10402 ins_pipe( fpu_reg_reg_mem ); 10403 %} 10404 // 10405 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10406 // This instruction does not round to 24-bits 10407 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10408 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10409 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10410 ins_cost(95); 10411 10412 format %{ "FLD $mem1 ===MACRO2===\n\t" 10413 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10414 "FADD ST,$src2\n\t" 10415 "FSTP $dst" %} 10416 opcode(0xD9); /* LoadF D9 /0 */ 10417 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10418 FMul_ST_reg(src1), 10419 FAdd_ST_reg(src2), 10420 Pop_Reg_FPR(dst) ); 10421 ins_pipe( fpu_reg_mem_reg_reg ); 10422 %} 10423 10424 // MACRO3 -- addFPR a mulFPR 10425 // This instruction does not round to 24-bits. It is a '2-address' 10426 // instruction in that the result goes back to src2. This eliminates 10427 // a move from the macro; possibly the register allocator will have 10428 // to add it back (and maybe not). 10429 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10430 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10431 match(Set src2 (AddF (MulF src0 src1) src2)); 10432 10433 format %{ "FLD $src0 ===MACRO3===\n\t" 10434 "FMUL ST,$src1\n\t" 10435 "FADDP $src2,ST" %} 10436 opcode(0xD9); /* LoadF D9 /0 */ 10437 ins_encode( Push_Reg_FPR(src0), 10438 FMul_ST_reg(src1), 10439 FAddP_reg_ST(src2) ); 10440 ins_pipe( fpu_reg_reg_reg ); 10441 %} 10442 10443 // MACRO4 -- divFPR subFPR 10444 // This instruction does not round to 24-bits 10445 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10446 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10447 match(Set dst (DivF (SubF src2 src1) src3)); 10448 10449 format %{ "FLD $src2 ===MACRO4===\n\t" 10450 "FSUB ST,$src1\n\t" 10451 "FDIV ST,$src3\n\t" 10452 "FSTP $dst" %} 10453 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10454 ins_encode( Push_Reg_FPR(src2), 10455 subFPR_divFPR_encode(src1,src3), 10456 Pop_Reg_FPR(dst) ); 10457 ins_pipe( fpu_reg_reg_reg_reg ); 10458 %} 10459 10460 // Spill to obtain 24-bit precision 10461 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10462 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10463 match(Set dst (DivF src1 src2)); 10464 10465 format %{ "FDIV $dst,$src1,$src2" %} 10466 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10467 ins_encode( Push_Reg_FPR(src1), 10468 OpcReg_FPR(src2), 10469 Pop_Mem_FPR(dst) ); 10470 ins_pipe( fpu_mem_reg_reg ); 10471 %} 10472 // 10473 // This instruction does not round to 24-bits 10474 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10475 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10476 match(Set dst (DivF dst src)); 10477 10478 format %{ "FDIV $dst,$src" %} 10479 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10480 ins_encode( Push_Reg_FPR(src), 10481 OpcP, RegOpc(dst) ); 10482 ins_pipe( fpu_reg_reg ); 10483 %} 10484 10485 10486 // Spill to obtain 24-bit precision 10487 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10488 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10489 match(Set dst (ModF src1 src2)); 10490 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10491 10492 format %{ "FMOD $dst,$src1,$src2" %} 10493 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10494 emitModDPR(), 10495 Push_Result_Mod_DPR(src2), 10496 Pop_Mem_FPR(dst)); 10497 ins_pipe( pipe_slow ); 10498 %} 10499 // 10500 // This instruction does not round to 24-bits 10501 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10502 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10503 match(Set dst (ModF dst src)); 10504 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10505 10506 format %{ "FMOD $dst,$src" %} 10507 ins_encode(Push_Reg_Mod_DPR(dst, src), 10508 emitModDPR(), 10509 Push_Result_Mod_DPR(src), 10510 Pop_Reg_FPR(dst)); 10511 ins_pipe( pipe_slow ); 10512 %} 10513 10514 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10515 predicate(UseSSE>=1); 10516 match(Set dst (ModF src0 src1)); 10517 effect(KILL rax, KILL cr); 10518 format %{ "SUB ESP,4\t # FMOD\n" 10519 "\tMOVSS [ESP+0],$src1\n" 10520 "\tFLD_S [ESP+0]\n" 10521 "\tMOVSS [ESP+0],$src0\n" 10522 "\tFLD_S [ESP+0]\n" 10523 "loop:\tFPREM\n" 10524 "\tFWAIT\n" 10525 "\tFNSTSW AX\n" 10526 "\tSAHF\n" 10527 "\tJP loop\n" 10528 "\tFSTP_S [ESP+0]\n" 10529 "\tMOVSS $dst,[ESP+0]\n" 10530 "\tADD ESP,4\n" 10531 "\tFSTP ST0\t # Restore FPU Stack" 10532 %} 10533 ins_cost(250); 10534 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10535 ins_pipe( pipe_slow ); 10536 %} 10537 10538 10539 //----------Arithmetic Conversion Instructions--------------------------------- 10540 // The conversions operations are all Alpha sorted. Please keep it that way! 10541 10542 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10543 predicate(UseSSE==0); 10544 match(Set dst (RoundFloat src)); 10545 ins_cost(125); 10546 format %{ "FST_S $dst,$src\t# F-round" %} 10547 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10548 ins_pipe( fpu_mem_reg ); 10549 %} 10550 10551 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10552 predicate(UseSSE<=1); 10553 match(Set dst (RoundDouble src)); 10554 ins_cost(125); 10555 format %{ "FST_D $dst,$src\t# D-round" %} 10556 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10557 ins_pipe( fpu_mem_reg ); 10558 %} 10559 10560 // Force rounding to 24-bit precision and 6-bit exponent 10561 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10562 predicate(UseSSE==0); 10563 match(Set dst (ConvD2F src)); 10564 format %{ "FST_S $dst,$src\t# F-round" %} 10565 expand %{ 10566 roundFloat_mem_reg(dst,src); 10567 %} 10568 %} 10569 10570 // Force rounding to 24-bit precision and 6-bit exponent 10571 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10572 predicate(UseSSE==1); 10573 match(Set dst (ConvD2F src)); 10574 effect( KILL cr ); 10575 format %{ "SUB ESP,4\n\t" 10576 "FST_S [ESP],$src\t# F-round\n\t" 10577 "MOVSS $dst,[ESP]\n\t" 10578 "ADD ESP,4" %} 10579 ins_encode %{ 10580 __ subptr(rsp, 4); 10581 if ($src$$reg != FPR1L_enc) { 10582 __ fld_s($src$$reg-1); 10583 __ fstp_s(Address(rsp, 0)); 10584 } else { 10585 __ fst_s(Address(rsp, 0)); 10586 } 10587 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10588 __ addptr(rsp, 4); 10589 %} 10590 ins_pipe( pipe_slow ); 10591 %} 10592 10593 // Force rounding double precision to single precision 10594 instruct convD2F_reg(regF dst, regD src) %{ 10595 predicate(UseSSE>=2); 10596 match(Set dst (ConvD2F src)); 10597 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10598 ins_encode %{ 10599 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10600 %} 10601 ins_pipe( pipe_slow ); 10602 %} 10603 10604 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10605 predicate(UseSSE==0); 10606 match(Set dst (ConvF2D src)); 10607 format %{ "FST_S $dst,$src\t# D-round" %} 10608 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10609 ins_pipe( fpu_reg_reg ); 10610 %} 10611 10612 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10613 predicate(UseSSE==1); 10614 match(Set dst (ConvF2D src)); 10615 format %{ "FST_D $dst,$src\t# D-round" %} 10616 expand %{ 10617 roundDouble_mem_reg(dst,src); 10618 %} 10619 %} 10620 10621 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10622 predicate(UseSSE==1); 10623 match(Set dst (ConvF2D src)); 10624 effect( KILL cr ); 10625 format %{ "SUB ESP,4\n\t" 10626 "MOVSS [ESP] $src\n\t" 10627 "FLD_S [ESP]\n\t" 10628 "ADD ESP,4\n\t" 10629 "FSTP $dst\t# D-round" %} 10630 ins_encode %{ 10631 __ subptr(rsp, 4); 10632 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10633 __ fld_s(Address(rsp, 0)); 10634 __ addptr(rsp, 4); 10635 __ fstp_d($dst$$reg); 10636 %} 10637 ins_pipe( pipe_slow ); 10638 %} 10639 10640 instruct convF2D_reg(regD dst, regF src) %{ 10641 predicate(UseSSE>=2); 10642 match(Set dst (ConvF2D src)); 10643 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10644 ins_encode %{ 10645 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10646 %} 10647 ins_pipe( pipe_slow ); 10648 %} 10649 10650 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10651 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10652 predicate(UseSSE<=1); 10653 match(Set dst (ConvD2I src)); 10654 effect( KILL tmp, KILL cr ); 10655 format %{ "FLD $src\t# Convert double to int \n\t" 10656 "FLDCW trunc mode\n\t" 10657 "SUB ESP,4\n\t" 10658 "FISTp [ESP + #0]\n\t" 10659 "FLDCW std/24-bit mode\n\t" 10660 "POP EAX\n\t" 10661 "CMP EAX,0x80000000\n\t" 10662 "JNE,s fast\n\t" 10663 "FLD_D $src\n\t" 10664 "CALL d2i_wrapper\n" 10665 "fast:" %} 10666 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10667 ins_pipe( pipe_slow ); 10668 %} 10669 10670 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10671 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10672 predicate(UseSSE>=2); 10673 match(Set dst (ConvD2I src)); 10674 effect( KILL tmp, KILL cr ); 10675 format %{ "CVTTSD2SI $dst, $src\n\t" 10676 "CMP $dst,0x80000000\n\t" 10677 "JNE,s fast\n\t" 10678 "SUB ESP, 8\n\t" 10679 "MOVSD [ESP], $src\n\t" 10680 "FLD_D [ESP]\n\t" 10681 "ADD ESP, 8\n\t" 10682 "CALL d2i_wrapper\n" 10683 "fast:" %} 10684 ins_encode %{ 10685 Label fast; 10686 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10687 __ cmpl($dst$$Register, 0x80000000); 10688 __ jccb(Assembler::notEqual, fast); 10689 __ subptr(rsp, 8); 10690 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10691 __ fld_d(Address(rsp, 0)); 10692 __ addptr(rsp, 8); 10693 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10694 __ bind(fast); 10695 %} 10696 ins_pipe( pipe_slow ); 10697 %} 10698 10699 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10700 predicate(UseSSE<=1); 10701 match(Set dst (ConvD2L src)); 10702 effect( KILL cr ); 10703 format %{ "FLD $src\t# Convert double to long\n\t" 10704 "FLDCW trunc mode\n\t" 10705 "SUB ESP,8\n\t" 10706 "FISTp [ESP + #0]\n\t" 10707 "FLDCW std/24-bit mode\n\t" 10708 "POP EAX\n\t" 10709 "POP EDX\n\t" 10710 "CMP EDX,0x80000000\n\t" 10711 "JNE,s fast\n\t" 10712 "TEST EAX,EAX\n\t" 10713 "JNE,s fast\n\t" 10714 "FLD $src\n\t" 10715 "CALL d2l_wrapper\n" 10716 "fast:" %} 10717 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10718 ins_pipe( pipe_slow ); 10719 %} 10720 10721 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10722 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10723 predicate (UseSSE>=2); 10724 match(Set dst (ConvD2L src)); 10725 effect( KILL cr ); 10726 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10727 "MOVSD [ESP],$src\n\t" 10728 "FLD_D [ESP]\n\t" 10729 "FLDCW trunc mode\n\t" 10730 "FISTp [ESP + #0]\n\t" 10731 "FLDCW std/24-bit mode\n\t" 10732 "POP EAX\n\t" 10733 "POP EDX\n\t" 10734 "CMP EDX,0x80000000\n\t" 10735 "JNE,s fast\n\t" 10736 "TEST EAX,EAX\n\t" 10737 "JNE,s fast\n\t" 10738 "SUB ESP,8\n\t" 10739 "MOVSD [ESP],$src\n\t" 10740 "FLD_D [ESP]\n\t" 10741 "ADD ESP,8\n\t" 10742 "CALL d2l_wrapper\n" 10743 "fast:" %} 10744 ins_encode %{ 10745 Label fast; 10746 __ subptr(rsp, 8); 10747 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10748 __ fld_d(Address(rsp, 0)); 10749 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10750 __ fistp_d(Address(rsp, 0)); 10751 // Restore the rounding mode, mask the exception 10752 if (Compile::current()->in_24_bit_fp_mode()) { 10753 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10754 } else { 10755 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10756 } 10757 // Load the converted long, adjust CPU stack 10758 __ pop(rax); 10759 __ pop(rdx); 10760 __ cmpl(rdx, 0x80000000); 10761 __ jccb(Assembler::notEqual, fast); 10762 __ testl(rax, rax); 10763 __ jccb(Assembler::notEqual, fast); 10764 __ subptr(rsp, 8); 10765 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10766 __ fld_d(Address(rsp, 0)); 10767 __ addptr(rsp, 8); 10768 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10769 __ bind(fast); 10770 %} 10771 ins_pipe( pipe_slow ); 10772 %} 10773 10774 // Convert a double to an int. Java semantics require we do complex 10775 // manglations in the corner cases. So we set the rounding mode to 10776 // 'zero', store the darned double down as an int, and reset the 10777 // rounding mode to 'nearest'. The hardware stores a flag value down 10778 // if we would overflow or converted a NAN; we check for this and 10779 // and go the slow path if needed. 10780 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10781 predicate(UseSSE==0); 10782 match(Set dst (ConvF2I src)); 10783 effect( KILL tmp, KILL cr ); 10784 format %{ "FLD $src\t# Convert float to int \n\t" 10785 "FLDCW trunc mode\n\t" 10786 "SUB ESP,4\n\t" 10787 "FISTp [ESP + #0]\n\t" 10788 "FLDCW std/24-bit mode\n\t" 10789 "POP EAX\n\t" 10790 "CMP EAX,0x80000000\n\t" 10791 "JNE,s fast\n\t" 10792 "FLD $src\n\t" 10793 "CALL d2i_wrapper\n" 10794 "fast:" %} 10795 // DPR2I_encoding works for FPR2I 10796 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10797 ins_pipe( pipe_slow ); 10798 %} 10799 10800 // Convert a float in xmm to an int reg. 10801 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10802 predicate(UseSSE>=1); 10803 match(Set dst (ConvF2I src)); 10804 effect( KILL tmp, KILL cr ); 10805 format %{ "CVTTSS2SI $dst, $src\n\t" 10806 "CMP $dst,0x80000000\n\t" 10807 "JNE,s fast\n\t" 10808 "SUB ESP, 4\n\t" 10809 "MOVSS [ESP], $src\n\t" 10810 "FLD [ESP]\n\t" 10811 "ADD ESP, 4\n\t" 10812 "CALL d2i_wrapper\n" 10813 "fast:" %} 10814 ins_encode %{ 10815 Label fast; 10816 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10817 __ cmpl($dst$$Register, 0x80000000); 10818 __ jccb(Assembler::notEqual, fast); 10819 __ subptr(rsp, 4); 10820 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10821 __ fld_s(Address(rsp, 0)); 10822 __ addptr(rsp, 4); 10823 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10824 __ bind(fast); 10825 %} 10826 ins_pipe( pipe_slow ); 10827 %} 10828 10829 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10830 predicate(UseSSE==0); 10831 match(Set dst (ConvF2L src)); 10832 effect( KILL cr ); 10833 format %{ "FLD $src\t# Convert float to long\n\t" 10834 "FLDCW trunc mode\n\t" 10835 "SUB ESP,8\n\t" 10836 "FISTp [ESP + #0]\n\t" 10837 "FLDCW std/24-bit mode\n\t" 10838 "POP EAX\n\t" 10839 "POP EDX\n\t" 10840 "CMP EDX,0x80000000\n\t" 10841 "JNE,s fast\n\t" 10842 "TEST EAX,EAX\n\t" 10843 "JNE,s fast\n\t" 10844 "FLD $src\n\t" 10845 "CALL d2l_wrapper\n" 10846 "fast:" %} 10847 // DPR2L_encoding works for FPR2L 10848 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10849 ins_pipe( pipe_slow ); 10850 %} 10851 10852 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10853 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10854 predicate (UseSSE>=1); 10855 match(Set dst (ConvF2L src)); 10856 effect( KILL cr ); 10857 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10858 "MOVSS [ESP],$src\n\t" 10859 "FLD_S [ESP]\n\t" 10860 "FLDCW trunc mode\n\t" 10861 "FISTp [ESP + #0]\n\t" 10862 "FLDCW std/24-bit mode\n\t" 10863 "POP EAX\n\t" 10864 "POP EDX\n\t" 10865 "CMP EDX,0x80000000\n\t" 10866 "JNE,s fast\n\t" 10867 "TEST EAX,EAX\n\t" 10868 "JNE,s fast\n\t" 10869 "SUB ESP,4\t# Convert float to long\n\t" 10870 "MOVSS [ESP],$src\n\t" 10871 "FLD_S [ESP]\n\t" 10872 "ADD ESP,4\n\t" 10873 "CALL d2l_wrapper\n" 10874 "fast:" %} 10875 ins_encode %{ 10876 Label fast; 10877 __ subptr(rsp, 8); 10878 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10879 __ fld_s(Address(rsp, 0)); 10880 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10881 __ fistp_d(Address(rsp, 0)); 10882 // Restore the rounding mode, mask the exception 10883 if (Compile::current()->in_24_bit_fp_mode()) { 10884 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10885 } else { 10886 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10887 } 10888 // Load the converted long, adjust CPU stack 10889 __ pop(rax); 10890 __ pop(rdx); 10891 __ cmpl(rdx, 0x80000000); 10892 __ jccb(Assembler::notEqual, fast); 10893 __ testl(rax, rax); 10894 __ jccb(Assembler::notEqual, fast); 10895 __ subptr(rsp, 4); 10896 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10897 __ fld_s(Address(rsp, 0)); 10898 __ addptr(rsp, 4); 10899 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10900 __ bind(fast); 10901 %} 10902 ins_pipe( pipe_slow ); 10903 %} 10904 10905 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10906 predicate( UseSSE<=1 ); 10907 match(Set dst (ConvI2D src)); 10908 format %{ "FILD $src\n\t" 10909 "FSTP $dst" %} 10910 opcode(0xDB, 0x0); /* DB /0 */ 10911 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10912 ins_pipe( fpu_reg_mem ); 10913 %} 10914 10915 instruct convI2D_reg(regD dst, rRegI src) %{ 10916 predicate( UseSSE>=2 && !UseXmmI2D ); 10917 match(Set dst (ConvI2D src)); 10918 format %{ "CVTSI2SD $dst,$src" %} 10919 ins_encode %{ 10920 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10921 %} 10922 ins_pipe( pipe_slow ); 10923 %} 10924 10925 instruct convI2D_mem(regD dst, memory mem) %{ 10926 predicate( UseSSE>=2 ); 10927 match(Set dst (ConvI2D (LoadI mem))); 10928 format %{ "CVTSI2SD $dst,$mem" %} 10929 ins_encode %{ 10930 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10931 %} 10932 ins_pipe( pipe_slow ); 10933 %} 10934 10935 instruct convXI2D_reg(regD dst, rRegI src) 10936 %{ 10937 predicate( UseSSE>=2 && UseXmmI2D ); 10938 match(Set dst (ConvI2D src)); 10939 10940 format %{ "MOVD $dst,$src\n\t" 10941 "CVTDQ2PD $dst,$dst\t# i2d" %} 10942 ins_encode %{ 10943 __ movdl($dst$$XMMRegister, $src$$Register); 10944 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10945 %} 10946 ins_pipe(pipe_slow); // XXX 10947 %} 10948 10949 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10950 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10951 match(Set dst (ConvI2D (LoadI mem))); 10952 format %{ "FILD $mem\n\t" 10953 "FSTP $dst" %} 10954 opcode(0xDB); /* DB /0 */ 10955 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10956 Pop_Reg_DPR(dst)); 10957 ins_pipe( fpu_reg_mem ); 10958 %} 10959 10960 // Convert a byte to a float; no rounding step needed. 10961 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10962 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10963 match(Set dst (ConvI2F src)); 10964 format %{ "FILD $src\n\t" 10965 "FSTP $dst" %} 10966 10967 opcode(0xDB, 0x0); /* DB /0 */ 10968 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10969 ins_pipe( fpu_reg_mem ); 10970 %} 10971 10972 // In 24-bit mode, force exponent rounding by storing back out 10973 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10974 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10975 match(Set dst (ConvI2F src)); 10976 ins_cost(200); 10977 format %{ "FILD $src\n\t" 10978 "FSTP_S $dst" %} 10979 opcode(0xDB, 0x0); /* DB /0 */ 10980 ins_encode( Push_Mem_I(src), 10981 Pop_Mem_FPR(dst)); 10982 ins_pipe( fpu_mem_mem ); 10983 %} 10984 10985 // In 24-bit mode, force exponent rounding by storing back out 10986 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10987 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10988 match(Set dst (ConvI2F (LoadI mem))); 10989 ins_cost(200); 10990 format %{ "FILD $mem\n\t" 10991 "FSTP_S $dst" %} 10992 opcode(0xDB); /* DB /0 */ 10993 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10994 Pop_Mem_FPR(dst)); 10995 ins_pipe( fpu_mem_mem ); 10996 %} 10997 10998 // This instruction does not round to 24-bits 10999 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11000 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11001 match(Set dst (ConvI2F src)); 11002 format %{ "FILD $src\n\t" 11003 "FSTP $dst" %} 11004 opcode(0xDB, 0x0); /* DB /0 */ 11005 ins_encode( Push_Mem_I(src), 11006 Pop_Reg_FPR(dst)); 11007 ins_pipe( fpu_reg_mem ); 11008 %} 11009 11010 // This instruction does not round to 24-bits 11011 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11012 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11013 match(Set dst (ConvI2F (LoadI mem))); 11014 format %{ "FILD $mem\n\t" 11015 "FSTP $dst" %} 11016 opcode(0xDB); /* DB /0 */ 11017 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11018 Pop_Reg_FPR(dst)); 11019 ins_pipe( fpu_reg_mem ); 11020 %} 11021 11022 // Convert an int to a float in xmm; no rounding step needed. 11023 instruct convI2F_reg(regF dst, rRegI src) %{ 11024 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11025 match(Set dst (ConvI2F src)); 11026 format %{ "CVTSI2SS $dst, $src" %} 11027 ins_encode %{ 11028 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11029 %} 11030 ins_pipe( pipe_slow ); 11031 %} 11032 11033 instruct convXI2F_reg(regF dst, rRegI src) 11034 %{ 11035 predicate( UseSSE>=2 && UseXmmI2F ); 11036 match(Set dst (ConvI2F src)); 11037 11038 format %{ "MOVD $dst,$src\n\t" 11039 "CVTDQ2PS $dst,$dst\t# i2f" %} 11040 ins_encode %{ 11041 __ movdl($dst$$XMMRegister, $src$$Register); 11042 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11043 %} 11044 ins_pipe(pipe_slow); // XXX 11045 %} 11046 11047 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11048 match(Set dst (ConvI2L src)); 11049 effect(KILL cr); 11050 ins_cost(375); 11051 format %{ "MOV $dst.lo,$src\n\t" 11052 "MOV $dst.hi,$src\n\t" 11053 "SAR $dst.hi,31" %} 11054 ins_encode(convert_int_long(dst,src)); 11055 ins_pipe( ialu_reg_reg_long ); 11056 %} 11057 11058 // Zero-extend convert int to long 11059 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11060 match(Set dst (AndL (ConvI2L src) mask) ); 11061 effect( KILL flags ); 11062 ins_cost(250); 11063 format %{ "MOV $dst.lo,$src\n\t" 11064 "XOR $dst.hi,$dst.hi" %} 11065 opcode(0x33); // XOR 11066 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11067 ins_pipe( ialu_reg_reg_long ); 11068 %} 11069 11070 // Zero-extend long 11071 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11072 match(Set dst (AndL src mask) ); 11073 effect( KILL flags ); 11074 ins_cost(250); 11075 format %{ "MOV $dst.lo,$src.lo\n\t" 11076 "XOR $dst.hi,$dst.hi\n\t" %} 11077 opcode(0x33); // XOR 11078 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11079 ins_pipe( ialu_reg_reg_long ); 11080 %} 11081 11082 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11083 predicate (UseSSE<=1); 11084 match(Set dst (ConvL2D src)); 11085 effect( KILL cr ); 11086 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11087 "PUSH $src.lo\n\t" 11088 "FILD ST,[ESP + #0]\n\t" 11089 "ADD ESP,8\n\t" 11090 "FSTP_D $dst\t# D-round" %} 11091 opcode(0xDF, 0x5); /* DF /5 */ 11092 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11093 ins_pipe( pipe_slow ); 11094 %} 11095 11096 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11097 predicate (UseSSE>=2); 11098 match(Set dst (ConvL2D src)); 11099 effect( KILL cr ); 11100 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11101 "PUSH $src.lo\n\t" 11102 "FILD_D [ESP]\n\t" 11103 "FSTP_D [ESP]\n\t" 11104 "MOVSD $dst,[ESP]\n\t" 11105 "ADD ESP,8" %} 11106 opcode(0xDF, 0x5); /* DF /5 */ 11107 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11108 ins_pipe( pipe_slow ); 11109 %} 11110 11111 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11112 predicate (UseSSE>=1); 11113 match(Set dst (ConvL2F src)); 11114 effect( KILL cr ); 11115 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11116 "PUSH $src.lo\n\t" 11117 "FILD_D [ESP]\n\t" 11118 "FSTP_S [ESP]\n\t" 11119 "MOVSS $dst,[ESP]\n\t" 11120 "ADD ESP,8" %} 11121 opcode(0xDF, 0x5); /* DF /5 */ 11122 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11123 ins_pipe( pipe_slow ); 11124 %} 11125 11126 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11127 match(Set dst (ConvL2F src)); 11128 effect( KILL cr ); 11129 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11130 "PUSH $src.lo\n\t" 11131 "FILD ST,[ESP + #0]\n\t" 11132 "ADD ESP,8\n\t" 11133 "FSTP_S $dst\t# F-round" %} 11134 opcode(0xDF, 0x5); /* DF /5 */ 11135 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11136 ins_pipe( pipe_slow ); 11137 %} 11138 11139 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11140 match(Set dst (ConvL2I src)); 11141 effect( DEF dst, USE src ); 11142 format %{ "MOV $dst,$src.lo" %} 11143 ins_encode(enc_CopyL_Lo(dst,src)); 11144 ins_pipe( ialu_reg_reg ); 11145 %} 11146 11147 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11148 match(Set dst (MoveF2I src)); 11149 effect( DEF dst, USE src ); 11150 ins_cost(100); 11151 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11152 ins_encode %{ 11153 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11154 %} 11155 ins_pipe( ialu_reg_mem ); 11156 %} 11157 11158 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11159 predicate(UseSSE==0); 11160 match(Set dst (MoveF2I src)); 11161 effect( DEF dst, USE src ); 11162 11163 ins_cost(125); 11164 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11165 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11166 ins_pipe( fpu_mem_reg ); 11167 %} 11168 11169 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11170 predicate(UseSSE>=1); 11171 match(Set dst (MoveF2I src)); 11172 effect( DEF dst, USE src ); 11173 11174 ins_cost(95); 11175 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11176 ins_encode %{ 11177 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11178 %} 11179 ins_pipe( pipe_slow ); 11180 %} 11181 11182 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11183 predicate(UseSSE>=2); 11184 match(Set dst (MoveF2I src)); 11185 effect( DEF dst, USE src ); 11186 ins_cost(85); 11187 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11188 ins_encode %{ 11189 __ movdl($dst$$Register, $src$$XMMRegister); 11190 %} 11191 ins_pipe( pipe_slow ); 11192 %} 11193 11194 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11195 match(Set dst (MoveI2F src)); 11196 effect( DEF dst, USE src ); 11197 11198 ins_cost(100); 11199 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11200 ins_encode %{ 11201 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11202 %} 11203 ins_pipe( ialu_mem_reg ); 11204 %} 11205 11206 11207 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11208 predicate(UseSSE==0); 11209 match(Set dst (MoveI2F src)); 11210 effect(DEF dst, USE src); 11211 11212 ins_cost(125); 11213 format %{ "FLD_S $src\n\t" 11214 "FSTP $dst\t# MoveI2F_stack_reg" %} 11215 opcode(0xD9); /* D9 /0, FLD m32real */ 11216 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11217 Pop_Reg_FPR(dst) ); 11218 ins_pipe( fpu_reg_mem ); 11219 %} 11220 11221 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11222 predicate(UseSSE>=1); 11223 match(Set dst (MoveI2F src)); 11224 effect( DEF dst, USE src ); 11225 11226 ins_cost(95); 11227 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11228 ins_encode %{ 11229 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11230 %} 11231 ins_pipe( pipe_slow ); 11232 %} 11233 11234 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11235 predicate(UseSSE>=2); 11236 match(Set dst (MoveI2F src)); 11237 effect( DEF dst, USE src ); 11238 11239 ins_cost(85); 11240 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11241 ins_encode %{ 11242 __ movdl($dst$$XMMRegister, $src$$Register); 11243 %} 11244 ins_pipe( pipe_slow ); 11245 %} 11246 11247 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11248 match(Set dst (MoveD2L src)); 11249 effect(DEF dst, USE src); 11250 11251 ins_cost(250); 11252 format %{ "MOV $dst.lo,$src\n\t" 11253 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11254 opcode(0x8B, 0x8B); 11255 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11256 ins_pipe( ialu_mem_long_reg ); 11257 %} 11258 11259 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11260 predicate(UseSSE<=1); 11261 match(Set dst (MoveD2L src)); 11262 effect(DEF dst, USE src); 11263 11264 ins_cost(125); 11265 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11266 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11267 ins_pipe( fpu_mem_reg ); 11268 %} 11269 11270 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11271 predicate(UseSSE>=2); 11272 match(Set dst (MoveD2L src)); 11273 effect(DEF dst, USE src); 11274 ins_cost(95); 11275 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11276 ins_encode %{ 11277 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11278 %} 11279 ins_pipe( pipe_slow ); 11280 %} 11281 11282 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11283 predicate(UseSSE>=2); 11284 match(Set dst (MoveD2L src)); 11285 effect(DEF dst, USE src, TEMP tmp); 11286 ins_cost(85); 11287 format %{ "MOVD $dst.lo,$src\n\t" 11288 "PSHUFLW $tmp,$src,0x4E\n\t" 11289 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11290 ins_encode %{ 11291 __ movdl($dst$$Register, $src$$XMMRegister); 11292 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11293 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11294 %} 11295 ins_pipe( pipe_slow ); 11296 %} 11297 11298 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11299 match(Set dst (MoveL2D src)); 11300 effect(DEF dst, USE src); 11301 11302 ins_cost(200); 11303 format %{ "MOV $dst,$src.lo\n\t" 11304 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11305 opcode(0x89, 0x89); 11306 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11307 ins_pipe( ialu_mem_long_reg ); 11308 %} 11309 11310 11311 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11312 predicate(UseSSE<=1); 11313 match(Set dst (MoveL2D src)); 11314 effect(DEF dst, USE src); 11315 ins_cost(125); 11316 11317 format %{ "FLD_D $src\n\t" 11318 "FSTP $dst\t# MoveL2D_stack_reg" %} 11319 opcode(0xDD); /* DD /0, FLD m64real */ 11320 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11321 Pop_Reg_DPR(dst) ); 11322 ins_pipe( fpu_reg_mem ); 11323 %} 11324 11325 11326 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11327 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11328 match(Set dst (MoveL2D src)); 11329 effect(DEF dst, USE src); 11330 11331 ins_cost(95); 11332 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11333 ins_encode %{ 11334 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11335 %} 11336 ins_pipe( pipe_slow ); 11337 %} 11338 11339 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11340 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11341 match(Set dst (MoveL2D src)); 11342 effect(DEF dst, USE src); 11343 11344 ins_cost(95); 11345 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11346 ins_encode %{ 11347 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11348 %} 11349 ins_pipe( pipe_slow ); 11350 %} 11351 11352 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11353 predicate(UseSSE>=2); 11354 match(Set dst (MoveL2D src)); 11355 effect(TEMP dst, USE src, TEMP tmp); 11356 ins_cost(85); 11357 format %{ "MOVD $dst,$src.lo\n\t" 11358 "MOVD $tmp,$src.hi\n\t" 11359 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11360 ins_encode %{ 11361 __ movdl($dst$$XMMRegister, $src$$Register); 11362 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11363 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11364 %} 11365 ins_pipe( pipe_slow ); 11366 %} 11367 11368 11369 // ======================================================================= 11370 // fast clearing of an array 11371 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11372 predicate(!UseFastStosb); 11373 match(Set dummy (ClearArray cnt base)); 11374 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11375 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11376 "SHL ECX,1\t# Convert doublewords to words\n\t" 11377 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11378 ins_encode %{ 11379 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11380 %} 11381 ins_pipe( pipe_slow ); 11382 %} 11383 11384 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11385 predicate(UseFastStosb); 11386 match(Set dummy (ClearArray cnt base)); 11387 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11388 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11389 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11390 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11391 ins_encode %{ 11392 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11393 %} 11394 ins_pipe( pipe_slow ); 11395 %} 11396 11397 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11398 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11399 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11400 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11401 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11402 11403 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11404 ins_encode %{ 11405 __ string_compare($str1$$Register, $str2$$Register, 11406 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11407 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11408 %} 11409 ins_pipe( pipe_slow ); 11410 %} 11411 11412 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11413 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11414 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11415 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11416 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11417 11418 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11419 ins_encode %{ 11420 __ string_compare($str1$$Register, $str2$$Register, 11421 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11422 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11423 %} 11424 ins_pipe( pipe_slow ); 11425 %} 11426 11427 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11428 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11429 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11430 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11431 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11432 11433 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11434 ins_encode %{ 11435 __ string_compare($str1$$Register, $str2$$Register, 11436 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11437 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11438 %} 11439 ins_pipe( pipe_slow ); 11440 %} 11441 11442 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11443 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11444 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11445 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11446 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11447 11448 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11449 ins_encode %{ 11450 __ string_compare($str2$$Register, $str1$$Register, 11451 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11452 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11453 %} 11454 ins_pipe( pipe_slow ); 11455 %} 11456 11457 // fast string equals 11458 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11459 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11460 match(Set result (StrEquals (Binary str1 str2) cnt)); 11461 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11462 11463 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11464 ins_encode %{ 11465 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11466 $cnt$$Register, $result$$Register, $tmp3$$Register, 11467 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11468 %} 11469 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 // fast search of substring with known size. 11474 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11475 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11476 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11477 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11478 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11479 11480 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11481 ins_encode %{ 11482 int icnt2 = (int)$int_cnt2$$constant; 11483 if (icnt2 >= 16) { 11484 // IndexOf for constant substrings with size >= 16 elements 11485 // which don't need to be loaded through stack. 11486 __ string_indexofC8($str1$$Register, $str2$$Register, 11487 $cnt1$$Register, $cnt2$$Register, 11488 icnt2, $result$$Register, 11489 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11490 } else { 11491 // Small strings are loaded through stack if they cross page boundary. 11492 __ string_indexof($str1$$Register, $str2$$Register, 11493 $cnt1$$Register, $cnt2$$Register, 11494 icnt2, $result$$Register, 11495 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11496 } 11497 %} 11498 ins_pipe( pipe_slow ); 11499 %} 11500 11501 // fast search of substring with known size. 11502 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11503 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11504 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11505 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11506 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11507 11508 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11509 ins_encode %{ 11510 int icnt2 = (int)$int_cnt2$$constant; 11511 if (icnt2 >= 8) { 11512 // IndexOf for constant substrings with size >= 8 elements 11513 // which don't need to be loaded through stack. 11514 __ string_indexofC8($str1$$Register, $str2$$Register, 11515 $cnt1$$Register, $cnt2$$Register, 11516 icnt2, $result$$Register, 11517 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11518 } else { 11519 // Small strings are loaded through stack if they cross page boundary. 11520 __ string_indexof($str1$$Register, $str2$$Register, 11521 $cnt1$$Register, $cnt2$$Register, 11522 icnt2, $result$$Register, 11523 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11524 } 11525 %} 11526 ins_pipe( pipe_slow ); 11527 %} 11528 11529 // fast search of substring with known size. 11530 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11531 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11532 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11533 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11534 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11535 11536 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11537 ins_encode %{ 11538 int icnt2 = (int)$int_cnt2$$constant; 11539 if (icnt2 >= 8) { 11540 // IndexOf for constant substrings with size >= 8 elements 11541 // which don't need to be loaded through stack. 11542 __ string_indexofC8($str1$$Register, $str2$$Register, 11543 $cnt1$$Register, $cnt2$$Register, 11544 icnt2, $result$$Register, 11545 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11546 } else { 11547 // Small strings are loaded through stack if they cross page boundary. 11548 __ string_indexof($str1$$Register, $str2$$Register, 11549 $cnt1$$Register, $cnt2$$Register, 11550 icnt2, $result$$Register, 11551 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11552 } 11553 %} 11554 ins_pipe( pipe_slow ); 11555 %} 11556 11557 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11558 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11559 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11560 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11561 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11562 11563 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11564 ins_encode %{ 11565 __ string_indexof($str1$$Register, $str2$$Register, 11566 $cnt1$$Register, $cnt2$$Register, 11567 (-1), $result$$Register, 11568 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11569 %} 11570 ins_pipe( pipe_slow ); 11571 %} 11572 11573 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11574 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11575 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11576 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11577 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11578 11579 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11580 ins_encode %{ 11581 __ string_indexof($str1$$Register, $str2$$Register, 11582 $cnt1$$Register, $cnt2$$Register, 11583 (-1), $result$$Register, 11584 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11585 %} 11586 ins_pipe( pipe_slow ); 11587 %} 11588 11589 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11590 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11591 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11592 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11593 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11594 11595 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11596 ins_encode %{ 11597 __ string_indexof($str1$$Register, $str2$$Register, 11598 $cnt1$$Register, $cnt2$$Register, 11599 (-1), $result$$Register, 11600 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11601 %} 11602 ins_pipe( pipe_slow ); 11603 %} 11604 11605 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11606 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11607 predicate(UseSSE42Intrinsics); 11608 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11609 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11610 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11611 ins_encode %{ 11612 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11613 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11614 %} 11615 ins_pipe( pipe_slow ); 11616 %} 11617 11618 // fast array equals 11619 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11620 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11621 %{ 11622 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11623 match(Set result (AryEq ary1 ary2)); 11624 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11625 //ins_cost(300); 11626 11627 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11628 ins_encode %{ 11629 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11630 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11631 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11632 %} 11633 ins_pipe( pipe_slow ); 11634 %} 11635 11636 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11637 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11638 %{ 11639 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11640 match(Set result (AryEq ary1 ary2)); 11641 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11642 //ins_cost(300); 11643 11644 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11645 ins_encode %{ 11646 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11647 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11648 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11649 %} 11650 ins_pipe( pipe_slow ); 11651 %} 11652 11653 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11654 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11655 %{ 11656 match(Set result (HasNegatives ary1 len)); 11657 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11658 11659 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11660 ins_encode %{ 11661 __ has_negatives($ary1$$Register, $len$$Register, 11662 $result$$Register, $tmp3$$Register, 11663 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11664 %} 11665 ins_pipe( pipe_slow ); 11666 %} 11667 11668 // fast char[] to byte[] compression 11669 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11670 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11671 match(Set result (StrCompressedCopy src (Binary dst len))); 11672 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11673 11674 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11675 ins_encode %{ 11676 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11677 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11678 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11679 %} 11680 ins_pipe( pipe_slow ); 11681 %} 11682 11683 // fast byte[] to char[] inflation 11684 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11685 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11686 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11687 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11688 11689 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11690 ins_encode %{ 11691 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11692 $tmp1$$XMMRegister, $tmp2$$Register); 11693 %} 11694 ins_pipe( pipe_slow ); 11695 %} 11696 11697 // encode char[] to byte[] in ISO_8859_1 11698 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11699 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11700 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11701 match(Set result (EncodeISOArray src (Binary dst len))); 11702 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11703 11704 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11705 ins_encode %{ 11706 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11707 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11708 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11709 %} 11710 ins_pipe( pipe_slow ); 11711 %} 11712 11713 11714 //----------Control Flow Instructions------------------------------------------ 11715 // Signed compare Instructions 11716 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11717 match(Set cr (CmpI op1 op2)); 11718 effect( DEF cr, USE op1, USE op2 ); 11719 format %{ "CMP $op1,$op2" %} 11720 opcode(0x3B); /* Opcode 3B /r */ 11721 ins_encode( OpcP, RegReg( op1, op2) ); 11722 ins_pipe( ialu_cr_reg_reg ); 11723 %} 11724 11725 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11726 match(Set cr (CmpI op1 op2)); 11727 effect( DEF cr, USE op1 ); 11728 format %{ "CMP $op1,$op2" %} 11729 opcode(0x81,0x07); /* Opcode 81 /7 */ 11730 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11731 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11732 ins_pipe( ialu_cr_reg_imm ); 11733 %} 11734 11735 // Cisc-spilled version of cmpI_eReg 11736 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11737 match(Set cr (CmpI op1 (LoadI op2))); 11738 11739 format %{ "CMP $op1,$op2" %} 11740 ins_cost(500); 11741 opcode(0x3B); /* Opcode 3B /r */ 11742 ins_encode( OpcP, RegMem( op1, op2) ); 11743 ins_pipe( ialu_cr_reg_mem ); 11744 %} 11745 11746 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11747 match(Set cr (CmpI src zero)); 11748 effect( DEF cr, USE src ); 11749 11750 format %{ "TEST $src,$src" %} 11751 opcode(0x85); 11752 ins_encode( OpcP, RegReg( src, src ) ); 11753 ins_pipe( ialu_cr_reg_imm ); 11754 %} 11755 11756 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11757 match(Set cr (CmpI (AndI src con) zero)); 11758 11759 format %{ "TEST $src,$con" %} 11760 opcode(0xF7,0x00); 11761 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11762 ins_pipe( ialu_cr_reg_imm ); 11763 %} 11764 11765 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11766 match(Set cr (CmpI (AndI src mem) zero)); 11767 11768 format %{ "TEST $src,$mem" %} 11769 opcode(0x85); 11770 ins_encode( OpcP, RegMem( src, mem ) ); 11771 ins_pipe( ialu_cr_reg_mem ); 11772 %} 11773 11774 // Unsigned compare Instructions; really, same as signed except they 11775 // produce an eFlagsRegU instead of eFlagsReg. 11776 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11777 match(Set cr (CmpU op1 op2)); 11778 11779 format %{ "CMPu $op1,$op2" %} 11780 opcode(0x3B); /* Opcode 3B /r */ 11781 ins_encode( OpcP, RegReg( op1, op2) ); 11782 ins_pipe( ialu_cr_reg_reg ); 11783 %} 11784 11785 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11786 match(Set cr (CmpU op1 op2)); 11787 11788 format %{ "CMPu $op1,$op2" %} 11789 opcode(0x81,0x07); /* Opcode 81 /7 */ 11790 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11791 ins_pipe( ialu_cr_reg_imm ); 11792 %} 11793 11794 // // Cisc-spilled version of cmpU_eReg 11795 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11796 match(Set cr (CmpU op1 (LoadI op2))); 11797 11798 format %{ "CMPu $op1,$op2" %} 11799 ins_cost(500); 11800 opcode(0x3B); /* Opcode 3B /r */ 11801 ins_encode( OpcP, RegMem( op1, op2) ); 11802 ins_pipe( ialu_cr_reg_mem ); 11803 %} 11804 11805 // // Cisc-spilled version of cmpU_eReg 11806 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11807 // match(Set cr (CmpU (LoadI op1) op2)); 11808 // 11809 // format %{ "CMPu $op1,$op2" %} 11810 // ins_cost(500); 11811 // opcode(0x39); /* Opcode 39 /r */ 11812 // ins_encode( OpcP, RegMem( op1, op2) ); 11813 //%} 11814 11815 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11816 match(Set cr (CmpU src zero)); 11817 11818 format %{ "TESTu $src,$src" %} 11819 opcode(0x85); 11820 ins_encode( OpcP, RegReg( src, src ) ); 11821 ins_pipe( ialu_cr_reg_imm ); 11822 %} 11823 11824 // Unsigned pointer compare Instructions 11825 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11826 match(Set cr (CmpP op1 op2)); 11827 11828 format %{ "CMPu $op1,$op2" %} 11829 opcode(0x3B); /* Opcode 3B /r */ 11830 ins_encode( OpcP, RegReg( op1, op2) ); 11831 ins_pipe( ialu_cr_reg_reg ); 11832 %} 11833 11834 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11835 match(Set cr (CmpP op1 op2)); 11836 11837 format %{ "CMPu $op1,$op2" %} 11838 opcode(0x81,0x07); /* Opcode 81 /7 */ 11839 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11840 ins_pipe( ialu_cr_reg_imm ); 11841 %} 11842 11843 // // Cisc-spilled version of cmpP_eReg 11844 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11845 match(Set cr (CmpP op1 (LoadP op2))); 11846 11847 format %{ "CMPu $op1,$op2" %} 11848 ins_cost(500); 11849 opcode(0x3B); /* Opcode 3B /r */ 11850 ins_encode( OpcP, RegMem( op1, op2) ); 11851 ins_pipe( ialu_cr_reg_mem ); 11852 %} 11853 11854 // // Cisc-spilled version of cmpP_eReg 11855 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11856 // match(Set cr (CmpP (LoadP op1) op2)); 11857 // 11858 // format %{ "CMPu $op1,$op2" %} 11859 // ins_cost(500); 11860 // opcode(0x39); /* Opcode 39 /r */ 11861 // ins_encode( OpcP, RegMem( op1, op2) ); 11862 //%} 11863 11864 // Compare raw pointer (used in out-of-heap check). 11865 // Only works because non-oop pointers must be raw pointers 11866 // and raw pointers have no anti-dependencies. 11867 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11868 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11869 match(Set cr (CmpP op1 (LoadP op2))); 11870 11871 format %{ "CMPu $op1,$op2" %} 11872 opcode(0x3B); /* Opcode 3B /r */ 11873 ins_encode( OpcP, RegMem( op1, op2) ); 11874 ins_pipe( ialu_cr_reg_mem ); 11875 %} 11876 11877 // 11878 // This will generate a signed flags result. This should be ok 11879 // since any compare to a zero should be eq/neq. 11880 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11881 match(Set cr (CmpP src zero)); 11882 11883 format %{ "TEST $src,$src" %} 11884 opcode(0x85); 11885 ins_encode( OpcP, RegReg( src, src ) ); 11886 ins_pipe( ialu_cr_reg_imm ); 11887 %} 11888 11889 // Cisc-spilled version of testP_reg 11890 // This will generate a signed flags result. This should be ok 11891 // since any compare to a zero should be eq/neq. 11892 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11893 match(Set cr (CmpP (LoadP op) zero)); 11894 11895 format %{ "TEST $op,0xFFFFFFFF" %} 11896 ins_cost(500); 11897 opcode(0xF7); /* Opcode F7 /0 */ 11898 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11899 ins_pipe( ialu_cr_reg_imm ); 11900 %} 11901 11902 // Yanked all unsigned pointer compare operations. 11903 // Pointer compares are done with CmpP which is already unsigned. 11904 11905 //----------Max and Min-------------------------------------------------------- 11906 // Min Instructions 11907 //// 11908 // *** Min and Max using the conditional move are slower than the 11909 // *** branch version on a Pentium III. 11910 // // Conditional move for min 11911 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11912 // effect( USE_DEF op2, USE op1, USE cr ); 11913 // format %{ "CMOVlt $op2,$op1\t! min" %} 11914 // opcode(0x4C,0x0F); 11915 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11916 // ins_pipe( pipe_cmov_reg ); 11917 //%} 11918 // 11919 //// Min Register with Register (P6 version) 11920 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11921 // predicate(VM_Version::supports_cmov() ); 11922 // match(Set op2 (MinI op1 op2)); 11923 // ins_cost(200); 11924 // expand %{ 11925 // eFlagsReg cr; 11926 // compI_eReg(cr,op1,op2); 11927 // cmovI_reg_lt(op2,op1,cr); 11928 // %} 11929 //%} 11930 11931 // Min Register with Register (generic version) 11932 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11933 match(Set dst (MinI dst src)); 11934 effect(KILL flags); 11935 ins_cost(300); 11936 11937 format %{ "MIN $dst,$src" %} 11938 opcode(0xCC); 11939 ins_encode( min_enc(dst,src) ); 11940 ins_pipe( pipe_slow ); 11941 %} 11942 11943 // Max Register with Register 11944 // *** Min and Max using the conditional move are slower than the 11945 // *** branch version on a Pentium III. 11946 // // Conditional move for max 11947 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11948 // effect( USE_DEF op2, USE op1, USE cr ); 11949 // format %{ "CMOVgt $op2,$op1\t! max" %} 11950 // opcode(0x4F,0x0F); 11951 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11952 // ins_pipe( pipe_cmov_reg ); 11953 //%} 11954 // 11955 // // Max Register with Register (P6 version) 11956 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11957 // predicate(VM_Version::supports_cmov() ); 11958 // match(Set op2 (MaxI op1 op2)); 11959 // ins_cost(200); 11960 // expand %{ 11961 // eFlagsReg cr; 11962 // compI_eReg(cr,op1,op2); 11963 // cmovI_reg_gt(op2,op1,cr); 11964 // %} 11965 //%} 11966 11967 // Max Register with Register (generic version) 11968 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11969 match(Set dst (MaxI dst src)); 11970 effect(KILL flags); 11971 ins_cost(300); 11972 11973 format %{ "MAX $dst,$src" %} 11974 opcode(0xCC); 11975 ins_encode( max_enc(dst,src) ); 11976 ins_pipe( pipe_slow ); 11977 %} 11978 11979 // ============================================================================ 11980 // Counted Loop limit node which represents exact final iterator value. 11981 // Note: the resulting value should fit into integer range since 11982 // counted loops have limit check on overflow. 11983 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11984 match(Set limit (LoopLimit (Binary init limit) stride)); 11985 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11986 ins_cost(300); 11987 11988 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11989 ins_encode %{ 11990 int strd = (int)$stride$$constant; 11991 assert(strd != 1 && strd != -1, "sanity"); 11992 int m1 = (strd > 0) ? 1 : -1; 11993 // Convert limit to long (EAX:EDX) 11994 __ cdql(); 11995 // Convert init to long (init:tmp) 11996 __ movl($tmp$$Register, $init$$Register); 11997 __ sarl($tmp$$Register, 31); 11998 // $limit - $init 11999 __ subl($limit$$Register, $init$$Register); 12000 __ sbbl($limit_hi$$Register, $tmp$$Register); 12001 // + ($stride - 1) 12002 if (strd > 0) { 12003 __ addl($limit$$Register, (strd - 1)); 12004 __ adcl($limit_hi$$Register, 0); 12005 __ movl($tmp$$Register, strd); 12006 } else { 12007 __ addl($limit$$Register, (strd + 1)); 12008 __ adcl($limit_hi$$Register, -1); 12009 __ lneg($limit_hi$$Register, $limit$$Register); 12010 __ movl($tmp$$Register, -strd); 12011 } 12012 // signed devision: (EAX:EDX) / pos_stride 12013 __ idivl($tmp$$Register); 12014 if (strd < 0) { 12015 // restore sign 12016 __ negl($tmp$$Register); 12017 } 12018 // (EAX) * stride 12019 __ mull($tmp$$Register); 12020 // + init (ignore upper bits) 12021 __ addl($limit$$Register, $init$$Register); 12022 %} 12023 ins_pipe( pipe_slow ); 12024 %} 12025 12026 // ============================================================================ 12027 // Branch Instructions 12028 // Jump Table 12029 instruct jumpXtnd(rRegI switch_val) %{ 12030 match(Jump switch_val); 12031 ins_cost(350); 12032 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12033 ins_encode %{ 12034 // Jump to Address(table_base + switch_reg) 12035 Address index(noreg, $switch_val$$Register, Address::times_1); 12036 __ jump(ArrayAddress($constantaddress, index)); 12037 %} 12038 ins_pipe(pipe_jmp); 12039 %} 12040 12041 // Jump Direct - Label defines a relative address from JMP+1 12042 instruct jmpDir(label labl) %{ 12043 match(Goto); 12044 effect(USE labl); 12045 12046 ins_cost(300); 12047 format %{ "JMP $labl" %} 12048 size(5); 12049 ins_encode %{ 12050 Label* L = $labl$$label; 12051 __ jmp(*L, false); // Always long jump 12052 %} 12053 ins_pipe( pipe_jmp ); 12054 %} 12055 12056 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12057 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12058 match(If cop cr); 12059 effect(USE labl); 12060 12061 ins_cost(300); 12062 format %{ "J$cop $labl" %} 12063 size(6); 12064 ins_encode %{ 12065 Label* L = $labl$$label; 12066 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12067 %} 12068 ins_pipe( pipe_jcc ); 12069 %} 12070 12071 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12072 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12073 match(CountedLoopEnd cop cr); 12074 effect(USE labl); 12075 12076 ins_cost(300); 12077 format %{ "J$cop $labl\t# Loop end" %} 12078 size(6); 12079 ins_encode %{ 12080 Label* L = $labl$$label; 12081 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12082 %} 12083 ins_pipe( pipe_jcc ); 12084 %} 12085 12086 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12087 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12088 match(CountedLoopEnd cop cmp); 12089 effect(USE labl); 12090 12091 ins_cost(300); 12092 format %{ "J$cop,u $labl\t# Loop end" %} 12093 size(6); 12094 ins_encode %{ 12095 Label* L = $labl$$label; 12096 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12097 %} 12098 ins_pipe( pipe_jcc ); 12099 %} 12100 12101 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12102 match(CountedLoopEnd cop cmp); 12103 effect(USE labl); 12104 12105 ins_cost(200); 12106 format %{ "J$cop,u $labl\t# Loop end" %} 12107 size(6); 12108 ins_encode %{ 12109 Label* L = $labl$$label; 12110 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12111 %} 12112 ins_pipe( pipe_jcc ); 12113 %} 12114 12115 // Jump Direct Conditional - using unsigned comparison 12116 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12117 match(If cop cmp); 12118 effect(USE labl); 12119 12120 ins_cost(300); 12121 format %{ "J$cop,u $labl" %} 12122 size(6); 12123 ins_encode %{ 12124 Label* L = $labl$$label; 12125 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12126 %} 12127 ins_pipe(pipe_jcc); 12128 %} 12129 12130 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12131 match(If cop cmp); 12132 effect(USE labl); 12133 12134 ins_cost(200); 12135 format %{ "J$cop,u $labl" %} 12136 size(6); 12137 ins_encode %{ 12138 Label* L = $labl$$label; 12139 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12140 %} 12141 ins_pipe(pipe_jcc); 12142 %} 12143 12144 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12145 match(If cop cmp); 12146 effect(USE labl); 12147 12148 ins_cost(200); 12149 format %{ $$template 12150 if ($cop$$cmpcode == Assembler::notEqual) { 12151 $$emit$$"JP,u $labl\n\t" 12152 $$emit$$"J$cop,u $labl" 12153 } else { 12154 $$emit$$"JP,u done\n\t" 12155 $$emit$$"J$cop,u $labl\n\t" 12156 $$emit$$"done:" 12157 } 12158 %} 12159 ins_encode %{ 12160 Label* l = $labl$$label; 12161 if ($cop$$cmpcode == Assembler::notEqual) { 12162 __ jcc(Assembler::parity, *l, false); 12163 __ jcc(Assembler::notEqual, *l, false); 12164 } else if ($cop$$cmpcode == Assembler::equal) { 12165 Label done; 12166 __ jccb(Assembler::parity, done); 12167 __ jcc(Assembler::equal, *l, false); 12168 __ bind(done); 12169 } else { 12170 ShouldNotReachHere(); 12171 } 12172 %} 12173 ins_pipe(pipe_jcc); 12174 %} 12175 12176 // ============================================================================ 12177 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12178 // array for an instance of the superklass. Set a hidden internal cache on a 12179 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12180 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12181 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12182 match(Set result (PartialSubtypeCheck sub super)); 12183 effect( KILL rcx, KILL cr ); 12184 12185 ins_cost(1100); // slightly larger than the next version 12186 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12187 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12188 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12189 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12190 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12191 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12192 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12193 "miss:\t" %} 12194 12195 opcode(0x1); // Force a XOR of EDI 12196 ins_encode( enc_PartialSubtypeCheck() ); 12197 ins_pipe( pipe_slow ); 12198 %} 12199 12200 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12201 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12202 effect( KILL rcx, KILL result ); 12203 12204 ins_cost(1000); 12205 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12206 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12207 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12208 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12209 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12210 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12211 "miss:\t" %} 12212 12213 opcode(0x0); // No need to XOR EDI 12214 ins_encode( enc_PartialSubtypeCheck() ); 12215 ins_pipe( pipe_slow ); 12216 %} 12217 12218 // ============================================================================ 12219 // Branch Instructions -- short offset versions 12220 // 12221 // These instructions are used to replace jumps of a long offset (the default 12222 // match) with jumps of a shorter offset. These instructions are all tagged 12223 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12224 // match rules in general matching. Instead, the ADLC generates a conversion 12225 // method in the MachNode which can be used to do in-place replacement of the 12226 // long variant with the shorter variant. The compiler will determine if a 12227 // branch can be taken by the is_short_branch_offset() predicate in the machine 12228 // specific code section of the file. 12229 12230 // Jump Direct - Label defines a relative address from JMP+1 12231 instruct jmpDir_short(label labl) %{ 12232 match(Goto); 12233 effect(USE labl); 12234 12235 ins_cost(300); 12236 format %{ "JMP,s $labl" %} 12237 size(2); 12238 ins_encode %{ 12239 Label* L = $labl$$label; 12240 __ jmpb(*L); 12241 %} 12242 ins_pipe( pipe_jmp ); 12243 ins_short_branch(1); 12244 %} 12245 12246 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12247 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12248 match(If cop cr); 12249 effect(USE labl); 12250 12251 ins_cost(300); 12252 format %{ "J$cop,s $labl" %} 12253 size(2); 12254 ins_encode %{ 12255 Label* L = $labl$$label; 12256 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12257 %} 12258 ins_pipe( pipe_jcc ); 12259 ins_short_branch(1); 12260 %} 12261 12262 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12263 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12264 match(CountedLoopEnd cop cr); 12265 effect(USE labl); 12266 12267 ins_cost(300); 12268 format %{ "J$cop,s $labl\t# Loop end" %} 12269 size(2); 12270 ins_encode %{ 12271 Label* L = $labl$$label; 12272 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12273 %} 12274 ins_pipe( pipe_jcc ); 12275 ins_short_branch(1); 12276 %} 12277 12278 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12279 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12280 match(CountedLoopEnd cop cmp); 12281 effect(USE labl); 12282 12283 ins_cost(300); 12284 format %{ "J$cop,us $labl\t# Loop end" %} 12285 size(2); 12286 ins_encode %{ 12287 Label* L = $labl$$label; 12288 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12289 %} 12290 ins_pipe( pipe_jcc ); 12291 ins_short_branch(1); 12292 %} 12293 12294 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12295 match(CountedLoopEnd cop cmp); 12296 effect(USE labl); 12297 12298 ins_cost(300); 12299 format %{ "J$cop,us $labl\t# Loop end" %} 12300 size(2); 12301 ins_encode %{ 12302 Label* L = $labl$$label; 12303 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12304 %} 12305 ins_pipe( pipe_jcc ); 12306 ins_short_branch(1); 12307 %} 12308 12309 // Jump Direct Conditional - using unsigned comparison 12310 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12311 match(If cop cmp); 12312 effect(USE labl); 12313 12314 ins_cost(300); 12315 format %{ "J$cop,us $labl" %} 12316 size(2); 12317 ins_encode %{ 12318 Label* L = $labl$$label; 12319 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12320 %} 12321 ins_pipe( pipe_jcc ); 12322 ins_short_branch(1); 12323 %} 12324 12325 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12326 match(If cop cmp); 12327 effect(USE labl); 12328 12329 ins_cost(300); 12330 format %{ "J$cop,us $labl" %} 12331 size(2); 12332 ins_encode %{ 12333 Label* L = $labl$$label; 12334 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12335 %} 12336 ins_pipe( pipe_jcc ); 12337 ins_short_branch(1); 12338 %} 12339 12340 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12341 match(If cop cmp); 12342 effect(USE labl); 12343 12344 ins_cost(300); 12345 format %{ $$template 12346 if ($cop$$cmpcode == Assembler::notEqual) { 12347 $$emit$$"JP,u,s $labl\n\t" 12348 $$emit$$"J$cop,u,s $labl" 12349 } else { 12350 $$emit$$"JP,u,s done\n\t" 12351 $$emit$$"J$cop,u,s $labl\n\t" 12352 $$emit$$"done:" 12353 } 12354 %} 12355 size(4); 12356 ins_encode %{ 12357 Label* l = $labl$$label; 12358 if ($cop$$cmpcode == Assembler::notEqual) { 12359 __ jccb(Assembler::parity, *l); 12360 __ jccb(Assembler::notEqual, *l); 12361 } else if ($cop$$cmpcode == Assembler::equal) { 12362 Label done; 12363 __ jccb(Assembler::parity, done); 12364 __ jccb(Assembler::equal, *l); 12365 __ bind(done); 12366 } else { 12367 ShouldNotReachHere(); 12368 } 12369 %} 12370 ins_pipe(pipe_jcc); 12371 ins_short_branch(1); 12372 %} 12373 12374 // ============================================================================ 12375 // Long Compare 12376 // 12377 // Currently we hold longs in 2 registers. Comparing such values efficiently 12378 // is tricky. The flavor of compare used depends on whether we are testing 12379 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12380 // The GE test is the negated LT test. The LE test can be had by commuting 12381 // the operands (yielding a GE test) and then negating; negate again for the 12382 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12383 // NE test is negated from that. 12384 12385 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12386 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12387 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12388 // are collapsed internally in the ADLC's dfa-gen code. The match for 12389 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12390 // foo match ends up with the wrong leaf. One fix is to not match both 12391 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12392 // both forms beat the trinary form of long-compare and both are very useful 12393 // on Intel which has so few registers. 12394 12395 // Manifest a CmpL result in an integer register. Very painful. 12396 // This is the test to avoid. 12397 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12398 match(Set dst (CmpL3 src1 src2)); 12399 effect( KILL flags ); 12400 ins_cost(1000); 12401 format %{ "XOR $dst,$dst\n\t" 12402 "CMP $src1.hi,$src2.hi\n\t" 12403 "JLT,s m_one\n\t" 12404 "JGT,s p_one\n\t" 12405 "CMP $src1.lo,$src2.lo\n\t" 12406 "JB,s m_one\n\t" 12407 "JEQ,s done\n" 12408 "p_one:\tINC $dst\n\t" 12409 "JMP,s done\n" 12410 "m_one:\tDEC $dst\n" 12411 "done:" %} 12412 ins_encode %{ 12413 Label p_one, m_one, done; 12414 __ xorptr($dst$$Register, $dst$$Register); 12415 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12416 __ jccb(Assembler::less, m_one); 12417 __ jccb(Assembler::greater, p_one); 12418 __ cmpl($src1$$Register, $src2$$Register); 12419 __ jccb(Assembler::below, m_one); 12420 __ jccb(Assembler::equal, done); 12421 __ bind(p_one); 12422 __ incrementl($dst$$Register); 12423 __ jmpb(done); 12424 __ bind(m_one); 12425 __ decrementl($dst$$Register); 12426 __ bind(done); 12427 %} 12428 ins_pipe( pipe_slow ); 12429 %} 12430 12431 //====== 12432 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12433 // compares. Can be used for LE or GT compares by reversing arguments. 12434 // NOT GOOD FOR EQ/NE tests. 12435 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12436 match( Set flags (CmpL src zero )); 12437 ins_cost(100); 12438 format %{ "TEST $src.hi,$src.hi" %} 12439 opcode(0x85); 12440 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12441 ins_pipe( ialu_cr_reg_reg ); 12442 %} 12443 12444 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12445 // compares. Can be used for LE or GT compares by reversing arguments. 12446 // NOT GOOD FOR EQ/NE tests. 12447 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12448 match( Set flags (CmpL src1 src2 )); 12449 effect( TEMP tmp ); 12450 ins_cost(300); 12451 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12452 "MOV $tmp,$src1.hi\n\t" 12453 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12454 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12455 ins_pipe( ialu_cr_reg_reg ); 12456 %} 12457 12458 // Long compares reg < zero/req OR reg >= zero/req. 12459 // Just a wrapper for a normal branch, plus the predicate test. 12460 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12461 match(If cmp flags); 12462 effect(USE labl); 12463 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12464 expand %{ 12465 jmpCon(cmp,flags,labl); // JLT or JGE... 12466 %} 12467 %} 12468 12469 // Compare 2 longs and CMOVE longs. 12470 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12471 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12472 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12473 ins_cost(400); 12474 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12475 "CMOV$cmp $dst.hi,$src.hi" %} 12476 opcode(0x0F,0x40); 12477 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12478 ins_pipe( pipe_cmov_reg_long ); 12479 %} 12480 12481 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12482 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12483 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12484 ins_cost(500); 12485 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12486 "CMOV$cmp $dst.hi,$src.hi" %} 12487 opcode(0x0F,0x40); 12488 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12489 ins_pipe( pipe_cmov_reg_long ); 12490 %} 12491 12492 // Compare 2 longs and CMOVE ints. 12493 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12494 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12495 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12496 ins_cost(200); 12497 format %{ "CMOV$cmp $dst,$src" %} 12498 opcode(0x0F,0x40); 12499 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12500 ins_pipe( pipe_cmov_reg ); 12501 %} 12502 12503 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12504 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12505 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12506 ins_cost(250); 12507 format %{ "CMOV$cmp $dst,$src" %} 12508 opcode(0x0F,0x40); 12509 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12510 ins_pipe( pipe_cmov_mem ); 12511 %} 12512 12513 // Compare 2 longs and CMOVE ints. 12514 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12515 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12516 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12517 ins_cost(200); 12518 format %{ "CMOV$cmp $dst,$src" %} 12519 opcode(0x0F,0x40); 12520 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12521 ins_pipe( pipe_cmov_reg ); 12522 %} 12523 12524 // Compare 2 longs and CMOVE doubles 12525 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12526 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12527 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12528 ins_cost(200); 12529 expand %{ 12530 fcmovDPR_regS(cmp,flags,dst,src); 12531 %} 12532 %} 12533 12534 // Compare 2 longs and CMOVE doubles 12535 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12536 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12537 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12538 ins_cost(200); 12539 expand %{ 12540 fcmovD_regS(cmp,flags,dst,src); 12541 %} 12542 %} 12543 12544 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12545 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12546 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12547 ins_cost(200); 12548 expand %{ 12549 fcmovFPR_regS(cmp,flags,dst,src); 12550 %} 12551 %} 12552 12553 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12554 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12555 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12556 ins_cost(200); 12557 expand %{ 12558 fcmovF_regS(cmp,flags,dst,src); 12559 %} 12560 %} 12561 12562 //====== 12563 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12564 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12565 match( Set flags (CmpL src zero )); 12566 effect(TEMP tmp); 12567 ins_cost(200); 12568 format %{ "MOV $tmp,$src.lo\n\t" 12569 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12570 ins_encode( long_cmp_flags0( src, tmp ) ); 12571 ins_pipe( ialu_reg_reg_long ); 12572 %} 12573 12574 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12575 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12576 match( Set flags (CmpL src1 src2 )); 12577 ins_cost(200+300); 12578 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12579 "JNE,s skip\n\t" 12580 "CMP $src1.hi,$src2.hi\n\t" 12581 "skip:\t" %} 12582 ins_encode( long_cmp_flags1( src1, src2 ) ); 12583 ins_pipe( ialu_cr_reg_reg ); 12584 %} 12585 12586 // Long compare reg == zero/reg OR reg != zero/reg 12587 // Just a wrapper for a normal branch, plus the predicate test. 12588 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12589 match(If cmp flags); 12590 effect(USE labl); 12591 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12592 expand %{ 12593 jmpCon(cmp,flags,labl); // JEQ or JNE... 12594 %} 12595 %} 12596 12597 // Compare 2 longs and CMOVE longs. 12598 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12599 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12600 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12601 ins_cost(400); 12602 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12603 "CMOV$cmp $dst.hi,$src.hi" %} 12604 opcode(0x0F,0x40); 12605 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12606 ins_pipe( pipe_cmov_reg_long ); 12607 %} 12608 12609 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12610 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12611 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12612 ins_cost(500); 12613 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12614 "CMOV$cmp $dst.hi,$src.hi" %} 12615 opcode(0x0F,0x40); 12616 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12617 ins_pipe( pipe_cmov_reg_long ); 12618 %} 12619 12620 // Compare 2 longs and CMOVE ints. 12621 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12622 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12623 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12624 ins_cost(200); 12625 format %{ "CMOV$cmp $dst,$src" %} 12626 opcode(0x0F,0x40); 12627 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12628 ins_pipe( pipe_cmov_reg ); 12629 %} 12630 12631 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12632 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12633 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12634 ins_cost(250); 12635 format %{ "CMOV$cmp $dst,$src" %} 12636 opcode(0x0F,0x40); 12637 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12638 ins_pipe( pipe_cmov_mem ); 12639 %} 12640 12641 // Compare 2 longs and CMOVE ints. 12642 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12643 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12644 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12645 ins_cost(200); 12646 format %{ "CMOV$cmp $dst,$src" %} 12647 opcode(0x0F,0x40); 12648 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12649 ins_pipe( pipe_cmov_reg ); 12650 %} 12651 12652 // Compare 2 longs and CMOVE doubles 12653 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12654 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12655 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12656 ins_cost(200); 12657 expand %{ 12658 fcmovDPR_regS(cmp,flags,dst,src); 12659 %} 12660 %} 12661 12662 // Compare 2 longs and CMOVE doubles 12663 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12664 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12665 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12666 ins_cost(200); 12667 expand %{ 12668 fcmovD_regS(cmp,flags,dst,src); 12669 %} 12670 %} 12671 12672 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12673 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12674 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12675 ins_cost(200); 12676 expand %{ 12677 fcmovFPR_regS(cmp,flags,dst,src); 12678 %} 12679 %} 12680 12681 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12682 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12683 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12684 ins_cost(200); 12685 expand %{ 12686 fcmovF_regS(cmp,flags,dst,src); 12687 %} 12688 %} 12689 12690 //====== 12691 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12692 // Same as cmpL_reg_flags_LEGT except must negate src 12693 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12694 match( Set flags (CmpL src zero )); 12695 effect( TEMP tmp ); 12696 ins_cost(300); 12697 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12698 "CMP $tmp,$src.lo\n\t" 12699 "SBB $tmp,$src.hi\n\t" %} 12700 ins_encode( long_cmp_flags3(src, tmp) ); 12701 ins_pipe( ialu_reg_reg_long ); 12702 %} 12703 12704 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12705 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12706 // requires a commuted test to get the same result. 12707 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12708 match( Set flags (CmpL src1 src2 )); 12709 effect( TEMP tmp ); 12710 ins_cost(300); 12711 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12712 "MOV $tmp,$src2.hi\n\t" 12713 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12714 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12715 ins_pipe( ialu_cr_reg_reg ); 12716 %} 12717 12718 // Long compares reg < zero/req OR reg >= zero/req. 12719 // Just a wrapper for a normal branch, plus the predicate test 12720 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12721 match(If cmp flags); 12722 effect(USE labl); 12723 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12724 ins_cost(300); 12725 expand %{ 12726 jmpCon(cmp,flags,labl); // JGT or JLE... 12727 %} 12728 %} 12729 12730 // Compare 2 longs and CMOVE longs. 12731 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12732 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12733 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12734 ins_cost(400); 12735 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12736 "CMOV$cmp $dst.hi,$src.hi" %} 12737 opcode(0x0F,0x40); 12738 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12739 ins_pipe( pipe_cmov_reg_long ); 12740 %} 12741 12742 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12743 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12744 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12745 ins_cost(500); 12746 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12747 "CMOV$cmp $dst.hi,$src.hi+4" %} 12748 opcode(0x0F,0x40); 12749 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12750 ins_pipe( pipe_cmov_reg_long ); 12751 %} 12752 12753 // Compare 2 longs and CMOVE ints. 12754 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12755 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12756 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12757 ins_cost(200); 12758 format %{ "CMOV$cmp $dst,$src" %} 12759 opcode(0x0F,0x40); 12760 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12761 ins_pipe( pipe_cmov_reg ); 12762 %} 12763 12764 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12765 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12766 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12767 ins_cost(250); 12768 format %{ "CMOV$cmp $dst,$src" %} 12769 opcode(0x0F,0x40); 12770 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12771 ins_pipe( pipe_cmov_mem ); 12772 %} 12773 12774 // Compare 2 longs and CMOVE ptrs. 12775 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12776 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12777 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12778 ins_cost(200); 12779 format %{ "CMOV$cmp $dst,$src" %} 12780 opcode(0x0F,0x40); 12781 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12782 ins_pipe( pipe_cmov_reg ); 12783 %} 12784 12785 // Compare 2 longs and CMOVE doubles 12786 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12787 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12788 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12789 ins_cost(200); 12790 expand %{ 12791 fcmovDPR_regS(cmp,flags,dst,src); 12792 %} 12793 %} 12794 12795 // Compare 2 longs and CMOVE doubles 12796 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12797 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12798 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12799 ins_cost(200); 12800 expand %{ 12801 fcmovD_regS(cmp,flags,dst,src); 12802 %} 12803 %} 12804 12805 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12806 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12807 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12808 ins_cost(200); 12809 expand %{ 12810 fcmovFPR_regS(cmp,flags,dst,src); 12811 %} 12812 %} 12813 12814 12815 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12816 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12817 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12818 ins_cost(200); 12819 expand %{ 12820 fcmovF_regS(cmp,flags,dst,src); 12821 %} 12822 %} 12823 12824 12825 // ============================================================================ 12826 // Procedure Call/Return Instructions 12827 // Call Java Static Instruction 12828 // Note: If this code changes, the corresponding ret_addr_offset() and 12829 // compute_padding() functions will have to be adjusted. 12830 instruct CallStaticJavaDirect(method meth) %{ 12831 match(CallStaticJava); 12832 effect(USE meth); 12833 12834 ins_cost(300); 12835 format %{ "CALL,static " %} 12836 opcode(0xE8); /* E8 cd */ 12837 ins_encode( pre_call_resets, 12838 Java_Static_Call( meth ), 12839 call_epilog, 12840 post_call_FPU ); 12841 ins_pipe( pipe_slow ); 12842 ins_alignment(4); 12843 %} 12844 12845 // Call Java Dynamic Instruction 12846 // Note: If this code changes, the corresponding ret_addr_offset() and 12847 // compute_padding() functions will have to be adjusted. 12848 instruct CallDynamicJavaDirect(method meth) %{ 12849 match(CallDynamicJava); 12850 effect(USE meth); 12851 12852 ins_cost(300); 12853 format %{ "MOV EAX,(oop)-1\n\t" 12854 "CALL,dynamic" %} 12855 opcode(0xE8); /* E8 cd */ 12856 ins_encode( pre_call_resets, 12857 Java_Dynamic_Call( meth ), 12858 call_epilog, 12859 post_call_FPU ); 12860 ins_pipe( pipe_slow ); 12861 ins_alignment(4); 12862 %} 12863 12864 // Call Runtime Instruction 12865 instruct CallRuntimeDirect(method meth) %{ 12866 match(CallRuntime ); 12867 effect(USE meth); 12868 12869 ins_cost(300); 12870 format %{ "CALL,runtime " %} 12871 opcode(0xE8); /* E8 cd */ 12872 // Use FFREEs to clear entries in float stack 12873 ins_encode( pre_call_resets, 12874 FFree_Float_Stack_All, 12875 Java_To_Runtime( meth ), 12876 post_call_FPU ); 12877 ins_pipe( pipe_slow ); 12878 %} 12879 12880 // Call runtime without safepoint 12881 instruct CallLeafDirect(method meth) %{ 12882 match(CallLeaf); 12883 effect(USE meth); 12884 12885 ins_cost(300); 12886 format %{ "CALL_LEAF,runtime " %} 12887 opcode(0xE8); /* E8 cd */ 12888 ins_encode( pre_call_resets, 12889 FFree_Float_Stack_All, 12890 Java_To_Runtime( meth ), 12891 Verify_FPU_For_Leaf, post_call_FPU ); 12892 ins_pipe( pipe_slow ); 12893 %} 12894 12895 instruct CallLeafNoFPDirect(method meth) %{ 12896 match(CallLeafNoFP); 12897 effect(USE meth); 12898 12899 ins_cost(300); 12900 format %{ "CALL_LEAF_NOFP,runtime " %} 12901 opcode(0xE8); /* E8 cd */ 12902 ins_encode(Java_To_Runtime(meth)); 12903 ins_pipe( pipe_slow ); 12904 %} 12905 12906 12907 // Return Instruction 12908 // Remove the return address & jump to it. 12909 instruct Ret() %{ 12910 match(Return); 12911 format %{ "RET" %} 12912 opcode(0xC3); 12913 ins_encode(OpcP); 12914 ins_pipe( pipe_jmp ); 12915 %} 12916 12917 // Tail Call; Jump from runtime stub to Java code. 12918 // Also known as an 'interprocedural jump'. 12919 // Target of jump will eventually return to caller. 12920 // TailJump below removes the return address. 12921 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12922 match(TailCall jump_target method_oop ); 12923 ins_cost(300); 12924 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12925 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12926 ins_encode( OpcP, RegOpc(jump_target) ); 12927 ins_pipe( pipe_jmp ); 12928 %} 12929 12930 12931 // Tail Jump; remove the return address; jump to target. 12932 // TailCall above leaves the return address around. 12933 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12934 match( TailJump jump_target ex_oop ); 12935 ins_cost(300); 12936 format %{ "POP EDX\t# pop return address into dummy\n\t" 12937 "JMP $jump_target " %} 12938 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12939 ins_encode( enc_pop_rdx, 12940 OpcP, RegOpc(jump_target) ); 12941 ins_pipe( pipe_jmp ); 12942 %} 12943 12944 // Create exception oop: created by stack-crawling runtime code. 12945 // Created exception is now available to this handler, and is setup 12946 // just prior to jumping to this handler. No code emitted. 12947 instruct CreateException( eAXRegP ex_oop ) 12948 %{ 12949 match(Set ex_oop (CreateEx)); 12950 12951 size(0); 12952 // use the following format syntax 12953 format %{ "# exception oop is in EAX; no code emitted" %} 12954 ins_encode(); 12955 ins_pipe( empty ); 12956 %} 12957 12958 12959 // Rethrow exception: 12960 // The exception oop will come in the first argument position. 12961 // Then JUMP (not call) to the rethrow stub code. 12962 instruct RethrowException() 12963 %{ 12964 match(Rethrow); 12965 12966 // use the following format syntax 12967 format %{ "JMP rethrow_stub" %} 12968 ins_encode(enc_rethrow); 12969 ins_pipe( pipe_jmp ); 12970 %} 12971 12972 // inlined locking and unlocking 12973 12974 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12975 predicate(Compile::current()->use_rtm()); 12976 match(Set cr (FastLock object box)); 12977 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12978 ins_cost(300); 12979 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12980 ins_encode %{ 12981 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12982 $scr$$Register, $cx1$$Register, $cx2$$Register, 12983 _counters, _rtm_counters, _stack_rtm_counters, 12984 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12985 true, ra_->C->profile_rtm()); 12986 %} 12987 ins_pipe(pipe_slow); 12988 %} 12989 12990 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12991 predicate(!Compile::current()->use_rtm()); 12992 match(Set cr (FastLock object box)); 12993 effect(TEMP tmp, TEMP scr, USE_KILL box); 12994 ins_cost(300); 12995 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12996 ins_encode %{ 12997 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12998 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12999 %} 13000 ins_pipe(pipe_slow); 13001 %} 13002 13003 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13004 match(Set cr (FastUnlock object box)); 13005 effect(TEMP tmp, USE_KILL box); 13006 ins_cost(300); 13007 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13008 ins_encode %{ 13009 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13010 %} 13011 ins_pipe(pipe_slow); 13012 %} 13013 13014 13015 13016 // ============================================================================ 13017 // Safepoint Instruction 13018 instruct safePoint_poll(eFlagsReg cr) %{ 13019 match(SafePoint); 13020 effect(KILL cr); 13021 13022 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13023 // On SPARC that might be acceptable as we can generate the address with 13024 // just a sethi, saving an or. By polling at offset 0 we can end up 13025 // putting additional pressure on the index-0 in the D$. Because of 13026 // alignment (just like the situation at hand) the lower indices tend 13027 // to see more traffic. It'd be better to change the polling address 13028 // to offset 0 of the last $line in the polling page. 13029 13030 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13031 ins_cost(125); 13032 size(6) ; 13033 ins_encode( Safepoint_Poll() ); 13034 ins_pipe( ialu_reg_mem ); 13035 %} 13036 13037 13038 // ============================================================================ 13039 // This name is KNOWN by the ADLC and cannot be changed. 13040 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13041 // for this guy. 13042 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13043 match(Set dst (ThreadLocal)); 13044 effect(DEF dst, KILL cr); 13045 13046 format %{ "MOV $dst, Thread::current()" %} 13047 ins_encode %{ 13048 Register dstReg = as_Register($dst$$reg); 13049 __ get_thread(dstReg); 13050 %} 13051 ins_pipe( ialu_reg_fat ); 13052 %} 13053 13054 13055 13056 //----------PEEPHOLE RULES----------------------------------------------------- 13057 // These must follow all instruction definitions as they use the names 13058 // defined in the instructions definitions. 13059 // 13060 // peepmatch ( root_instr_name [preceding_instruction]* ); 13061 // 13062 // peepconstraint %{ 13063 // (instruction_number.operand_name relational_op instruction_number.operand_name 13064 // [, ...] ); 13065 // // instruction numbers are zero-based using left to right order in peepmatch 13066 // 13067 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13068 // // provide an instruction_number.operand_name for each operand that appears 13069 // // in the replacement instruction's match rule 13070 // 13071 // ---------VM FLAGS--------------------------------------------------------- 13072 // 13073 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13074 // 13075 // Each peephole rule is given an identifying number starting with zero and 13076 // increasing by one in the order seen by the parser. An individual peephole 13077 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13078 // on the command-line. 13079 // 13080 // ---------CURRENT LIMITATIONS---------------------------------------------- 13081 // 13082 // Only match adjacent instructions in same basic block 13083 // Only equality constraints 13084 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13085 // Only one replacement instruction 13086 // 13087 // ---------EXAMPLE---------------------------------------------------------- 13088 // 13089 // // pertinent parts of existing instructions in architecture description 13090 // instruct movI(rRegI dst, rRegI src) %{ 13091 // match(Set dst (CopyI src)); 13092 // %} 13093 // 13094 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13095 // match(Set dst (AddI dst src)); 13096 // effect(KILL cr); 13097 // %} 13098 // 13099 // // Change (inc mov) to lea 13100 // peephole %{ 13101 // // increment preceeded by register-register move 13102 // peepmatch ( incI_eReg movI ); 13103 // // require that the destination register of the increment 13104 // // match the destination register of the move 13105 // peepconstraint ( 0.dst == 1.dst ); 13106 // // construct a replacement instruction that sets 13107 // // the destination to ( move's source register + one ) 13108 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13109 // %} 13110 // 13111 // Implementation no longer uses movX instructions since 13112 // machine-independent system no longer uses CopyX nodes. 13113 // 13114 // peephole %{ 13115 // peepmatch ( incI_eReg movI ); 13116 // peepconstraint ( 0.dst == 1.dst ); 13117 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13118 // %} 13119 // 13120 // peephole %{ 13121 // peepmatch ( decI_eReg movI ); 13122 // peepconstraint ( 0.dst == 1.dst ); 13123 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13124 // %} 13125 // 13126 // peephole %{ 13127 // peepmatch ( addI_eReg_imm movI ); 13128 // peepconstraint ( 0.dst == 1.dst ); 13129 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13130 // %} 13131 // 13132 // peephole %{ 13133 // peepmatch ( addP_eReg_imm movP ); 13134 // peepconstraint ( 0.dst == 1.dst ); 13135 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13136 // %} 13137 13138 // // Change load of spilled value to only a spill 13139 // instruct storeI(memory mem, rRegI src) %{ 13140 // match(Set mem (StoreI mem src)); 13141 // %} 13142 // 13143 // instruct loadI(rRegI dst, memory mem) %{ 13144 // match(Set dst (LoadI mem)); 13145 // %} 13146 // 13147 peephole %{ 13148 peepmatch ( loadI storeI ); 13149 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13150 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13151 %} 13152 13153 //----------SMARTSPILL RULES--------------------------------------------------- 13154 // These must follow all instruction definitions as they use the names 13155 // defined in the instructions definitions.