1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 799 // it maps more cases to single byte displacement 800 _masm.set_managed(); 801 if (reg_lo+1 == reg_hi) { // double move? 802 if (is_load) { 803 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 804 } else { 805 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 806 } 807 } else { 808 if (is_load) { 809 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 810 } else { 811 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 812 } 813 } 814 #ifndef PRODUCT 815 } else if (!do_size) { 816 if (size != 0) st->print("\n\t"); 817 if (reg_lo+1 == reg_hi) { // double move? 818 if (is_load) st->print("%s %s,[ESP + #%d]", 819 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSD [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } else { 824 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 825 Matcher::regName[reg_lo], offset); 826 else st->print("MOVSS [ESP + #%d],%s", 827 offset, Matcher::regName[reg_lo]); 828 } 829 #endif 830 } 831 bool is_single_byte = false; 832 if ((UseAVX > 2) && (offset != 0)) { 833 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 834 } 835 int offset_size = 0; 836 if (UseAVX > 2 ) { 837 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 838 } else { 839 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 840 } 841 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 842 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 843 return size+5+offset_size; 844 } 845 846 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 848 int src_hi, int dst_hi, int size, outputStream* st ) { 849 if (cbuf) { 850 MacroAssembler _masm(cbuf); 851 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 852 _masm.set_managed(); 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } else { 857 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 858 as_XMMRegister(Matcher::_regEncode[src_lo])); 859 } 860 #ifndef PRODUCT 861 } else if (!do_size) { 862 if (size != 0) st->print("\n\t"); 863 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 864 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 865 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 866 } else { 867 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } 869 } else { 870 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 871 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 872 } else { 873 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 874 } 875 } 876 #endif 877 } 878 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 879 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 880 int sz = (UseAVX > 2) ? 6 : 4; 881 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 882 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 883 return size + sz; 884 } 885 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 894 as_Register(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 905 int src_hi, int dst_hi, int size, outputStream* st ) { 906 // 32-bit 907 if (cbuf) { 908 MacroAssembler _masm(cbuf); 909 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 910 _masm.set_managed(); 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 912 as_XMMRegister(Matcher::_regEncode[src_lo])); 913 #ifndef PRODUCT 914 } else if (!do_size) { 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 916 #endif 917 } 918 return (UseAVX> 2) ? 6 : 4; 919 } 920 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 922 if( cbuf ) { 923 emit_opcode(*cbuf, 0x8B ); 924 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 929 #endif 930 } 931 return size+2; 932 } 933 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 935 int offset, int size, outputStream* st ) { 936 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 937 if( cbuf ) { 938 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 939 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 940 #ifndef PRODUCT 941 } else if( !do_size ) { 942 if( size != 0 ) st->print("\n\t"); 943 st->print("FLD %s",Matcher::regName[src_lo]); 944 #endif 945 } 946 size += 2; 947 } 948 949 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 950 const char *op_str; 951 int op; 952 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 953 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 954 op = 0xDD; 955 } else { // 32-bit store 956 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 957 op = 0xD9; 958 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 959 } 960 961 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 962 } 963 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 966 int src_hi, int dst_hi, uint ireg, outputStream* st); 967 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 969 int stack_offset, int reg, uint ireg, outputStream* st); 970 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 972 int dst_offset, uint ireg, outputStream* st) { 973 int calc_size = 0; 974 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 975 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 976 switch (ireg) { 977 case Op_VecS: 978 calc_size = 3+src_offset_size + 3+dst_offset_size; 979 break; 980 case Op_VecD: { 981 calc_size = 3+src_offset_size + 3+dst_offset_size; 982 int tmp_src_offset = src_offset + 4; 983 int tmp_dst_offset = dst_offset + 4; 984 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 985 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 986 calc_size += 3+src_offset_size + 3+dst_offset_size; 987 break; 988 } 989 case Op_VecX: 990 case Op_VecY: 991 case Op_VecZ: 992 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 993 break; 994 default: 995 ShouldNotReachHere(); 996 } 997 if (cbuf) { 998 MacroAssembler _masm(cbuf); 999 int offset = __ offset(); 1000 switch (ireg) { 1001 case Op_VecS: 1002 __ pushl(Address(rsp, src_offset)); 1003 __ popl (Address(rsp, dst_offset)); 1004 break; 1005 case Op_VecD: 1006 __ pushl(Address(rsp, src_offset)); 1007 __ popl (Address(rsp, dst_offset)); 1008 __ pushl(Address(rsp, src_offset+4)); 1009 __ popl (Address(rsp, dst_offset+4)); 1010 break; 1011 case Op_VecX: 1012 __ movdqu(Address(rsp, -16), xmm0); 1013 __ movdqu(xmm0, Address(rsp, src_offset)); 1014 __ movdqu(Address(rsp, dst_offset), xmm0); 1015 __ movdqu(xmm0, Address(rsp, -16)); 1016 break; 1017 case Op_VecY: 1018 __ vmovdqu(Address(rsp, -32), xmm0); 1019 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1020 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1021 __ vmovdqu(xmm0, Address(rsp, -32)); 1022 break; 1023 case Op_VecZ: 1024 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1025 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1026 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1027 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1028 break; 1029 default: 1030 ShouldNotReachHere(); 1031 } 1032 int size = __ offset() - offset; 1033 assert(size == calc_size, "incorrect size calculation"); 1034 return size; 1035 #ifndef PRODUCT 1036 } else if (!do_size) { 1037 switch (ireg) { 1038 case Op_VecS: 1039 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1040 "popl [rsp + #%d]", 1041 src_offset, dst_offset); 1042 break; 1043 case Op_VecD: 1044 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1045 "popq [rsp + #%d]\n\t" 1046 "pushl [rsp + #%d]\n\t" 1047 "popq [rsp + #%d]", 1048 src_offset, dst_offset, src_offset+4, dst_offset+4); 1049 break; 1050 case Op_VecX: 1051 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1052 "movdqu xmm0, [rsp + #%d]\n\t" 1053 "movdqu [rsp + #%d], xmm0\n\t" 1054 "movdqu xmm0, [rsp - #16]", 1055 src_offset, dst_offset); 1056 break; 1057 case Op_VecY: 1058 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1059 "vmovdqu xmm0, [rsp + #%d]\n\t" 1060 "vmovdqu [rsp + #%d], xmm0\n\t" 1061 "vmovdqu xmm0, [rsp - #32]", 1062 src_offset, dst_offset); 1063 break; 1064 case Op_VecZ: 1065 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1066 "vmovdqu xmm0, [rsp + #%d]\n\t" 1067 "vmovdqu [rsp + #%d], xmm0\n\t" 1068 "vmovdqu xmm0, [rsp - #64]", 1069 src_offset, dst_offset); 1070 break; 1071 default: 1072 ShouldNotReachHere(); 1073 } 1074 #endif 1075 } 1076 return calc_size; 1077 } 1078 1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1080 // Get registers to move 1081 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1082 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1083 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1084 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1085 1086 enum RC src_second_rc = rc_class(src_second); 1087 enum RC src_first_rc = rc_class(src_first); 1088 enum RC dst_second_rc = rc_class(dst_second); 1089 enum RC dst_first_rc = rc_class(dst_first); 1090 1091 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1092 1093 // Generate spill code! 1094 int size = 0; 1095 1096 if( src_first == dst_first && src_second == dst_second ) 1097 return size; // Self copy, no move 1098 1099 if (bottom_type()->isa_vect() != NULL) { 1100 uint ireg = ideal_reg(); 1101 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1102 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1103 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1105 // mem -> mem 1106 int src_offset = ra_->reg2offset(src_first); 1107 int dst_offset = ra_->reg2offset(dst_first); 1108 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1109 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1110 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1111 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1112 int stack_offset = ra_->reg2offset(dst_first); 1113 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1114 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1115 int stack_offset = ra_->reg2offset(src_first); 1116 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1117 } else { 1118 ShouldNotReachHere(); 1119 } 1120 } 1121 1122 // -------------------------------------- 1123 // Check for mem-mem move. push/pop to move. 1124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1125 if( src_second == dst_first ) { // overlapping stack copy ranges 1126 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1127 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1128 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1129 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1130 } 1131 // move low bits 1132 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1133 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1134 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1135 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1137 } 1138 return size; 1139 } 1140 1141 // -------------------------------------- 1142 // Check for integer reg-reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1144 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1145 1146 // Check for integer store 1147 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1148 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1149 1150 // Check for integer load 1151 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1152 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1153 1154 // Check for integer reg-xmm reg copy 1155 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1156 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1157 "no 64 bit integer-float reg moves" ); 1158 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1159 } 1160 // -------------------------------------- 1161 // Check for float reg-reg copy 1162 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1163 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1164 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1165 if( cbuf ) { 1166 1167 // Note the mucking with the register encode to compensate for the 0/1 1168 // indexing issue mentioned in a comment in the reg_def sections 1169 // for FPR registers many lines above here. 1170 1171 if( src_first != FPR1L_num ) { 1172 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1173 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1174 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1175 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1176 } else { 1177 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1178 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1179 } 1180 #ifndef PRODUCT 1181 } else if( !do_size ) { 1182 if( size != 0 ) st->print("\n\t"); 1183 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1184 else st->print( "FST %s", Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1188 } 1189 1190 // Check for float store 1191 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1192 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1193 } 1194 1195 // Check for float load 1196 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1197 int offset = ra_->reg2offset(src_first); 1198 const char *op_str; 1199 int op; 1200 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1201 op_str = "FLD_D"; 1202 op = 0xDD; 1203 } else { // 32-bit load 1204 op_str = "FLD_S"; 1205 op = 0xD9; 1206 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1207 } 1208 if( cbuf ) { 1209 emit_opcode (*cbuf, op ); 1210 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1211 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1212 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1213 #ifndef PRODUCT 1214 } else if( !do_size ) { 1215 if( size != 0 ) st->print("\n\t"); 1216 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1217 #endif 1218 } 1219 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1220 return size + 3+offset_size+2; 1221 } 1222 1223 // Check for xmm reg-reg copy 1224 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1225 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1226 (src_first+1 == src_second && dst_first+1 == dst_second), 1227 "no non-adjacent float-moves" ); 1228 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1229 } 1230 1231 // Check for xmm reg-integer reg copy 1232 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1233 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1234 "no 64 bit float-integer reg moves" ); 1235 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1236 } 1237 1238 // Check for xmm store 1239 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1240 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1241 } 1242 1243 // Check for float xmm load 1244 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1245 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1246 } 1247 1248 // Copy from float reg to xmm reg 1249 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1250 // copy to the top of stack from floating point reg 1251 // and use LEA to preserve flags 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0xF8); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP-8]"); 1261 #endif 1262 } 1263 size += 4; 1264 1265 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1266 1267 // Copy from the temp memory to the xmm reg. 1268 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1269 1270 if( cbuf ) { 1271 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1272 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1273 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1274 emit_d8(*cbuf,0x08); 1275 #ifndef PRODUCT 1276 } else if( !do_size ) { 1277 if( size != 0 ) st->print("\n\t"); 1278 st->print("LEA ESP,[ESP+8]"); 1279 #endif 1280 } 1281 size += 4; 1282 return size; 1283 } 1284 1285 assert( size > 0, "missed a case" ); 1286 1287 // -------------------------------------------------------------------- 1288 // Check for second bits still needing moving. 1289 if( src_second == dst_second ) 1290 return size; // Self copy; no move 1291 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1292 1293 // Check for second word int-int move 1294 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1295 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1296 1297 // Check for second word integer store 1298 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1299 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1300 1301 // Check for second word integer load 1302 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1303 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1304 1305 1306 Unimplemented(); 1307 return 0; // Mute compiler 1308 } 1309 1310 #ifndef PRODUCT 1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1312 implementation( NULL, ra_, false, st ); 1313 } 1314 #endif 1315 1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 implementation( &cbuf, ra_, false, NULL ); 1318 } 1319 1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1321 return implementation( NULL, ra_, true, NULL ); 1322 } 1323 1324 1325 //============================================================================= 1326 #ifndef PRODUCT 1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 int reg = ra_->get_reg_first(this); 1330 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1331 } 1332 #endif 1333 1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1335 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1336 int reg = ra_->get_encode(this); 1337 if( offset >= 128 ) { 1338 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1339 emit_rm(cbuf, 0x2, reg, 0x04); 1340 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1341 emit_d32(cbuf, offset); 1342 } 1343 else { 1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(cbuf, 0x1, reg, 0x04); 1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1347 emit_d8(cbuf, offset); 1348 } 1349 } 1350 1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1352 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1353 if( offset >= 128 ) { 1354 return 7; 1355 } 1356 else { 1357 return 4; 1358 } 1359 } 1360 1361 //============================================================================= 1362 #ifndef PRODUCT 1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1364 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1365 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1366 st->print_cr("\tNOP"); 1367 st->print_cr("\tNOP"); 1368 if( !OptoBreakpoint ) 1369 st->print_cr("\tNOP"); 1370 } 1371 #endif 1372 1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1374 MacroAssembler masm(&cbuf); 1375 #ifdef ASSERT 1376 uint insts_size = cbuf.insts_size(); 1377 #endif 1378 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1379 masm.jump_cc(Assembler::notEqual, 1380 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1381 /* WARNING these NOPs are critical so that verified entry point is properly 1382 aligned for patching by NativeJump::patch_verified_entry() */ 1383 int nops_cnt = 2; 1384 if( !OptoBreakpoint ) // Leave space for int3 1385 nops_cnt += 1; 1386 masm.nop(nops_cnt); 1387 1388 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1389 } 1390 1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1392 return OptoBreakpoint ? 11 : 12; 1393 } 1394 1395 1396 //============================================================================= 1397 1398 int Matcher::regnum_to_fpu_offset(int regnum) { 1399 return regnum - 32; // The FP registers are in the second chunk 1400 } 1401 1402 // This is UltraSparc specific, true just means we have fast l2f conversion 1403 const bool Matcher::convL2FSupported(void) { 1404 return true; 1405 } 1406 1407 // Is this branch offset short enough that a short branch can be used? 1408 // 1409 // NOTE: If the platform does not provide any short branch variants, then 1410 // this method should return false for offset 0. 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1412 // The passed offset is relative to address of the branch. 1413 // On 86 a branch displacement is calculated relative to address 1414 // of a next instruction. 1415 offset -= br_size; 1416 1417 // the short version of jmpConUCF2 contains multiple branches, 1418 // making the reach slightly less 1419 if (rule == jmpConUCF2_rule) 1420 return (-126 <= offset && offset <= 125); 1421 return (-128 <= offset && offset <= 127); 1422 } 1423 1424 const bool Matcher::isSimpleConstant64(jlong value) { 1425 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1426 return false; 1427 } 1428 1429 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1430 const bool Matcher::init_array_count_is_in_bytes = false; 1431 1432 // Needs 2 CMOV's for longs. 1433 const int Matcher::long_cmove_cost() { return 1; } 1434 1435 // No CMOVF/CMOVD with SSE/SSE2 1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1437 1438 // Does the CPU require late expand (see block.cpp for description of late expand)? 1439 const bool Matcher::require_postalloc_expand = false; 1440 1441 // Should the Matcher clone shifts on addressing modes, expecting them to 1442 // be subsumed into complex addressing expressions or compute them into 1443 // registers? True for Intel but false for most RISCs 1444 const bool Matcher::clone_shift_expressions = true; 1445 1446 // Do we need to mask the count passed to shift instructions or does 1447 // the cpu only look at the lower 5/6 bits anyway? 1448 const bool Matcher::need_masked_shift_count = false; 1449 1450 bool Matcher::narrow_oop_use_complex_address() { 1451 ShouldNotCallThis(); 1452 return true; 1453 } 1454 1455 bool Matcher::narrow_klass_use_complex_address() { 1456 ShouldNotCallThis(); 1457 return true; 1458 } 1459 1460 1461 // Is it better to copy float constants, or load them directly from memory? 1462 // Intel can load a float constant from a direct address, requiring no 1463 // extra registers. Most RISCs will have to materialize an address into a 1464 // register first, so they would do better to copy the constant from stack. 1465 const bool Matcher::rematerialize_float_constants = true; 1466 1467 // If CPU can load and store mis-aligned doubles directly then no fixup is 1468 // needed. Else we split the double into 2 integer pieces and move it 1469 // piece-by-piece. Only happens when passing doubles into C code as the 1470 // Java calling convention forces doubles to be aligned. 1471 const bool Matcher::misaligned_doubles_ok = true; 1472 1473 1474 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1475 // Get the memory operand from the node 1476 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1477 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1478 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1479 uint opcnt = 1; // First operand 1480 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1481 while( idx >= skipped+num_edges ) { 1482 skipped += num_edges; 1483 opcnt++; // Bump operand count 1484 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1485 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1486 } 1487 1488 MachOper *memory = node->_opnds[opcnt]; 1489 MachOper *new_memory = NULL; 1490 switch (memory->opcode()) { 1491 case DIRECT: 1492 case INDOFFSET32X: 1493 // No transformation necessary. 1494 return; 1495 case INDIRECT: 1496 new_memory = new indirect_win95_safeOper( ); 1497 break; 1498 case INDOFFSET8: 1499 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1500 break; 1501 case INDOFFSET32: 1502 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1503 break; 1504 case INDINDEXOFFSET: 1505 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1506 break; 1507 case INDINDEXSCALE: 1508 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1509 break; 1510 case INDINDEXSCALEOFFSET: 1511 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1512 break; 1513 case LOAD_LONG_INDIRECT: 1514 case LOAD_LONG_INDOFFSET32: 1515 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1516 return; 1517 default: 1518 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1519 return; 1520 } 1521 node->_opnds[opcnt] = new_memory; 1522 } 1523 1524 // Advertise here if the CPU requires explicit rounding operations 1525 // to implement the UseStrictFP mode. 1526 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1527 1528 // Are floats conerted to double when stored to stack during deoptimization? 1529 // On x32 it is stored with convertion only when FPU is used for floats. 1530 bool Matcher::float_in_double() { return (UseSSE == 0); } 1531 1532 // Do ints take an entire long register or just half? 1533 const bool Matcher::int_in_long = false; 1534 1535 // Return whether or not this register is ever used as an argument. This 1536 // function is used on startup to build the trampoline stubs in generateOptoStub. 1537 // Registers not mentioned will be killed by the VM call in the trampoline, and 1538 // arguments in those registers not be available to the callee. 1539 bool Matcher::can_be_java_arg( int reg ) { 1540 if( reg == ECX_num || reg == EDX_num ) return true; 1541 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1542 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1543 return false; 1544 } 1545 1546 bool Matcher::is_spillable_arg( int reg ) { 1547 return can_be_java_arg(reg); 1548 } 1549 1550 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1551 // Use hardware integer DIV instruction when 1552 // it is faster than a code which use multiply. 1553 // Only when constant divisor fits into 32 bit 1554 // (min_jint is excluded to get only correct 1555 // positive 32 bit values from negative). 1556 return VM_Version::has_fast_idiv() && 1557 (divisor == (int)divisor && divisor != min_jint); 1558 } 1559 1560 // Register for DIVI projection of divmodI 1561 RegMask Matcher::divI_proj_mask() { 1562 return EAX_REG_mask(); 1563 } 1564 1565 // Register for MODI projection of divmodI 1566 RegMask Matcher::modI_proj_mask() { 1567 return EDX_REG_mask(); 1568 } 1569 1570 // Register for DIVL projection of divmodL 1571 RegMask Matcher::divL_proj_mask() { 1572 ShouldNotReachHere(); 1573 return RegMask(); 1574 } 1575 1576 // Register for MODL projection of divmodL 1577 RegMask Matcher::modL_proj_mask() { 1578 ShouldNotReachHere(); 1579 return RegMask(); 1580 } 1581 1582 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1583 return NO_REG_mask(); 1584 } 1585 1586 // Returns true if the high 32 bits of the value is known to be zero. 1587 bool is_operand_hi32_zero(Node* n) { 1588 int opc = n->Opcode(); 1589 if (opc == Op_AndL) { 1590 Node* o2 = n->in(2); 1591 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1592 return true; 1593 } 1594 } 1595 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1596 return true; 1597 } 1598 return false; 1599 } 1600 1601 %} 1602 1603 //----------ENCODING BLOCK----------------------------------------------------- 1604 // This block specifies the encoding classes used by the compiler to output 1605 // byte streams. Encoding classes generate functions which are called by 1606 // Machine Instruction Nodes in order to generate the bit encoding of the 1607 // instruction. Operands specify their base encoding interface with the 1608 // interface keyword. There are currently supported four interfaces, 1609 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1610 // operand to generate a function which returns its register number when 1611 // queried. CONST_INTER causes an operand to generate a function which 1612 // returns the value of the constant when queried. MEMORY_INTER causes an 1613 // operand to generate four functions which return the Base Register, the 1614 // Index Register, the Scale Value, and the Offset Value of the operand when 1615 // queried. COND_INTER causes an operand to generate six functions which 1616 // return the encoding code (ie - encoding bits for the instruction) 1617 // associated with each basic boolean condition for a conditional instruction. 1618 // Instructions specify two basic values for encoding. They use the 1619 // ins_encode keyword to specify their encoding class (which must be one of 1620 // the class names specified in the encoding block), and they use the 1621 // opcode keyword to specify, in order, their primary, secondary, and 1622 // tertiary opcode. Only the opcode sections which a particular instruction 1623 // needs for encoding need to be specified. 1624 encode %{ 1625 // Build emit functions for each basic byte or larger field in the intel 1626 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1627 // code in the enc_class source block. Emit functions will live in the 1628 // main source block for now. In future, we can generalize this by 1629 // adding a syntax that specifies the sizes of fields in an order, 1630 // so that the adlc can build the emit functions automagically 1631 1632 // Emit primary opcode 1633 enc_class OpcP %{ 1634 emit_opcode(cbuf, $primary); 1635 %} 1636 1637 // Emit secondary opcode 1638 enc_class OpcS %{ 1639 emit_opcode(cbuf, $secondary); 1640 %} 1641 1642 // Emit opcode directly 1643 enc_class Opcode(immI d8) %{ 1644 emit_opcode(cbuf, $d8$$constant); 1645 %} 1646 1647 enc_class SizePrefix %{ 1648 emit_opcode(cbuf,0x66); 1649 %} 1650 1651 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1656 emit_opcode(cbuf,$opcode$$constant); 1657 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1658 %} 1659 1660 enc_class mov_r32_imm0( rRegI dst ) %{ 1661 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1662 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1663 %} 1664 1665 enc_class cdq_enc %{ 1666 // Full implementation of Java idiv and irem; checks for 1667 // special case as described in JVM spec., p.243 & p.271. 1668 // 1669 // normal case special case 1670 // 1671 // input : rax,: dividend min_int 1672 // reg: divisor -1 1673 // 1674 // output: rax,: quotient (= rax, idiv reg) min_int 1675 // rdx: remainder (= rax, irem reg) 0 1676 // 1677 // Code sequnce: 1678 // 1679 // 81 F8 00 00 00 80 cmp rax,80000000h 1680 // 0F 85 0B 00 00 00 jne normal_case 1681 // 33 D2 xor rdx,edx 1682 // 83 F9 FF cmp rcx,0FFh 1683 // 0F 84 03 00 00 00 je done 1684 // normal_case: 1685 // 99 cdq 1686 // F7 F9 idiv rax,ecx 1687 // done: 1688 // 1689 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1690 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1691 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1693 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1695 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1696 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1697 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1698 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1699 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1700 // normal_case: 1701 emit_opcode(cbuf,0x99); // cdq 1702 // idiv (note: must be emitted by the user of this rule) 1703 // normal: 1704 %} 1705 1706 // Dense encoding for older common ops 1707 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1708 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1709 %} 1710 1711 1712 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1713 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1714 // Check for 8-bit immediate, and set sign extend bit in opcode 1715 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1716 emit_opcode(cbuf, $primary | 0x02); 1717 } 1718 else { // If 32-bit immediate 1719 emit_opcode(cbuf, $primary); 1720 } 1721 %} 1722 1723 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1724 // Emit primary opcode and set sign-extend bit 1725 // Check for 8-bit immediate, and set sign extend bit in opcode 1726 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1727 emit_opcode(cbuf, $primary | 0x02); } 1728 else { // If 32-bit immediate 1729 emit_opcode(cbuf, $primary); 1730 } 1731 // Emit r/m byte with secondary opcode, after primary opcode. 1732 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1733 %} 1734 1735 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1736 // Check for 8-bit immediate, and set sign extend bit in opcode 1737 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1738 $$$emit8$imm$$constant; 1739 } 1740 else { // If 32-bit immediate 1741 // Output immediate 1742 $$$emit32$imm$$constant; 1743 } 1744 %} 1745 1746 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1747 // Emit primary opcode and set sign-extend bit 1748 // Check for 8-bit immediate, and set sign extend bit in opcode 1749 int con = (int)$imm$$constant; // Throw away top bits 1750 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1751 // Emit r/m byte with secondary opcode, after primary opcode. 1752 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1753 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1754 else emit_d32(cbuf,con); 1755 %} 1756 1757 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1758 // Emit primary opcode and set sign-extend bit 1759 // Check for 8-bit immediate, and set sign extend bit in opcode 1760 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1761 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1762 // Emit r/m byte with tertiary opcode, after primary opcode. 1763 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1764 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1765 else emit_d32(cbuf,con); 1766 %} 1767 1768 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1769 emit_cc(cbuf, $secondary, $dst$$reg ); 1770 %} 1771 1772 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1773 int destlo = $dst$$reg; 1774 int desthi = HIGH_FROM_LOW(destlo); 1775 // bswap lo 1776 emit_opcode(cbuf, 0x0F); 1777 emit_cc(cbuf, 0xC8, destlo); 1778 // bswap hi 1779 emit_opcode(cbuf, 0x0F); 1780 emit_cc(cbuf, 0xC8, desthi); 1781 // xchg lo and hi 1782 emit_opcode(cbuf, 0x87); 1783 emit_rm(cbuf, 0x3, destlo, desthi); 1784 %} 1785 1786 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1787 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1788 %} 1789 1790 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1791 $$$emit8$primary; 1792 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1793 %} 1794 1795 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1796 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1797 emit_d8(cbuf, op >> 8 ); 1798 emit_d8(cbuf, op & 255); 1799 %} 1800 1801 // emulate a CMOV with a conditional branch around a MOV 1802 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1803 // Invert sense of branch from sense of CMOV 1804 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1805 emit_d8( cbuf, $brOffs$$constant ); 1806 %} 1807 1808 enc_class enc_PartialSubtypeCheck( ) %{ 1809 Register Redi = as_Register(EDI_enc); // result register 1810 Register Reax = as_Register(EAX_enc); // super class 1811 Register Recx = as_Register(ECX_enc); // killed 1812 Register Resi = as_Register(ESI_enc); // sub class 1813 Label miss; 1814 1815 MacroAssembler _masm(&cbuf); 1816 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1817 NULL, &miss, 1818 /*set_cond_codes:*/ true); 1819 if ($primary) { 1820 __ xorptr(Redi, Redi); 1821 } 1822 __ bind(miss); 1823 %} 1824 1825 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1826 MacroAssembler masm(&cbuf); 1827 int start = masm.offset(); 1828 if (UseSSE >= 2) { 1829 if (VerifyFPU) { 1830 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1831 } 1832 } else { 1833 // External c_calling_convention expects the FPU stack to be 'clean'. 1834 // Compiled code leaves it dirty. Do cleanup now. 1835 masm.empty_FPU_stack(); 1836 } 1837 if (sizeof_FFree_Float_Stack_All == -1) { 1838 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1839 } else { 1840 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1841 } 1842 %} 1843 1844 enc_class Verify_FPU_For_Leaf %{ 1845 if( VerifyFPU ) { 1846 MacroAssembler masm(&cbuf); 1847 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1848 } 1849 %} 1850 1851 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1852 // This is the instruction starting address for relocation info. 1853 cbuf.set_insts_mark(); 1854 $$$emit8$primary; 1855 // CALL directly to the runtime 1856 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1857 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1858 1859 if (UseSSE >= 2) { 1860 MacroAssembler _masm(&cbuf); 1861 BasicType rt = tf()->return_type(); 1862 1863 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1864 // A C runtime call where the return value is unused. In SSE2+ 1865 // mode the result needs to be removed from the FPU stack. It's 1866 // likely that this function call could be removed by the 1867 // optimizer if the C function is a pure function. 1868 __ ffree(0); 1869 } else if (rt == T_FLOAT) { 1870 __ lea(rsp, Address(rsp, -4)); 1871 __ fstp_s(Address(rsp, 0)); 1872 __ movflt(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 4)); 1874 } else if (rt == T_DOUBLE) { 1875 __ lea(rsp, Address(rsp, -8)); 1876 __ fstp_d(Address(rsp, 0)); 1877 __ movdbl(xmm0, Address(rsp, 0)); 1878 __ lea(rsp, Address(rsp, 8)); 1879 } 1880 } 1881 %} 1882 1883 1884 enc_class pre_call_resets %{ 1885 // If method sets FPU control word restore it here 1886 debug_only(int off0 = cbuf.insts_size()); 1887 if (ra_->C->in_24_bit_fp_mode()) { 1888 MacroAssembler _masm(&cbuf); 1889 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1890 } 1891 if (ra_->C->max_vector_size() > 16) { 1892 // Clear upper bits of YMM registers when current compiled code uses 1893 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1894 MacroAssembler _masm(&cbuf); 1895 __ vzeroupper(); 1896 } 1897 debug_only(int off1 = cbuf.insts_size()); 1898 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1899 %} 1900 1901 enc_class post_call_FPU %{ 1902 // If method sets FPU control word do it here also 1903 if (Compile::current()->in_24_bit_fp_mode()) { 1904 MacroAssembler masm(&cbuf); 1905 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1906 } 1907 %} 1908 1909 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1910 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1911 // who we intended to call. 1912 cbuf.set_insts_mark(); 1913 $$$emit8$primary; 1914 1915 if (!_method) { 1916 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1917 runtime_call_Relocation::spec(), 1918 RELOC_IMM32); 1919 } else { 1920 int method_index = resolved_method_index(cbuf); 1921 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1922 : static_call_Relocation::spec(method_index); 1923 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1924 rspec, RELOC_DISP32); 1925 // Emit stubs for static call. 1926 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1927 if (stub == NULL) { 1928 ciEnv::current()->record_failure("CodeCache is full"); 1929 return; 1930 } 1931 } 1932 %} 1933 1934 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1935 MacroAssembler _masm(&cbuf); 1936 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1937 %} 1938 1939 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1940 int disp = in_bytes(Method::from_compiled_offset()); 1941 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1942 1943 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1944 cbuf.set_insts_mark(); 1945 $$$emit8$primary; 1946 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1947 emit_d8(cbuf, disp); // Displacement 1948 1949 %} 1950 1951 // Following encoding is no longer used, but may be restored if calling 1952 // convention changes significantly. 1953 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1954 // 1955 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1956 // // int ic_reg = Matcher::inline_cache_reg(); 1957 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1958 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1959 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1960 // 1961 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1962 // // // so we load it immediately before the call 1963 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1964 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1965 // 1966 // // xor rbp,ebp 1967 // emit_opcode(cbuf, 0x33); 1968 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1969 // 1970 // // CALL to interpreter. 1971 // cbuf.set_insts_mark(); 1972 // $$$emit8$primary; 1973 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1974 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1975 // %} 1976 1977 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1978 $$$emit8$primary; 1979 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1980 $$$emit8$shift$$constant; 1981 %} 1982 1983 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1984 // Load immediate does not have a zero or sign extended version 1985 // for 8-bit immediates 1986 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1987 $$$emit32$src$$constant; 1988 %} 1989 1990 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1991 // Load immediate does not have a zero or sign extended version 1992 // for 8-bit immediates 1993 emit_opcode(cbuf, $primary + $dst$$reg); 1994 $$$emit32$src$$constant; 1995 %} 1996 1997 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1998 // Load immediate does not have a zero or sign extended version 1999 // for 8-bit immediates 2000 int dst_enc = $dst$$reg; 2001 int src_con = $src$$constant & 0x0FFFFFFFFL; 2002 if (src_con == 0) { 2003 // xor dst, dst 2004 emit_opcode(cbuf, 0x33); 2005 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2006 } else { 2007 emit_opcode(cbuf, $primary + dst_enc); 2008 emit_d32(cbuf, src_con); 2009 } 2010 %} 2011 2012 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2013 // Load immediate does not have a zero or sign extended version 2014 // for 8-bit immediates 2015 int dst_enc = $dst$$reg + 2; 2016 int src_con = ((julong)($src$$constant)) >> 32; 2017 if (src_con == 0) { 2018 // xor dst, dst 2019 emit_opcode(cbuf, 0x33); 2020 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2021 } else { 2022 emit_opcode(cbuf, $primary + dst_enc); 2023 emit_d32(cbuf, src_con); 2024 } 2025 %} 2026 2027 2028 // Encode a reg-reg copy. If it is useless, then empty encoding. 2029 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2030 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2031 %} 2032 2033 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2034 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2035 %} 2036 2037 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2038 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2039 %} 2040 2041 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2042 $$$emit8$primary; 2043 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2044 %} 2045 2046 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2047 $$$emit8$secondary; 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2052 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2053 %} 2054 2055 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2056 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2057 %} 2058 2059 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2060 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2061 %} 2062 2063 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2064 // Output immediate 2065 $$$emit32$src$$constant; 2066 %} 2067 2068 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2069 // Output Float immediate bits 2070 jfloat jf = $src$$constant; 2071 int jf_as_bits = jint_cast( jf ); 2072 emit_d32(cbuf, jf_as_bits); 2073 %} 2074 2075 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2076 // Output Float immediate bits 2077 jfloat jf = $src$$constant; 2078 int jf_as_bits = jint_cast( jf ); 2079 emit_d32(cbuf, jf_as_bits); 2080 %} 2081 2082 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2083 // Output immediate 2084 $$$emit16$src$$constant; 2085 %} 2086 2087 enc_class Con_d32(immI src) %{ 2088 emit_d32(cbuf,$src$$constant); 2089 %} 2090 2091 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2092 // Output immediate memory reference 2093 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2094 emit_d32(cbuf, 0x00); 2095 %} 2096 2097 enc_class lock_prefix( ) %{ 2098 if( os::is_MP() ) 2099 emit_opcode(cbuf,0xF0); // [Lock] 2100 %} 2101 2102 // Cmp-xchg long value. 2103 // Note: we need to swap rbx, and rcx before and after the 2104 // cmpxchg8 instruction because the instruction uses 2105 // rcx as the high order word of the new value to store but 2106 // our register encoding uses rbx,. 2107 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2108 2109 // XCHG rbx,ecx 2110 emit_opcode(cbuf,0x87); 2111 emit_opcode(cbuf,0xD9); 2112 // [Lock] 2113 if( os::is_MP() ) 2114 emit_opcode(cbuf,0xF0); 2115 // CMPXCHG8 [Eptr] 2116 emit_opcode(cbuf,0x0F); 2117 emit_opcode(cbuf,0xC7); 2118 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2119 // XCHG rbx,ecx 2120 emit_opcode(cbuf,0x87); 2121 emit_opcode(cbuf,0xD9); 2122 %} 2123 2124 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2125 // [Lock] 2126 if( os::is_MP() ) 2127 emit_opcode(cbuf,0xF0); 2128 2129 // CMPXCHG [Eptr] 2130 emit_opcode(cbuf,0x0F); 2131 emit_opcode(cbuf,0xB1); 2132 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2133 %} 2134 2135 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2136 int res_encoding = $res$$reg; 2137 2138 // MOV res,0 2139 emit_opcode( cbuf, 0xB8 + res_encoding); 2140 emit_d32( cbuf, 0 ); 2141 // JNE,s fail 2142 emit_opcode(cbuf,0x75); 2143 emit_d8(cbuf, 5 ); 2144 // MOV res,1 2145 emit_opcode( cbuf, 0xB8 + res_encoding); 2146 emit_d32( cbuf, 1 ); 2147 // fail: 2148 %} 2149 2150 enc_class set_instruction_start( ) %{ 2151 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2152 %} 2153 2154 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2155 int reg_encoding = $ereg$$reg; 2156 int base = $mem$$base; 2157 int index = $mem$$index; 2158 int scale = $mem$$scale; 2159 int displace = $mem$$disp; 2160 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2161 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2162 %} 2163 2164 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2165 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2166 int base = $mem$$base; 2167 int index = $mem$$index; 2168 int scale = $mem$$scale; 2169 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2170 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2171 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2172 %} 2173 2174 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2175 int r1, r2; 2176 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2177 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2178 emit_opcode(cbuf,0x0F); 2179 emit_opcode(cbuf,$tertiary); 2180 emit_rm(cbuf, 0x3, r1, r2); 2181 emit_d8(cbuf,$cnt$$constant); 2182 emit_d8(cbuf,$primary); 2183 emit_rm(cbuf, 0x3, $secondary, r1); 2184 emit_d8(cbuf,$cnt$$constant); 2185 %} 2186 2187 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2188 emit_opcode( cbuf, 0x8B ); // Move 2189 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2190 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2191 emit_d8(cbuf,$primary); 2192 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2193 emit_d8(cbuf,$cnt$$constant-32); 2194 } 2195 emit_d8(cbuf,$primary); 2196 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2197 emit_d8(cbuf,31); 2198 %} 2199 2200 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2201 int r1, r2; 2202 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2203 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2204 2205 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2206 emit_rm(cbuf, 0x3, r1, r2); 2207 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2208 emit_opcode(cbuf,$primary); 2209 emit_rm(cbuf, 0x3, $secondary, r1); 2210 emit_d8(cbuf,$cnt$$constant-32); 2211 } 2212 emit_opcode(cbuf,0x33); // XOR r2,r2 2213 emit_rm(cbuf, 0x3, r2, r2); 2214 %} 2215 2216 // Clone of RegMem but accepts an extra parameter to access each 2217 // half of a double in memory; it never needs relocation info. 2218 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2219 emit_opcode(cbuf,$opcode$$constant); 2220 int reg_encoding = $rm_reg$$reg; 2221 int base = $mem$$base; 2222 int index = $mem$$index; 2223 int scale = $mem$$scale; 2224 int displace = $mem$$disp + $disp_for_half$$constant; 2225 relocInfo::relocType disp_reloc = relocInfo::none; 2226 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2227 %} 2228 2229 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2230 // 2231 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2232 // and it never needs relocation information. 2233 // Frequently used to move data between FPU's Stack Top and memory. 2234 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2235 int rm_byte_opcode = $rm_opcode$$constant; 2236 int base = $mem$$base; 2237 int index = $mem$$index; 2238 int scale = $mem$$scale; 2239 int displace = $mem$$disp; 2240 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2241 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2242 %} 2243 2244 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2245 int rm_byte_opcode = $rm_opcode$$constant; 2246 int base = $mem$$base; 2247 int index = $mem$$index; 2248 int scale = $mem$$scale; 2249 int displace = $mem$$disp; 2250 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2251 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2252 %} 2253 2254 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2255 int reg_encoding = $dst$$reg; 2256 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2257 int index = 0x04; // 0x04 indicates no index 2258 int scale = 0x00; // 0x00 indicates no scale 2259 int displace = $src1$$constant; // 0x00 indicates no displacement 2260 relocInfo::relocType disp_reloc = relocInfo::none; 2261 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2262 %} 2263 2264 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2265 // Compare dst,src 2266 emit_opcode(cbuf,0x3B); 2267 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2268 // jmp dst < src around move 2269 emit_opcode(cbuf,0x7C); 2270 emit_d8(cbuf,2); 2271 // move dst,src 2272 emit_opcode(cbuf,0x8B); 2273 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2274 %} 2275 2276 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2277 // Compare dst,src 2278 emit_opcode(cbuf,0x3B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 // jmp dst > src around move 2281 emit_opcode(cbuf,0x7F); 2282 emit_d8(cbuf,2); 2283 // move dst,src 2284 emit_opcode(cbuf,0x8B); 2285 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2286 %} 2287 2288 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2289 // If src is FPR1, we can just FST to store it. 2290 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2291 int reg_encoding = 0x2; // Just store 2292 int base = $mem$$base; 2293 int index = $mem$$index; 2294 int scale = $mem$$scale; 2295 int displace = $mem$$disp; 2296 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2297 if( $src$$reg != FPR1L_enc ) { 2298 reg_encoding = 0x3; // Store & pop 2299 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2300 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2301 } 2302 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2303 emit_opcode(cbuf,$primary); 2304 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2305 %} 2306 2307 enc_class neg_reg(rRegI dst) %{ 2308 // NEG $dst 2309 emit_opcode(cbuf,0xF7); 2310 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2311 %} 2312 2313 enc_class setLT_reg(eCXRegI dst) %{ 2314 // SETLT $dst 2315 emit_opcode(cbuf,0x0F); 2316 emit_opcode(cbuf,0x9C); 2317 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2318 %} 2319 2320 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2321 int tmpReg = $tmp$$reg; 2322 2323 // SUB $p,$q 2324 emit_opcode(cbuf,0x2B); 2325 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2326 // SBB $tmp,$tmp 2327 emit_opcode(cbuf,0x1B); 2328 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2329 // AND $tmp,$y 2330 emit_opcode(cbuf,0x23); 2331 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2332 // ADD $p,$tmp 2333 emit_opcode(cbuf,0x03); 2334 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2335 %} 2336 2337 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2338 // TEST shift,32 2339 emit_opcode(cbuf,0xF7); 2340 emit_rm(cbuf, 0x3, 0, ECX_enc); 2341 emit_d32(cbuf,0x20); 2342 // JEQ,s small 2343 emit_opcode(cbuf, 0x74); 2344 emit_d8(cbuf, 0x04); 2345 // MOV $dst.hi,$dst.lo 2346 emit_opcode( cbuf, 0x8B ); 2347 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2348 // CLR $dst.lo 2349 emit_opcode(cbuf, 0x33); 2350 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2351 // small: 2352 // SHLD $dst.hi,$dst.lo,$shift 2353 emit_opcode(cbuf,0x0F); 2354 emit_opcode(cbuf,0xA5); 2355 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2356 // SHL $dst.lo,$shift" 2357 emit_opcode(cbuf,0xD3); 2358 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2359 %} 2360 2361 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2362 // TEST shift,32 2363 emit_opcode(cbuf,0xF7); 2364 emit_rm(cbuf, 0x3, 0, ECX_enc); 2365 emit_d32(cbuf,0x20); 2366 // JEQ,s small 2367 emit_opcode(cbuf, 0x74); 2368 emit_d8(cbuf, 0x04); 2369 // MOV $dst.lo,$dst.hi 2370 emit_opcode( cbuf, 0x8B ); 2371 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2372 // CLR $dst.hi 2373 emit_opcode(cbuf, 0x33); 2374 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2375 // small: 2376 // SHRD $dst.lo,$dst.hi,$shift 2377 emit_opcode(cbuf,0x0F); 2378 emit_opcode(cbuf,0xAD); 2379 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2380 // SHR $dst.hi,$shift" 2381 emit_opcode(cbuf,0xD3); 2382 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2383 %} 2384 2385 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2386 // TEST shift,32 2387 emit_opcode(cbuf,0xF7); 2388 emit_rm(cbuf, 0x3, 0, ECX_enc); 2389 emit_d32(cbuf,0x20); 2390 // JEQ,s small 2391 emit_opcode(cbuf, 0x74); 2392 emit_d8(cbuf, 0x05); 2393 // MOV $dst.lo,$dst.hi 2394 emit_opcode( cbuf, 0x8B ); 2395 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2396 // SAR $dst.hi,31 2397 emit_opcode(cbuf, 0xC1); 2398 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2399 emit_d8(cbuf, 0x1F ); 2400 // small: 2401 // SHRD $dst.lo,$dst.hi,$shift 2402 emit_opcode(cbuf,0x0F); 2403 emit_opcode(cbuf,0xAD); 2404 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2405 // SAR $dst.hi,$shift" 2406 emit_opcode(cbuf,0xD3); 2407 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2408 %} 2409 2410 2411 // ----------------- Encodings for floating point unit ----------------- 2412 // May leave result in FPU-TOS or FPU reg depending on opcodes 2413 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2414 $$$emit8$primary; 2415 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2416 %} 2417 2418 // Pop argument in FPR0 with FSTP ST(0) 2419 enc_class PopFPU() %{ 2420 emit_opcode( cbuf, 0xDD ); 2421 emit_d8( cbuf, 0xD8 ); 2422 %} 2423 2424 // !!!!! equivalent to Pop_Reg_F 2425 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2426 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2427 emit_d8( cbuf, 0xD8+$dst$$reg ); 2428 %} 2429 2430 enc_class Push_Reg_DPR( regDPR dst ) %{ 2431 emit_opcode( cbuf, 0xD9 ); 2432 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2433 %} 2434 2435 enc_class strictfp_bias1( regDPR dst ) %{ 2436 emit_opcode( cbuf, 0xDB ); // FLD m80real 2437 emit_opcode( cbuf, 0x2D ); 2438 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2439 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2440 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2441 %} 2442 2443 enc_class strictfp_bias2( regDPR dst ) %{ 2444 emit_opcode( cbuf, 0xDB ); // FLD m80real 2445 emit_opcode( cbuf, 0x2D ); 2446 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2447 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2448 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2449 %} 2450 2451 // Special case for moving an integer register to a stack slot. 2452 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2453 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2454 %} 2455 2456 // Special case for moving a register to a stack slot. 2457 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2458 // Opcode already emitted 2459 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2460 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2461 emit_d32(cbuf, $dst$$disp); // Displacement 2462 %} 2463 2464 // Push the integer in stackSlot 'src' onto FP-stack 2465 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2466 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2467 %} 2468 2469 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2470 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2471 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2472 %} 2473 2474 // Same as Pop_Mem_F except for opcode 2475 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2476 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2477 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2478 %} 2479 2480 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2481 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2482 emit_d8( cbuf, 0xD8+$dst$$reg ); 2483 %} 2484 2485 enc_class Push_Reg_FPR( regFPR dst ) %{ 2486 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2487 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2488 %} 2489 2490 // Push FPU's float to a stack-slot, and pop FPU-stack 2491 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2492 int pop = 0x02; 2493 if ($src$$reg != FPR1L_enc) { 2494 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2495 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2496 pop = 0x03; 2497 } 2498 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2499 %} 2500 2501 // Push FPU's double to a stack-slot, and pop FPU-stack 2502 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2503 int pop = 0x02; 2504 if ($src$$reg != FPR1L_enc) { 2505 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2506 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2507 pop = 0x03; 2508 } 2509 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2510 %} 2511 2512 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2513 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2514 int pop = 0xD0 - 1; // -1 since we skip FLD 2515 if ($src$$reg != FPR1L_enc) { 2516 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2517 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2518 pop = 0xD8; 2519 } 2520 emit_opcode( cbuf, 0xDD ); 2521 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2522 %} 2523 2524 2525 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2526 // load dst in FPR0 2527 emit_opcode( cbuf, 0xD9 ); 2528 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2529 if ($src$$reg != FPR1L_enc) { 2530 // fincstp 2531 emit_opcode (cbuf, 0xD9); 2532 emit_opcode (cbuf, 0xF7); 2533 // swap src with FPR1: 2534 // FXCH FPR1 with src 2535 emit_opcode(cbuf, 0xD9); 2536 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2537 // fdecstp 2538 emit_opcode (cbuf, 0xD9); 2539 emit_opcode (cbuf, 0xF6); 2540 } 2541 %} 2542 2543 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2544 MacroAssembler _masm(&cbuf); 2545 __ subptr(rsp, 8); 2546 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2547 __ fld_d(Address(rsp, 0)); 2548 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2549 __ fld_d(Address(rsp, 0)); 2550 %} 2551 2552 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2553 MacroAssembler _masm(&cbuf); 2554 __ subptr(rsp, 4); 2555 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2556 __ fld_s(Address(rsp, 0)); 2557 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2558 __ fld_s(Address(rsp, 0)); 2559 %} 2560 2561 enc_class Push_ResultD(regD dst) %{ 2562 MacroAssembler _masm(&cbuf); 2563 __ fstp_d(Address(rsp, 0)); 2564 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2565 __ addptr(rsp, 8); 2566 %} 2567 2568 enc_class Push_ResultF(regF dst, immI d8) %{ 2569 MacroAssembler _masm(&cbuf); 2570 __ fstp_s(Address(rsp, 0)); 2571 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2572 __ addptr(rsp, $d8$$constant); 2573 %} 2574 2575 enc_class Push_SrcD(regD src) %{ 2576 MacroAssembler _masm(&cbuf); 2577 __ subptr(rsp, 8); 2578 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2579 __ fld_d(Address(rsp, 0)); 2580 %} 2581 2582 enc_class push_stack_temp_qword() %{ 2583 MacroAssembler _masm(&cbuf); 2584 __ subptr(rsp, 8); 2585 %} 2586 2587 enc_class pop_stack_temp_qword() %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ addptr(rsp, 8); 2590 %} 2591 2592 enc_class push_xmm_to_fpr1(regD src) %{ 2593 MacroAssembler _masm(&cbuf); 2594 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2595 __ fld_d(Address(rsp, 0)); 2596 %} 2597 2598 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2599 if ($src$$reg != FPR1L_enc) { 2600 // fincstp 2601 emit_opcode (cbuf, 0xD9); 2602 emit_opcode (cbuf, 0xF7); 2603 // FXCH FPR1 with src 2604 emit_opcode(cbuf, 0xD9); 2605 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2606 // fdecstp 2607 emit_opcode (cbuf, 0xD9); 2608 emit_opcode (cbuf, 0xF6); 2609 } 2610 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2611 // // FSTP FPR$dst$$reg 2612 // emit_opcode( cbuf, 0xDD ); 2613 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2614 %} 2615 2616 enc_class fnstsw_sahf_skip_parity() %{ 2617 // fnstsw ax 2618 emit_opcode( cbuf, 0xDF ); 2619 emit_opcode( cbuf, 0xE0 ); 2620 // sahf 2621 emit_opcode( cbuf, 0x9E ); 2622 // jnp ::skip 2623 emit_opcode( cbuf, 0x7B ); 2624 emit_opcode( cbuf, 0x05 ); 2625 %} 2626 2627 enc_class emitModDPR() %{ 2628 // fprem must be iterative 2629 // :: loop 2630 // fprem 2631 emit_opcode( cbuf, 0xD9 ); 2632 emit_opcode( cbuf, 0xF8 ); 2633 // wait 2634 emit_opcode( cbuf, 0x9b ); 2635 // fnstsw ax 2636 emit_opcode( cbuf, 0xDF ); 2637 emit_opcode( cbuf, 0xE0 ); 2638 // sahf 2639 emit_opcode( cbuf, 0x9E ); 2640 // jp ::loop 2641 emit_opcode( cbuf, 0x0F ); 2642 emit_opcode( cbuf, 0x8A ); 2643 emit_opcode( cbuf, 0xF4 ); 2644 emit_opcode( cbuf, 0xFF ); 2645 emit_opcode( cbuf, 0xFF ); 2646 emit_opcode( cbuf, 0xFF ); 2647 %} 2648 2649 enc_class fpu_flags() %{ 2650 // fnstsw_ax 2651 emit_opcode( cbuf, 0xDF); 2652 emit_opcode( cbuf, 0xE0); 2653 // test ax,0x0400 2654 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2655 emit_opcode( cbuf, 0xA9 ); 2656 emit_d16 ( cbuf, 0x0400 ); 2657 // // // This sequence works, but stalls for 12-16 cycles on PPro 2658 // // test rax,0x0400 2659 // emit_opcode( cbuf, 0xA9 ); 2660 // emit_d32 ( cbuf, 0x00000400 ); 2661 // 2662 // jz exit (no unordered comparison) 2663 emit_opcode( cbuf, 0x74 ); 2664 emit_d8 ( cbuf, 0x02 ); 2665 // mov ah,1 - treat as LT case (set carry flag) 2666 emit_opcode( cbuf, 0xB4 ); 2667 emit_d8 ( cbuf, 0x01 ); 2668 // sahf 2669 emit_opcode( cbuf, 0x9E); 2670 %} 2671 2672 enc_class cmpF_P6_fixup() %{ 2673 // Fixup the integer flags in case comparison involved a NaN 2674 // 2675 // JNP exit (no unordered comparison, P-flag is set by NaN) 2676 emit_opcode( cbuf, 0x7B ); 2677 emit_d8 ( cbuf, 0x03 ); 2678 // MOV AH,1 - treat as LT case (set carry flag) 2679 emit_opcode( cbuf, 0xB4 ); 2680 emit_d8 ( cbuf, 0x01 ); 2681 // SAHF 2682 emit_opcode( cbuf, 0x9E); 2683 // NOP // target for branch to avoid branch to branch 2684 emit_opcode( cbuf, 0x90); 2685 %} 2686 2687 // fnstsw_ax(); 2688 // sahf(); 2689 // movl(dst, nan_result); 2690 // jcc(Assembler::parity, exit); 2691 // movl(dst, less_result); 2692 // jcc(Assembler::below, exit); 2693 // movl(dst, equal_result); 2694 // jcc(Assembler::equal, exit); 2695 // movl(dst, greater_result); 2696 2697 // less_result = 1; 2698 // greater_result = -1; 2699 // equal_result = 0; 2700 // nan_result = -1; 2701 2702 enc_class CmpF_Result(rRegI dst) %{ 2703 // fnstsw_ax(); 2704 emit_opcode( cbuf, 0xDF); 2705 emit_opcode( cbuf, 0xE0); 2706 // sahf 2707 emit_opcode( cbuf, 0x9E); 2708 // movl(dst, nan_result); 2709 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2710 emit_d32( cbuf, -1 ); 2711 // jcc(Assembler::parity, exit); 2712 emit_opcode( cbuf, 0x7A ); 2713 emit_d8 ( cbuf, 0x13 ); 2714 // movl(dst, less_result); 2715 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2716 emit_d32( cbuf, -1 ); 2717 // jcc(Assembler::below, exit); 2718 emit_opcode( cbuf, 0x72 ); 2719 emit_d8 ( cbuf, 0x0C ); 2720 // movl(dst, equal_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, 0 ); 2723 // jcc(Assembler::equal, exit); 2724 emit_opcode( cbuf, 0x74 ); 2725 emit_d8 ( cbuf, 0x05 ); 2726 // movl(dst, greater_result); 2727 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2728 emit_d32( cbuf, 1 ); 2729 %} 2730 2731 2732 // Compare the longs and set flags 2733 // BROKEN! Do Not use as-is 2734 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2735 // CMP $src1.hi,$src2.hi 2736 emit_opcode( cbuf, 0x3B ); 2737 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2738 // JNE,s done 2739 emit_opcode(cbuf,0x75); 2740 emit_d8(cbuf, 2 ); 2741 // CMP $src1.lo,$src2.lo 2742 emit_opcode( cbuf, 0x3B ); 2743 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2744 // done: 2745 %} 2746 2747 enc_class convert_int_long( regL dst, rRegI src ) %{ 2748 // mov $dst.lo,$src 2749 int dst_encoding = $dst$$reg; 2750 int src_encoding = $src$$reg; 2751 encode_Copy( cbuf, dst_encoding , src_encoding ); 2752 // mov $dst.hi,$src 2753 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2754 // sar $dst.hi,31 2755 emit_opcode( cbuf, 0xC1 ); 2756 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2757 emit_d8(cbuf, 0x1F ); 2758 %} 2759 2760 enc_class convert_long_double( eRegL src ) %{ 2761 // push $src.hi 2762 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2763 // push $src.lo 2764 emit_opcode(cbuf, 0x50+$src$$reg ); 2765 // fild 64-bits at [SP] 2766 emit_opcode(cbuf,0xdf); 2767 emit_d8(cbuf, 0x6C); 2768 emit_d8(cbuf, 0x24); 2769 emit_d8(cbuf, 0x00); 2770 // pop stack 2771 emit_opcode(cbuf, 0x83); // add SP, #8 2772 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2773 emit_d8(cbuf, 0x8); 2774 %} 2775 2776 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2777 // IMUL EDX:EAX,$src1 2778 emit_opcode( cbuf, 0xF7 ); 2779 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2780 // SAR EDX,$cnt-32 2781 int shift_count = ((int)$cnt$$constant) - 32; 2782 if (shift_count > 0) { 2783 emit_opcode(cbuf, 0xC1); 2784 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2785 emit_d8(cbuf, shift_count); 2786 } 2787 %} 2788 2789 // this version doesn't have add sp, 8 2790 enc_class convert_long_double2( eRegL src ) %{ 2791 // push $src.hi 2792 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2793 // push $src.lo 2794 emit_opcode(cbuf, 0x50+$src$$reg ); 2795 // fild 64-bits at [SP] 2796 emit_opcode(cbuf,0xdf); 2797 emit_d8(cbuf, 0x6C); 2798 emit_d8(cbuf, 0x24); 2799 emit_d8(cbuf, 0x00); 2800 %} 2801 2802 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2803 // Basic idea: long = (long)int * (long)int 2804 // IMUL EDX:EAX, src 2805 emit_opcode( cbuf, 0xF7 ); 2806 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2807 %} 2808 2809 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2810 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2811 // MUL EDX:EAX, src 2812 emit_opcode( cbuf, 0xF7 ); 2813 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2814 %} 2815 2816 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2817 // Basic idea: lo(result) = lo(x_lo * y_lo) 2818 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2819 // MOV $tmp,$src.lo 2820 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2821 // IMUL $tmp,EDX 2822 emit_opcode( cbuf, 0x0F ); 2823 emit_opcode( cbuf, 0xAF ); 2824 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2825 // MOV EDX,$src.hi 2826 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2827 // IMUL EDX,EAX 2828 emit_opcode( cbuf, 0x0F ); 2829 emit_opcode( cbuf, 0xAF ); 2830 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2831 // ADD $tmp,EDX 2832 emit_opcode( cbuf, 0x03 ); 2833 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2834 // MUL EDX:EAX,$src.lo 2835 emit_opcode( cbuf, 0xF7 ); 2836 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2837 // ADD EDX,ESI 2838 emit_opcode( cbuf, 0x03 ); 2839 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2840 %} 2841 2842 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2843 // Basic idea: lo(result) = lo(src * y_lo) 2844 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2845 // IMUL $tmp,EDX,$src 2846 emit_opcode( cbuf, 0x6B ); 2847 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2848 emit_d8( cbuf, (int)$src$$constant ); 2849 // MOV EDX,$src 2850 emit_opcode(cbuf, 0xB8 + EDX_enc); 2851 emit_d32( cbuf, (int)$src$$constant ); 2852 // MUL EDX:EAX,EDX 2853 emit_opcode( cbuf, 0xF7 ); 2854 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2855 // ADD EDX,ESI 2856 emit_opcode( cbuf, 0x03 ); 2857 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2858 %} 2859 2860 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2861 // PUSH src1.hi 2862 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2863 // PUSH src1.lo 2864 emit_opcode(cbuf, 0x50+$src1$$reg ); 2865 // PUSH src2.hi 2866 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2867 // PUSH src2.lo 2868 emit_opcode(cbuf, 0x50+$src2$$reg ); 2869 // CALL directly to the runtime 2870 cbuf.set_insts_mark(); 2871 emit_opcode(cbuf,0xE8); // Call into runtime 2872 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2873 // Restore stack 2874 emit_opcode(cbuf, 0x83); // add SP, #framesize 2875 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2876 emit_d8(cbuf, 4*4); 2877 %} 2878 2879 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2880 // PUSH src1.hi 2881 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2882 // PUSH src1.lo 2883 emit_opcode(cbuf, 0x50+$src1$$reg ); 2884 // PUSH src2.hi 2885 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2886 // PUSH src2.lo 2887 emit_opcode(cbuf, 0x50+$src2$$reg ); 2888 // CALL directly to the runtime 2889 cbuf.set_insts_mark(); 2890 emit_opcode(cbuf,0xE8); // Call into runtime 2891 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2892 // Restore stack 2893 emit_opcode(cbuf, 0x83); // add SP, #framesize 2894 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2895 emit_d8(cbuf, 4*4); 2896 %} 2897 2898 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2899 // MOV $tmp,$src.lo 2900 emit_opcode(cbuf, 0x8B); 2901 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2902 // OR $tmp,$src.hi 2903 emit_opcode(cbuf, 0x0B); 2904 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2905 %} 2906 2907 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2908 // CMP $src1.lo,$src2.lo 2909 emit_opcode( cbuf, 0x3B ); 2910 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2911 // JNE,s skip 2912 emit_cc(cbuf, 0x70, 0x5); 2913 emit_d8(cbuf,2); 2914 // CMP $src1.hi,$src2.hi 2915 emit_opcode( cbuf, 0x3B ); 2916 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2917 %} 2918 2919 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2920 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2921 emit_opcode( cbuf, 0x3B ); 2922 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2923 // MOV $tmp,$src1.hi 2924 emit_opcode( cbuf, 0x8B ); 2925 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2926 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2927 emit_opcode( cbuf, 0x1B ); 2928 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2929 %} 2930 2931 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2932 // XOR $tmp,$tmp 2933 emit_opcode(cbuf,0x33); // XOR 2934 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2935 // CMP $tmp,$src.lo 2936 emit_opcode( cbuf, 0x3B ); 2937 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2938 // SBB $tmp,$src.hi 2939 emit_opcode( cbuf, 0x1B ); 2940 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2941 %} 2942 2943 // Sniff, sniff... smells like Gnu Superoptimizer 2944 enc_class neg_long( eRegL dst ) %{ 2945 emit_opcode(cbuf,0xF7); // NEG hi 2946 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2947 emit_opcode(cbuf,0xF7); // NEG lo 2948 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2949 emit_opcode(cbuf,0x83); // SBB hi,0 2950 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2951 emit_d8 (cbuf,0 ); 2952 %} 2953 2954 enc_class enc_pop_rdx() %{ 2955 emit_opcode(cbuf,0x5A); 2956 %} 2957 2958 enc_class enc_rethrow() %{ 2959 cbuf.set_insts_mark(); 2960 emit_opcode(cbuf, 0xE9); // jmp entry 2961 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2962 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2963 %} 2964 2965 2966 // Convert a double to an int. Java semantics require we do complex 2967 // manglelations in the corner cases. So we set the rounding mode to 2968 // 'zero', store the darned double down as an int, and reset the 2969 // rounding mode to 'nearest'. The hardware throws an exception which 2970 // patches up the correct value directly to the stack. 2971 enc_class DPR2I_encoding( regDPR src ) %{ 2972 // Flip to round-to-zero mode. We attempted to allow invalid-op 2973 // exceptions here, so that a NAN or other corner-case value will 2974 // thrown an exception (but normal values get converted at full speed). 2975 // However, I2C adapters and other float-stack manglers leave pending 2976 // invalid-op exceptions hanging. We would have to clear them before 2977 // enabling them and that is more expensive than just testing for the 2978 // invalid value Intel stores down in the corner cases. 2979 emit_opcode(cbuf,0xD9); // FLDCW trunc 2980 emit_opcode(cbuf,0x2D); 2981 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2982 // Allocate a word 2983 emit_opcode(cbuf,0x83); // SUB ESP,4 2984 emit_opcode(cbuf,0xEC); 2985 emit_d8(cbuf,0x04); 2986 // Encoding assumes a double has been pushed into FPR0. 2987 // Store down the double as an int, popping the FPU stack 2988 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2989 emit_opcode(cbuf,0x1C); 2990 emit_d8(cbuf,0x24); 2991 // Restore the rounding mode; mask the exception 2992 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2993 emit_opcode(cbuf,0x2D); 2994 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2995 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2996 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2997 2998 // Load the converted int; adjust CPU stack 2999 emit_opcode(cbuf,0x58); // POP EAX 3000 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3001 emit_d32 (cbuf,0x80000000); // 0x80000000 3002 emit_opcode(cbuf,0x75); // JNE around_slow_call 3003 emit_d8 (cbuf,0x07); // Size of slow_call 3004 // Push src onto stack slow-path 3005 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3006 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3007 // CALL directly to the runtime 3008 cbuf.set_insts_mark(); 3009 emit_opcode(cbuf,0xE8); // Call into runtime 3010 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3011 // Carry on here... 3012 %} 3013 3014 enc_class DPR2L_encoding( regDPR src ) %{ 3015 emit_opcode(cbuf,0xD9); // FLDCW trunc 3016 emit_opcode(cbuf,0x2D); 3017 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3018 // Allocate a word 3019 emit_opcode(cbuf,0x83); // SUB ESP,8 3020 emit_opcode(cbuf,0xEC); 3021 emit_d8(cbuf,0x08); 3022 // Encoding assumes a double has been pushed into FPR0. 3023 // Store down the double as a long, popping the FPU stack 3024 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3025 emit_opcode(cbuf,0x3C); 3026 emit_d8(cbuf,0x24); 3027 // Restore the rounding mode; mask the exception 3028 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3029 emit_opcode(cbuf,0x2D); 3030 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3031 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3032 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3033 3034 // Load the converted int; adjust CPU stack 3035 emit_opcode(cbuf,0x58); // POP EAX 3036 emit_opcode(cbuf,0x5A); // POP EDX 3037 emit_opcode(cbuf,0x81); // CMP EDX,imm 3038 emit_d8 (cbuf,0xFA); // rdx 3039 emit_d32 (cbuf,0x80000000); // 0x80000000 3040 emit_opcode(cbuf,0x75); // JNE around_slow_call 3041 emit_d8 (cbuf,0x07+4); // Size of slow_call 3042 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3043 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3044 emit_opcode(cbuf,0x75); // JNE around_slow_call 3045 emit_d8 (cbuf,0x07); // Size of slow_call 3046 // Push src onto stack slow-path 3047 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3048 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3049 // CALL directly to the runtime 3050 cbuf.set_insts_mark(); 3051 emit_opcode(cbuf,0xE8); // Call into runtime 3052 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3053 // Carry on here... 3054 %} 3055 3056 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3057 // Operand was loaded from memory into fp ST (stack top) 3058 // FMUL ST,$src /* D8 C8+i */ 3059 emit_opcode(cbuf, 0xD8); 3060 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3061 %} 3062 3063 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3064 // FADDP ST,src2 /* D8 C0+i */ 3065 emit_opcode(cbuf, 0xD8); 3066 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3067 //could use FADDP src2,fpST /* DE C0+i */ 3068 %} 3069 3070 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3071 // FADDP src2,ST /* DE C0+i */ 3072 emit_opcode(cbuf, 0xDE); 3073 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3074 %} 3075 3076 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3077 // Operand has been loaded into fp ST (stack top) 3078 // FSUB ST,$src1 3079 emit_opcode(cbuf, 0xD8); 3080 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3081 3082 // FDIV 3083 emit_opcode(cbuf, 0xD8); 3084 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3085 %} 3086 3087 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3088 // Operand was loaded from memory into fp ST (stack top) 3089 // FADD ST,$src /* D8 C0+i */ 3090 emit_opcode(cbuf, 0xD8); 3091 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3092 3093 // FMUL ST,src2 /* D8 C*+i */ 3094 emit_opcode(cbuf, 0xD8); 3095 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3096 %} 3097 3098 3099 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3100 // Operand was loaded from memory into fp ST (stack top) 3101 // FADD ST,$src /* D8 C0+i */ 3102 emit_opcode(cbuf, 0xD8); 3103 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3104 3105 // FMULP src2,ST /* DE C8+i */ 3106 emit_opcode(cbuf, 0xDE); 3107 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3108 %} 3109 3110 // Atomically load the volatile long 3111 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3112 emit_opcode(cbuf,0xDF); 3113 int rm_byte_opcode = 0x05; 3114 int base = $mem$$base; 3115 int index = $mem$$index; 3116 int scale = $mem$$scale; 3117 int displace = $mem$$disp; 3118 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3119 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3120 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3121 %} 3122 3123 // Volatile Store Long. Must be atomic, so move it into 3124 // the FP TOS and then do a 64-bit FIST. Has to probe the 3125 // target address before the store (for null-ptr checks) 3126 // so the memory operand is used twice in the encoding. 3127 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3128 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3129 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3130 emit_opcode(cbuf,0xDF); 3131 int rm_byte_opcode = 0x07; 3132 int base = $mem$$base; 3133 int index = $mem$$index; 3134 int scale = $mem$$scale; 3135 int displace = $mem$$disp; 3136 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3137 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3138 %} 3139 3140 // Safepoint Poll. This polls the safepoint page, and causes an 3141 // exception if it is not readable. Unfortunately, it kills the condition code 3142 // in the process 3143 // We current use TESTL [spp],EDI 3144 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3145 3146 enc_class Safepoint_Poll() %{ 3147 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3148 emit_opcode(cbuf,0x85); 3149 emit_rm (cbuf, 0x0, 0x7, 0x5); 3150 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3151 %} 3152 %} 3153 3154 3155 //----------FRAME-------------------------------------------------------------- 3156 // Definition of frame structure and management information. 3157 // 3158 // S T A C K L A Y O U T Allocators stack-slot number 3159 // | (to get allocators register number 3160 // G Owned by | | v add OptoReg::stack0()) 3161 // r CALLER | | 3162 // o | +--------+ pad to even-align allocators stack-slot 3163 // w V | pad0 | numbers; owned by CALLER 3164 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3165 // h ^ | in | 5 3166 // | | args | 4 Holes in incoming args owned by SELF 3167 // | | | | 3 3168 // | | +--------+ 3169 // V | | old out| Empty on Intel, window on Sparc 3170 // | old |preserve| Must be even aligned. 3171 // | SP-+--------+----> Matcher::_old_SP, even aligned 3172 // | | in | 3 area for Intel ret address 3173 // Owned by |preserve| Empty on Sparc. 3174 // SELF +--------+ 3175 // | | pad2 | 2 pad to align old SP 3176 // | +--------+ 1 3177 // | | locks | 0 3178 // | +--------+----> OptoReg::stack0(), even aligned 3179 // | | pad1 | 11 pad to align new SP 3180 // | +--------+ 3181 // | | | 10 3182 // | | spills | 9 spills 3183 // V | | 8 (pad0 slot for callee) 3184 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3185 // ^ | out | 7 3186 // | | args | 6 Holes in outgoing args owned by CALLEE 3187 // Owned by +--------+ 3188 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3189 // | new |preserve| Must be even-aligned. 3190 // | SP-+--------+----> Matcher::_new_SP, even aligned 3191 // | | | 3192 // 3193 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3194 // known from SELF's arguments and the Java calling convention. 3195 // Region 6-7 is determined per call site. 3196 // Note 2: If the calling convention leaves holes in the incoming argument 3197 // area, those holes are owned by SELF. Holes in the outgoing area 3198 // are owned by the CALLEE. Holes should not be nessecary in the 3199 // incoming area, as the Java calling convention is completely under 3200 // the control of the AD file. Doubles can be sorted and packed to 3201 // avoid holes. Holes in the outgoing arguments may be nessecary for 3202 // varargs C calling conventions. 3203 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3204 // even aligned with pad0 as needed. 3205 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3206 // region 6-11 is even aligned; it may be padded out more so that 3207 // the region from SP to FP meets the minimum stack alignment. 3208 3209 frame %{ 3210 // What direction does stack grow in (assumed to be same for C & Java) 3211 stack_direction(TOWARDS_LOW); 3212 3213 // These three registers define part of the calling convention 3214 // between compiled code and the interpreter. 3215 inline_cache_reg(EAX); // Inline Cache Register 3216 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3217 3218 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3219 cisc_spilling_operand_name(indOffset32); 3220 3221 // Number of stack slots consumed by locking an object 3222 sync_stack_slots(1); 3223 3224 // Compiled code's Frame Pointer 3225 frame_pointer(ESP); 3226 // Interpreter stores its frame pointer in a register which is 3227 // stored to the stack by I2CAdaptors. 3228 // I2CAdaptors convert from interpreted java to compiled java. 3229 interpreter_frame_pointer(EBP); 3230 3231 // Stack alignment requirement 3232 // Alignment size in bytes (128-bit -> 16 bytes) 3233 stack_alignment(StackAlignmentInBytes); 3234 3235 // Number of stack slots between incoming argument block and the start of 3236 // a new frame. The PROLOG must add this many slots to the stack. The 3237 // EPILOG must remove this many slots. Intel needs one slot for 3238 // return address and one for rbp, (must save rbp) 3239 in_preserve_stack_slots(2+VerifyStackAtCalls); 3240 3241 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3242 // for calls to C. Supports the var-args backing area for register parms. 3243 varargs_C_out_slots_killed(0); 3244 3245 // The after-PROLOG location of the return address. Location of 3246 // return address specifies a type (REG or STACK) and a number 3247 // representing the register number (i.e. - use a register name) or 3248 // stack slot. 3249 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3250 // Otherwise, it is above the locks and verification slot and alignment word 3251 return_addr(STACK - 1 + 3252 round_to((Compile::current()->in_preserve_stack_slots() + 3253 Compile::current()->fixed_slots()), 3254 stack_alignment_in_slots())); 3255 3256 // Body of function which returns an integer array locating 3257 // arguments either in registers or in stack slots. Passed an array 3258 // of ideal registers called "sig" and a "length" count. Stack-slot 3259 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3260 // arguments for a CALLEE. Incoming stack arguments are 3261 // automatically biased by the preserve_stack_slots field above. 3262 calling_convention %{ 3263 // No difference between ingoing/outgoing just pass false 3264 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3265 %} 3266 3267 3268 // Body of function which returns an integer array locating 3269 // arguments either in registers or in stack slots. Passed an array 3270 // of ideal registers called "sig" and a "length" count. Stack-slot 3271 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3272 // arguments for a CALLEE. Incoming stack arguments are 3273 // automatically biased by the preserve_stack_slots field above. 3274 c_calling_convention %{ 3275 // This is obviously always outgoing 3276 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3277 %} 3278 3279 // Location of C & interpreter return values 3280 c_return_value %{ 3281 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3282 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3283 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3284 3285 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3286 // that C functions return float and double results in XMM0. 3287 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3288 return OptoRegPair(XMM0b_num,XMM0_num); 3289 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3290 return OptoRegPair(OptoReg::Bad,XMM0_num); 3291 3292 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3293 %} 3294 3295 // Location of return values 3296 return_value %{ 3297 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3298 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3299 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3300 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3301 return OptoRegPair(XMM0b_num,XMM0_num); 3302 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3303 return OptoRegPair(OptoReg::Bad,XMM0_num); 3304 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3305 %} 3306 3307 %} 3308 3309 //----------ATTRIBUTES--------------------------------------------------------- 3310 //----------Operand Attributes------------------------------------------------- 3311 op_attrib op_cost(0); // Required cost attribute 3312 3313 //----------Instruction Attributes--------------------------------------------- 3314 ins_attrib ins_cost(100); // Required cost attribute 3315 ins_attrib ins_size(8); // Required size attribute (in bits) 3316 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3317 // non-matching short branch variant of some 3318 // long branch? 3319 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3320 // specifies the alignment that some part of the instruction (not 3321 // necessarily the start) requires. If > 1, a compute_padding() 3322 // function must be provided for the instruction 3323 3324 //----------OPERANDS----------------------------------------------------------- 3325 // Operand definitions must precede instruction definitions for correct parsing 3326 // in the ADLC because operands constitute user defined types which are used in 3327 // instruction definitions. 3328 3329 //----------Simple Operands---------------------------------------------------- 3330 // Immediate Operands 3331 // Integer Immediate 3332 operand immI() %{ 3333 match(ConI); 3334 3335 op_cost(10); 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 // Constant for test vs zero 3341 operand immI0() %{ 3342 predicate(n->get_int() == 0); 3343 match(ConI); 3344 3345 op_cost(0); 3346 format %{ %} 3347 interface(CONST_INTER); 3348 %} 3349 3350 // Constant for increment 3351 operand immI1() %{ 3352 predicate(n->get_int() == 1); 3353 match(ConI); 3354 3355 op_cost(0); 3356 format %{ %} 3357 interface(CONST_INTER); 3358 %} 3359 3360 // Constant for decrement 3361 operand immI_M1() %{ 3362 predicate(n->get_int() == -1); 3363 match(ConI); 3364 3365 op_cost(0); 3366 format %{ %} 3367 interface(CONST_INTER); 3368 %} 3369 3370 // Valid scale values for addressing modes 3371 operand immI2() %{ 3372 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3373 match(ConI); 3374 3375 format %{ %} 3376 interface(CONST_INTER); 3377 %} 3378 3379 operand immI8() %{ 3380 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3381 match(ConI); 3382 3383 op_cost(5); 3384 format %{ %} 3385 interface(CONST_INTER); 3386 %} 3387 3388 operand immI16() %{ 3389 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3390 match(ConI); 3391 3392 op_cost(10); 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Int Immediate non-negative 3398 operand immU31() 3399 %{ 3400 predicate(n->get_int() >= 0); 3401 match(ConI); 3402 3403 op_cost(0); 3404 format %{ %} 3405 interface(CONST_INTER); 3406 %} 3407 3408 // Constant for long shifts 3409 operand immI_32() %{ 3410 predicate( n->get_int() == 32 ); 3411 match(ConI); 3412 3413 op_cost(0); 3414 format %{ %} 3415 interface(CONST_INTER); 3416 %} 3417 3418 operand immI_1_31() %{ 3419 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3420 match(ConI); 3421 3422 op_cost(0); 3423 format %{ %} 3424 interface(CONST_INTER); 3425 %} 3426 3427 operand immI_32_63() %{ 3428 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3429 match(ConI); 3430 op_cost(0); 3431 3432 format %{ %} 3433 interface(CONST_INTER); 3434 %} 3435 3436 operand immI_1() %{ 3437 predicate( n->get_int() == 1 ); 3438 match(ConI); 3439 3440 op_cost(0); 3441 format %{ %} 3442 interface(CONST_INTER); 3443 %} 3444 3445 operand immI_2() %{ 3446 predicate( n->get_int() == 2 ); 3447 match(ConI); 3448 3449 op_cost(0); 3450 format %{ %} 3451 interface(CONST_INTER); 3452 %} 3453 3454 operand immI_3() %{ 3455 predicate( n->get_int() == 3 ); 3456 match(ConI); 3457 3458 op_cost(0); 3459 format %{ %} 3460 interface(CONST_INTER); 3461 %} 3462 3463 // Pointer Immediate 3464 operand immP() %{ 3465 match(ConP); 3466 3467 op_cost(10); 3468 format %{ %} 3469 interface(CONST_INTER); 3470 %} 3471 3472 // NULL Pointer Immediate 3473 operand immP0() %{ 3474 predicate( n->get_ptr() == 0 ); 3475 match(ConP); 3476 op_cost(0); 3477 3478 format %{ %} 3479 interface(CONST_INTER); 3480 %} 3481 3482 // Long Immediate 3483 operand immL() %{ 3484 match(ConL); 3485 3486 op_cost(20); 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Long Immediate zero 3492 operand immL0() %{ 3493 predicate( n->get_long() == 0L ); 3494 match(ConL); 3495 op_cost(0); 3496 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 // Long Immediate zero 3502 operand immL_M1() %{ 3503 predicate( n->get_long() == -1L ); 3504 match(ConL); 3505 op_cost(0); 3506 3507 format %{ %} 3508 interface(CONST_INTER); 3509 %} 3510 3511 // Long immediate from 0 to 127. 3512 // Used for a shorter form of long mul by 10. 3513 operand immL_127() %{ 3514 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3515 match(ConL); 3516 op_cost(0); 3517 3518 format %{ %} 3519 interface(CONST_INTER); 3520 %} 3521 3522 // Long Immediate: low 32-bit mask 3523 operand immL_32bits() %{ 3524 predicate(n->get_long() == 0xFFFFFFFFL); 3525 match(ConL); 3526 op_cost(0); 3527 3528 format %{ %} 3529 interface(CONST_INTER); 3530 %} 3531 3532 // Long Immediate: low 32-bit mask 3533 operand immL32() %{ 3534 predicate(n->get_long() == (int)(n->get_long())); 3535 match(ConL); 3536 op_cost(20); 3537 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 //Double Immediate zero 3543 operand immDPR0() %{ 3544 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3545 // bug that generates code such that NaNs compare equal to 0.0 3546 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3547 match(ConD); 3548 3549 op_cost(5); 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 // Double Immediate one 3555 operand immDPR1() %{ 3556 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3557 match(ConD); 3558 3559 op_cost(5); 3560 format %{ %} 3561 interface(CONST_INTER); 3562 %} 3563 3564 // Double Immediate 3565 operand immDPR() %{ 3566 predicate(UseSSE<=1); 3567 match(ConD); 3568 3569 op_cost(5); 3570 format %{ %} 3571 interface(CONST_INTER); 3572 %} 3573 3574 operand immD() %{ 3575 predicate(UseSSE>=2); 3576 match(ConD); 3577 3578 op_cost(5); 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 // Double Immediate zero 3584 operand immD0() %{ 3585 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3586 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3587 // compare equal to -0.0. 3588 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3589 match(ConD); 3590 3591 format %{ %} 3592 interface(CONST_INTER); 3593 %} 3594 3595 // Float Immediate zero 3596 operand immFPR0() %{ 3597 predicate(UseSSE == 0 && n->getf() == 0.0F); 3598 match(ConF); 3599 3600 op_cost(5); 3601 format %{ %} 3602 interface(CONST_INTER); 3603 %} 3604 3605 // Float Immediate one 3606 operand immFPR1() %{ 3607 predicate(UseSSE == 0 && n->getf() == 1.0F); 3608 match(ConF); 3609 3610 op_cost(5); 3611 format %{ %} 3612 interface(CONST_INTER); 3613 %} 3614 3615 // Float Immediate 3616 operand immFPR() %{ 3617 predicate( UseSSE == 0 ); 3618 match(ConF); 3619 3620 op_cost(5); 3621 format %{ %} 3622 interface(CONST_INTER); 3623 %} 3624 3625 // Float Immediate 3626 operand immF() %{ 3627 predicate(UseSSE >= 1); 3628 match(ConF); 3629 3630 op_cost(5); 3631 format %{ %} 3632 interface(CONST_INTER); 3633 %} 3634 3635 // Float Immediate zero. Zero and not -0.0 3636 operand immF0() %{ 3637 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3638 match(ConF); 3639 3640 op_cost(5); 3641 format %{ %} 3642 interface(CONST_INTER); 3643 %} 3644 3645 // Immediates for special shifts (sign extend) 3646 3647 // Constants for increment 3648 operand immI_16() %{ 3649 predicate( n->get_int() == 16 ); 3650 match(ConI); 3651 3652 format %{ %} 3653 interface(CONST_INTER); 3654 %} 3655 3656 operand immI_24() %{ 3657 predicate( n->get_int() == 24 ); 3658 match(ConI); 3659 3660 format %{ %} 3661 interface(CONST_INTER); 3662 %} 3663 3664 // Constant for byte-wide masking 3665 operand immI_255() %{ 3666 predicate( n->get_int() == 255 ); 3667 match(ConI); 3668 3669 format %{ %} 3670 interface(CONST_INTER); 3671 %} 3672 3673 // Constant for short-wide masking 3674 operand immI_65535() %{ 3675 predicate(n->get_int() == 65535); 3676 match(ConI); 3677 3678 format %{ %} 3679 interface(CONST_INTER); 3680 %} 3681 3682 // Register Operands 3683 // Integer Register 3684 operand rRegI() %{ 3685 constraint(ALLOC_IN_RC(int_reg)); 3686 match(RegI); 3687 match(xRegI); 3688 match(eAXRegI); 3689 match(eBXRegI); 3690 match(eCXRegI); 3691 match(eDXRegI); 3692 match(eDIRegI); 3693 match(eSIRegI); 3694 3695 format %{ %} 3696 interface(REG_INTER); 3697 %} 3698 3699 // Subset of Integer Register 3700 operand xRegI(rRegI reg) %{ 3701 constraint(ALLOC_IN_RC(int_x_reg)); 3702 match(reg); 3703 match(eAXRegI); 3704 match(eBXRegI); 3705 match(eCXRegI); 3706 match(eDXRegI); 3707 3708 format %{ %} 3709 interface(REG_INTER); 3710 %} 3711 3712 // Special Registers 3713 operand eAXRegI(xRegI reg) %{ 3714 constraint(ALLOC_IN_RC(eax_reg)); 3715 match(reg); 3716 match(rRegI); 3717 3718 format %{ "EAX" %} 3719 interface(REG_INTER); 3720 %} 3721 3722 // Special Registers 3723 operand eBXRegI(xRegI reg) %{ 3724 constraint(ALLOC_IN_RC(ebx_reg)); 3725 match(reg); 3726 match(rRegI); 3727 3728 format %{ "EBX" %} 3729 interface(REG_INTER); 3730 %} 3731 3732 operand eCXRegI(xRegI reg) %{ 3733 constraint(ALLOC_IN_RC(ecx_reg)); 3734 match(reg); 3735 match(rRegI); 3736 3737 format %{ "ECX" %} 3738 interface(REG_INTER); 3739 %} 3740 3741 operand eDXRegI(xRegI reg) %{ 3742 constraint(ALLOC_IN_RC(edx_reg)); 3743 match(reg); 3744 match(rRegI); 3745 3746 format %{ "EDX" %} 3747 interface(REG_INTER); 3748 %} 3749 3750 operand eDIRegI(xRegI reg) %{ 3751 constraint(ALLOC_IN_RC(edi_reg)); 3752 match(reg); 3753 match(rRegI); 3754 3755 format %{ "EDI" %} 3756 interface(REG_INTER); 3757 %} 3758 3759 operand naxRegI() %{ 3760 constraint(ALLOC_IN_RC(nax_reg)); 3761 match(RegI); 3762 match(eCXRegI); 3763 match(eDXRegI); 3764 match(eSIRegI); 3765 match(eDIRegI); 3766 3767 format %{ %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand nadxRegI() %{ 3772 constraint(ALLOC_IN_RC(nadx_reg)); 3773 match(RegI); 3774 match(eBXRegI); 3775 match(eCXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 operand ncxRegI() %{ 3784 constraint(ALLOC_IN_RC(ncx_reg)); 3785 match(RegI); 3786 match(eAXRegI); 3787 match(eDXRegI); 3788 match(eSIRegI); 3789 match(eDIRegI); 3790 3791 format %{ %} 3792 interface(REG_INTER); 3793 %} 3794 3795 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3796 // // 3797 operand eSIRegI(xRegI reg) %{ 3798 constraint(ALLOC_IN_RC(esi_reg)); 3799 match(reg); 3800 match(rRegI); 3801 3802 format %{ "ESI" %} 3803 interface(REG_INTER); 3804 %} 3805 3806 // Pointer Register 3807 operand anyRegP() %{ 3808 constraint(ALLOC_IN_RC(any_reg)); 3809 match(RegP); 3810 match(eAXRegP); 3811 match(eBXRegP); 3812 match(eCXRegP); 3813 match(eDIRegP); 3814 match(eRegP); 3815 3816 format %{ %} 3817 interface(REG_INTER); 3818 %} 3819 3820 operand eRegP() %{ 3821 constraint(ALLOC_IN_RC(int_reg)); 3822 match(RegP); 3823 match(eAXRegP); 3824 match(eBXRegP); 3825 match(eCXRegP); 3826 match(eDIRegP); 3827 3828 format %{ %} 3829 interface(REG_INTER); 3830 %} 3831 3832 // On windows95, EBP is not safe to use for implicit null tests. 3833 operand eRegP_no_EBP() %{ 3834 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3835 match(RegP); 3836 match(eAXRegP); 3837 match(eBXRegP); 3838 match(eCXRegP); 3839 match(eDIRegP); 3840 3841 op_cost(100); 3842 format %{ %} 3843 interface(REG_INTER); 3844 %} 3845 3846 operand naxRegP() %{ 3847 constraint(ALLOC_IN_RC(nax_reg)); 3848 match(RegP); 3849 match(eBXRegP); 3850 match(eDXRegP); 3851 match(eCXRegP); 3852 match(eSIRegP); 3853 match(eDIRegP); 3854 3855 format %{ %} 3856 interface(REG_INTER); 3857 %} 3858 3859 operand nabxRegP() %{ 3860 constraint(ALLOC_IN_RC(nabx_reg)); 3861 match(RegP); 3862 match(eCXRegP); 3863 match(eDXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 operand pRegP() %{ 3872 constraint(ALLOC_IN_RC(p_reg)); 3873 match(RegP); 3874 match(eBXRegP); 3875 match(eDXRegP); 3876 match(eSIRegP); 3877 match(eDIRegP); 3878 3879 format %{ %} 3880 interface(REG_INTER); 3881 %} 3882 3883 // Special Registers 3884 // Return a pointer value 3885 operand eAXRegP(eRegP reg) %{ 3886 constraint(ALLOC_IN_RC(eax_reg)); 3887 match(reg); 3888 format %{ "EAX" %} 3889 interface(REG_INTER); 3890 %} 3891 3892 // Used in AtomicAdd 3893 operand eBXRegP(eRegP reg) %{ 3894 constraint(ALLOC_IN_RC(ebx_reg)); 3895 match(reg); 3896 format %{ "EBX" %} 3897 interface(REG_INTER); 3898 %} 3899 3900 // Tail-call (interprocedural jump) to interpreter 3901 operand eCXRegP(eRegP reg) %{ 3902 constraint(ALLOC_IN_RC(ecx_reg)); 3903 match(reg); 3904 format %{ "ECX" %} 3905 interface(REG_INTER); 3906 %} 3907 3908 operand eSIRegP(eRegP reg) %{ 3909 constraint(ALLOC_IN_RC(esi_reg)); 3910 match(reg); 3911 format %{ "ESI" %} 3912 interface(REG_INTER); 3913 %} 3914 3915 // Used in rep stosw 3916 operand eDIRegP(eRegP reg) %{ 3917 constraint(ALLOC_IN_RC(edi_reg)); 3918 match(reg); 3919 format %{ "EDI" %} 3920 interface(REG_INTER); 3921 %} 3922 3923 operand eRegL() %{ 3924 constraint(ALLOC_IN_RC(long_reg)); 3925 match(RegL); 3926 match(eADXRegL); 3927 3928 format %{ %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand eADXRegL( eRegL reg ) %{ 3933 constraint(ALLOC_IN_RC(eadx_reg)); 3934 match(reg); 3935 3936 format %{ "EDX:EAX" %} 3937 interface(REG_INTER); 3938 %} 3939 3940 operand eBCXRegL( eRegL reg ) %{ 3941 constraint(ALLOC_IN_RC(ebcx_reg)); 3942 match(reg); 3943 3944 format %{ "EBX:ECX" %} 3945 interface(REG_INTER); 3946 %} 3947 3948 // Special case for integer high multiply 3949 operand eADXRegL_low_only() %{ 3950 constraint(ALLOC_IN_RC(eadx_reg)); 3951 match(RegL); 3952 3953 format %{ "EAX" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 // Flags register, used as output of compare instructions 3958 operand eFlagsReg() %{ 3959 constraint(ALLOC_IN_RC(int_flags)); 3960 match(RegFlags); 3961 3962 format %{ "EFLAGS" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 // Flags register, used as output of FLOATING POINT compare instructions 3967 operand eFlagsRegU() %{ 3968 constraint(ALLOC_IN_RC(int_flags)); 3969 match(RegFlags); 3970 3971 format %{ "EFLAGS_U" %} 3972 interface(REG_INTER); 3973 %} 3974 3975 operand eFlagsRegUCF() %{ 3976 constraint(ALLOC_IN_RC(int_flags)); 3977 match(RegFlags); 3978 predicate(false); 3979 3980 format %{ "EFLAGS_U_CF" %} 3981 interface(REG_INTER); 3982 %} 3983 3984 // Condition Code Register used by long compare 3985 operand flagsReg_long_LTGE() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_LTGE" %} 3989 interface(REG_INTER); 3990 %} 3991 operand flagsReg_long_EQNE() %{ 3992 constraint(ALLOC_IN_RC(int_flags)); 3993 match(RegFlags); 3994 format %{ "FLAGS_EQNE" %} 3995 interface(REG_INTER); 3996 %} 3997 operand flagsReg_long_LEGT() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_LEGT" %} 4001 interface(REG_INTER); 4002 %} 4003 4004 // Float register operands 4005 operand regDPR() %{ 4006 predicate( UseSSE < 2 ); 4007 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4008 match(RegD); 4009 match(regDPR1); 4010 match(regDPR2); 4011 format %{ %} 4012 interface(REG_INTER); 4013 %} 4014 4015 operand regDPR1(regDPR reg) %{ 4016 predicate( UseSSE < 2 ); 4017 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4018 match(reg); 4019 format %{ "FPR1" %} 4020 interface(REG_INTER); 4021 %} 4022 4023 operand regDPR2(regDPR reg) %{ 4024 predicate( UseSSE < 2 ); 4025 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4026 match(reg); 4027 format %{ "FPR2" %} 4028 interface(REG_INTER); 4029 %} 4030 4031 operand regnotDPR1(regDPR reg) %{ 4032 predicate( UseSSE < 2 ); 4033 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4034 match(reg); 4035 format %{ %} 4036 interface(REG_INTER); 4037 %} 4038 4039 // Float register operands 4040 operand regFPR() %{ 4041 predicate( UseSSE < 2 ); 4042 constraint(ALLOC_IN_RC(fp_flt_reg)); 4043 match(RegF); 4044 match(regFPR1); 4045 format %{ %} 4046 interface(REG_INTER); 4047 %} 4048 4049 // Float register operands 4050 operand regFPR1(regFPR reg) %{ 4051 predicate( UseSSE < 2 ); 4052 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4053 match(reg); 4054 format %{ "FPR1" %} 4055 interface(REG_INTER); 4056 %} 4057 4058 // XMM Float register operands 4059 operand regF() %{ 4060 predicate( UseSSE>=1 ); 4061 constraint(ALLOC_IN_RC(float_reg_legacy)); 4062 match(RegF); 4063 format %{ %} 4064 interface(REG_INTER); 4065 %} 4066 4067 // XMM Double register operands 4068 operand regD() %{ 4069 predicate( UseSSE>=2 ); 4070 constraint(ALLOC_IN_RC(double_reg_legacy)); 4071 match(RegD); 4072 format %{ %} 4073 interface(REG_INTER); 4074 %} 4075 4076 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4077 // runtime code generation via reg_class_dynamic. 4078 operand vecS() %{ 4079 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4080 match(VecS); 4081 4082 format %{ %} 4083 interface(REG_INTER); 4084 %} 4085 4086 operand vecD() %{ 4087 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4088 match(VecD); 4089 4090 format %{ %} 4091 interface(REG_INTER); 4092 %} 4093 4094 operand vecX() %{ 4095 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4096 match(VecX); 4097 4098 format %{ %} 4099 interface(REG_INTER); 4100 %} 4101 4102 operand vecY() %{ 4103 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4104 match(VecY); 4105 4106 format %{ %} 4107 interface(REG_INTER); 4108 %} 4109 4110 //----------Memory Operands---------------------------------------------------- 4111 // Direct Memory Operand 4112 operand direct(immP addr) %{ 4113 match(addr); 4114 4115 format %{ "[$addr]" %} 4116 interface(MEMORY_INTER) %{ 4117 base(0xFFFFFFFF); 4118 index(0x4); 4119 scale(0x0); 4120 disp($addr); 4121 %} 4122 %} 4123 4124 // Indirect Memory Operand 4125 operand indirect(eRegP reg) %{ 4126 constraint(ALLOC_IN_RC(int_reg)); 4127 match(reg); 4128 4129 format %{ "[$reg]" %} 4130 interface(MEMORY_INTER) %{ 4131 base($reg); 4132 index(0x4); 4133 scale(0x0); 4134 disp(0x0); 4135 %} 4136 %} 4137 4138 // Indirect Memory Plus Short Offset Operand 4139 operand indOffset8(eRegP reg, immI8 off) %{ 4140 match(AddP reg off); 4141 4142 format %{ "[$reg + $off]" %} 4143 interface(MEMORY_INTER) %{ 4144 base($reg); 4145 index(0x4); 4146 scale(0x0); 4147 disp($off); 4148 %} 4149 %} 4150 4151 // Indirect Memory Plus Long Offset Operand 4152 operand indOffset32(eRegP reg, immI off) %{ 4153 match(AddP reg off); 4154 4155 format %{ "[$reg + $off]" %} 4156 interface(MEMORY_INTER) %{ 4157 base($reg); 4158 index(0x4); 4159 scale(0x0); 4160 disp($off); 4161 %} 4162 %} 4163 4164 // Indirect Memory Plus Long Offset Operand 4165 operand indOffset32X(rRegI reg, immP off) %{ 4166 match(AddP off reg); 4167 4168 format %{ "[$reg + $off]" %} 4169 interface(MEMORY_INTER) %{ 4170 base($reg); 4171 index(0x4); 4172 scale(0x0); 4173 disp($off); 4174 %} 4175 %} 4176 4177 // Indirect Memory Plus Index Register Plus Offset Operand 4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4179 match(AddP (AddP reg ireg) off); 4180 4181 op_cost(10); 4182 format %{"[$reg + $off + $ireg]" %} 4183 interface(MEMORY_INTER) %{ 4184 base($reg); 4185 index($ireg); 4186 scale(0x0); 4187 disp($off); 4188 %} 4189 %} 4190 4191 // Indirect Memory Plus Index Register Plus Offset Operand 4192 operand indIndex(eRegP reg, rRegI ireg) %{ 4193 match(AddP reg ireg); 4194 4195 op_cost(10); 4196 format %{"[$reg + $ireg]" %} 4197 interface(MEMORY_INTER) %{ 4198 base($reg); 4199 index($ireg); 4200 scale(0x0); 4201 disp(0x0); 4202 %} 4203 %} 4204 4205 // // ------------------------------------------------------------------------- 4206 // // 486 architecture doesn't support "scale * index + offset" with out a base 4207 // // ------------------------------------------------------------------------- 4208 // // Scaled Memory Operands 4209 // // Indirect Memory Times Scale Plus Offset Operand 4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4211 // match(AddP off (LShiftI ireg scale)); 4212 // 4213 // op_cost(10); 4214 // format %{"[$off + $ireg << $scale]" %} 4215 // interface(MEMORY_INTER) %{ 4216 // base(0x4); 4217 // index($ireg); 4218 // scale($scale); 4219 // disp($off); 4220 // %} 4221 // %} 4222 4223 // Indirect Memory Times Scale Plus Index Register 4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4225 match(AddP reg (LShiftI ireg scale)); 4226 4227 op_cost(10); 4228 format %{"[$reg + $ireg << $scale]" %} 4229 interface(MEMORY_INTER) %{ 4230 base($reg); 4231 index($ireg); 4232 scale($scale); 4233 disp(0x0); 4234 %} 4235 %} 4236 4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4239 match(AddP (AddP reg (LShiftI ireg scale)) off); 4240 4241 op_cost(10); 4242 format %{"[$reg + $off + $ireg << $scale]" %} 4243 interface(MEMORY_INTER) %{ 4244 base($reg); 4245 index($ireg); 4246 scale($scale); 4247 disp($off); 4248 %} 4249 %} 4250 4251 //----------Load Long Memory Operands------------------------------------------ 4252 // The load-long idiom will use it's address expression again after loading 4253 // the first word of the long. If the load-long destination overlaps with 4254 // registers used in the addressing expression, the 2nd half will be loaded 4255 // from a clobbered address. Fix this by requiring that load-long use 4256 // address registers that do not overlap with the load-long target. 4257 4258 // load-long support 4259 operand load_long_RegP() %{ 4260 constraint(ALLOC_IN_RC(esi_reg)); 4261 match(RegP); 4262 match(eSIRegP); 4263 op_cost(100); 4264 format %{ %} 4265 interface(REG_INTER); 4266 %} 4267 4268 // Indirect Memory Operand Long 4269 operand load_long_indirect(load_long_RegP reg) %{ 4270 constraint(ALLOC_IN_RC(esi_reg)); 4271 match(reg); 4272 4273 format %{ "[$reg]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index(0x4); 4277 scale(0x0); 4278 disp(0x0); 4279 %} 4280 %} 4281 4282 // Indirect Memory Plus Long Offset Operand 4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4284 match(AddP reg off); 4285 4286 format %{ "[$reg + $off]" %} 4287 interface(MEMORY_INTER) %{ 4288 base($reg); 4289 index(0x4); 4290 scale(0x0); 4291 disp($off); 4292 %} 4293 %} 4294 4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4296 4297 4298 //----------Special Memory Operands-------------------------------------------- 4299 // Stack Slot Operand - This operand is used for loading and storing temporary 4300 // values on the stack where a match requires a value to 4301 // flow through memory. 4302 operand stackSlotP(sRegP reg) %{ 4303 constraint(ALLOC_IN_RC(stack_slots)); 4304 // No match rule because this operand is only generated in matching 4305 format %{ "[$reg]" %} 4306 interface(MEMORY_INTER) %{ 4307 base(0x4); // ESP 4308 index(0x4); // No Index 4309 scale(0x0); // No Scale 4310 disp($reg); // Stack Offset 4311 %} 4312 %} 4313 4314 operand stackSlotI(sRegI reg) %{ 4315 constraint(ALLOC_IN_RC(stack_slots)); 4316 // No match rule because this operand is only generated in matching 4317 format %{ "[$reg]" %} 4318 interface(MEMORY_INTER) %{ 4319 base(0x4); // ESP 4320 index(0x4); // No Index 4321 scale(0x0); // No Scale 4322 disp($reg); // Stack Offset 4323 %} 4324 %} 4325 4326 operand stackSlotF(sRegF reg) %{ 4327 constraint(ALLOC_IN_RC(stack_slots)); 4328 // No match rule because this operand is only generated in matching 4329 format %{ "[$reg]" %} 4330 interface(MEMORY_INTER) %{ 4331 base(0x4); // ESP 4332 index(0x4); // No Index 4333 scale(0x0); // No Scale 4334 disp($reg); // Stack Offset 4335 %} 4336 %} 4337 4338 operand stackSlotD(sRegD reg) %{ 4339 constraint(ALLOC_IN_RC(stack_slots)); 4340 // No match rule because this operand is only generated in matching 4341 format %{ "[$reg]" %} 4342 interface(MEMORY_INTER) %{ 4343 base(0x4); // ESP 4344 index(0x4); // No Index 4345 scale(0x0); // No Scale 4346 disp($reg); // Stack Offset 4347 %} 4348 %} 4349 4350 operand stackSlotL(sRegL reg) %{ 4351 constraint(ALLOC_IN_RC(stack_slots)); 4352 // No match rule because this operand is only generated in matching 4353 format %{ "[$reg]" %} 4354 interface(MEMORY_INTER) %{ 4355 base(0x4); // ESP 4356 index(0x4); // No Index 4357 scale(0x0); // No Scale 4358 disp($reg); // Stack Offset 4359 %} 4360 %} 4361 4362 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4363 // Indirect Memory Operand 4364 operand indirect_win95_safe(eRegP_no_EBP reg) 4365 %{ 4366 constraint(ALLOC_IN_RC(int_reg)); 4367 match(reg); 4368 4369 op_cost(100); 4370 format %{ "[$reg]" %} 4371 interface(MEMORY_INTER) %{ 4372 base($reg); 4373 index(0x4); 4374 scale(0x0); 4375 disp(0x0); 4376 %} 4377 %} 4378 4379 // Indirect Memory Plus Short Offset Operand 4380 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4381 %{ 4382 match(AddP reg off); 4383 4384 op_cost(100); 4385 format %{ "[$reg + $off]" %} 4386 interface(MEMORY_INTER) %{ 4387 base($reg); 4388 index(0x4); 4389 scale(0x0); 4390 disp($off); 4391 %} 4392 %} 4393 4394 // Indirect Memory Plus Long Offset Operand 4395 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4396 %{ 4397 match(AddP reg off); 4398 4399 op_cost(100); 4400 format %{ "[$reg + $off]" %} 4401 interface(MEMORY_INTER) %{ 4402 base($reg); 4403 index(0x4); 4404 scale(0x0); 4405 disp($off); 4406 %} 4407 %} 4408 4409 // Indirect Memory Plus Index Register Plus Offset Operand 4410 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4411 %{ 4412 match(AddP (AddP reg ireg) off); 4413 4414 op_cost(100); 4415 format %{"[$reg + $off + $ireg]" %} 4416 interface(MEMORY_INTER) %{ 4417 base($reg); 4418 index($ireg); 4419 scale(0x0); 4420 disp($off); 4421 %} 4422 %} 4423 4424 // Indirect Memory Times Scale Plus Index Register 4425 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4426 %{ 4427 match(AddP reg (LShiftI ireg scale)); 4428 4429 op_cost(100); 4430 format %{"[$reg + $ireg << $scale]" %} 4431 interface(MEMORY_INTER) %{ 4432 base($reg); 4433 index($ireg); 4434 scale($scale); 4435 disp(0x0); 4436 %} 4437 %} 4438 4439 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4440 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4441 %{ 4442 match(AddP (AddP reg (LShiftI ireg scale)) off); 4443 4444 op_cost(100); 4445 format %{"[$reg + $off + $ireg << $scale]" %} 4446 interface(MEMORY_INTER) %{ 4447 base($reg); 4448 index($ireg); 4449 scale($scale); 4450 disp($off); 4451 %} 4452 %} 4453 4454 //----------Conditional Branch Operands---------------------------------------- 4455 // Comparison Op - This is the operation of the comparison, and is limited to 4456 // the following set of codes: 4457 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4458 // 4459 // Other attributes of the comparison, such as unsignedness, are specified 4460 // by the comparison instruction that sets a condition code flags register. 4461 // That result is represented by a flags operand whose subtype is appropriate 4462 // to the unsignedness (etc.) of the comparison. 4463 // 4464 // Later, the instruction which matches both the Comparison Op (a Bool) and 4465 // the flags (produced by the Cmp) specifies the coding of the comparison op 4466 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4467 4468 // Comparision Code 4469 operand cmpOp() %{ 4470 match(Bool); 4471 4472 format %{ "" %} 4473 interface(COND_INTER) %{ 4474 equal(0x4, "e"); 4475 not_equal(0x5, "ne"); 4476 less(0xC, "l"); 4477 greater_equal(0xD, "ge"); 4478 less_equal(0xE, "le"); 4479 greater(0xF, "g"); 4480 overflow(0x0, "o"); 4481 no_overflow(0x1, "no"); 4482 %} 4483 %} 4484 4485 // Comparison Code, unsigned compare. Used by FP also, with 4486 // C2 (unordered) turned into GT or LT already. The other bits 4487 // C0 and C3 are turned into Carry & Zero flags. 4488 operand cmpOpU() %{ 4489 match(Bool); 4490 4491 format %{ "" %} 4492 interface(COND_INTER) %{ 4493 equal(0x4, "e"); 4494 not_equal(0x5, "ne"); 4495 less(0x2, "b"); 4496 greater_equal(0x3, "nb"); 4497 less_equal(0x6, "be"); 4498 greater(0x7, "nbe"); 4499 overflow(0x0, "o"); 4500 no_overflow(0x1, "no"); 4501 %} 4502 %} 4503 4504 // Floating comparisons that don't require any fixup for the unordered case 4505 operand cmpOpUCF() %{ 4506 match(Bool); 4507 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4508 n->as_Bool()->_test._test == BoolTest::ge || 4509 n->as_Bool()->_test._test == BoolTest::le || 4510 n->as_Bool()->_test._test == BoolTest::gt); 4511 format %{ "" %} 4512 interface(COND_INTER) %{ 4513 equal(0x4, "e"); 4514 not_equal(0x5, "ne"); 4515 less(0x2, "b"); 4516 greater_equal(0x3, "nb"); 4517 less_equal(0x6, "be"); 4518 greater(0x7, "nbe"); 4519 overflow(0x0, "o"); 4520 no_overflow(0x1, "no"); 4521 %} 4522 %} 4523 4524 4525 // Floating comparisons that can be fixed up with extra conditional jumps 4526 operand cmpOpUCF2() %{ 4527 match(Bool); 4528 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4529 n->as_Bool()->_test._test == BoolTest::eq); 4530 format %{ "" %} 4531 interface(COND_INTER) %{ 4532 equal(0x4, "e"); 4533 not_equal(0x5, "ne"); 4534 less(0x2, "b"); 4535 greater_equal(0x3, "nb"); 4536 less_equal(0x6, "be"); 4537 greater(0x7, "nbe"); 4538 overflow(0x0, "o"); 4539 no_overflow(0x1, "no"); 4540 %} 4541 %} 4542 4543 // Comparison Code for FP conditional move 4544 operand cmpOp_fcmov() %{ 4545 match(Bool); 4546 4547 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4548 n->as_Bool()->_test._test != BoolTest::no_overflow); 4549 format %{ "" %} 4550 interface(COND_INTER) %{ 4551 equal (0x0C8); 4552 not_equal (0x1C8); 4553 less (0x0C0); 4554 greater_equal(0x1C0); 4555 less_equal (0x0D0); 4556 greater (0x1D0); 4557 overflow(0x0, "o"); // not really supported by the instruction 4558 no_overflow(0x1, "no"); // not really supported by the instruction 4559 %} 4560 %} 4561 4562 // Comparision Code used in long compares 4563 operand cmpOp_commute() %{ 4564 match(Bool); 4565 4566 format %{ "" %} 4567 interface(COND_INTER) %{ 4568 equal(0x4, "e"); 4569 not_equal(0x5, "ne"); 4570 less(0xF, "g"); 4571 greater_equal(0xE, "le"); 4572 less_equal(0xD, "ge"); 4573 greater(0xC, "l"); 4574 overflow(0x0, "o"); 4575 no_overflow(0x1, "no"); 4576 %} 4577 %} 4578 4579 //----------OPERAND CLASSES---------------------------------------------------- 4580 // Operand Classes are groups of operands that are used as to simplify 4581 // instruction definitions by not requiring the AD writer to specify separate 4582 // instructions for every form of operand when the instruction accepts 4583 // multiple operand types with the same basic encoding and format. The classic 4584 // case of this is memory operands. 4585 4586 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4587 indIndex, indIndexScale, indIndexScaleOffset); 4588 4589 // Long memory operations are encoded in 2 instructions and a +4 offset. 4590 // This means some kind of offset is always required and you cannot use 4591 // an oop as the offset (done when working on static globals). 4592 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4593 indIndex, indIndexScale, indIndexScaleOffset); 4594 4595 4596 //----------PIPELINE----------------------------------------------------------- 4597 // Rules which define the behavior of the target architectures pipeline. 4598 pipeline %{ 4599 4600 //----------ATTRIBUTES--------------------------------------------------------- 4601 attributes %{ 4602 variable_size_instructions; // Fixed size instructions 4603 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4604 instruction_unit_size = 1; // An instruction is 1 bytes long 4605 instruction_fetch_unit_size = 16; // The processor fetches one line 4606 instruction_fetch_units = 1; // of 16 bytes 4607 4608 // List of nop instructions 4609 nops( MachNop ); 4610 %} 4611 4612 //----------RESOURCES---------------------------------------------------------- 4613 // Resources are the functional units available to the machine 4614 4615 // Generic P2/P3 pipeline 4616 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4617 // 3 instructions decoded per cycle. 4618 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4619 // 2 ALU op, only ALU0 handles mul/div instructions. 4620 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4621 MS0, MS1, MEM = MS0 | MS1, 4622 BR, FPU, 4623 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4624 4625 //----------PIPELINE DESCRIPTION----------------------------------------------- 4626 // Pipeline Description specifies the stages in the machine's pipeline 4627 4628 // Generic P2/P3 pipeline 4629 pipe_desc(S0, S1, S2, S3, S4, S5); 4630 4631 //----------PIPELINE CLASSES--------------------------------------------------- 4632 // Pipeline Classes describe the stages in which input and output are 4633 // referenced by the hardware pipeline. 4634 4635 // Naming convention: ialu or fpu 4636 // Then: _reg 4637 // Then: _reg if there is a 2nd register 4638 // Then: _long if it's a pair of instructions implementing a long 4639 // Then: _fat if it requires the big decoder 4640 // Or: _mem if it requires the big decoder and a memory unit. 4641 4642 // Integer ALU reg operation 4643 pipe_class ialu_reg(rRegI dst) %{ 4644 single_instruction; 4645 dst : S4(write); 4646 dst : S3(read); 4647 DECODE : S0; // any decoder 4648 ALU : S3; // any alu 4649 %} 4650 4651 // Long ALU reg operation 4652 pipe_class ialu_reg_long(eRegL dst) %{ 4653 instruction_count(2); 4654 dst : S4(write); 4655 dst : S3(read); 4656 DECODE : S0(2); // any 2 decoders 4657 ALU : S3(2); // both alus 4658 %} 4659 4660 // Integer ALU reg operation using big decoder 4661 pipe_class ialu_reg_fat(rRegI dst) %{ 4662 single_instruction; 4663 dst : S4(write); 4664 dst : S3(read); 4665 D0 : S0; // big decoder only 4666 ALU : S3; // any alu 4667 %} 4668 4669 // Long ALU reg operation using big decoder 4670 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4671 instruction_count(2); 4672 dst : S4(write); 4673 dst : S3(read); 4674 D0 : S0(2); // big decoder only; twice 4675 ALU : S3(2); // any 2 alus 4676 %} 4677 4678 // Integer ALU reg-reg operation 4679 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4680 single_instruction; 4681 dst : S4(write); 4682 src : S3(read); 4683 DECODE : S0; // any decoder 4684 ALU : S3; // any alu 4685 %} 4686 4687 // Long ALU reg-reg operation 4688 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4689 instruction_count(2); 4690 dst : S4(write); 4691 src : S3(read); 4692 DECODE : S0(2); // any 2 decoders 4693 ALU : S3(2); // both alus 4694 %} 4695 4696 // Integer ALU reg-reg operation 4697 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4698 single_instruction; 4699 dst : S4(write); 4700 src : S3(read); 4701 D0 : S0; // big decoder only 4702 ALU : S3; // any alu 4703 %} 4704 4705 // Long ALU reg-reg operation 4706 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4707 instruction_count(2); 4708 dst : S4(write); 4709 src : S3(read); 4710 D0 : S0(2); // big decoder only; twice 4711 ALU : S3(2); // both alus 4712 %} 4713 4714 // Integer ALU reg-mem operation 4715 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4716 single_instruction; 4717 dst : S5(write); 4718 mem : S3(read); 4719 D0 : S0; // big decoder only 4720 ALU : S4; // any alu 4721 MEM : S3; // any mem 4722 %} 4723 4724 // Long ALU reg-mem operation 4725 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4726 instruction_count(2); 4727 dst : S5(write); 4728 mem : S3(read); 4729 D0 : S0(2); // big decoder only; twice 4730 ALU : S4(2); // any 2 alus 4731 MEM : S3(2); // both mems 4732 %} 4733 4734 // Integer mem operation (prefetch) 4735 pipe_class ialu_mem(memory mem) 4736 %{ 4737 single_instruction; 4738 mem : S3(read); 4739 D0 : S0; // big decoder only 4740 MEM : S3; // any mem 4741 %} 4742 4743 // Integer Store to Memory 4744 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4745 single_instruction; 4746 mem : S3(read); 4747 src : S5(read); 4748 D0 : S0; // big decoder only 4749 ALU : S4; // any alu 4750 MEM : S3; 4751 %} 4752 4753 // Long Store to Memory 4754 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4755 instruction_count(2); 4756 mem : S3(read); 4757 src : S5(read); 4758 D0 : S0(2); // big decoder only; twice 4759 ALU : S4(2); // any 2 alus 4760 MEM : S3(2); // Both mems 4761 %} 4762 4763 // Integer Store to Memory 4764 pipe_class ialu_mem_imm(memory mem) %{ 4765 single_instruction; 4766 mem : S3(read); 4767 D0 : S0; // big decoder only 4768 ALU : S4; // any alu 4769 MEM : S3; 4770 %} 4771 4772 // Integer ALU0 reg-reg operation 4773 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4774 single_instruction; 4775 dst : S4(write); 4776 src : S3(read); 4777 D0 : S0; // Big decoder only 4778 ALU0 : S3; // only alu0 4779 %} 4780 4781 // Integer ALU0 reg-mem operation 4782 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4783 single_instruction; 4784 dst : S5(write); 4785 mem : S3(read); 4786 D0 : S0; // big decoder only 4787 ALU0 : S4; // ALU0 only 4788 MEM : S3; // any mem 4789 %} 4790 4791 // Integer ALU reg-reg operation 4792 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4793 single_instruction; 4794 cr : S4(write); 4795 src1 : S3(read); 4796 src2 : S3(read); 4797 DECODE : S0; // any decoder 4798 ALU : S3; // any alu 4799 %} 4800 4801 // Integer ALU reg-imm operation 4802 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4803 single_instruction; 4804 cr : S4(write); 4805 src1 : S3(read); 4806 DECODE : S0; // any decoder 4807 ALU : S3; // any alu 4808 %} 4809 4810 // Integer ALU reg-mem operation 4811 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4812 single_instruction; 4813 cr : S4(write); 4814 src1 : S3(read); 4815 src2 : S3(read); 4816 D0 : S0; // big decoder only 4817 ALU : S4; // any alu 4818 MEM : S3; 4819 %} 4820 4821 // Conditional move reg-reg 4822 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4823 instruction_count(4); 4824 y : S4(read); 4825 q : S3(read); 4826 p : S3(read); 4827 DECODE : S0(4); // any decoder 4828 %} 4829 4830 // Conditional move reg-reg 4831 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4832 single_instruction; 4833 dst : S4(write); 4834 src : S3(read); 4835 cr : S3(read); 4836 DECODE : S0; // any decoder 4837 %} 4838 4839 // Conditional move reg-mem 4840 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4841 single_instruction; 4842 dst : S4(write); 4843 src : S3(read); 4844 cr : S3(read); 4845 DECODE : S0; // any decoder 4846 MEM : S3; 4847 %} 4848 4849 // Conditional move reg-reg long 4850 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4851 single_instruction; 4852 dst : S4(write); 4853 src : S3(read); 4854 cr : S3(read); 4855 DECODE : S0(2); // any 2 decoders 4856 %} 4857 4858 // Conditional move double reg-reg 4859 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4860 single_instruction; 4861 dst : S4(write); 4862 src : S3(read); 4863 cr : S3(read); 4864 DECODE : S0; // any decoder 4865 %} 4866 4867 // Float reg-reg operation 4868 pipe_class fpu_reg(regDPR dst) %{ 4869 instruction_count(2); 4870 dst : S3(read); 4871 DECODE : S0(2); // any 2 decoders 4872 FPU : S3; 4873 %} 4874 4875 // Float reg-reg operation 4876 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4877 instruction_count(2); 4878 dst : S4(write); 4879 src : S3(read); 4880 DECODE : S0(2); // any 2 decoders 4881 FPU : S3; 4882 %} 4883 4884 // Float reg-reg operation 4885 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4886 instruction_count(3); 4887 dst : S4(write); 4888 src1 : S3(read); 4889 src2 : S3(read); 4890 DECODE : S0(3); // any 3 decoders 4891 FPU : S3(2); 4892 %} 4893 4894 // Float reg-reg operation 4895 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4896 instruction_count(4); 4897 dst : S4(write); 4898 src1 : S3(read); 4899 src2 : S3(read); 4900 src3 : S3(read); 4901 DECODE : S0(4); // any 3 decoders 4902 FPU : S3(2); 4903 %} 4904 4905 // Float reg-reg operation 4906 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4907 instruction_count(4); 4908 dst : S4(write); 4909 src1 : S3(read); 4910 src2 : S3(read); 4911 src3 : S3(read); 4912 DECODE : S1(3); // any 3 decoders 4913 D0 : S0; // Big decoder only 4914 FPU : S3(2); 4915 MEM : S3; 4916 %} 4917 4918 // Float reg-mem operation 4919 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4920 instruction_count(2); 4921 dst : S5(write); 4922 mem : S3(read); 4923 D0 : S0; // big decoder only 4924 DECODE : S1; // any decoder for FPU POP 4925 FPU : S4; 4926 MEM : S3; // any mem 4927 %} 4928 4929 // Float reg-mem operation 4930 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4931 instruction_count(3); 4932 dst : S5(write); 4933 src1 : S3(read); 4934 mem : S3(read); 4935 D0 : S0; // big decoder only 4936 DECODE : S1(2); // any decoder for FPU POP 4937 FPU : S4; 4938 MEM : S3; // any mem 4939 %} 4940 4941 // Float mem-reg operation 4942 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4943 instruction_count(2); 4944 src : S5(read); 4945 mem : S3(read); 4946 DECODE : S0; // any decoder for FPU PUSH 4947 D0 : S1; // big decoder only 4948 FPU : S4; 4949 MEM : S3; // any mem 4950 %} 4951 4952 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4953 instruction_count(3); 4954 src1 : S3(read); 4955 src2 : S3(read); 4956 mem : S3(read); 4957 DECODE : S0(2); // any decoder for FPU PUSH 4958 D0 : S1; // big decoder only 4959 FPU : S4; 4960 MEM : S3; // any mem 4961 %} 4962 4963 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4964 instruction_count(3); 4965 src1 : S3(read); 4966 src2 : S3(read); 4967 mem : S4(read); 4968 DECODE : S0; // any decoder for FPU PUSH 4969 D0 : S0(2); // big decoder only 4970 FPU : S4; 4971 MEM : S3(2); // any mem 4972 %} 4973 4974 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4975 instruction_count(2); 4976 src1 : S3(read); 4977 dst : S4(read); 4978 D0 : S0(2); // big decoder only 4979 MEM : S3(2); // any mem 4980 %} 4981 4982 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4983 instruction_count(3); 4984 src1 : S3(read); 4985 src2 : S3(read); 4986 dst : S4(read); 4987 D0 : S0(3); // big decoder only 4988 FPU : S4; 4989 MEM : S3(3); // any mem 4990 %} 4991 4992 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4993 instruction_count(3); 4994 src1 : S4(read); 4995 mem : S4(read); 4996 DECODE : S0; // any decoder for FPU PUSH 4997 D0 : S0(2); // big decoder only 4998 FPU : S4; 4999 MEM : S3(2); // any mem 5000 %} 5001 5002 // Float load constant 5003 pipe_class fpu_reg_con(regDPR dst) %{ 5004 instruction_count(2); 5005 dst : S5(write); 5006 D0 : S0; // big decoder only for the load 5007 DECODE : S1; // any decoder for FPU POP 5008 FPU : S4; 5009 MEM : S3; // any mem 5010 %} 5011 5012 // Float load constant 5013 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5014 instruction_count(3); 5015 dst : S5(write); 5016 src : S3(read); 5017 D0 : S0; // big decoder only for the load 5018 DECODE : S1(2); // any decoder for FPU POP 5019 FPU : S4; 5020 MEM : S3; // any mem 5021 %} 5022 5023 // UnConditional branch 5024 pipe_class pipe_jmp( label labl ) %{ 5025 single_instruction; 5026 BR : S3; 5027 %} 5028 5029 // Conditional branch 5030 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5031 single_instruction; 5032 cr : S1(read); 5033 BR : S3; 5034 %} 5035 5036 // Allocation idiom 5037 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5038 instruction_count(1); force_serialization; 5039 fixed_latency(6); 5040 heap_ptr : S3(read); 5041 DECODE : S0(3); 5042 D0 : S2; 5043 MEM : S3; 5044 ALU : S3(2); 5045 dst : S5(write); 5046 BR : S5; 5047 %} 5048 5049 // Generic big/slow expanded idiom 5050 pipe_class pipe_slow( ) %{ 5051 instruction_count(10); multiple_bundles; force_serialization; 5052 fixed_latency(100); 5053 D0 : S0(2); 5054 MEM : S3(2); 5055 %} 5056 5057 // The real do-nothing guy 5058 pipe_class empty( ) %{ 5059 instruction_count(0); 5060 %} 5061 5062 // Define the class for the Nop node 5063 define %{ 5064 MachNop = empty; 5065 %} 5066 5067 %} 5068 5069 //----------INSTRUCTIONS------------------------------------------------------- 5070 // 5071 // match -- States which machine-independent subtree may be replaced 5072 // by this instruction. 5073 // ins_cost -- The estimated cost of this instruction is used by instruction 5074 // selection to identify a minimum cost tree of machine 5075 // instructions that matches a tree of machine-independent 5076 // instructions. 5077 // format -- A string providing the disassembly for this instruction. 5078 // The value of an instruction's operand may be inserted 5079 // by referring to it with a '$' prefix. 5080 // opcode -- Three instruction opcodes may be provided. These are referred 5081 // to within an encode class as $primary, $secondary, and $tertiary 5082 // respectively. The primary opcode is commonly used to 5083 // indicate the type of machine instruction, while secondary 5084 // and tertiary are often used for prefix options or addressing 5085 // modes. 5086 // ins_encode -- A list of encode classes with parameters. The encode class 5087 // name must have been defined in an 'enc_class' specification 5088 // in the encode section of the architecture description. 5089 5090 //----------BSWAP-Instruction-------------------------------------------------- 5091 instruct bytes_reverse_int(rRegI dst) %{ 5092 match(Set dst (ReverseBytesI dst)); 5093 5094 format %{ "BSWAP $dst" %} 5095 opcode(0x0F, 0xC8); 5096 ins_encode( OpcP, OpcSReg(dst) ); 5097 ins_pipe( ialu_reg ); 5098 %} 5099 5100 instruct bytes_reverse_long(eRegL dst) %{ 5101 match(Set dst (ReverseBytesL dst)); 5102 5103 format %{ "BSWAP $dst.lo\n\t" 5104 "BSWAP $dst.hi\n\t" 5105 "XCHG $dst.lo $dst.hi" %} 5106 5107 ins_cost(125); 5108 ins_encode( bswap_long_bytes(dst) ); 5109 ins_pipe( ialu_reg_reg); 5110 %} 5111 5112 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5113 match(Set dst (ReverseBytesUS dst)); 5114 effect(KILL cr); 5115 5116 format %{ "BSWAP $dst\n\t" 5117 "SHR $dst,16\n\t" %} 5118 ins_encode %{ 5119 __ bswapl($dst$$Register); 5120 __ shrl($dst$$Register, 16); 5121 %} 5122 ins_pipe( ialu_reg ); 5123 %} 5124 5125 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5126 match(Set dst (ReverseBytesS dst)); 5127 effect(KILL cr); 5128 5129 format %{ "BSWAP $dst\n\t" 5130 "SAR $dst,16\n\t" %} 5131 ins_encode %{ 5132 __ bswapl($dst$$Register); 5133 __ sarl($dst$$Register, 16); 5134 %} 5135 ins_pipe( ialu_reg ); 5136 %} 5137 5138 5139 //---------- Zeros Count Instructions ------------------------------------------ 5140 5141 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5142 predicate(UseCountLeadingZerosInstruction); 5143 match(Set dst (CountLeadingZerosI src)); 5144 effect(KILL cr); 5145 5146 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5147 ins_encode %{ 5148 __ lzcntl($dst$$Register, $src$$Register); 5149 %} 5150 ins_pipe(ialu_reg); 5151 %} 5152 5153 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5154 predicate(!UseCountLeadingZerosInstruction); 5155 match(Set dst (CountLeadingZerosI src)); 5156 effect(KILL cr); 5157 5158 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5159 "JNZ skip\n\t" 5160 "MOV $dst, -1\n" 5161 "skip:\n\t" 5162 "NEG $dst\n\t" 5163 "ADD $dst, 31" %} 5164 ins_encode %{ 5165 Register Rdst = $dst$$Register; 5166 Register Rsrc = $src$$Register; 5167 Label skip; 5168 __ bsrl(Rdst, Rsrc); 5169 __ jccb(Assembler::notZero, skip); 5170 __ movl(Rdst, -1); 5171 __ bind(skip); 5172 __ negl(Rdst); 5173 __ addl(Rdst, BitsPerInt - 1); 5174 %} 5175 ins_pipe(ialu_reg); 5176 %} 5177 5178 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5179 predicate(UseCountLeadingZerosInstruction); 5180 match(Set dst (CountLeadingZerosL src)); 5181 effect(TEMP dst, KILL cr); 5182 5183 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5184 "JNC done\n\t" 5185 "LZCNT $dst, $src.lo\n\t" 5186 "ADD $dst, 32\n" 5187 "done:" %} 5188 ins_encode %{ 5189 Register Rdst = $dst$$Register; 5190 Register Rsrc = $src$$Register; 5191 Label done; 5192 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5193 __ jccb(Assembler::carryClear, done); 5194 __ lzcntl(Rdst, Rsrc); 5195 __ addl(Rdst, BitsPerInt); 5196 __ bind(done); 5197 %} 5198 ins_pipe(ialu_reg); 5199 %} 5200 5201 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5202 predicate(!UseCountLeadingZerosInstruction); 5203 match(Set dst (CountLeadingZerosL src)); 5204 effect(TEMP dst, KILL cr); 5205 5206 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5207 "JZ msw_is_zero\n\t" 5208 "ADD $dst, 32\n\t" 5209 "JMP not_zero\n" 5210 "msw_is_zero:\n\t" 5211 "BSR $dst, $src.lo\n\t" 5212 "JNZ not_zero\n\t" 5213 "MOV $dst, -1\n" 5214 "not_zero:\n\t" 5215 "NEG $dst\n\t" 5216 "ADD $dst, 63\n" %} 5217 ins_encode %{ 5218 Register Rdst = $dst$$Register; 5219 Register Rsrc = $src$$Register; 5220 Label msw_is_zero; 5221 Label not_zero; 5222 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5223 __ jccb(Assembler::zero, msw_is_zero); 5224 __ addl(Rdst, BitsPerInt); 5225 __ jmpb(not_zero); 5226 __ bind(msw_is_zero); 5227 __ bsrl(Rdst, Rsrc); 5228 __ jccb(Assembler::notZero, not_zero); 5229 __ movl(Rdst, -1); 5230 __ bind(not_zero); 5231 __ negl(Rdst); 5232 __ addl(Rdst, BitsPerLong - 1); 5233 %} 5234 ins_pipe(ialu_reg); 5235 %} 5236 5237 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5238 predicate(UseCountTrailingZerosInstruction); 5239 match(Set dst (CountTrailingZerosI src)); 5240 effect(KILL cr); 5241 5242 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5243 ins_encode %{ 5244 __ tzcntl($dst$$Register, $src$$Register); 5245 %} 5246 ins_pipe(ialu_reg); 5247 %} 5248 5249 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5250 predicate(!UseCountTrailingZerosInstruction); 5251 match(Set dst (CountTrailingZerosI src)); 5252 effect(KILL cr); 5253 5254 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5255 "JNZ done\n\t" 5256 "MOV $dst, 32\n" 5257 "done:" %} 5258 ins_encode %{ 5259 Register Rdst = $dst$$Register; 5260 Label done; 5261 __ bsfl(Rdst, $src$$Register); 5262 __ jccb(Assembler::notZero, done); 5263 __ movl(Rdst, BitsPerInt); 5264 __ bind(done); 5265 %} 5266 ins_pipe(ialu_reg); 5267 %} 5268 5269 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5270 predicate(UseCountTrailingZerosInstruction); 5271 match(Set dst (CountTrailingZerosL src)); 5272 effect(TEMP dst, KILL cr); 5273 5274 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5275 "JNC done\n\t" 5276 "TZCNT $dst, $src.hi\n\t" 5277 "ADD $dst, 32\n" 5278 "done:" %} 5279 ins_encode %{ 5280 Register Rdst = $dst$$Register; 5281 Register Rsrc = $src$$Register; 5282 Label done; 5283 __ tzcntl(Rdst, Rsrc); 5284 __ jccb(Assembler::carryClear, done); 5285 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5286 __ addl(Rdst, BitsPerInt); 5287 __ bind(done); 5288 %} 5289 ins_pipe(ialu_reg); 5290 %} 5291 5292 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5293 predicate(!UseCountTrailingZerosInstruction); 5294 match(Set dst (CountTrailingZerosL src)); 5295 effect(TEMP dst, KILL cr); 5296 5297 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5298 "JNZ done\n\t" 5299 "BSF $dst, $src.hi\n\t" 5300 "JNZ msw_not_zero\n\t" 5301 "MOV $dst, 32\n" 5302 "msw_not_zero:\n\t" 5303 "ADD $dst, 32\n" 5304 "done:" %} 5305 ins_encode %{ 5306 Register Rdst = $dst$$Register; 5307 Register Rsrc = $src$$Register; 5308 Label msw_not_zero; 5309 Label done; 5310 __ bsfl(Rdst, Rsrc); 5311 __ jccb(Assembler::notZero, done); 5312 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5313 __ jccb(Assembler::notZero, msw_not_zero); 5314 __ movl(Rdst, BitsPerInt); 5315 __ bind(msw_not_zero); 5316 __ addl(Rdst, BitsPerInt); 5317 __ bind(done); 5318 %} 5319 ins_pipe(ialu_reg); 5320 %} 5321 5322 5323 //---------- Population Count Instructions ------------------------------------- 5324 5325 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5326 predicate(UsePopCountInstruction); 5327 match(Set dst (PopCountI src)); 5328 effect(KILL cr); 5329 5330 format %{ "POPCNT $dst, $src" %} 5331 ins_encode %{ 5332 __ popcntl($dst$$Register, $src$$Register); 5333 %} 5334 ins_pipe(ialu_reg); 5335 %} 5336 5337 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5338 predicate(UsePopCountInstruction); 5339 match(Set dst (PopCountI (LoadI mem))); 5340 effect(KILL cr); 5341 5342 format %{ "POPCNT $dst, $mem" %} 5343 ins_encode %{ 5344 __ popcntl($dst$$Register, $mem$$Address); 5345 %} 5346 ins_pipe(ialu_reg); 5347 %} 5348 5349 // Note: Long.bitCount(long) returns an int. 5350 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5351 predicate(UsePopCountInstruction); 5352 match(Set dst (PopCountL src)); 5353 effect(KILL cr, TEMP tmp, TEMP dst); 5354 5355 format %{ "POPCNT $dst, $src.lo\n\t" 5356 "POPCNT $tmp, $src.hi\n\t" 5357 "ADD $dst, $tmp" %} 5358 ins_encode %{ 5359 __ popcntl($dst$$Register, $src$$Register); 5360 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5361 __ addl($dst$$Register, $tmp$$Register); 5362 %} 5363 ins_pipe(ialu_reg); 5364 %} 5365 5366 // Note: Long.bitCount(long) returns an int. 5367 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5368 predicate(UsePopCountInstruction); 5369 match(Set dst (PopCountL (LoadL mem))); 5370 effect(KILL cr, TEMP tmp, TEMP dst); 5371 5372 format %{ "POPCNT $dst, $mem\n\t" 5373 "POPCNT $tmp, $mem+4\n\t" 5374 "ADD $dst, $tmp" %} 5375 ins_encode %{ 5376 //__ popcntl($dst$$Register, $mem$$Address$$first); 5377 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5378 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5379 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5380 __ addl($dst$$Register, $tmp$$Register); 5381 %} 5382 ins_pipe(ialu_reg); 5383 %} 5384 5385 5386 //----------Load/Store/Move Instructions--------------------------------------- 5387 //----------Load Instructions-------------------------------------------------- 5388 // Load Byte (8bit signed) 5389 instruct loadB(xRegI dst, memory mem) %{ 5390 match(Set dst (LoadB mem)); 5391 5392 ins_cost(125); 5393 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5394 5395 ins_encode %{ 5396 __ movsbl($dst$$Register, $mem$$Address); 5397 %} 5398 5399 ins_pipe(ialu_reg_mem); 5400 %} 5401 5402 // Load Byte (8bit signed) into Long Register 5403 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5404 match(Set dst (ConvI2L (LoadB mem))); 5405 effect(KILL cr); 5406 5407 ins_cost(375); 5408 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5409 "MOV $dst.hi,$dst.lo\n\t" 5410 "SAR $dst.hi,7" %} 5411 5412 ins_encode %{ 5413 __ movsbl($dst$$Register, $mem$$Address); 5414 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5415 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5416 %} 5417 5418 ins_pipe(ialu_reg_mem); 5419 %} 5420 5421 // Load Unsigned Byte (8bit UNsigned) 5422 instruct loadUB(xRegI dst, memory mem) %{ 5423 match(Set dst (LoadUB mem)); 5424 5425 ins_cost(125); 5426 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5427 5428 ins_encode %{ 5429 __ movzbl($dst$$Register, $mem$$Address); 5430 %} 5431 5432 ins_pipe(ialu_reg_mem); 5433 %} 5434 5435 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5436 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5437 match(Set dst (ConvI2L (LoadUB mem))); 5438 effect(KILL cr); 5439 5440 ins_cost(250); 5441 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5442 "XOR $dst.hi,$dst.hi" %} 5443 5444 ins_encode %{ 5445 Register Rdst = $dst$$Register; 5446 __ movzbl(Rdst, $mem$$Address); 5447 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5448 %} 5449 5450 ins_pipe(ialu_reg_mem); 5451 %} 5452 5453 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5454 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5455 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5456 effect(KILL cr); 5457 5458 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5459 "XOR $dst.hi,$dst.hi\n\t" 5460 "AND $dst.lo,right_n_bits($mask, 8)" %} 5461 ins_encode %{ 5462 Register Rdst = $dst$$Register; 5463 __ movzbl(Rdst, $mem$$Address); 5464 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5465 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5466 %} 5467 ins_pipe(ialu_reg_mem); 5468 %} 5469 5470 // Load Short (16bit signed) 5471 instruct loadS(rRegI dst, memory mem) %{ 5472 match(Set dst (LoadS mem)); 5473 5474 ins_cost(125); 5475 format %{ "MOVSX $dst,$mem\t# short" %} 5476 5477 ins_encode %{ 5478 __ movswl($dst$$Register, $mem$$Address); 5479 %} 5480 5481 ins_pipe(ialu_reg_mem); 5482 %} 5483 5484 // Load Short (16 bit signed) to Byte (8 bit signed) 5485 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5486 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5487 5488 ins_cost(125); 5489 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5490 ins_encode %{ 5491 __ movsbl($dst$$Register, $mem$$Address); 5492 %} 5493 ins_pipe(ialu_reg_mem); 5494 %} 5495 5496 // Load Short (16bit signed) into Long Register 5497 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5498 match(Set dst (ConvI2L (LoadS mem))); 5499 effect(KILL cr); 5500 5501 ins_cost(375); 5502 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5503 "MOV $dst.hi,$dst.lo\n\t" 5504 "SAR $dst.hi,15" %} 5505 5506 ins_encode %{ 5507 __ movswl($dst$$Register, $mem$$Address); 5508 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5509 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5510 %} 5511 5512 ins_pipe(ialu_reg_mem); 5513 %} 5514 5515 // Load Unsigned Short/Char (16bit unsigned) 5516 instruct loadUS(rRegI dst, memory mem) %{ 5517 match(Set dst (LoadUS mem)); 5518 5519 ins_cost(125); 5520 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5521 5522 ins_encode %{ 5523 __ movzwl($dst$$Register, $mem$$Address); 5524 %} 5525 5526 ins_pipe(ialu_reg_mem); 5527 %} 5528 5529 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5530 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5531 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5532 5533 ins_cost(125); 5534 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5535 ins_encode %{ 5536 __ movsbl($dst$$Register, $mem$$Address); 5537 %} 5538 ins_pipe(ialu_reg_mem); 5539 %} 5540 5541 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5542 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5543 match(Set dst (ConvI2L (LoadUS mem))); 5544 effect(KILL cr); 5545 5546 ins_cost(250); 5547 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5548 "XOR $dst.hi,$dst.hi" %} 5549 5550 ins_encode %{ 5551 __ movzwl($dst$$Register, $mem$$Address); 5552 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5553 %} 5554 5555 ins_pipe(ialu_reg_mem); 5556 %} 5557 5558 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5559 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5560 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5561 effect(KILL cr); 5562 5563 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5564 "XOR $dst.hi,$dst.hi" %} 5565 ins_encode %{ 5566 Register Rdst = $dst$$Register; 5567 __ movzbl(Rdst, $mem$$Address); 5568 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5569 %} 5570 ins_pipe(ialu_reg_mem); 5571 %} 5572 5573 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5574 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5575 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5576 effect(KILL cr); 5577 5578 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5579 "XOR $dst.hi,$dst.hi\n\t" 5580 "AND $dst.lo,right_n_bits($mask, 16)" %} 5581 ins_encode %{ 5582 Register Rdst = $dst$$Register; 5583 __ movzwl(Rdst, $mem$$Address); 5584 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5585 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5586 %} 5587 ins_pipe(ialu_reg_mem); 5588 %} 5589 5590 // Load Integer 5591 instruct loadI(rRegI dst, memory mem) %{ 5592 match(Set dst (LoadI mem)); 5593 5594 ins_cost(125); 5595 format %{ "MOV $dst,$mem\t# int" %} 5596 5597 ins_encode %{ 5598 __ movl($dst$$Register, $mem$$Address); 5599 %} 5600 5601 ins_pipe(ialu_reg_mem); 5602 %} 5603 5604 // Load Integer (32 bit signed) to Byte (8 bit signed) 5605 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5606 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5607 5608 ins_cost(125); 5609 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5610 ins_encode %{ 5611 __ movsbl($dst$$Register, $mem$$Address); 5612 %} 5613 ins_pipe(ialu_reg_mem); 5614 %} 5615 5616 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5617 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5618 match(Set dst (AndI (LoadI mem) mask)); 5619 5620 ins_cost(125); 5621 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5622 ins_encode %{ 5623 __ movzbl($dst$$Register, $mem$$Address); 5624 %} 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Integer (32 bit signed) to Short (16 bit signed) 5629 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5630 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5631 5632 ins_cost(125); 5633 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5634 ins_encode %{ 5635 __ movswl($dst$$Register, $mem$$Address); 5636 %} 5637 ins_pipe(ialu_reg_mem); 5638 %} 5639 5640 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5641 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5642 match(Set dst (AndI (LoadI mem) mask)); 5643 5644 ins_cost(125); 5645 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5646 ins_encode %{ 5647 __ movzwl($dst$$Register, $mem$$Address); 5648 %} 5649 ins_pipe(ialu_reg_mem); 5650 %} 5651 5652 // Load Integer into Long Register 5653 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5654 match(Set dst (ConvI2L (LoadI mem))); 5655 effect(KILL cr); 5656 5657 ins_cost(375); 5658 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5659 "MOV $dst.hi,$dst.lo\n\t" 5660 "SAR $dst.hi,31" %} 5661 5662 ins_encode %{ 5663 __ movl($dst$$Register, $mem$$Address); 5664 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5665 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5666 %} 5667 5668 ins_pipe(ialu_reg_mem); 5669 %} 5670 5671 // Load Integer with mask 0xFF into Long Register 5672 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5673 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5674 effect(KILL cr); 5675 5676 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5677 "XOR $dst.hi,$dst.hi" %} 5678 ins_encode %{ 5679 Register Rdst = $dst$$Register; 5680 __ movzbl(Rdst, $mem$$Address); 5681 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5682 %} 5683 ins_pipe(ialu_reg_mem); 5684 %} 5685 5686 // Load Integer with mask 0xFFFF into Long Register 5687 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5688 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5689 effect(KILL cr); 5690 5691 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5692 "XOR $dst.hi,$dst.hi" %} 5693 ins_encode %{ 5694 Register Rdst = $dst$$Register; 5695 __ movzwl(Rdst, $mem$$Address); 5696 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5697 %} 5698 ins_pipe(ialu_reg_mem); 5699 %} 5700 5701 // Load Integer with 31-bit mask into Long Register 5702 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5703 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5704 effect(KILL cr); 5705 5706 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5707 "XOR $dst.hi,$dst.hi\n\t" 5708 "AND $dst.lo,$mask" %} 5709 ins_encode %{ 5710 Register Rdst = $dst$$Register; 5711 __ movl(Rdst, $mem$$Address); 5712 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5713 __ andl(Rdst, $mask$$constant); 5714 %} 5715 ins_pipe(ialu_reg_mem); 5716 %} 5717 5718 // Load Unsigned Integer into Long Register 5719 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5720 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5721 effect(KILL cr); 5722 5723 ins_cost(250); 5724 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5725 "XOR $dst.hi,$dst.hi" %} 5726 5727 ins_encode %{ 5728 __ movl($dst$$Register, $mem$$Address); 5729 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5730 %} 5731 5732 ins_pipe(ialu_reg_mem); 5733 %} 5734 5735 // Load Long. Cannot clobber address while loading, so restrict address 5736 // register to ESI 5737 instruct loadL(eRegL dst, load_long_memory mem) %{ 5738 predicate(!((LoadLNode*)n)->require_atomic_access()); 5739 match(Set dst (LoadL mem)); 5740 5741 ins_cost(250); 5742 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5743 "MOV $dst.hi,$mem+4" %} 5744 5745 ins_encode %{ 5746 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5747 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5748 __ movl($dst$$Register, Amemlo); 5749 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5750 %} 5751 5752 ins_pipe(ialu_reg_long_mem); 5753 %} 5754 5755 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5756 // then store it down to the stack and reload on the int 5757 // side. 5758 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5759 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5760 match(Set dst (LoadL mem)); 5761 5762 ins_cost(200); 5763 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5764 "FISTp $dst" %} 5765 ins_encode(enc_loadL_volatile(mem,dst)); 5766 ins_pipe( fpu_reg_mem ); 5767 %} 5768 5769 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5770 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5771 match(Set dst (LoadL mem)); 5772 effect(TEMP tmp); 5773 ins_cost(180); 5774 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5775 "MOVSD $dst,$tmp" %} 5776 ins_encode %{ 5777 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5778 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5784 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5785 match(Set dst (LoadL mem)); 5786 effect(TEMP tmp); 5787 ins_cost(160); 5788 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5789 "MOVD $dst.lo,$tmp\n\t" 5790 "PSRLQ $tmp,32\n\t" 5791 "MOVD $dst.hi,$tmp" %} 5792 ins_encode %{ 5793 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5794 __ movdl($dst$$Register, $tmp$$XMMRegister); 5795 __ psrlq($tmp$$XMMRegister, 32); 5796 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5797 %} 5798 ins_pipe( pipe_slow ); 5799 %} 5800 5801 // Load Range 5802 instruct loadRange(rRegI dst, memory mem) %{ 5803 match(Set dst (LoadRange mem)); 5804 5805 ins_cost(125); 5806 format %{ "MOV $dst,$mem" %} 5807 opcode(0x8B); 5808 ins_encode( OpcP, RegMem(dst,mem)); 5809 ins_pipe( ialu_reg_mem ); 5810 %} 5811 5812 5813 // Load Pointer 5814 instruct loadP(eRegP dst, memory mem) %{ 5815 match(Set dst (LoadP mem)); 5816 5817 ins_cost(125); 5818 format %{ "MOV $dst,$mem" %} 5819 opcode(0x8B); 5820 ins_encode( OpcP, RegMem(dst,mem)); 5821 ins_pipe( ialu_reg_mem ); 5822 %} 5823 5824 // Load Klass Pointer 5825 instruct loadKlass(eRegP dst, memory mem) %{ 5826 match(Set dst (LoadKlass mem)); 5827 5828 ins_cost(125); 5829 format %{ "MOV $dst,$mem" %} 5830 opcode(0x8B); 5831 ins_encode( OpcP, RegMem(dst,mem)); 5832 ins_pipe( ialu_reg_mem ); 5833 %} 5834 5835 // Load Double 5836 instruct loadDPR(regDPR dst, memory mem) %{ 5837 predicate(UseSSE<=1); 5838 match(Set dst (LoadD mem)); 5839 5840 ins_cost(150); 5841 format %{ "FLD_D ST,$mem\n\t" 5842 "FSTP $dst" %} 5843 opcode(0xDD); /* DD /0 */ 5844 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5845 Pop_Reg_DPR(dst) ); 5846 ins_pipe( fpu_reg_mem ); 5847 %} 5848 5849 // Load Double to XMM 5850 instruct loadD(regD dst, memory mem) %{ 5851 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5852 match(Set dst (LoadD mem)); 5853 ins_cost(145); 5854 format %{ "MOVSD $dst,$mem" %} 5855 ins_encode %{ 5856 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct loadD_partial(regD dst, memory mem) %{ 5862 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5863 match(Set dst (LoadD mem)); 5864 ins_cost(145); 5865 format %{ "MOVLPD $dst,$mem" %} 5866 ins_encode %{ 5867 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 // Load to XMM register (single-precision floating point) 5873 // MOVSS instruction 5874 instruct loadF(regF dst, memory mem) %{ 5875 predicate(UseSSE>=1); 5876 match(Set dst (LoadF mem)); 5877 ins_cost(145); 5878 format %{ "MOVSS $dst,$mem" %} 5879 ins_encode %{ 5880 __ movflt ($dst$$XMMRegister, $mem$$Address); 5881 %} 5882 ins_pipe( pipe_slow ); 5883 %} 5884 5885 // Load Float 5886 instruct loadFPR(regFPR dst, memory mem) %{ 5887 predicate(UseSSE==0); 5888 match(Set dst (LoadF mem)); 5889 5890 ins_cost(150); 5891 format %{ "FLD_S ST,$mem\n\t" 5892 "FSTP $dst" %} 5893 opcode(0xD9); /* D9 /0 */ 5894 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5895 Pop_Reg_FPR(dst) ); 5896 ins_pipe( fpu_reg_mem ); 5897 %} 5898 5899 // Load Effective Address 5900 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5901 match(Set dst mem); 5902 5903 ins_cost(110); 5904 format %{ "LEA $dst,$mem" %} 5905 opcode(0x8D); 5906 ins_encode( OpcP, RegMem(dst,mem)); 5907 ins_pipe( ialu_reg_reg_fat ); 5908 %} 5909 5910 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5911 match(Set dst mem); 5912 5913 ins_cost(110); 5914 format %{ "LEA $dst,$mem" %} 5915 opcode(0x8D); 5916 ins_encode( OpcP, RegMem(dst,mem)); 5917 ins_pipe( ialu_reg_reg_fat ); 5918 %} 5919 5920 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5921 match(Set dst mem); 5922 5923 ins_cost(110); 5924 format %{ "LEA $dst,$mem" %} 5925 opcode(0x8D); 5926 ins_encode( OpcP, RegMem(dst,mem)); 5927 ins_pipe( ialu_reg_reg_fat ); 5928 %} 5929 5930 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5931 match(Set dst mem); 5932 5933 ins_cost(110); 5934 format %{ "LEA $dst,$mem" %} 5935 opcode(0x8D); 5936 ins_encode( OpcP, RegMem(dst,mem)); 5937 ins_pipe( ialu_reg_reg_fat ); 5938 %} 5939 5940 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5941 match(Set dst mem); 5942 5943 ins_cost(110); 5944 format %{ "LEA $dst,$mem" %} 5945 opcode(0x8D); 5946 ins_encode( OpcP, RegMem(dst,mem)); 5947 ins_pipe( ialu_reg_reg_fat ); 5948 %} 5949 5950 // Load Constant 5951 instruct loadConI(rRegI dst, immI src) %{ 5952 match(Set dst src); 5953 5954 format %{ "MOV $dst,$src" %} 5955 ins_encode( LdImmI(dst, src) ); 5956 ins_pipe( ialu_reg_fat ); 5957 %} 5958 5959 // Load Constant zero 5960 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5961 match(Set dst src); 5962 effect(KILL cr); 5963 5964 ins_cost(50); 5965 format %{ "XOR $dst,$dst" %} 5966 opcode(0x33); /* + rd */ 5967 ins_encode( OpcP, RegReg( dst, dst ) ); 5968 ins_pipe( ialu_reg ); 5969 %} 5970 5971 instruct loadConP(eRegP dst, immP src) %{ 5972 match(Set dst src); 5973 5974 format %{ "MOV $dst,$src" %} 5975 opcode(0xB8); /* + rd */ 5976 ins_encode( LdImmP(dst, src) ); 5977 ins_pipe( ialu_reg_fat ); 5978 %} 5979 5980 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5981 match(Set dst src); 5982 effect(KILL cr); 5983 ins_cost(200); 5984 format %{ "MOV $dst.lo,$src.lo\n\t" 5985 "MOV $dst.hi,$src.hi" %} 5986 opcode(0xB8); 5987 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5988 ins_pipe( ialu_reg_long_fat ); 5989 %} 5990 5991 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5992 match(Set dst src); 5993 effect(KILL cr); 5994 ins_cost(150); 5995 format %{ "XOR $dst.lo,$dst.lo\n\t" 5996 "XOR $dst.hi,$dst.hi" %} 5997 opcode(0x33,0x33); 5998 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5999 ins_pipe( ialu_reg_long ); 6000 %} 6001 6002 // The instruction usage is guarded by predicate in operand immFPR(). 6003 instruct loadConFPR(regFPR dst, immFPR con) %{ 6004 match(Set dst con); 6005 ins_cost(125); 6006 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6007 "FSTP $dst" %} 6008 ins_encode %{ 6009 __ fld_s($constantaddress($con)); 6010 __ fstp_d($dst$$reg); 6011 %} 6012 ins_pipe(fpu_reg_con); 6013 %} 6014 6015 // The instruction usage is guarded by predicate in operand immFPR0(). 6016 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6017 match(Set dst con); 6018 ins_cost(125); 6019 format %{ "FLDZ ST\n\t" 6020 "FSTP $dst" %} 6021 ins_encode %{ 6022 __ fldz(); 6023 __ fstp_d($dst$$reg); 6024 %} 6025 ins_pipe(fpu_reg_con); 6026 %} 6027 6028 // The instruction usage is guarded by predicate in operand immFPR1(). 6029 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6030 match(Set dst con); 6031 ins_cost(125); 6032 format %{ "FLD1 ST\n\t" 6033 "FSTP $dst" %} 6034 ins_encode %{ 6035 __ fld1(); 6036 __ fstp_d($dst$$reg); 6037 %} 6038 ins_pipe(fpu_reg_con); 6039 %} 6040 6041 // The instruction usage is guarded by predicate in operand immF(). 6042 instruct loadConF(regF dst, immF con) %{ 6043 match(Set dst con); 6044 ins_cost(125); 6045 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6046 ins_encode %{ 6047 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6048 %} 6049 ins_pipe(pipe_slow); 6050 %} 6051 6052 // The instruction usage is guarded by predicate in operand immF0(). 6053 instruct loadConF0(regF dst, immF0 src) %{ 6054 match(Set dst src); 6055 ins_cost(100); 6056 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6057 ins_encode %{ 6058 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6059 %} 6060 ins_pipe(pipe_slow); 6061 %} 6062 6063 // The instruction usage is guarded by predicate in operand immDPR(). 6064 instruct loadConDPR(regDPR dst, immDPR con) %{ 6065 match(Set dst con); 6066 ins_cost(125); 6067 6068 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6069 "FSTP $dst" %} 6070 ins_encode %{ 6071 __ fld_d($constantaddress($con)); 6072 __ fstp_d($dst$$reg); 6073 %} 6074 ins_pipe(fpu_reg_con); 6075 %} 6076 6077 // The instruction usage is guarded by predicate in operand immDPR0(). 6078 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6079 match(Set dst con); 6080 ins_cost(125); 6081 6082 format %{ "FLDZ ST\n\t" 6083 "FSTP $dst" %} 6084 ins_encode %{ 6085 __ fldz(); 6086 __ fstp_d($dst$$reg); 6087 %} 6088 ins_pipe(fpu_reg_con); 6089 %} 6090 6091 // The instruction usage is guarded by predicate in operand immDPR1(). 6092 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6093 match(Set dst con); 6094 ins_cost(125); 6095 6096 format %{ "FLD1 ST\n\t" 6097 "FSTP $dst" %} 6098 ins_encode %{ 6099 __ fld1(); 6100 __ fstp_d($dst$$reg); 6101 %} 6102 ins_pipe(fpu_reg_con); 6103 %} 6104 6105 // The instruction usage is guarded by predicate in operand immD(). 6106 instruct loadConD(regD dst, immD con) %{ 6107 match(Set dst con); 6108 ins_cost(125); 6109 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6110 ins_encode %{ 6111 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6112 %} 6113 ins_pipe(pipe_slow); 6114 %} 6115 6116 // The instruction usage is guarded by predicate in operand immD0(). 6117 instruct loadConD0(regD dst, immD0 src) %{ 6118 match(Set dst src); 6119 ins_cost(100); 6120 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6121 ins_encode %{ 6122 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6123 %} 6124 ins_pipe( pipe_slow ); 6125 %} 6126 6127 // Load Stack Slot 6128 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6129 match(Set dst src); 6130 ins_cost(125); 6131 6132 format %{ "MOV $dst,$src" %} 6133 opcode(0x8B); 6134 ins_encode( OpcP, RegMem(dst,src)); 6135 ins_pipe( ialu_reg_mem ); 6136 %} 6137 6138 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6139 match(Set dst src); 6140 6141 ins_cost(200); 6142 format %{ "MOV $dst,$src.lo\n\t" 6143 "MOV $dst+4,$src.hi" %} 6144 opcode(0x8B, 0x8B); 6145 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6146 ins_pipe( ialu_mem_long_reg ); 6147 %} 6148 6149 // Load Stack Slot 6150 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6151 match(Set dst src); 6152 ins_cost(125); 6153 6154 format %{ "MOV $dst,$src" %} 6155 opcode(0x8B); 6156 ins_encode( OpcP, RegMem(dst,src)); 6157 ins_pipe( ialu_reg_mem ); 6158 %} 6159 6160 // Load Stack Slot 6161 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6162 match(Set dst src); 6163 ins_cost(125); 6164 6165 format %{ "FLD_S $src\n\t" 6166 "FSTP $dst" %} 6167 opcode(0xD9); /* D9 /0, FLD m32real */ 6168 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6169 Pop_Reg_FPR(dst) ); 6170 ins_pipe( fpu_reg_mem ); 6171 %} 6172 6173 // Load Stack Slot 6174 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6175 match(Set dst src); 6176 ins_cost(125); 6177 6178 format %{ "FLD_D $src\n\t" 6179 "FSTP $dst" %} 6180 opcode(0xDD); /* DD /0, FLD m64real */ 6181 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6182 Pop_Reg_DPR(dst) ); 6183 ins_pipe( fpu_reg_mem ); 6184 %} 6185 6186 // Prefetch instructions for allocation. 6187 // Must be safe to execute with invalid address (cannot fault). 6188 6189 instruct prefetchAlloc0( memory mem ) %{ 6190 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6191 match(PrefetchAllocation mem); 6192 ins_cost(0); 6193 size(0); 6194 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6195 ins_encode(); 6196 ins_pipe(empty); 6197 %} 6198 6199 instruct prefetchAlloc( memory mem ) %{ 6200 predicate(AllocatePrefetchInstr==3); 6201 match( PrefetchAllocation mem ); 6202 ins_cost(100); 6203 6204 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6205 ins_encode %{ 6206 __ prefetchw($mem$$Address); 6207 %} 6208 ins_pipe(ialu_mem); 6209 %} 6210 6211 instruct prefetchAllocNTA( memory mem ) %{ 6212 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6213 match(PrefetchAllocation mem); 6214 ins_cost(100); 6215 6216 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6217 ins_encode %{ 6218 __ prefetchnta($mem$$Address); 6219 %} 6220 ins_pipe(ialu_mem); 6221 %} 6222 6223 instruct prefetchAllocT0( memory mem ) %{ 6224 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6225 match(PrefetchAllocation mem); 6226 ins_cost(100); 6227 6228 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6229 ins_encode %{ 6230 __ prefetcht0($mem$$Address); 6231 %} 6232 ins_pipe(ialu_mem); 6233 %} 6234 6235 instruct prefetchAllocT2( memory mem ) %{ 6236 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6237 match(PrefetchAllocation mem); 6238 ins_cost(100); 6239 6240 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6241 ins_encode %{ 6242 __ prefetcht2($mem$$Address); 6243 %} 6244 ins_pipe(ialu_mem); 6245 %} 6246 6247 //----------Store Instructions------------------------------------------------- 6248 6249 // Store Byte 6250 instruct storeB(memory mem, xRegI src) %{ 6251 match(Set mem (StoreB mem src)); 6252 6253 ins_cost(125); 6254 format %{ "MOV8 $mem,$src" %} 6255 opcode(0x88); 6256 ins_encode( OpcP, RegMem( src, mem ) ); 6257 ins_pipe( ialu_mem_reg ); 6258 %} 6259 6260 // Store Char/Short 6261 instruct storeC(memory mem, rRegI src) %{ 6262 match(Set mem (StoreC mem src)); 6263 6264 ins_cost(125); 6265 format %{ "MOV16 $mem,$src" %} 6266 opcode(0x89, 0x66); 6267 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6268 ins_pipe( ialu_mem_reg ); 6269 %} 6270 6271 // Store Integer 6272 instruct storeI(memory mem, rRegI src) %{ 6273 match(Set mem (StoreI mem src)); 6274 6275 ins_cost(125); 6276 format %{ "MOV $mem,$src" %} 6277 opcode(0x89); 6278 ins_encode( OpcP, RegMem( src, mem ) ); 6279 ins_pipe( ialu_mem_reg ); 6280 %} 6281 6282 // Store Long 6283 instruct storeL(long_memory mem, eRegL src) %{ 6284 predicate(!((StoreLNode*)n)->require_atomic_access()); 6285 match(Set mem (StoreL mem src)); 6286 6287 ins_cost(200); 6288 format %{ "MOV $mem,$src.lo\n\t" 6289 "MOV $mem+4,$src.hi" %} 6290 opcode(0x89, 0x89); 6291 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6292 ins_pipe( ialu_mem_long_reg ); 6293 %} 6294 6295 // Store Long to Integer 6296 instruct storeL2I(memory mem, eRegL src) %{ 6297 match(Set mem (StoreI mem (ConvL2I src))); 6298 6299 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6300 ins_encode %{ 6301 __ movl($mem$$Address, $src$$Register); 6302 %} 6303 ins_pipe(ialu_mem_reg); 6304 %} 6305 6306 // Volatile Store Long. Must be atomic, so move it into 6307 // the FP TOS and then do a 64-bit FIST. Has to probe the 6308 // target address before the store (for null-ptr checks) 6309 // so the memory operand is used twice in the encoding. 6310 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6311 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6312 match(Set mem (StoreL mem src)); 6313 effect( KILL cr ); 6314 ins_cost(400); 6315 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6316 "FILD $src\n\t" 6317 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6318 opcode(0x3B); 6319 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6320 ins_pipe( fpu_reg_mem ); 6321 %} 6322 6323 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6324 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6325 match(Set mem (StoreL mem src)); 6326 effect( TEMP tmp, KILL cr ); 6327 ins_cost(380); 6328 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6329 "MOVSD $tmp,$src\n\t" 6330 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6331 ins_encode %{ 6332 __ cmpl(rax, $mem$$Address); 6333 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6334 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6340 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6341 match(Set mem (StoreL mem src)); 6342 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6343 ins_cost(360); 6344 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6345 "MOVD $tmp,$src.lo\n\t" 6346 "MOVD $tmp2,$src.hi\n\t" 6347 "PUNPCKLDQ $tmp,$tmp2\n\t" 6348 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6349 ins_encode %{ 6350 __ cmpl(rax, $mem$$Address); 6351 __ movdl($tmp$$XMMRegister, $src$$Register); 6352 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6353 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6354 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 6359 // Store Pointer; for storing unknown oops and raw pointers 6360 instruct storeP(memory mem, anyRegP src) %{ 6361 match(Set mem (StoreP mem src)); 6362 6363 ins_cost(125); 6364 format %{ "MOV $mem,$src" %} 6365 opcode(0x89); 6366 ins_encode( OpcP, RegMem( src, mem ) ); 6367 ins_pipe( ialu_mem_reg ); 6368 %} 6369 6370 // Store Integer Immediate 6371 instruct storeImmI(memory mem, immI src) %{ 6372 match(Set mem (StoreI mem src)); 6373 6374 ins_cost(150); 6375 format %{ "MOV $mem,$src" %} 6376 opcode(0xC7); /* C7 /0 */ 6377 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6378 ins_pipe( ialu_mem_imm ); 6379 %} 6380 6381 // Store Short/Char Immediate 6382 instruct storeImmI16(memory mem, immI16 src) %{ 6383 predicate(UseStoreImmI16); 6384 match(Set mem (StoreC mem src)); 6385 6386 ins_cost(150); 6387 format %{ "MOV16 $mem,$src" %} 6388 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6389 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6390 ins_pipe( ialu_mem_imm ); 6391 %} 6392 6393 // Store Pointer Immediate; null pointers or constant oops that do not 6394 // need card-mark barriers. 6395 instruct storeImmP(memory mem, immP src) %{ 6396 match(Set mem (StoreP mem src)); 6397 6398 ins_cost(150); 6399 format %{ "MOV $mem,$src" %} 6400 opcode(0xC7); /* C7 /0 */ 6401 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6402 ins_pipe( ialu_mem_imm ); 6403 %} 6404 6405 // Store Byte Immediate 6406 instruct storeImmB(memory mem, immI8 src) %{ 6407 match(Set mem (StoreB mem src)); 6408 6409 ins_cost(150); 6410 format %{ "MOV8 $mem,$src" %} 6411 opcode(0xC6); /* C6 /0 */ 6412 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6413 ins_pipe( ialu_mem_imm ); 6414 %} 6415 6416 // Store CMS card-mark Immediate 6417 instruct storeImmCM(memory mem, immI8 src) %{ 6418 match(Set mem (StoreCM mem src)); 6419 6420 ins_cost(150); 6421 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6422 opcode(0xC6); /* C6 /0 */ 6423 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6424 ins_pipe( ialu_mem_imm ); 6425 %} 6426 6427 // Store Double 6428 instruct storeDPR( memory mem, regDPR1 src) %{ 6429 predicate(UseSSE<=1); 6430 match(Set mem (StoreD mem src)); 6431 6432 ins_cost(100); 6433 format %{ "FST_D $mem,$src" %} 6434 opcode(0xDD); /* DD /2 */ 6435 ins_encode( enc_FPR_store(mem,src) ); 6436 ins_pipe( fpu_mem_reg ); 6437 %} 6438 6439 // Store double does rounding on x86 6440 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6441 predicate(UseSSE<=1); 6442 match(Set mem (StoreD mem (RoundDouble src))); 6443 6444 ins_cost(100); 6445 format %{ "FST_D $mem,$src\t# round" %} 6446 opcode(0xDD); /* DD /2 */ 6447 ins_encode( enc_FPR_store(mem,src) ); 6448 ins_pipe( fpu_mem_reg ); 6449 %} 6450 6451 // Store XMM register to memory (double-precision floating points) 6452 // MOVSD instruction 6453 instruct storeD(memory mem, regD src) %{ 6454 predicate(UseSSE>=2); 6455 match(Set mem (StoreD mem src)); 6456 ins_cost(95); 6457 format %{ "MOVSD $mem,$src" %} 6458 ins_encode %{ 6459 __ movdbl($mem$$Address, $src$$XMMRegister); 6460 %} 6461 ins_pipe( pipe_slow ); 6462 %} 6463 6464 // Store XMM register to memory (single-precision floating point) 6465 // MOVSS instruction 6466 instruct storeF(memory mem, regF src) %{ 6467 predicate(UseSSE>=1); 6468 match(Set mem (StoreF mem src)); 6469 ins_cost(95); 6470 format %{ "MOVSS $mem,$src" %} 6471 ins_encode %{ 6472 __ movflt($mem$$Address, $src$$XMMRegister); 6473 %} 6474 ins_pipe( pipe_slow ); 6475 %} 6476 6477 // Store Float 6478 instruct storeFPR( memory mem, regFPR1 src) %{ 6479 predicate(UseSSE==0); 6480 match(Set mem (StoreF mem src)); 6481 6482 ins_cost(100); 6483 format %{ "FST_S $mem,$src" %} 6484 opcode(0xD9); /* D9 /2 */ 6485 ins_encode( enc_FPR_store(mem,src) ); 6486 ins_pipe( fpu_mem_reg ); 6487 %} 6488 6489 // Store Float does rounding on x86 6490 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6491 predicate(UseSSE==0); 6492 match(Set mem (StoreF mem (RoundFloat src))); 6493 6494 ins_cost(100); 6495 format %{ "FST_S $mem,$src\t# round" %} 6496 opcode(0xD9); /* D9 /2 */ 6497 ins_encode( enc_FPR_store(mem,src) ); 6498 ins_pipe( fpu_mem_reg ); 6499 %} 6500 6501 // Store Float does rounding on x86 6502 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6503 predicate(UseSSE<=1); 6504 match(Set mem (StoreF mem (ConvD2F src))); 6505 6506 ins_cost(100); 6507 format %{ "FST_S $mem,$src\t# D-round" %} 6508 opcode(0xD9); /* D9 /2 */ 6509 ins_encode( enc_FPR_store(mem,src) ); 6510 ins_pipe( fpu_mem_reg ); 6511 %} 6512 6513 // Store immediate Float value (it is faster than store from FPU register) 6514 // The instruction usage is guarded by predicate in operand immFPR(). 6515 instruct storeFPR_imm( memory mem, immFPR src) %{ 6516 match(Set mem (StoreF mem src)); 6517 6518 ins_cost(50); 6519 format %{ "MOV $mem,$src\t# store float" %} 6520 opcode(0xC7); /* C7 /0 */ 6521 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6522 ins_pipe( ialu_mem_imm ); 6523 %} 6524 6525 // Store immediate Float value (it is faster than store from XMM register) 6526 // The instruction usage is guarded by predicate in operand immF(). 6527 instruct storeF_imm( memory mem, immF src) %{ 6528 match(Set mem (StoreF mem src)); 6529 6530 ins_cost(50); 6531 format %{ "MOV $mem,$src\t# store float" %} 6532 opcode(0xC7); /* C7 /0 */ 6533 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6534 ins_pipe( ialu_mem_imm ); 6535 %} 6536 6537 // Store Integer to stack slot 6538 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6539 match(Set dst src); 6540 6541 ins_cost(100); 6542 format %{ "MOV $dst,$src" %} 6543 opcode(0x89); 6544 ins_encode( OpcPRegSS( dst, src ) ); 6545 ins_pipe( ialu_mem_reg ); 6546 %} 6547 6548 // Store Integer to stack slot 6549 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6550 match(Set dst src); 6551 6552 ins_cost(100); 6553 format %{ "MOV $dst,$src" %} 6554 opcode(0x89); 6555 ins_encode( OpcPRegSS( dst, src ) ); 6556 ins_pipe( ialu_mem_reg ); 6557 %} 6558 6559 // Store Long to stack slot 6560 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6561 match(Set dst src); 6562 6563 ins_cost(200); 6564 format %{ "MOV $dst,$src.lo\n\t" 6565 "MOV $dst+4,$src.hi" %} 6566 opcode(0x89, 0x89); 6567 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6568 ins_pipe( ialu_mem_long_reg ); 6569 %} 6570 6571 //----------MemBar Instructions----------------------------------------------- 6572 // Memory barrier flavors 6573 6574 instruct membar_acquire() %{ 6575 match(MemBarAcquire); 6576 match(LoadFence); 6577 ins_cost(400); 6578 6579 size(0); 6580 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6581 ins_encode(); 6582 ins_pipe(empty); 6583 %} 6584 6585 instruct membar_acquire_lock() %{ 6586 match(MemBarAcquireLock); 6587 ins_cost(0); 6588 6589 size(0); 6590 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6591 ins_encode( ); 6592 ins_pipe(empty); 6593 %} 6594 6595 instruct membar_release() %{ 6596 match(MemBarRelease); 6597 match(StoreFence); 6598 ins_cost(400); 6599 6600 size(0); 6601 format %{ "MEMBAR-release ! (empty encoding)" %} 6602 ins_encode( ); 6603 ins_pipe(empty); 6604 %} 6605 6606 instruct membar_release_lock() %{ 6607 match(MemBarReleaseLock); 6608 ins_cost(0); 6609 6610 size(0); 6611 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6612 ins_encode( ); 6613 ins_pipe(empty); 6614 %} 6615 6616 instruct membar_volatile(eFlagsReg cr) %{ 6617 match(MemBarVolatile); 6618 effect(KILL cr); 6619 ins_cost(400); 6620 6621 format %{ 6622 $$template 6623 if (os::is_MP()) { 6624 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6625 } else { 6626 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6627 } 6628 %} 6629 ins_encode %{ 6630 __ membar(Assembler::StoreLoad); 6631 %} 6632 ins_pipe(pipe_slow); 6633 %} 6634 6635 instruct unnecessary_membar_volatile() %{ 6636 match(MemBarVolatile); 6637 predicate(Matcher::post_store_load_barrier(n)); 6638 ins_cost(0); 6639 6640 size(0); 6641 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6642 ins_encode( ); 6643 ins_pipe(empty); 6644 %} 6645 6646 instruct membar_storestore() %{ 6647 match(MemBarStoreStore); 6648 ins_cost(0); 6649 6650 size(0); 6651 format %{ "MEMBAR-storestore (empty encoding)" %} 6652 ins_encode( ); 6653 ins_pipe(empty); 6654 %} 6655 6656 //----------Move Instructions-------------------------------------------------- 6657 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6658 match(Set dst (CastX2P src)); 6659 format %{ "# X2P $dst, $src" %} 6660 ins_encode( /*empty encoding*/ ); 6661 ins_cost(0); 6662 ins_pipe(empty); 6663 %} 6664 6665 instruct castP2X(rRegI dst, eRegP src ) %{ 6666 match(Set dst (CastP2X src)); 6667 ins_cost(50); 6668 format %{ "MOV $dst, $src\t# CastP2X" %} 6669 ins_encode( enc_Copy( dst, src) ); 6670 ins_pipe( ialu_reg_reg ); 6671 %} 6672 6673 //----------Conditional Move--------------------------------------------------- 6674 // Conditional move 6675 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6676 predicate(!VM_Version::supports_cmov() ); 6677 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6678 ins_cost(200); 6679 format %{ "J$cop,us skip\t# signed cmove\n\t" 6680 "MOV $dst,$src\n" 6681 "skip:" %} 6682 ins_encode %{ 6683 Label Lskip; 6684 // Invert sense of branch from sense of CMOV 6685 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6686 __ movl($dst$$Register, $src$$Register); 6687 __ bind(Lskip); 6688 %} 6689 ins_pipe( pipe_cmov_reg ); 6690 %} 6691 6692 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6693 predicate(!VM_Version::supports_cmov() ); 6694 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6695 ins_cost(200); 6696 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6697 "MOV $dst,$src\n" 6698 "skip:" %} 6699 ins_encode %{ 6700 Label Lskip; 6701 // Invert sense of branch from sense of CMOV 6702 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6703 __ movl($dst$$Register, $src$$Register); 6704 __ bind(Lskip); 6705 %} 6706 ins_pipe( pipe_cmov_reg ); 6707 %} 6708 6709 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6710 predicate(VM_Version::supports_cmov() ); 6711 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6712 ins_cost(200); 6713 format %{ "CMOV$cop $dst,$src" %} 6714 opcode(0x0F,0x40); 6715 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6716 ins_pipe( pipe_cmov_reg ); 6717 %} 6718 6719 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6720 predicate(VM_Version::supports_cmov() ); 6721 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6722 ins_cost(200); 6723 format %{ "CMOV$cop $dst,$src" %} 6724 opcode(0x0F,0x40); 6725 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6726 ins_pipe( pipe_cmov_reg ); 6727 %} 6728 6729 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6730 predicate(VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6732 ins_cost(200); 6733 expand %{ 6734 cmovI_regU(cop, cr, dst, src); 6735 %} 6736 %} 6737 6738 // Conditional move 6739 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6740 predicate(VM_Version::supports_cmov() ); 6741 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6742 ins_cost(250); 6743 format %{ "CMOV$cop $dst,$src" %} 6744 opcode(0x0F,0x40); 6745 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6746 ins_pipe( pipe_cmov_mem ); 6747 %} 6748 6749 // Conditional move 6750 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6751 predicate(VM_Version::supports_cmov() ); 6752 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6753 ins_cost(250); 6754 format %{ "CMOV$cop $dst,$src" %} 6755 opcode(0x0F,0x40); 6756 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6757 ins_pipe( pipe_cmov_mem ); 6758 %} 6759 6760 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6761 predicate(VM_Version::supports_cmov() ); 6762 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6763 ins_cost(250); 6764 expand %{ 6765 cmovI_memU(cop, cr, dst, src); 6766 %} 6767 %} 6768 6769 // Conditional move 6770 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6771 predicate(VM_Version::supports_cmov() ); 6772 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6773 ins_cost(200); 6774 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6775 opcode(0x0F,0x40); 6776 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6777 ins_pipe( pipe_cmov_reg ); 6778 %} 6779 6780 // Conditional move (non-P6 version) 6781 // Note: a CMoveP is generated for stubs and native wrappers 6782 // regardless of whether we are on a P6, so we 6783 // emulate a cmov here 6784 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6785 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6786 ins_cost(300); 6787 format %{ "Jn$cop skip\n\t" 6788 "MOV $dst,$src\t# pointer\n" 6789 "skip:" %} 6790 opcode(0x8b); 6791 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6792 ins_pipe( pipe_cmov_reg ); 6793 %} 6794 6795 // Conditional move 6796 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6797 predicate(VM_Version::supports_cmov() ); 6798 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6799 ins_cost(200); 6800 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6801 opcode(0x0F,0x40); 6802 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6803 ins_pipe( pipe_cmov_reg ); 6804 %} 6805 6806 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6807 predicate(VM_Version::supports_cmov() ); 6808 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6809 ins_cost(200); 6810 expand %{ 6811 cmovP_regU(cop, cr, dst, src); 6812 %} 6813 %} 6814 6815 // DISABLED: Requires the ADLC to emit a bottom_type call that 6816 // correctly meets the two pointer arguments; one is an incoming 6817 // register but the other is a memory operand. ALSO appears to 6818 // be buggy with implicit null checks. 6819 // 6820 //// Conditional move 6821 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6822 // predicate(VM_Version::supports_cmov() ); 6823 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6824 // ins_cost(250); 6825 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6826 // opcode(0x0F,0x40); 6827 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6828 // ins_pipe( pipe_cmov_mem ); 6829 //%} 6830 // 6831 //// Conditional move 6832 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6833 // predicate(VM_Version::supports_cmov() ); 6834 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6835 // ins_cost(250); 6836 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6837 // opcode(0x0F,0x40); 6838 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6839 // ins_pipe( pipe_cmov_mem ); 6840 //%} 6841 6842 // Conditional move 6843 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6844 predicate(UseSSE<=1); 6845 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6846 ins_cost(200); 6847 format %{ "FCMOV$cop $dst,$src\t# double" %} 6848 opcode(0xDA); 6849 ins_encode( enc_cmov_dpr(cop,src) ); 6850 ins_pipe( pipe_cmovDPR_reg ); 6851 %} 6852 6853 // Conditional move 6854 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6855 predicate(UseSSE==0); 6856 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6857 ins_cost(200); 6858 format %{ "FCMOV$cop $dst,$src\t# float" %} 6859 opcode(0xDA); 6860 ins_encode( enc_cmov_dpr(cop,src) ); 6861 ins_pipe( pipe_cmovDPR_reg ); 6862 %} 6863 6864 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6865 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6866 predicate(UseSSE<=1); 6867 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6868 ins_cost(200); 6869 format %{ "Jn$cop skip\n\t" 6870 "MOV $dst,$src\t# double\n" 6871 "skip:" %} 6872 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6873 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6874 ins_pipe( pipe_cmovDPR_reg ); 6875 %} 6876 6877 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6878 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6879 predicate(UseSSE==0); 6880 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6881 ins_cost(200); 6882 format %{ "Jn$cop skip\n\t" 6883 "MOV $dst,$src\t# float\n" 6884 "skip:" %} 6885 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6886 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6887 ins_pipe( pipe_cmovDPR_reg ); 6888 %} 6889 6890 // No CMOVE with SSE/SSE2 6891 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6892 predicate (UseSSE>=1); 6893 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6894 ins_cost(200); 6895 format %{ "Jn$cop skip\n\t" 6896 "MOVSS $dst,$src\t# float\n" 6897 "skip:" %} 6898 ins_encode %{ 6899 Label skip; 6900 // Invert sense of branch from sense of CMOV 6901 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6902 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6903 __ bind(skip); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 // No CMOVE with SSE/SSE2 6909 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6910 predicate (UseSSE>=2); 6911 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6912 ins_cost(200); 6913 format %{ "Jn$cop skip\n\t" 6914 "MOVSD $dst,$src\t# float\n" 6915 "skip:" %} 6916 ins_encode %{ 6917 Label skip; 6918 // Invert sense of branch from sense of CMOV 6919 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6920 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6921 __ bind(skip); 6922 %} 6923 ins_pipe( pipe_slow ); 6924 %} 6925 6926 // unsigned version 6927 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6928 predicate (UseSSE>=1); 6929 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6930 ins_cost(200); 6931 format %{ "Jn$cop skip\n\t" 6932 "MOVSS $dst,$src\t# float\n" 6933 "skip:" %} 6934 ins_encode %{ 6935 Label skip; 6936 // Invert sense of branch from sense of CMOV 6937 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6938 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6939 __ bind(skip); 6940 %} 6941 ins_pipe( pipe_slow ); 6942 %} 6943 6944 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6945 predicate (UseSSE>=1); 6946 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6947 ins_cost(200); 6948 expand %{ 6949 fcmovF_regU(cop, cr, dst, src); 6950 %} 6951 %} 6952 6953 // unsigned version 6954 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6955 predicate (UseSSE>=2); 6956 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6957 ins_cost(200); 6958 format %{ "Jn$cop skip\n\t" 6959 "MOVSD $dst,$src\t# float\n" 6960 "skip:" %} 6961 ins_encode %{ 6962 Label skip; 6963 // Invert sense of branch from sense of CMOV 6964 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6965 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6966 __ bind(skip); 6967 %} 6968 ins_pipe( pipe_slow ); 6969 %} 6970 6971 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6972 predicate (UseSSE>=2); 6973 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6974 ins_cost(200); 6975 expand %{ 6976 fcmovD_regU(cop, cr, dst, src); 6977 %} 6978 %} 6979 6980 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6981 predicate(VM_Version::supports_cmov() ); 6982 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6983 ins_cost(200); 6984 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6985 "CMOV$cop $dst.hi,$src.hi" %} 6986 opcode(0x0F,0x40); 6987 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6988 ins_pipe( pipe_cmov_reg_long ); 6989 %} 6990 6991 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6992 predicate(VM_Version::supports_cmov() ); 6993 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6994 ins_cost(200); 6995 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6996 "CMOV$cop $dst.hi,$src.hi" %} 6997 opcode(0x0F,0x40); 6998 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6999 ins_pipe( pipe_cmov_reg_long ); 7000 %} 7001 7002 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7003 predicate(VM_Version::supports_cmov() ); 7004 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7005 ins_cost(200); 7006 expand %{ 7007 cmovL_regU(cop, cr, dst, src); 7008 %} 7009 %} 7010 7011 //----------Arithmetic Instructions-------------------------------------------- 7012 //----------Addition Instructions---------------------------------------------- 7013 7014 // Integer Addition Instructions 7015 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7016 match(Set dst (AddI dst src)); 7017 effect(KILL cr); 7018 7019 size(2); 7020 format %{ "ADD $dst,$src" %} 7021 opcode(0x03); 7022 ins_encode( OpcP, RegReg( dst, src) ); 7023 ins_pipe( ialu_reg_reg ); 7024 %} 7025 7026 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7027 match(Set dst (AddI dst src)); 7028 effect(KILL cr); 7029 7030 format %{ "ADD $dst,$src" %} 7031 opcode(0x81, 0x00); /* /0 id */ 7032 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7033 ins_pipe( ialu_reg ); 7034 %} 7035 7036 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7037 predicate(UseIncDec); 7038 match(Set dst (AddI dst src)); 7039 effect(KILL cr); 7040 7041 size(1); 7042 format %{ "INC $dst" %} 7043 opcode(0x40); /* */ 7044 ins_encode( Opc_plus( primary, dst ) ); 7045 ins_pipe( ialu_reg ); 7046 %} 7047 7048 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7049 match(Set dst (AddI src0 src1)); 7050 ins_cost(110); 7051 7052 format %{ "LEA $dst,[$src0 + $src1]" %} 7053 opcode(0x8D); /* 0x8D /r */ 7054 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7055 ins_pipe( ialu_reg_reg ); 7056 %} 7057 7058 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7059 match(Set dst (AddP src0 src1)); 7060 ins_cost(110); 7061 7062 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7063 opcode(0x8D); /* 0x8D /r */ 7064 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7065 ins_pipe( ialu_reg_reg ); 7066 %} 7067 7068 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7069 predicate(UseIncDec); 7070 match(Set dst (AddI dst src)); 7071 effect(KILL cr); 7072 7073 size(1); 7074 format %{ "DEC $dst" %} 7075 opcode(0x48); /* */ 7076 ins_encode( Opc_plus( primary, dst ) ); 7077 ins_pipe( ialu_reg ); 7078 %} 7079 7080 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7081 match(Set dst (AddP dst src)); 7082 effect(KILL cr); 7083 7084 size(2); 7085 format %{ "ADD $dst,$src" %} 7086 opcode(0x03); 7087 ins_encode( OpcP, RegReg( dst, src) ); 7088 ins_pipe( ialu_reg_reg ); 7089 %} 7090 7091 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7092 match(Set dst (AddP dst src)); 7093 effect(KILL cr); 7094 7095 format %{ "ADD $dst,$src" %} 7096 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7097 // ins_encode( RegImm( dst, src) ); 7098 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7099 ins_pipe( ialu_reg ); 7100 %} 7101 7102 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7103 match(Set dst (AddI dst (LoadI src))); 7104 effect(KILL cr); 7105 7106 ins_cost(125); 7107 format %{ "ADD $dst,$src" %} 7108 opcode(0x03); 7109 ins_encode( OpcP, RegMem( dst, src) ); 7110 ins_pipe( ialu_reg_mem ); 7111 %} 7112 7113 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7114 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7115 effect(KILL cr); 7116 7117 ins_cost(150); 7118 format %{ "ADD $dst,$src" %} 7119 opcode(0x01); /* Opcode 01 /r */ 7120 ins_encode( OpcP, RegMem( src, dst ) ); 7121 ins_pipe( ialu_mem_reg ); 7122 %} 7123 7124 // Add Memory with Immediate 7125 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7126 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7127 effect(KILL cr); 7128 7129 ins_cost(125); 7130 format %{ "ADD $dst,$src" %} 7131 opcode(0x81); /* Opcode 81 /0 id */ 7132 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7133 ins_pipe( ialu_mem_imm ); 7134 %} 7135 7136 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7137 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7138 effect(KILL cr); 7139 7140 ins_cost(125); 7141 format %{ "INC $dst" %} 7142 opcode(0xFF); /* Opcode FF /0 */ 7143 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7144 ins_pipe( ialu_mem_imm ); 7145 %} 7146 7147 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7148 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7149 effect(KILL cr); 7150 7151 ins_cost(125); 7152 format %{ "DEC $dst" %} 7153 opcode(0xFF); /* Opcode FF /1 */ 7154 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7155 ins_pipe( ialu_mem_imm ); 7156 %} 7157 7158 7159 instruct checkCastPP( eRegP dst ) %{ 7160 match(Set dst (CheckCastPP dst)); 7161 7162 size(0); 7163 format %{ "#checkcastPP of $dst" %} 7164 ins_encode( /*empty encoding*/ ); 7165 ins_pipe( empty ); 7166 %} 7167 7168 instruct castPP( eRegP dst ) %{ 7169 match(Set dst (CastPP dst)); 7170 format %{ "#castPP of $dst" %} 7171 ins_encode( /*empty encoding*/ ); 7172 ins_pipe( empty ); 7173 %} 7174 7175 instruct castII( rRegI dst ) %{ 7176 match(Set dst (CastII dst)); 7177 format %{ "#castII of $dst" %} 7178 ins_encode( /*empty encoding*/ ); 7179 ins_cost(0); 7180 ins_pipe( empty ); 7181 %} 7182 7183 7184 // Load-locked - same as a regular pointer load when used with compare-swap 7185 instruct loadPLocked(eRegP dst, memory mem) %{ 7186 match(Set dst (LoadPLocked mem)); 7187 7188 ins_cost(125); 7189 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7190 opcode(0x8B); 7191 ins_encode( OpcP, RegMem(dst,mem)); 7192 ins_pipe( ialu_reg_mem ); 7193 %} 7194 7195 // Conditional-store of the updated heap-top. 7196 // Used during allocation of the shared heap. 7197 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7198 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7199 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7200 // EAX is killed if there is contention, but then it's also unused. 7201 // In the common case of no contention, EAX holds the new oop address. 7202 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7203 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7204 ins_pipe( pipe_cmpxchg ); 7205 %} 7206 7207 // Conditional-store of an int value. 7208 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7209 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7210 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7211 effect(KILL oldval); 7212 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7213 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7214 ins_pipe( pipe_cmpxchg ); 7215 %} 7216 7217 // Conditional-store of a long value. 7218 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7219 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7220 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7221 effect(KILL oldval); 7222 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7223 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7224 "XCHG EBX,ECX" 7225 %} 7226 ins_encode %{ 7227 // Note: we need to swap rbx, and rcx before and after the 7228 // cmpxchg8 instruction because the instruction uses 7229 // rcx as the high order word of the new value to store but 7230 // our register encoding uses rbx. 7231 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7232 if( os::is_MP() ) 7233 __ lock(); 7234 __ cmpxchg8($mem$$Address); 7235 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7236 %} 7237 ins_pipe( pipe_cmpxchg ); 7238 %} 7239 7240 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7241 7242 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7243 predicate(VM_Version::supports_cx8()); 7244 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7245 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7246 effect(KILL cr, KILL oldval); 7247 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7248 "MOV $res,0\n\t" 7249 "JNE,s fail\n\t" 7250 "MOV $res,1\n" 7251 "fail:" %} 7252 ins_encode( enc_cmpxchg8(mem_ptr), 7253 enc_flags_ne_to_boolean(res) ); 7254 ins_pipe( pipe_cmpxchg ); 7255 %} 7256 7257 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7258 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7259 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7260 effect(KILL cr, KILL oldval); 7261 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7262 "MOV $res,0\n\t" 7263 "JNE,s fail\n\t" 7264 "MOV $res,1\n" 7265 "fail:" %} 7266 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7267 ins_pipe( pipe_cmpxchg ); 7268 %} 7269 7270 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7271 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7272 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7273 effect(KILL cr, KILL oldval); 7274 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7275 "MOV $res,0\n\t" 7276 "JNE,s fail\n\t" 7277 "MOV $res,1\n" 7278 "fail:" %} 7279 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7280 ins_pipe( pipe_cmpxchg ); 7281 %} 7282 7283 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7284 predicate(VM_Version::supports_cx8()); 7285 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7286 effect(KILL cr); 7287 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7288 ins_encode( enc_cmpxchg8(mem_ptr) ); 7289 ins_pipe( pipe_cmpxchg ); 7290 %} 7291 7292 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7293 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7294 effect(KILL cr); 7295 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7296 ins_encode( enc_cmpxchg(mem_ptr) ); 7297 ins_pipe( pipe_cmpxchg ); 7298 %} 7299 7300 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7301 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7302 effect(KILL cr); 7303 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7304 ins_encode( enc_cmpxchg(mem_ptr) ); 7305 ins_pipe( pipe_cmpxchg ); 7306 %} 7307 7308 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7309 predicate(n->as_LoadStore()->result_not_used()); 7310 match(Set dummy (GetAndAddI mem add)); 7311 effect(KILL cr); 7312 format %{ "ADDL [$mem],$add" %} 7313 ins_encode %{ 7314 if (os::is_MP()) { __ lock(); } 7315 __ addl($mem$$Address, $add$$constant); 7316 %} 7317 ins_pipe( pipe_cmpxchg ); 7318 %} 7319 7320 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7321 match(Set newval (GetAndAddI mem newval)); 7322 effect(KILL cr); 7323 format %{ "XADDL [$mem],$newval" %} 7324 ins_encode %{ 7325 if (os::is_MP()) { __ lock(); } 7326 __ xaddl($mem$$Address, $newval$$Register); 7327 %} 7328 ins_pipe( pipe_cmpxchg ); 7329 %} 7330 7331 instruct xchgI( memory mem, rRegI newval) %{ 7332 match(Set newval (GetAndSetI mem newval)); 7333 format %{ "XCHGL $newval,[$mem]" %} 7334 ins_encode %{ 7335 __ xchgl($newval$$Register, $mem$$Address); 7336 %} 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 instruct xchgP( memory mem, pRegP newval) %{ 7341 match(Set newval (GetAndSetP mem newval)); 7342 format %{ "XCHGL $newval,[$mem]" %} 7343 ins_encode %{ 7344 __ xchgl($newval$$Register, $mem$$Address); 7345 %} 7346 ins_pipe( pipe_cmpxchg ); 7347 %} 7348 7349 //----------Subtraction Instructions------------------------------------------- 7350 7351 // Integer Subtraction Instructions 7352 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7353 match(Set dst (SubI dst src)); 7354 effect(KILL cr); 7355 7356 size(2); 7357 format %{ "SUB $dst,$src" %} 7358 opcode(0x2B); 7359 ins_encode( OpcP, RegReg( dst, src) ); 7360 ins_pipe( ialu_reg_reg ); 7361 %} 7362 7363 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7364 match(Set dst (SubI dst src)); 7365 effect(KILL cr); 7366 7367 format %{ "SUB $dst,$src" %} 7368 opcode(0x81,0x05); /* Opcode 81 /5 */ 7369 // ins_encode( RegImm( dst, src) ); 7370 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7371 ins_pipe( ialu_reg ); 7372 %} 7373 7374 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7375 match(Set dst (SubI dst (LoadI src))); 7376 effect(KILL cr); 7377 7378 ins_cost(125); 7379 format %{ "SUB $dst,$src" %} 7380 opcode(0x2B); 7381 ins_encode( OpcP, RegMem( dst, src) ); 7382 ins_pipe( ialu_reg_mem ); 7383 %} 7384 7385 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7386 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7387 effect(KILL cr); 7388 7389 ins_cost(150); 7390 format %{ "SUB $dst,$src" %} 7391 opcode(0x29); /* Opcode 29 /r */ 7392 ins_encode( OpcP, RegMem( src, dst ) ); 7393 ins_pipe( ialu_mem_reg ); 7394 %} 7395 7396 // Subtract from a pointer 7397 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7398 match(Set dst (AddP dst (SubI zero src))); 7399 effect(KILL cr); 7400 7401 size(2); 7402 format %{ "SUB $dst,$src" %} 7403 opcode(0x2B); 7404 ins_encode( OpcP, RegReg( dst, src) ); 7405 ins_pipe( ialu_reg_reg ); 7406 %} 7407 7408 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7409 match(Set dst (SubI zero dst)); 7410 effect(KILL cr); 7411 7412 size(2); 7413 format %{ "NEG $dst" %} 7414 opcode(0xF7,0x03); // Opcode F7 /3 7415 ins_encode( OpcP, RegOpc( dst ) ); 7416 ins_pipe( ialu_reg ); 7417 %} 7418 7419 //----------Multiplication/Division Instructions------------------------------- 7420 // Integer Multiplication Instructions 7421 // Multiply Register 7422 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7423 match(Set dst (MulI dst src)); 7424 effect(KILL cr); 7425 7426 size(3); 7427 ins_cost(300); 7428 format %{ "IMUL $dst,$src" %} 7429 opcode(0xAF, 0x0F); 7430 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7431 ins_pipe( ialu_reg_reg_alu0 ); 7432 %} 7433 7434 // Multiply 32-bit Immediate 7435 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7436 match(Set dst (MulI src imm)); 7437 effect(KILL cr); 7438 7439 ins_cost(300); 7440 format %{ "IMUL $dst,$src,$imm" %} 7441 opcode(0x69); /* 69 /r id */ 7442 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7443 ins_pipe( ialu_reg_reg_alu0 ); 7444 %} 7445 7446 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7447 match(Set dst src); 7448 effect(KILL cr); 7449 7450 // Note that this is artificially increased to make it more expensive than loadConL 7451 ins_cost(250); 7452 format %{ "MOV EAX,$src\t// low word only" %} 7453 opcode(0xB8); 7454 ins_encode( LdImmL_Lo(dst, src) ); 7455 ins_pipe( ialu_reg_fat ); 7456 %} 7457 7458 // Multiply by 32-bit Immediate, taking the shifted high order results 7459 // (special case for shift by 32) 7460 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7461 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7462 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7463 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7464 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7465 effect(USE src1, KILL cr); 7466 7467 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7468 ins_cost(0*100 + 1*400 - 150); 7469 format %{ "IMUL EDX:EAX,$src1" %} 7470 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7471 ins_pipe( pipe_slow ); 7472 %} 7473 7474 // Multiply by 32-bit Immediate, taking the shifted high order results 7475 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7476 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7477 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7478 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7479 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7480 effect(USE src1, KILL cr); 7481 7482 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7483 ins_cost(1*100 + 1*400 - 150); 7484 format %{ "IMUL EDX:EAX,$src1\n\t" 7485 "SAR EDX,$cnt-32" %} 7486 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7487 ins_pipe( pipe_slow ); 7488 %} 7489 7490 // Multiply Memory 32-bit Immediate 7491 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7492 match(Set dst (MulI (LoadI src) imm)); 7493 effect(KILL cr); 7494 7495 ins_cost(300); 7496 format %{ "IMUL $dst,$src,$imm" %} 7497 opcode(0x69); /* 69 /r id */ 7498 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7499 ins_pipe( ialu_reg_mem_alu0 ); 7500 %} 7501 7502 // Multiply Memory 7503 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7504 match(Set dst (MulI dst (LoadI src))); 7505 effect(KILL cr); 7506 7507 ins_cost(350); 7508 format %{ "IMUL $dst,$src" %} 7509 opcode(0xAF, 0x0F); 7510 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7511 ins_pipe( ialu_reg_mem_alu0 ); 7512 %} 7513 7514 // Multiply Register Int to Long 7515 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7516 // Basic Idea: long = (long)int * (long)int 7517 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7518 effect(DEF dst, USE src, USE src1, KILL flags); 7519 7520 ins_cost(300); 7521 format %{ "IMUL $dst,$src1" %} 7522 7523 ins_encode( long_int_multiply( dst, src1 ) ); 7524 ins_pipe( ialu_reg_reg_alu0 ); 7525 %} 7526 7527 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7528 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7529 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7530 effect(KILL flags); 7531 7532 ins_cost(300); 7533 format %{ "MUL $dst,$src1" %} 7534 7535 ins_encode( long_uint_multiply(dst, src1) ); 7536 ins_pipe( ialu_reg_reg_alu0 ); 7537 %} 7538 7539 // Multiply Register Long 7540 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7541 match(Set dst (MulL dst src)); 7542 effect(KILL cr, TEMP tmp); 7543 ins_cost(4*100+3*400); 7544 // Basic idea: lo(result) = lo(x_lo * y_lo) 7545 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7546 format %{ "MOV $tmp,$src.lo\n\t" 7547 "IMUL $tmp,EDX\n\t" 7548 "MOV EDX,$src.hi\n\t" 7549 "IMUL EDX,EAX\n\t" 7550 "ADD $tmp,EDX\n\t" 7551 "MUL EDX:EAX,$src.lo\n\t" 7552 "ADD EDX,$tmp" %} 7553 ins_encode( long_multiply( dst, src, tmp ) ); 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 // Multiply Register Long where the left operand's high 32 bits are zero 7558 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7559 predicate(is_operand_hi32_zero(n->in(1))); 7560 match(Set dst (MulL dst src)); 7561 effect(KILL cr, TEMP tmp); 7562 ins_cost(2*100+2*400); 7563 // Basic idea: lo(result) = lo(x_lo * y_lo) 7564 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7565 format %{ "MOV $tmp,$src.hi\n\t" 7566 "IMUL $tmp,EAX\n\t" 7567 "MUL EDX:EAX,$src.lo\n\t" 7568 "ADD EDX,$tmp" %} 7569 ins_encode %{ 7570 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7571 __ imull($tmp$$Register, rax); 7572 __ mull($src$$Register); 7573 __ addl(rdx, $tmp$$Register); 7574 %} 7575 ins_pipe( pipe_slow ); 7576 %} 7577 7578 // Multiply Register Long where the right operand's high 32 bits are zero 7579 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7580 predicate(is_operand_hi32_zero(n->in(2))); 7581 match(Set dst (MulL dst src)); 7582 effect(KILL cr, TEMP tmp); 7583 ins_cost(2*100+2*400); 7584 // Basic idea: lo(result) = lo(x_lo * y_lo) 7585 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7586 format %{ "MOV $tmp,$src.lo\n\t" 7587 "IMUL $tmp,EDX\n\t" 7588 "MUL EDX:EAX,$src.lo\n\t" 7589 "ADD EDX,$tmp" %} 7590 ins_encode %{ 7591 __ movl($tmp$$Register, $src$$Register); 7592 __ imull($tmp$$Register, rdx); 7593 __ mull($src$$Register); 7594 __ addl(rdx, $tmp$$Register); 7595 %} 7596 ins_pipe( pipe_slow ); 7597 %} 7598 7599 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7600 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7601 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7602 match(Set dst (MulL dst src)); 7603 effect(KILL cr); 7604 ins_cost(1*400); 7605 // Basic idea: lo(result) = lo(x_lo * y_lo) 7606 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7607 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7608 ins_encode %{ 7609 __ mull($src$$Register); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 // Multiply Register Long by small constant 7615 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7616 match(Set dst (MulL dst src)); 7617 effect(KILL cr, TEMP tmp); 7618 ins_cost(2*100+2*400); 7619 size(12); 7620 // Basic idea: lo(result) = lo(src * EAX) 7621 // hi(result) = hi(src * EAX) + lo(src * EDX) 7622 format %{ "IMUL $tmp,EDX,$src\n\t" 7623 "MOV EDX,$src\n\t" 7624 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7625 "ADD EDX,$tmp" %} 7626 ins_encode( long_multiply_con( dst, src, tmp ) ); 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 // Integer DIV with Register 7631 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7632 match(Set rax (DivI rax div)); 7633 effect(KILL rdx, KILL cr); 7634 size(26); 7635 ins_cost(30*100+10*100); 7636 format %{ "CMP EAX,0x80000000\n\t" 7637 "JNE,s normal\n\t" 7638 "XOR EDX,EDX\n\t" 7639 "CMP ECX,-1\n\t" 7640 "JE,s done\n" 7641 "normal: CDQ\n\t" 7642 "IDIV $div\n\t" 7643 "done:" %} 7644 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7645 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7646 ins_pipe( ialu_reg_reg_alu0 ); 7647 %} 7648 7649 // Divide Register Long 7650 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7651 match(Set dst (DivL src1 src2)); 7652 effect( KILL cr, KILL cx, KILL bx ); 7653 ins_cost(10000); 7654 format %{ "PUSH $src1.hi\n\t" 7655 "PUSH $src1.lo\n\t" 7656 "PUSH $src2.hi\n\t" 7657 "PUSH $src2.lo\n\t" 7658 "CALL SharedRuntime::ldiv\n\t" 7659 "ADD ESP,16" %} 7660 ins_encode( long_div(src1,src2) ); 7661 ins_pipe( pipe_slow ); 7662 %} 7663 7664 // Integer DIVMOD with Register, both quotient and mod results 7665 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7666 match(DivModI rax div); 7667 effect(KILL cr); 7668 size(26); 7669 ins_cost(30*100+10*100); 7670 format %{ "CMP EAX,0x80000000\n\t" 7671 "JNE,s normal\n\t" 7672 "XOR EDX,EDX\n\t" 7673 "CMP ECX,-1\n\t" 7674 "JE,s done\n" 7675 "normal: CDQ\n\t" 7676 "IDIV $div\n\t" 7677 "done:" %} 7678 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7679 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7680 ins_pipe( pipe_slow ); 7681 %} 7682 7683 // Integer MOD with Register 7684 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7685 match(Set rdx (ModI rax div)); 7686 effect(KILL rax, KILL cr); 7687 7688 size(26); 7689 ins_cost(300); 7690 format %{ "CDQ\n\t" 7691 "IDIV $div" %} 7692 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7693 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7694 ins_pipe( ialu_reg_reg_alu0 ); 7695 %} 7696 7697 // Remainder Register Long 7698 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7699 match(Set dst (ModL src1 src2)); 7700 effect( KILL cr, KILL cx, KILL bx ); 7701 ins_cost(10000); 7702 format %{ "PUSH $src1.hi\n\t" 7703 "PUSH $src1.lo\n\t" 7704 "PUSH $src2.hi\n\t" 7705 "PUSH $src2.lo\n\t" 7706 "CALL SharedRuntime::lrem\n\t" 7707 "ADD ESP,16" %} 7708 ins_encode( long_mod(src1,src2) ); 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 // Divide Register Long (no special case since divisor != -1) 7713 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7714 match(Set dst (DivL dst imm)); 7715 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7716 ins_cost(1000); 7717 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7718 "XOR $tmp2,$tmp2\n\t" 7719 "CMP $tmp,EDX\n\t" 7720 "JA,s fast\n\t" 7721 "MOV $tmp2,EAX\n\t" 7722 "MOV EAX,EDX\n\t" 7723 "MOV EDX,0\n\t" 7724 "JLE,s pos\n\t" 7725 "LNEG EAX : $tmp2\n\t" 7726 "DIV $tmp # unsigned division\n\t" 7727 "XCHG EAX,$tmp2\n\t" 7728 "DIV $tmp\n\t" 7729 "LNEG $tmp2 : EAX\n\t" 7730 "JMP,s done\n" 7731 "pos:\n\t" 7732 "DIV $tmp\n\t" 7733 "XCHG EAX,$tmp2\n" 7734 "fast:\n\t" 7735 "DIV $tmp\n" 7736 "done:\n\t" 7737 "MOV EDX,$tmp2\n\t" 7738 "NEG EDX:EAX # if $imm < 0" %} 7739 ins_encode %{ 7740 int con = (int)$imm$$constant; 7741 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7742 int pcon = (con > 0) ? con : -con; 7743 Label Lfast, Lpos, Ldone; 7744 7745 __ movl($tmp$$Register, pcon); 7746 __ xorl($tmp2$$Register,$tmp2$$Register); 7747 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7748 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7749 7750 __ movl($tmp2$$Register, $dst$$Register); // save 7751 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7752 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7753 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7754 7755 // Negative dividend. 7756 // convert value to positive to use unsigned division 7757 __ lneg($dst$$Register, $tmp2$$Register); 7758 __ divl($tmp$$Register); 7759 __ xchgl($dst$$Register, $tmp2$$Register); 7760 __ divl($tmp$$Register); 7761 // revert result back to negative 7762 __ lneg($tmp2$$Register, $dst$$Register); 7763 __ jmpb(Ldone); 7764 7765 __ bind(Lpos); 7766 __ divl($tmp$$Register); // Use unsigned division 7767 __ xchgl($dst$$Register, $tmp2$$Register); 7768 // Fallthrow for final divide, tmp2 has 32 bit hi result 7769 7770 __ bind(Lfast); 7771 // fast path: src is positive 7772 __ divl($tmp$$Register); // Use unsigned division 7773 7774 __ bind(Ldone); 7775 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7776 if (con < 0) { 7777 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7778 } 7779 %} 7780 ins_pipe( pipe_slow ); 7781 %} 7782 7783 // Remainder Register Long (remainder fit into 32 bits) 7784 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7785 match(Set dst (ModL dst imm)); 7786 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7787 ins_cost(1000); 7788 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7789 "CMP $tmp,EDX\n\t" 7790 "JA,s fast\n\t" 7791 "MOV $tmp2,EAX\n\t" 7792 "MOV EAX,EDX\n\t" 7793 "MOV EDX,0\n\t" 7794 "JLE,s pos\n\t" 7795 "LNEG EAX : $tmp2\n\t" 7796 "DIV $tmp # unsigned division\n\t" 7797 "MOV EAX,$tmp2\n\t" 7798 "DIV $tmp\n\t" 7799 "NEG EDX\n\t" 7800 "JMP,s done\n" 7801 "pos:\n\t" 7802 "DIV $tmp\n\t" 7803 "MOV EAX,$tmp2\n" 7804 "fast:\n\t" 7805 "DIV $tmp\n" 7806 "done:\n\t" 7807 "MOV EAX,EDX\n\t" 7808 "SAR EDX,31\n\t" %} 7809 ins_encode %{ 7810 int con = (int)$imm$$constant; 7811 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7812 int pcon = (con > 0) ? con : -con; 7813 Label Lfast, Lpos, Ldone; 7814 7815 __ movl($tmp$$Register, pcon); 7816 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7817 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7818 7819 __ movl($tmp2$$Register, $dst$$Register); // save 7820 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7821 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7822 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7823 7824 // Negative dividend. 7825 // convert value to positive to use unsigned division 7826 __ lneg($dst$$Register, $tmp2$$Register); 7827 __ divl($tmp$$Register); 7828 __ movl($dst$$Register, $tmp2$$Register); 7829 __ divl($tmp$$Register); 7830 // revert remainder back to negative 7831 __ negl(HIGH_FROM_LOW($dst$$Register)); 7832 __ jmpb(Ldone); 7833 7834 __ bind(Lpos); 7835 __ divl($tmp$$Register); 7836 __ movl($dst$$Register, $tmp2$$Register); 7837 7838 __ bind(Lfast); 7839 // fast path: src is positive 7840 __ divl($tmp$$Register); 7841 7842 __ bind(Ldone); 7843 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7844 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7845 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 // Integer Shift Instructions 7851 // Shift Left by one 7852 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7853 match(Set dst (LShiftI dst shift)); 7854 effect(KILL cr); 7855 7856 size(2); 7857 format %{ "SHL $dst,$shift" %} 7858 opcode(0xD1, 0x4); /* D1 /4 */ 7859 ins_encode( OpcP, RegOpc( dst ) ); 7860 ins_pipe( ialu_reg ); 7861 %} 7862 7863 // Shift Left by 8-bit immediate 7864 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7865 match(Set dst (LShiftI dst shift)); 7866 effect(KILL cr); 7867 7868 size(3); 7869 format %{ "SHL $dst,$shift" %} 7870 opcode(0xC1, 0x4); /* C1 /4 ib */ 7871 ins_encode( RegOpcImm( dst, shift) ); 7872 ins_pipe( ialu_reg ); 7873 %} 7874 7875 // Shift Left by variable 7876 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7877 match(Set dst (LShiftI dst shift)); 7878 effect(KILL cr); 7879 7880 size(2); 7881 format %{ "SHL $dst,$shift" %} 7882 opcode(0xD3, 0x4); /* D3 /4 */ 7883 ins_encode( OpcP, RegOpc( dst ) ); 7884 ins_pipe( ialu_reg_reg ); 7885 %} 7886 7887 // Arithmetic shift right by one 7888 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7889 match(Set dst (RShiftI dst shift)); 7890 effect(KILL cr); 7891 7892 size(2); 7893 format %{ "SAR $dst,$shift" %} 7894 opcode(0xD1, 0x7); /* D1 /7 */ 7895 ins_encode( OpcP, RegOpc( dst ) ); 7896 ins_pipe( ialu_reg ); 7897 %} 7898 7899 // Arithmetic shift right by one 7900 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7901 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7902 effect(KILL cr); 7903 format %{ "SAR $dst,$shift" %} 7904 opcode(0xD1, 0x7); /* D1 /7 */ 7905 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7906 ins_pipe( ialu_mem_imm ); 7907 %} 7908 7909 // Arithmetic Shift Right by 8-bit immediate 7910 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7911 match(Set dst (RShiftI dst shift)); 7912 effect(KILL cr); 7913 7914 size(3); 7915 format %{ "SAR $dst,$shift" %} 7916 opcode(0xC1, 0x7); /* C1 /7 ib */ 7917 ins_encode( RegOpcImm( dst, shift ) ); 7918 ins_pipe( ialu_mem_imm ); 7919 %} 7920 7921 // Arithmetic Shift Right by 8-bit immediate 7922 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7923 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7924 effect(KILL cr); 7925 7926 format %{ "SAR $dst,$shift" %} 7927 opcode(0xC1, 0x7); /* C1 /7 ib */ 7928 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7929 ins_pipe( ialu_mem_imm ); 7930 %} 7931 7932 // Arithmetic Shift Right by variable 7933 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7934 match(Set dst (RShiftI dst shift)); 7935 effect(KILL cr); 7936 7937 size(2); 7938 format %{ "SAR $dst,$shift" %} 7939 opcode(0xD3, 0x7); /* D3 /7 */ 7940 ins_encode( OpcP, RegOpc( dst ) ); 7941 ins_pipe( ialu_reg_reg ); 7942 %} 7943 7944 // Logical shift right by one 7945 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7946 match(Set dst (URShiftI dst shift)); 7947 effect(KILL cr); 7948 7949 size(2); 7950 format %{ "SHR $dst,$shift" %} 7951 opcode(0xD1, 0x5); /* D1 /5 */ 7952 ins_encode( OpcP, RegOpc( dst ) ); 7953 ins_pipe( ialu_reg ); 7954 %} 7955 7956 // Logical Shift Right by 8-bit immediate 7957 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7958 match(Set dst (URShiftI dst shift)); 7959 effect(KILL cr); 7960 7961 size(3); 7962 format %{ "SHR $dst,$shift" %} 7963 opcode(0xC1, 0x5); /* C1 /5 ib */ 7964 ins_encode( RegOpcImm( dst, shift) ); 7965 ins_pipe( ialu_reg ); 7966 %} 7967 7968 7969 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7970 // This idiom is used by the compiler for the i2b bytecode. 7971 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7972 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7973 7974 size(3); 7975 format %{ "MOVSX $dst,$src :8" %} 7976 ins_encode %{ 7977 __ movsbl($dst$$Register, $src$$Register); 7978 %} 7979 ins_pipe(ialu_reg_reg); 7980 %} 7981 7982 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7983 // This idiom is used by the compiler the i2s bytecode. 7984 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7985 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7986 7987 size(3); 7988 format %{ "MOVSX $dst,$src :16" %} 7989 ins_encode %{ 7990 __ movswl($dst$$Register, $src$$Register); 7991 %} 7992 ins_pipe(ialu_reg_reg); 7993 %} 7994 7995 7996 // Logical Shift Right by variable 7997 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7998 match(Set dst (URShiftI dst shift)); 7999 effect(KILL cr); 8000 8001 size(2); 8002 format %{ "SHR $dst,$shift" %} 8003 opcode(0xD3, 0x5); /* D3 /5 */ 8004 ins_encode( OpcP, RegOpc( dst ) ); 8005 ins_pipe( ialu_reg_reg ); 8006 %} 8007 8008 8009 //----------Logical Instructions----------------------------------------------- 8010 //----------Integer Logical Instructions--------------------------------------- 8011 // And Instructions 8012 // And Register with Register 8013 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8014 match(Set dst (AndI dst src)); 8015 effect(KILL cr); 8016 8017 size(2); 8018 format %{ "AND $dst,$src" %} 8019 opcode(0x23); 8020 ins_encode( OpcP, RegReg( dst, src) ); 8021 ins_pipe( ialu_reg_reg ); 8022 %} 8023 8024 // And Register with Immediate 8025 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8026 match(Set dst (AndI dst src)); 8027 effect(KILL cr); 8028 8029 format %{ "AND $dst,$src" %} 8030 opcode(0x81,0x04); /* Opcode 81 /4 */ 8031 // ins_encode( RegImm( dst, src) ); 8032 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8033 ins_pipe( ialu_reg ); 8034 %} 8035 8036 // And Register with Memory 8037 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8038 match(Set dst (AndI dst (LoadI src))); 8039 effect(KILL cr); 8040 8041 ins_cost(125); 8042 format %{ "AND $dst,$src" %} 8043 opcode(0x23); 8044 ins_encode( OpcP, RegMem( dst, src) ); 8045 ins_pipe( ialu_reg_mem ); 8046 %} 8047 8048 // And Memory with Register 8049 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8050 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8051 effect(KILL cr); 8052 8053 ins_cost(150); 8054 format %{ "AND $dst,$src" %} 8055 opcode(0x21); /* Opcode 21 /r */ 8056 ins_encode( OpcP, RegMem( src, dst ) ); 8057 ins_pipe( ialu_mem_reg ); 8058 %} 8059 8060 // And Memory with Immediate 8061 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8062 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8063 effect(KILL cr); 8064 8065 ins_cost(125); 8066 format %{ "AND $dst,$src" %} 8067 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8068 // ins_encode( MemImm( dst, src) ); 8069 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8070 ins_pipe( ialu_mem_imm ); 8071 %} 8072 8073 // BMI1 instructions 8074 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8075 match(Set dst (AndI (XorI src1 minus_1) src2)); 8076 predicate(UseBMI1Instructions); 8077 effect(KILL cr); 8078 8079 format %{ "ANDNL $dst, $src1, $src2" %} 8080 8081 ins_encode %{ 8082 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8083 %} 8084 ins_pipe(ialu_reg); 8085 %} 8086 8087 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8088 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8089 predicate(UseBMI1Instructions); 8090 effect(KILL cr); 8091 8092 ins_cost(125); 8093 format %{ "ANDNL $dst, $src1, $src2" %} 8094 8095 ins_encode %{ 8096 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8097 %} 8098 ins_pipe(ialu_reg_mem); 8099 %} 8100 8101 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8102 match(Set dst (AndI (SubI imm_zero src) src)); 8103 predicate(UseBMI1Instructions); 8104 effect(KILL cr); 8105 8106 format %{ "BLSIL $dst, $src" %} 8107 8108 ins_encode %{ 8109 __ blsil($dst$$Register, $src$$Register); 8110 %} 8111 ins_pipe(ialu_reg); 8112 %} 8113 8114 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8115 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8116 predicate(UseBMI1Instructions); 8117 effect(KILL cr); 8118 8119 ins_cost(125); 8120 format %{ "BLSIL $dst, $src" %} 8121 8122 ins_encode %{ 8123 __ blsil($dst$$Register, $src$$Address); 8124 %} 8125 ins_pipe(ialu_reg_mem); 8126 %} 8127 8128 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8129 %{ 8130 match(Set dst (XorI (AddI src minus_1) src)); 8131 predicate(UseBMI1Instructions); 8132 effect(KILL cr); 8133 8134 format %{ "BLSMSKL $dst, $src" %} 8135 8136 ins_encode %{ 8137 __ blsmskl($dst$$Register, $src$$Register); 8138 %} 8139 8140 ins_pipe(ialu_reg); 8141 %} 8142 8143 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8144 %{ 8145 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8146 predicate(UseBMI1Instructions); 8147 effect(KILL cr); 8148 8149 ins_cost(125); 8150 format %{ "BLSMSKL $dst, $src" %} 8151 8152 ins_encode %{ 8153 __ blsmskl($dst$$Register, $src$$Address); 8154 %} 8155 8156 ins_pipe(ialu_reg_mem); 8157 %} 8158 8159 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8160 %{ 8161 match(Set dst (AndI (AddI src minus_1) src) ); 8162 predicate(UseBMI1Instructions); 8163 effect(KILL cr); 8164 8165 format %{ "BLSRL $dst, $src" %} 8166 8167 ins_encode %{ 8168 __ blsrl($dst$$Register, $src$$Register); 8169 %} 8170 8171 ins_pipe(ialu_reg); 8172 %} 8173 8174 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8175 %{ 8176 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8177 predicate(UseBMI1Instructions); 8178 effect(KILL cr); 8179 8180 ins_cost(125); 8181 format %{ "BLSRL $dst, $src" %} 8182 8183 ins_encode %{ 8184 __ blsrl($dst$$Register, $src$$Address); 8185 %} 8186 8187 ins_pipe(ialu_reg_mem); 8188 %} 8189 8190 // Or Instructions 8191 // Or Register with Register 8192 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8193 match(Set dst (OrI dst src)); 8194 effect(KILL cr); 8195 8196 size(2); 8197 format %{ "OR $dst,$src" %} 8198 opcode(0x0B); 8199 ins_encode( OpcP, RegReg( dst, src) ); 8200 ins_pipe( ialu_reg_reg ); 8201 %} 8202 8203 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8204 match(Set dst (OrI dst (CastP2X src))); 8205 effect(KILL cr); 8206 8207 size(2); 8208 format %{ "OR $dst,$src" %} 8209 opcode(0x0B); 8210 ins_encode( OpcP, RegReg( dst, src) ); 8211 ins_pipe( ialu_reg_reg ); 8212 %} 8213 8214 8215 // Or Register with Immediate 8216 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8217 match(Set dst (OrI dst src)); 8218 effect(KILL cr); 8219 8220 format %{ "OR $dst,$src" %} 8221 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8222 // ins_encode( RegImm( dst, src) ); 8223 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8224 ins_pipe( ialu_reg ); 8225 %} 8226 8227 // Or Register with Memory 8228 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8229 match(Set dst (OrI dst (LoadI src))); 8230 effect(KILL cr); 8231 8232 ins_cost(125); 8233 format %{ "OR $dst,$src" %} 8234 opcode(0x0B); 8235 ins_encode( OpcP, RegMem( dst, src) ); 8236 ins_pipe( ialu_reg_mem ); 8237 %} 8238 8239 // Or Memory with Register 8240 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8241 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8242 effect(KILL cr); 8243 8244 ins_cost(150); 8245 format %{ "OR $dst,$src" %} 8246 opcode(0x09); /* Opcode 09 /r */ 8247 ins_encode( OpcP, RegMem( src, dst ) ); 8248 ins_pipe( ialu_mem_reg ); 8249 %} 8250 8251 // Or Memory with Immediate 8252 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8253 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8254 effect(KILL cr); 8255 8256 ins_cost(125); 8257 format %{ "OR $dst,$src" %} 8258 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8259 // ins_encode( MemImm( dst, src) ); 8260 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8261 ins_pipe( ialu_mem_imm ); 8262 %} 8263 8264 // ROL/ROR 8265 // ROL expand 8266 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8267 effect(USE_DEF dst, USE shift, KILL cr); 8268 8269 format %{ "ROL $dst, $shift" %} 8270 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8271 ins_encode( OpcP, RegOpc( dst )); 8272 ins_pipe( ialu_reg ); 8273 %} 8274 8275 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8276 effect(USE_DEF dst, USE shift, KILL cr); 8277 8278 format %{ "ROL $dst, $shift" %} 8279 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8280 ins_encode( RegOpcImm(dst, shift) ); 8281 ins_pipe(ialu_reg); 8282 %} 8283 8284 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8285 effect(USE_DEF dst, USE shift, KILL cr); 8286 8287 format %{ "ROL $dst, $shift" %} 8288 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8289 ins_encode(OpcP, RegOpc(dst)); 8290 ins_pipe( ialu_reg_reg ); 8291 %} 8292 // end of ROL expand 8293 8294 // ROL 32bit by one once 8295 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8296 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8297 8298 expand %{ 8299 rolI_eReg_imm1(dst, lshift, cr); 8300 %} 8301 %} 8302 8303 // ROL 32bit var by imm8 once 8304 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8305 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8306 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8307 8308 expand %{ 8309 rolI_eReg_imm8(dst, lshift, cr); 8310 %} 8311 %} 8312 8313 // ROL 32bit var by var once 8314 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8315 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8316 8317 expand %{ 8318 rolI_eReg_CL(dst, shift, cr); 8319 %} 8320 %} 8321 8322 // ROL 32bit var by var once 8323 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8324 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8325 8326 expand %{ 8327 rolI_eReg_CL(dst, shift, cr); 8328 %} 8329 %} 8330 8331 // ROR expand 8332 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8333 effect(USE_DEF dst, USE shift, KILL cr); 8334 8335 format %{ "ROR $dst, $shift" %} 8336 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8337 ins_encode( OpcP, RegOpc( dst ) ); 8338 ins_pipe( ialu_reg ); 8339 %} 8340 8341 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8342 effect (USE_DEF dst, USE shift, KILL cr); 8343 8344 format %{ "ROR $dst, $shift" %} 8345 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8346 ins_encode( RegOpcImm(dst, shift) ); 8347 ins_pipe( ialu_reg ); 8348 %} 8349 8350 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8351 effect(USE_DEF dst, USE shift, KILL cr); 8352 8353 format %{ "ROR $dst, $shift" %} 8354 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8355 ins_encode(OpcP, RegOpc(dst)); 8356 ins_pipe( ialu_reg_reg ); 8357 %} 8358 // end of ROR expand 8359 8360 // ROR right once 8361 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8362 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8363 8364 expand %{ 8365 rorI_eReg_imm1(dst, rshift, cr); 8366 %} 8367 %} 8368 8369 // ROR 32bit by immI8 once 8370 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8371 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8372 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8373 8374 expand %{ 8375 rorI_eReg_imm8(dst, rshift, cr); 8376 %} 8377 %} 8378 8379 // ROR 32bit var by var once 8380 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8381 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8382 8383 expand %{ 8384 rorI_eReg_CL(dst, shift, cr); 8385 %} 8386 %} 8387 8388 // ROR 32bit var by var once 8389 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8390 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8391 8392 expand %{ 8393 rorI_eReg_CL(dst, shift, cr); 8394 %} 8395 %} 8396 8397 // Xor Instructions 8398 // Xor Register with Register 8399 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8400 match(Set dst (XorI dst src)); 8401 effect(KILL cr); 8402 8403 size(2); 8404 format %{ "XOR $dst,$src" %} 8405 opcode(0x33); 8406 ins_encode( OpcP, RegReg( dst, src) ); 8407 ins_pipe( ialu_reg_reg ); 8408 %} 8409 8410 // Xor Register with Immediate -1 8411 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8412 match(Set dst (XorI dst imm)); 8413 8414 size(2); 8415 format %{ "NOT $dst" %} 8416 ins_encode %{ 8417 __ notl($dst$$Register); 8418 %} 8419 ins_pipe( ialu_reg ); 8420 %} 8421 8422 // Xor Register with Immediate 8423 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8424 match(Set dst (XorI dst src)); 8425 effect(KILL cr); 8426 8427 format %{ "XOR $dst,$src" %} 8428 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8429 // ins_encode( RegImm( dst, src) ); 8430 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8431 ins_pipe( ialu_reg ); 8432 %} 8433 8434 // Xor Register with Memory 8435 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8436 match(Set dst (XorI dst (LoadI src))); 8437 effect(KILL cr); 8438 8439 ins_cost(125); 8440 format %{ "XOR $dst,$src" %} 8441 opcode(0x33); 8442 ins_encode( OpcP, RegMem(dst, src) ); 8443 ins_pipe( ialu_reg_mem ); 8444 %} 8445 8446 // Xor Memory with Register 8447 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8448 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8449 effect(KILL cr); 8450 8451 ins_cost(150); 8452 format %{ "XOR $dst,$src" %} 8453 opcode(0x31); /* Opcode 31 /r */ 8454 ins_encode( OpcP, RegMem( src, dst ) ); 8455 ins_pipe( ialu_mem_reg ); 8456 %} 8457 8458 // Xor Memory with Immediate 8459 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8460 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8461 effect(KILL cr); 8462 8463 ins_cost(125); 8464 format %{ "XOR $dst,$src" %} 8465 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8466 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8467 ins_pipe( ialu_mem_imm ); 8468 %} 8469 8470 //----------Convert Int to Boolean--------------------------------------------- 8471 8472 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8473 effect( DEF dst, USE src ); 8474 format %{ "MOV $dst,$src" %} 8475 ins_encode( enc_Copy( dst, src) ); 8476 ins_pipe( ialu_reg_reg ); 8477 %} 8478 8479 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8480 effect( USE_DEF dst, USE src, KILL cr ); 8481 8482 size(4); 8483 format %{ "NEG $dst\n\t" 8484 "ADC $dst,$src" %} 8485 ins_encode( neg_reg(dst), 8486 OpcRegReg(0x13,dst,src) ); 8487 ins_pipe( ialu_reg_reg_long ); 8488 %} 8489 8490 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8491 match(Set dst (Conv2B src)); 8492 8493 expand %{ 8494 movI_nocopy(dst,src); 8495 ci2b(dst,src,cr); 8496 %} 8497 %} 8498 8499 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8500 effect( DEF dst, USE src ); 8501 format %{ "MOV $dst,$src" %} 8502 ins_encode( enc_Copy( dst, src) ); 8503 ins_pipe( ialu_reg_reg ); 8504 %} 8505 8506 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8507 effect( USE_DEF dst, USE src, KILL cr ); 8508 format %{ "NEG $dst\n\t" 8509 "ADC $dst,$src" %} 8510 ins_encode( neg_reg(dst), 8511 OpcRegReg(0x13,dst,src) ); 8512 ins_pipe( ialu_reg_reg_long ); 8513 %} 8514 8515 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8516 match(Set dst (Conv2B src)); 8517 8518 expand %{ 8519 movP_nocopy(dst,src); 8520 cp2b(dst,src,cr); 8521 %} 8522 %} 8523 8524 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8525 match(Set dst (CmpLTMask p q)); 8526 effect(KILL cr); 8527 ins_cost(400); 8528 8529 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8530 format %{ "XOR $dst,$dst\n\t" 8531 "CMP $p,$q\n\t" 8532 "SETlt $dst\n\t" 8533 "NEG $dst" %} 8534 ins_encode %{ 8535 Register Rp = $p$$Register; 8536 Register Rq = $q$$Register; 8537 Register Rd = $dst$$Register; 8538 Label done; 8539 __ xorl(Rd, Rd); 8540 __ cmpl(Rp, Rq); 8541 __ setb(Assembler::less, Rd); 8542 __ negl(Rd); 8543 %} 8544 8545 ins_pipe(pipe_slow); 8546 %} 8547 8548 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8549 match(Set dst (CmpLTMask dst zero)); 8550 effect(DEF dst, KILL cr); 8551 ins_cost(100); 8552 8553 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8554 ins_encode %{ 8555 __ sarl($dst$$Register, 31); 8556 %} 8557 ins_pipe(ialu_reg); 8558 %} 8559 8560 /* better to save a register than avoid a branch */ 8561 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8562 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8563 effect(KILL cr); 8564 ins_cost(400); 8565 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8566 "JGE done\n\t" 8567 "ADD $p,$y\n" 8568 "done: " %} 8569 ins_encode %{ 8570 Register Rp = $p$$Register; 8571 Register Rq = $q$$Register; 8572 Register Ry = $y$$Register; 8573 Label done; 8574 __ subl(Rp, Rq); 8575 __ jccb(Assembler::greaterEqual, done); 8576 __ addl(Rp, Ry); 8577 __ bind(done); 8578 %} 8579 8580 ins_pipe(pipe_cmplt); 8581 %} 8582 8583 /* better to save a register than avoid a branch */ 8584 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8585 match(Set y (AndI (CmpLTMask p q) y)); 8586 effect(KILL cr); 8587 8588 ins_cost(300); 8589 8590 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8591 "JLT done\n\t" 8592 "XORL $y, $y\n" 8593 "done: " %} 8594 ins_encode %{ 8595 Register Rp = $p$$Register; 8596 Register Rq = $q$$Register; 8597 Register Ry = $y$$Register; 8598 Label done; 8599 __ cmpl(Rp, Rq); 8600 __ jccb(Assembler::less, done); 8601 __ xorl(Ry, Ry); 8602 __ bind(done); 8603 %} 8604 8605 ins_pipe(pipe_cmplt); 8606 %} 8607 8608 /* If I enable this, I encourage spilling in the inner loop of compress. 8609 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8610 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8611 */ 8612 //----------Overflow Math Instructions----------------------------------------- 8613 8614 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8615 %{ 8616 match(Set cr (OverflowAddI op1 op2)); 8617 effect(DEF cr, USE_KILL op1, USE op2); 8618 8619 format %{ "ADD $op1, $op2\t# overflow check int" %} 8620 8621 ins_encode %{ 8622 __ addl($op1$$Register, $op2$$Register); 8623 %} 8624 ins_pipe(ialu_reg_reg); 8625 %} 8626 8627 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8628 %{ 8629 match(Set cr (OverflowAddI op1 op2)); 8630 effect(DEF cr, USE_KILL op1, USE op2); 8631 8632 format %{ "ADD $op1, $op2\t# overflow check int" %} 8633 8634 ins_encode %{ 8635 __ addl($op1$$Register, $op2$$constant); 8636 %} 8637 ins_pipe(ialu_reg_reg); 8638 %} 8639 8640 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8641 %{ 8642 match(Set cr (OverflowSubI op1 op2)); 8643 8644 format %{ "CMP $op1, $op2\t# overflow check int" %} 8645 ins_encode %{ 8646 __ cmpl($op1$$Register, $op2$$Register); 8647 %} 8648 ins_pipe(ialu_reg_reg); 8649 %} 8650 8651 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8652 %{ 8653 match(Set cr (OverflowSubI op1 op2)); 8654 8655 format %{ "CMP $op1, $op2\t# overflow check int" %} 8656 ins_encode %{ 8657 __ cmpl($op1$$Register, $op2$$constant); 8658 %} 8659 ins_pipe(ialu_reg_reg); 8660 %} 8661 8662 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8663 %{ 8664 match(Set cr (OverflowSubI zero op2)); 8665 effect(DEF cr, USE_KILL op2); 8666 8667 format %{ "NEG $op2\t# overflow check int" %} 8668 ins_encode %{ 8669 __ negl($op2$$Register); 8670 %} 8671 ins_pipe(ialu_reg_reg); 8672 %} 8673 8674 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8675 %{ 8676 match(Set cr (OverflowMulI op1 op2)); 8677 effect(DEF cr, USE_KILL op1, USE op2); 8678 8679 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8680 ins_encode %{ 8681 __ imull($op1$$Register, $op2$$Register); 8682 %} 8683 ins_pipe(ialu_reg_reg_alu0); 8684 %} 8685 8686 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8687 %{ 8688 match(Set cr (OverflowMulI op1 op2)); 8689 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8690 8691 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8692 ins_encode %{ 8693 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8694 %} 8695 ins_pipe(ialu_reg_reg_alu0); 8696 %} 8697 8698 //----------Long Instructions------------------------------------------------ 8699 // Add Long Register with Register 8700 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8701 match(Set dst (AddL dst src)); 8702 effect(KILL cr); 8703 ins_cost(200); 8704 format %{ "ADD $dst.lo,$src.lo\n\t" 8705 "ADC $dst.hi,$src.hi" %} 8706 opcode(0x03, 0x13); 8707 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8708 ins_pipe( ialu_reg_reg_long ); 8709 %} 8710 8711 // Add Long Register with Immediate 8712 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8713 match(Set dst (AddL dst src)); 8714 effect(KILL cr); 8715 format %{ "ADD $dst.lo,$src.lo\n\t" 8716 "ADC $dst.hi,$src.hi" %} 8717 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8718 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8719 ins_pipe( ialu_reg_long ); 8720 %} 8721 8722 // Add Long Register with Memory 8723 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8724 match(Set dst (AddL dst (LoadL mem))); 8725 effect(KILL cr); 8726 ins_cost(125); 8727 format %{ "ADD $dst.lo,$mem\n\t" 8728 "ADC $dst.hi,$mem+4" %} 8729 opcode(0x03, 0x13); 8730 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8731 ins_pipe( ialu_reg_long_mem ); 8732 %} 8733 8734 // Subtract Long Register with Register. 8735 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8736 match(Set dst (SubL dst src)); 8737 effect(KILL cr); 8738 ins_cost(200); 8739 format %{ "SUB $dst.lo,$src.lo\n\t" 8740 "SBB $dst.hi,$src.hi" %} 8741 opcode(0x2B, 0x1B); 8742 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8743 ins_pipe( ialu_reg_reg_long ); 8744 %} 8745 8746 // Subtract Long Register with Immediate 8747 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8748 match(Set dst (SubL dst src)); 8749 effect(KILL cr); 8750 format %{ "SUB $dst.lo,$src.lo\n\t" 8751 "SBB $dst.hi,$src.hi" %} 8752 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8753 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8754 ins_pipe( ialu_reg_long ); 8755 %} 8756 8757 // Subtract Long Register with Memory 8758 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8759 match(Set dst (SubL dst (LoadL mem))); 8760 effect(KILL cr); 8761 ins_cost(125); 8762 format %{ "SUB $dst.lo,$mem\n\t" 8763 "SBB $dst.hi,$mem+4" %} 8764 opcode(0x2B, 0x1B); 8765 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8766 ins_pipe( ialu_reg_long_mem ); 8767 %} 8768 8769 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8770 match(Set dst (SubL zero dst)); 8771 effect(KILL cr); 8772 ins_cost(300); 8773 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8774 ins_encode( neg_long(dst) ); 8775 ins_pipe( ialu_reg_reg_long ); 8776 %} 8777 8778 // And Long Register with Register 8779 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8780 match(Set dst (AndL dst src)); 8781 effect(KILL cr); 8782 format %{ "AND $dst.lo,$src.lo\n\t" 8783 "AND $dst.hi,$src.hi" %} 8784 opcode(0x23,0x23); 8785 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8786 ins_pipe( ialu_reg_reg_long ); 8787 %} 8788 8789 // And Long Register with Immediate 8790 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8791 match(Set dst (AndL dst src)); 8792 effect(KILL cr); 8793 format %{ "AND $dst.lo,$src.lo\n\t" 8794 "AND $dst.hi,$src.hi" %} 8795 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8796 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8797 ins_pipe( ialu_reg_long ); 8798 %} 8799 8800 // And Long Register with Memory 8801 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8802 match(Set dst (AndL dst (LoadL mem))); 8803 effect(KILL cr); 8804 ins_cost(125); 8805 format %{ "AND $dst.lo,$mem\n\t" 8806 "AND $dst.hi,$mem+4" %} 8807 opcode(0x23, 0x23); 8808 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8809 ins_pipe( ialu_reg_long_mem ); 8810 %} 8811 8812 // BMI1 instructions 8813 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8814 match(Set dst (AndL (XorL src1 minus_1) src2)); 8815 predicate(UseBMI1Instructions); 8816 effect(KILL cr, TEMP dst); 8817 8818 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8819 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8820 %} 8821 8822 ins_encode %{ 8823 Register Rdst = $dst$$Register; 8824 Register Rsrc1 = $src1$$Register; 8825 Register Rsrc2 = $src2$$Register; 8826 __ andnl(Rdst, Rsrc1, Rsrc2); 8827 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8828 %} 8829 ins_pipe(ialu_reg_reg_long); 8830 %} 8831 8832 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8833 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8834 predicate(UseBMI1Instructions); 8835 effect(KILL cr, TEMP dst); 8836 8837 ins_cost(125); 8838 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8839 "ANDNL $dst.hi, $src1.hi, $src2+4" 8840 %} 8841 8842 ins_encode %{ 8843 Register Rdst = $dst$$Register; 8844 Register Rsrc1 = $src1$$Register; 8845 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8846 8847 __ andnl(Rdst, Rsrc1, $src2$$Address); 8848 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8849 %} 8850 ins_pipe(ialu_reg_mem); 8851 %} 8852 8853 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8854 match(Set dst (AndL (SubL imm_zero src) src)); 8855 predicate(UseBMI1Instructions); 8856 effect(KILL cr, TEMP dst); 8857 8858 format %{ "MOVL $dst.hi, 0\n\t" 8859 "BLSIL $dst.lo, $src.lo\n\t" 8860 "JNZ done\n\t" 8861 "BLSIL $dst.hi, $src.hi\n" 8862 "done:" 8863 %} 8864 8865 ins_encode %{ 8866 Label done; 8867 Register Rdst = $dst$$Register; 8868 Register Rsrc = $src$$Register; 8869 __ movl(HIGH_FROM_LOW(Rdst), 0); 8870 __ blsil(Rdst, Rsrc); 8871 __ jccb(Assembler::notZero, done); 8872 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8873 __ bind(done); 8874 %} 8875 ins_pipe(ialu_reg); 8876 %} 8877 8878 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8879 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8880 predicate(UseBMI1Instructions); 8881 effect(KILL cr, TEMP dst); 8882 8883 ins_cost(125); 8884 format %{ "MOVL $dst.hi, 0\n\t" 8885 "BLSIL $dst.lo, $src\n\t" 8886 "JNZ done\n\t" 8887 "BLSIL $dst.hi, $src+4\n" 8888 "done:" 8889 %} 8890 8891 ins_encode %{ 8892 Label done; 8893 Register Rdst = $dst$$Register; 8894 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8895 8896 __ movl(HIGH_FROM_LOW(Rdst), 0); 8897 __ blsil(Rdst, $src$$Address); 8898 __ jccb(Assembler::notZero, done); 8899 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8900 __ bind(done); 8901 %} 8902 ins_pipe(ialu_reg_mem); 8903 %} 8904 8905 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8906 %{ 8907 match(Set dst (XorL (AddL src minus_1) src)); 8908 predicate(UseBMI1Instructions); 8909 effect(KILL cr, TEMP dst); 8910 8911 format %{ "MOVL $dst.hi, 0\n\t" 8912 "BLSMSKL $dst.lo, $src.lo\n\t" 8913 "JNC done\n\t" 8914 "BLSMSKL $dst.hi, $src.hi\n" 8915 "done:" 8916 %} 8917 8918 ins_encode %{ 8919 Label done; 8920 Register Rdst = $dst$$Register; 8921 Register Rsrc = $src$$Register; 8922 __ movl(HIGH_FROM_LOW(Rdst), 0); 8923 __ blsmskl(Rdst, Rsrc); 8924 __ jccb(Assembler::carryClear, done); 8925 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8926 __ bind(done); 8927 %} 8928 8929 ins_pipe(ialu_reg); 8930 %} 8931 8932 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8933 %{ 8934 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8935 predicate(UseBMI1Instructions); 8936 effect(KILL cr, TEMP dst); 8937 8938 ins_cost(125); 8939 format %{ "MOVL $dst.hi, 0\n\t" 8940 "BLSMSKL $dst.lo, $src\n\t" 8941 "JNC done\n\t" 8942 "BLSMSKL $dst.hi, $src+4\n" 8943 "done:" 8944 %} 8945 8946 ins_encode %{ 8947 Label done; 8948 Register Rdst = $dst$$Register; 8949 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8950 8951 __ movl(HIGH_FROM_LOW(Rdst), 0); 8952 __ blsmskl(Rdst, $src$$Address); 8953 __ jccb(Assembler::carryClear, done); 8954 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8955 __ bind(done); 8956 %} 8957 8958 ins_pipe(ialu_reg_mem); 8959 %} 8960 8961 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8962 %{ 8963 match(Set dst (AndL (AddL src minus_1) src) ); 8964 predicate(UseBMI1Instructions); 8965 effect(KILL cr, TEMP dst); 8966 8967 format %{ "MOVL $dst.hi, $src.hi\n\t" 8968 "BLSRL $dst.lo, $src.lo\n\t" 8969 "JNC done\n\t" 8970 "BLSRL $dst.hi, $src.hi\n" 8971 "done:" 8972 %} 8973 8974 ins_encode %{ 8975 Label done; 8976 Register Rdst = $dst$$Register; 8977 Register Rsrc = $src$$Register; 8978 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8979 __ blsrl(Rdst, Rsrc); 8980 __ jccb(Assembler::carryClear, done); 8981 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8982 __ bind(done); 8983 %} 8984 8985 ins_pipe(ialu_reg); 8986 %} 8987 8988 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8989 %{ 8990 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8991 predicate(UseBMI1Instructions); 8992 effect(KILL cr, TEMP dst); 8993 8994 ins_cost(125); 8995 format %{ "MOVL $dst.hi, $src+4\n\t" 8996 "BLSRL $dst.lo, $src\n\t" 8997 "JNC done\n\t" 8998 "BLSRL $dst.hi, $src+4\n" 8999 "done:" 9000 %} 9001 9002 ins_encode %{ 9003 Label done; 9004 Register Rdst = $dst$$Register; 9005 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9006 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9007 __ blsrl(Rdst, $src$$Address); 9008 __ jccb(Assembler::carryClear, done); 9009 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9010 __ bind(done); 9011 %} 9012 9013 ins_pipe(ialu_reg_mem); 9014 %} 9015 9016 // Or Long Register with Register 9017 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9018 match(Set dst (OrL dst src)); 9019 effect(KILL cr); 9020 format %{ "OR $dst.lo,$src.lo\n\t" 9021 "OR $dst.hi,$src.hi" %} 9022 opcode(0x0B,0x0B); 9023 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9024 ins_pipe( ialu_reg_reg_long ); 9025 %} 9026 9027 // Or Long Register with Immediate 9028 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9029 match(Set dst (OrL dst src)); 9030 effect(KILL cr); 9031 format %{ "OR $dst.lo,$src.lo\n\t" 9032 "OR $dst.hi,$src.hi" %} 9033 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9034 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9035 ins_pipe( ialu_reg_long ); 9036 %} 9037 9038 // Or Long Register with Memory 9039 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9040 match(Set dst (OrL dst (LoadL mem))); 9041 effect(KILL cr); 9042 ins_cost(125); 9043 format %{ "OR $dst.lo,$mem\n\t" 9044 "OR $dst.hi,$mem+4" %} 9045 opcode(0x0B,0x0B); 9046 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9047 ins_pipe( ialu_reg_long_mem ); 9048 %} 9049 9050 // Xor Long Register with Register 9051 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9052 match(Set dst (XorL dst src)); 9053 effect(KILL cr); 9054 format %{ "XOR $dst.lo,$src.lo\n\t" 9055 "XOR $dst.hi,$src.hi" %} 9056 opcode(0x33,0x33); 9057 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9058 ins_pipe( ialu_reg_reg_long ); 9059 %} 9060 9061 // Xor Long Register with Immediate -1 9062 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9063 match(Set dst (XorL dst imm)); 9064 format %{ "NOT $dst.lo\n\t" 9065 "NOT $dst.hi" %} 9066 ins_encode %{ 9067 __ notl($dst$$Register); 9068 __ notl(HIGH_FROM_LOW($dst$$Register)); 9069 %} 9070 ins_pipe( ialu_reg_long ); 9071 %} 9072 9073 // Xor Long Register with Immediate 9074 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9075 match(Set dst (XorL dst src)); 9076 effect(KILL cr); 9077 format %{ "XOR $dst.lo,$src.lo\n\t" 9078 "XOR $dst.hi,$src.hi" %} 9079 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9080 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9081 ins_pipe( ialu_reg_long ); 9082 %} 9083 9084 // Xor Long Register with Memory 9085 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9086 match(Set dst (XorL dst (LoadL mem))); 9087 effect(KILL cr); 9088 ins_cost(125); 9089 format %{ "XOR $dst.lo,$mem\n\t" 9090 "XOR $dst.hi,$mem+4" %} 9091 opcode(0x33,0x33); 9092 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9093 ins_pipe( ialu_reg_long_mem ); 9094 %} 9095 9096 // Shift Left Long by 1 9097 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9098 predicate(UseNewLongLShift); 9099 match(Set dst (LShiftL dst cnt)); 9100 effect(KILL cr); 9101 ins_cost(100); 9102 format %{ "ADD $dst.lo,$dst.lo\n\t" 9103 "ADC $dst.hi,$dst.hi" %} 9104 ins_encode %{ 9105 __ addl($dst$$Register,$dst$$Register); 9106 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9107 %} 9108 ins_pipe( ialu_reg_long ); 9109 %} 9110 9111 // Shift Left Long by 2 9112 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9113 predicate(UseNewLongLShift); 9114 match(Set dst (LShiftL dst cnt)); 9115 effect(KILL cr); 9116 ins_cost(100); 9117 format %{ "ADD $dst.lo,$dst.lo\n\t" 9118 "ADC $dst.hi,$dst.hi\n\t" 9119 "ADD $dst.lo,$dst.lo\n\t" 9120 "ADC $dst.hi,$dst.hi" %} 9121 ins_encode %{ 9122 __ addl($dst$$Register,$dst$$Register); 9123 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9124 __ addl($dst$$Register,$dst$$Register); 9125 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9126 %} 9127 ins_pipe( ialu_reg_long ); 9128 %} 9129 9130 // Shift Left Long by 3 9131 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9132 predicate(UseNewLongLShift); 9133 match(Set dst (LShiftL dst cnt)); 9134 effect(KILL cr); 9135 ins_cost(100); 9136 format %{ "ADD $dst.lo,$dst.lo\n\t" 9137 "ADC $dst.hi,$dst.hi\n\t" 9138 "ADD $dst.lo,$dst.lo\n\t" 9139 "ADC $dst.hi,$dst.hi\n\t" 9140 "ADD $dst.lo,$dst.lo\n\t" 9141 "ADC $dst.hi,$dst.hi" %} 9142 ins_encode %{ 9143 __ addl($dst$$Register,$dst$$Register); 9144 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9145 __ addl($dst$$Register,$dst$$Register); 9146 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9147 __ addl($dst$$Register,$dst$$Register); 9148 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9149 %} 9150 ins_pipe( ialu_reg_long ); 9151 %} 9152 9153 // Shift Left Long by 1-31 9154 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9155 match(Set dst (LShiftL dst cnt)); 9156 effect(KILL cr); 9157 ins_cost(200); 9158 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9159 "SHL $dst.lo,$cnt" %} 9160 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9161 ins_encode( move_long_small_shift(dst,cnt) ); 9162 ins_pipe( ialu_reg_long ); 9163 %} 9164 9165 // Shift Left Long by 32-63 9166 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9167 match(Set dst (LShiftL dst cnt)); 9168 effect(KILL cr); 9169 ins_cost(300); 9170 format %{ "MOV $dst.hi,$dst.lo\n" 9171 "\tSHL $dst.hi,$cnt-32\n" 9172 "\tXOR $dst.lo,$dst.lo" %} 9173 opcode(0xC1, 0x4); /* C1 /4 ib */ 9174 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9175 ins_pipe( ialu_reg_long ); 9176 %} 9177 9178 // Shift Left Long by variable 9179 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9180 match(Set dst (LShiftL dst shift)); 9181 effect(KILL cr); 9182 ins_cost(500+200); 9183 size(17); 9184 format %{ "TEST $shift,32\n\t" 9185 "JEQ,s small\n\t" 9186 "MOV $dst.hi,$dst.lo\n\t" 9187 "XOR $dst.lo,$dst.lo\n" 9188 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9189 "SHL $dst.lo,$shift" %} 9190 ins_encode( shift_left_long( dst, shift ) ); 9191 ins_pipe( pipe_slow ); 9192 %} 9193 9194 // Shift Right Long by 1-31 9195 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9196 match(Set dst (URShiftL dst cnt)); 9197 effect(KILL cr); 9198 ins_cost(200); 9199 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9200 "SHR $dst.hi,$cnt" %} 9201 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9202 ins_encode( move_long_small_shift(dst,cnt) ); 9203 ins_pipe( ialu_reg_long ); 9204 %} 9205 9206 // Shift Right Long by 32-63 9207 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9208 match(Set dst (URShiftL dst cnt)); 9209 effect(KILL cr); 9210 ins_cost(300); 9211 format %{ "MOV $dst.lo,$dst.hi\n" 9212 "\tSHR $dst.lo,$cnt-32\n" 9213 "\tXOR $dst.hi,$dst.hi" %} 9214 opcode(0xC1, 0x5); /* C1 /5 ib */ 9215 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9216 ins_pipe( ialu_reg_long ); 9217 %} 9218 9219 // Shift Right Long by variable 9220 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9221 match(Set dst (URShiftL dst shift)); 9222 effect(KILL cr); 9223 ins_cost(600); 9224 size(17); 9225 format %{ "TEST $shift,32\n\t" 9226 "JEQ,s small\n\t" 9227 "MOV $dst.lo,$dst.hi\n\t" 9228 "XOR $dst.hi,$dst.hi\n" 9229 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9230 "SHR $dst.hi,$shift" %} 9231 ins_encode( shift_right_long( dst, shift ) ); 9232 ins_pipe( pipe_slow ); 9233 %} 9234 9235 // Shift Right Long by 1-31 9236 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9237 match(Set dst (RShiftL dst cnt)); 9238 effect(KILL cr); 9239 ins_cost(200); 9240 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9241 "SAR $dst.hi,$cnt" %} 9242 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9243 ins_encode( move_long_small_shift(dst,cnt) ); 9244 ins_pipe( ialu_reg_long ); 9245 %} 9246 9247 // Shift Right Long by 32-63 9248 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9249 match(Set dst (RShiftL dst cnt)); 9250 effect(KILL cr); 9251 ins_cost(300); 9252 format %{ "MOV $dst.lo,$dst.hi\n" 9253 "\tSAR $dst.lo,$cnt-32\n" 9254 "\tSAR $dst.hi,31" %} 9255 opcode(0xC1, 0x7); /* C1 /7 ib */ 9256 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9257 ins_pipe( ialu_reg_long ); 9258 %} 9259 9260 // Shift Right arithmetic Long by variable 9261 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9262 match(Set dst (RShiftL dst shift)); 9263 effect(KILL cr); 9264 ins_cost(600); 9265 size(18); 9266 format %{ "TEST $shift,32\n\t" 9267 "JEQ,s small\n\t" 9268 "MOV $dst.lo,$dst.hi\n\t" 9269 "SAR $dst.hi,31\n" 9270 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9271 "SAR $dst.hi,$shift" %} 9272 ins_encode( shift_right_arith_long( dst, shift ) ); 9273 ins_pipe( pipe_slow ); 9274 %} 9275 9276 9277 //----------Double Instructions------------------------------------------------ 9278 // Double Math 9279 9280 // Compare & branch 9281 9282 // P6 version of float compare, sets condition codes in EFLAGS 9283 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9284 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9285 match(Set cr (CmpD src1 src2)); 9286 effect(KILL rax); 9287 ins_cost(150); 9288 format %{ "FLD $src1\n\t" 9289 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9290 "JNP exit\n\t" 9291 "MOV ah,1 // saw a NaN, set CF\n\t" 9292 "SAHF\n" 9293 "exit:\tNOP // avoid branch to branch" %} 9294 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9295 ins_encode( Push_Reg_DPR(src1), 9296 OpcP, RegOpc(src2), 9297 cmpF_P6_fixup ); 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9302 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9303 match(Set cr (CmpD src1 src2)); 9304 ins_cost(150); 9305 format %{ "FLD $src1\n\t" 9306 "FUCOMIP ST,$src2 // P6 instruction" %} 9307 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9308 ins_encode( Push_Reg_DPR(src1), 9309 OpcP, RegOpc(src2)); 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 // Compare & branch 9314 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9315 predicate(UseSSE<=1); 9316 match(Set cr (CmpD src1 src2)); 9317 effect(KILL rax); 9318 ins_cost(200); 9319 format %{ "FLD $src1\n\t" 9320 "FCOMp $src2\n\t" 9321 "FNSTSW AX\n\t" 9322 "TEST AX,0x400\n\t" 9323 "JZ,s flags\n\t" 9324 "MOV AH,1\t# unordered treat as LT\n" 9325 "flags:\tSAHF" %} 9326 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9327 ins_encode( Push_Reg_DPR(src1), 9328 OpcP, RegOpc(src2), 9329 fpu_flags); 9330 ins_pipe( pipe_slow ); 9331 %} 9332 9333 // Compare vs zero into -1,0,1 9334 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9335 predicate(UseSSE<=1); 9336 match(Set dst (CmpD3 src1 zero)); 9337 effect(KILL cr, KILL rax); 9338 ins_cost(280); 9339 format %{ "FTSTD $dst,$src1" %} 9340 opcode(0xE4, 0xD9); 9341 ins_encode( Push_Reg_DPR(src1), 9342 OpcS, OpcP, PopFPU, 9343 CmpF_Result(dst)); 9344 ins_pipe( pipe_slow ); 9345 %} 9346 9347 // Compare into -1,0,1 9348 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9349 predicate(UseSSE<=1); 9350 match(Set dst (CmpD3 src1 src2)); 9351 effect(KILL cr, KILL rax); 9352 ins_cost(300); 9353 format %{ "FCMPD $dst,$src1,$src2" %} 9354 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9355 ins_encode( Push_Reg_DPR(src1), 9356 OpcP, RegOpc(src2), 9357 CmpF_Result(dst)); 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 // float compare and set condition codes in EFLAGS by XMM regs 9362 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9363 predicate(UseSSE>=2); 9364 match(Set cr (CmpD src1 src2)); 9365 ins_cost(145); 9366 format %{ "UCOMISD $src1,$src2\n\t" 9367 "JNP,s exit\n\t" 9368 "PUSHF\t# saw NaN, set CF\n\t" 9369 "AND [rsp], #0xffffff2b\n\t" 9370 "POPF\n" 9371 "exit:" %} 9372 ins_encode %{ 9373 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9374 emit_cmpfp_fixup(_masm); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9380 predicate(UseSSE>=2); 9381 match(Set cr (CmpD src1 src2)); 9382 ins_cost(100); 9383 format %{ "UCOMISD $src1,$src2" %} 9384 ins_encode %{ 9385 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9386 %} 9387 ins_pipe( pipe_slow ); 9388 %} 9389 9390 // float compare and set condition codes in EFLAGS by XMM regs 9391 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9392 predicate(UseSSE>=2); 9393 match(Set cr (CmpD src1 (LoadD src2))); 9394 ins_cost(145); 9395 format %{ "UCOMISD $src1,$src2\n\t" 9396 "JNP,s exit\n\t" 9397 "PUSHF\t# saw NaN, set CF\n\t" 9398 "AND [rsp], #0xffffff2b\n\t" 9399 "POPF\n" 9400 "exit:" %} 9401 ins_encode %{ 9402 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9403 emit_cmpfp_fixup(_masm); 9404 %} 9405 ins_pipe( pipe_slow ); 9406 %} 9407 9408 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9409 predicate(UseSSE>=2); 9410 match(Set cr (CmpD src1 (LoadD src2))); 9411 ins_cost(100); 9412 format %{ "UCOMISD $src1,$src2" %} 9413 ins_encode %{ 9414 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 // Compare into -1,0,1 in XMM 9420 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9421 predicate(UseSSE>=2); 9422 match(Set dst (CmpD3 src1 src2)); 9423 effect(KILL cr); 9424 ins_cost(255); 9425 format %{ "UCOMISD $src1, $src2\n\t" 9426 "MOV $dst, #-1\n\t" 9427 "JP,s done\n\t" 9428 "JB,s done\n\t" 9429 "SETNE $dst\n\t" 9430 "MOVZB $dst, $dst\n" 9431 "done:" %} 9432 ins_encode %{ 9433 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9434 emit_cmpfp3(_masm, $dst$$Register); 9435 %} 9436 ins_pipe( pipe_slow ); 9437 %} 9438 9439 // Compare into -1,0,1 in XMM and memory 9440 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9441 predicate(UseSSE>=2); 9442 match(Set dst (CmpD3 src1 (LoadD src2))); 9443 effect(KILL cr); 9444 ins_cost(275); 9445 format %{ "UCOMISD $src1, $src2\n\t" 9446 "MOV $dst, #-1\n\t" 9447 "JP,s done\n\t" 9448 "JB,s done\n\t" 9449 "SETNE $dst\n\t" 9450 "MOVZB $dst, $dst\n" 9451 "done:" %} 9452 ins_encode %{ 9453 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9454 emit_cmpfp3(_masm, $dst$$Register); 9455 %} 9456 ins_pipe( pipe_slow ); 9457 %} 9458 9459 9460 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9461 predicate (UseSSE <=1); 9462 match(Set dst (SubD dst src)); 9463 9464 format %{ "FLD $src\n\t" 9465 "DSUBp $dst,ST" %} 9466 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9467 ins_cost(150); 9468 ins_encode( Push_Reg_DPR(src), 9469 OpcP, RegOpc(dst) ); 9470 ins_pipe( fpu_reg_reg ); 9471 %} 9472 9473 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9474 predicate (UseSSE <=1); 9475 match(Set dst (RoundDouble (SubD src1 src2))); 9476 ins_cost(250); 9477 9478 format %{ "FLD $src2\n\t" 9479 "DSUB ST,$src1\n\t" 9480 "FSTP_D $dst\t# D-round" %} 9481 opcode(0xD8, 0x5); 9482 ins_encode( Push_Reg_DPR(src2), 9483 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9484 ins_pipe( fpu_mem_reg_reg ); 9485 %} 9486 9487 9488 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9489 predicate (UseSSE <=1); 9490 match(Set dst (SubD dst (LoadD src))); 9491 ins_cost(150); 9492 9493 format %{ "FLD $src\n\t" 9494 "DSUBp $dst,ST" %} 9495 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9496 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9497 OpcP, RegOpc(dst) ); 9498 ins_pipe( fpu_reg_mem ); 9499 %} 9500 9501 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9502 predicate (UseSSE<=1); 9503 match(Set dst (AbsD src)); 9504 ins_cost(100); 9505 format %{ "FABS" %} 9506 opcode(0xE1, 0xD9); 9507 ins_encode( OpcS, OpcP ); 9508 ins_pipe( fpu_reg_reg ); 9509 %} 9510 9511 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9512 predicate(UseSSE<=1); 9513 match(Set dst (NegD src)); 9514 ins_cost(100); 9515 format %{ "FCHS" %} 9516 opcode(0xE0, 0xD9); 9517 ins_encode( OpcS, OpcP ); 9518 ins_pipe( fpu_reg_reg ); 9519 %} 9520 9521 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9522 predicate(UseSSE<=1); 9523 match(Set dst (AddD dst src)); 9524 format %{ "FLD $src\n\t" 9525 "DADD $dst,ST" %} 9526 size(4); 9527 ins_cost(150); 9528 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9529 ins_encode( Push_Reg_DPR(src), 9530 OpcP, RegOpc(dst) ); 9531 ins_pipe( fpu_reg_reg ); 9532 %} 9533 9534 9535 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9536 predicate(UseSSE<=1); 9537 match(Set dst (RoundDouble (AddD src1 src2))); 9538 ins_cost(250); 9539 9540 format %{ "FLD $src2\n\t" 9541 "DADD ST,$src1\n\t" 9542 "FSTP_D $dst\t# D-round" %} 9543 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9544 ins_encode( Push_Reg_DPR(src2), 9545 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9546 ins_pipe( fpu_mem_reg_reg ); 9547 %} 9548 9549 9550 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9551 predicate(UseSSE<=1); 9552 match(Set dst (AddD dst (LoadD src))); 9553 ins_cost(150); 9554 9555 format %{ "FLD $src\n\t" 9556 "DADDp $dst,ST" %} 9557 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9558 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9559 OpcP, RegOpc(dst) ); 9560 ins_pipe( fpu_reg_mem ); 9561 %} 9562 9563 // add-to-memory 9564 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9565 predicate(UseSSE<=1); 9566 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9567 ins_cost(150); 9568 9569 format %{ "FLD_D $dst\n\t" 9570 "DADD ST,$src\n\t" 9571 "FST_D $dst" %} 9572 opcode(0xDD, 0x0); 9573 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9574 Opcode(0xD8), RegOpc(src), 9575 set_instruction_start, 9576 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9577 ins_pipe( fpu_reg_mem ); 9578 %} 9579 9580 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9581 predicate(UseSSE<=1); 9582 match(Set dst (AddD dst con)); 9583 ins_cost(125); 9584 format %{ "FLD1\n\t" 9585 "DADDp $dst,ST" %} 9586 ins_encode %{ 9587 __ fld1(); 9588 __ faddp($dst$$reg); 9589 %} 9590 ins_pipe(fpu_reg); 9591 %} 9592 9593 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9594 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9595 match(Set dst (AddD dst con)); 9596 ins_cost(200); 9597 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9598 "DADDp $dst,ST" %} 9599 ins_encode %{ 9600 __ fld_d($constantaddress($con)); 9601 __ faddp($dst$$reg); 9602 %} 9603 ins_pipe(fpu_reg_mem); 9604 %} 9605 9606 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9607 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9608 match(Set dst (RoundDouble (AddD src con))); 9609 ins_cost(200); 9610 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9611 "DADD ST,$src\n\t" 9612 "FSTP_D $dst\t# D-round" %} 9613 ins_encode %{ 9614 __ fld_d($constantaddress($con)); 9615 __ fadd($src$$reg); 9616 __ fstp_d(Address(rsp, $dst$$disp)); 9617 %} 9618 ins_pipe(fpu_mem_reg_con); 9619 %} 9620 9621 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9622 predicate(UseSSE<=1); 9623 match(Set dst (MulD dst src)); 9624 format %{ "FLD $src\n\t" 9625 "DMULp $dst,ST" %} 9626 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9627 ins_cost(150); 9628 ins_encode( Push_Reg_DPR(src), 9629 OpcP, RegOpc(dst) ); 9630 ins_pipe( fpu_reg_reg ); 9631 %} 9632 9633 // Strict FP instruction biases argument before multiply then 9634 // biases result to avoid double rounding of subnormals. 9635 // 9636 // scale arg1 by multiplying arg1 by 2^(-15360) 9637 // load arg2 9638 // multiply scaled arg1 by arg2 9639 // rescale product by 2^(15360) 9640 // 9641 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9642 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9643 match(Set dst (MulD dst src)); 9644 ins_cost(1); // Select this instruction for all strict FP double multiplies 9645 9646 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9647 "DMULp $dst,ST\n\t" 9648 "FLD $src\n\t" 9649 "DMULp $dst,ST\n\t" 9650 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9651 "DMULp $dst,ST\n\t" %} 9652 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9653 ins_encode( strictfp_bias1(dst), 9654 Push_Reg_DPR(src), 9655 OpcP, RegOpc(dst), 9656 strictfp_bias2(dst) ); 9657 ins_pipe( fpu_reg_reg ); 9658 %} 9659 9660 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9661 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9662 match(Set dst (MulD dst con)); 9663 ins_cost(200); 9664 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9665 "DMULp $dst,ST" %} 9666 ins_encode %{ 9667 __ fld_d($constantaddress($con)); 9668 __ fmulp($dst$$reg); 9669 %} 9670 ins_pipe(fpu_reg_mem); 9671 %} 9672 9673 9674 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9675 predicate( UseSSE<=1 ); 9676 match(Set dst (MulD dst (LoadD src))); 9677 ins_cost(200); 9678 format %{ "FLD_D $src\n\t" 9679 "DMULp $dst,ST" %} 9680 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9681 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9682 OpcP, RegOpc(dst) ); 9683 ins_pipe( fpu_reg_mem ); 9684 %} 9685 9686 // 9687 // Cisc-alternate to reg-reg multiply 9688 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9689 predicate( UseSSE<=1 ); 9690 match(Set dst (MulD src (LoadD mem))); 9691 ins_cost(250); 9692 format %{ "FLD_D $mem\n\t" 9693 "DMUL ST,$src\n\t" 9694 "FSTP_D $dst" %} 9695 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9696 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9697 OpcReg_FPR(src), 9698 Pop_Reg_DPR(dst) ); 9699 ins_pipe( fpu_reg_reg_mem ); 9700 %} 9701 9702 9703 // MACRO3 -- addDPR a mulDPR 9704 // This instruction is a '2-address' instruction in that the result goes 9705 // back to src2. This eliminates a move from the macro; possibly the 9706 // register allocator will have to add it back (and maybe not). 9707 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9708 predicate( UseSSE<=1 ); 9709 match(Set src2 (AddD (MulD src0 src1) src2)); 9710 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9711 "DMUL ST,$src1\n\t" 9712 "DADDp $src2,ST" %} 9713 ins_cost(250); 9714 opcode(0xDD); /* LoadD DD /0 */ 9715 ins_encode( Push_Reg_FPR(src0), 9716 FMul_ST_reg(src1), 9717 FAddP_reg_ST(src2) ); 9718 ins_pipe( fpu_reg_reg_reg ); 9719 %} 9720 9721 9722 // MACRO3 -- subDPR a mulDPR 9723 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9724 predicate( UseSSE<=1 ); 9725 match(Set src2 (SubD (MulD src0 src1) src2)); 9726 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9727 "DMUL ST,$src1\n\t" 9728 "DSUBRp $src2,ST" %} 9729 ins_cost(250); 9730 ins_encode( Push_Reg_FPR(src0), 9731 FMul_ST_reg(src1), 9732 Opcode(0xDE), Opc_plus(0xE0,src2)); 9733 ins_pipe( fpu_reg_reg_reg ); 9734 %} 9735 9736 9737 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9738 predicate( UseSSE<=1 ); 9739 match(Set dst (DivD dst src)); 9740 9741 format %{ "FLD $src\n\t" 9742 "FDIVp $dst,ST" %} 9743 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9744 ins_cost(150); 9745 ins_encode( Push_Reg_DPR(src), 9746 OpcP, RegOpc(dst) ); 9747 ins_pipe( fpu_reg_reg ); 9748 %} 9749 9750 // Strict FP instruction biases argument before division then 9751 // biases result, to avoid double rounding of subnormals. 9752 // 9753 // scale dividend by multiplying dividend by 2^(-15360) 9754 // load divisor 9755 // divide scaled dividend by divisor 9756 // rescale quotient by 2^(15360) 9757 // 9758 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9759 predicate (UseSSE<=1); 9760 match(Set dst (DivD dst src)); 9761 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9762 ins_cost(01); 9763 9764 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9765 "DMULp $dst,ST\n\t" 9766 "FLD $src\n\t" 9767 "FDIVp $dst,ST\n\t" 9768 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9769 "DMULp $dst,ST\n\t" %} 9770 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9771 ins_encode( strictfp_bias1(dst), 9772 Push_Reg_DPR(src), 9773 OpcP, RegOpc(dst), 9774 strictfp_bias2(dst) ); 9775 ins_pipe( fpu_reg_reg ); 9776 %} 9777 9778 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9779 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9780 match(Set dst (RoundDouble (DivD src1 src2))); 9781 9782 format %{ "FLD $src1\n\t" 9783 "FDIV ST,$src2\n\t" 9784 "FSTP_D $dst\t# D-round" %} 9785 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9786 ins_encode( Push_Reg_DPR(src1), 9787 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9788 ins_pipe( fpu_mem_reg_reg ); 9789 %} 9790 9791 9792 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9793 predicate(UseSSE<=1); 9794 match(Set dst (ModD dst src)); 9795 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9796 9797 format %{ "DMOD $dst,$src" %} 9798 ins_cost(250); 9799 ins_encode(Push_Reg_Mod_DPR(dst, src), 9800 emitModDPR(), 9801 Push_Result_Mod_DPR(src), 9802 Pop_Reg_DPR(dst)); 9803 ins_pipe( pipe_slow ); 9804 %} 9805 9806 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9807 predicate(UseSSE>=2); 9808 match(Set dst (ModD src0 src1)); 9809 effect(KILL rax, KILL cr); 9810 9811 format %{ "SUB ESP,8\t # DMOD\n" 9812 "\tMOVSD [ESP+0],$src1\n" 9813 "\tFLD_D [ESP+0]\n" 9814 "\tMOVSD [ESP+0],$src0\n" 9815 "\tFLD_D [ESP+0]\n" 9816 "loop:\tFPREM\n" 9817 "\tFWAIT\n" 9818 "\tFNSTSW AX\n" 9819 "\tSAHF\n" 9820 "\tJP loop\n" 9821 "\tFSTP_D [ESP+0]\n" 9822 "\tMOVSD $dst,[ESP+0]\n" 9823 "\tADD ESP,8\n" 9824 "\tFSTP ST0\t # Restore FPU Stack" 9825 %} 9826 ins_cost(250); 9827 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9828 ins_pipe( pipe_slow ); 9829 %} 9830 9831 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9832 predicate (UseSSE<=1); 9833 match(Set dst(AtanD dst src)); 9834 format %{ "DATA $dst,$src" %} 9835 opcode(0xD9, 0xF3); 9836 ins_encode( Push_Reg_DPR(src), 9837 OpcP, OpcS, RegOpc(dst) ); 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9842 predicate (UseSSE>=2); 9843 match(Set dst(AtanD dst src)); 9844 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9845 format %{ "DATA $dst,$src" %} 9846 opcode(0xD9, 0xF3); 9847 ins_encode( Push_SrcD(src), 9848 OpcP, OpcS, Push_ResultD(dst) ); 9849 ins_pipe( pipe_slow ); 9850 %} 9851 9852 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9853 predicate (UseSSE<=1); 9854 match(Set dst (SqrtD src)); 9855 format %{ "DSQRT $dst,$src" %} 9856 opcode(0xFA, 0xD9); 9857 ins_encode( Push_Reg_DPR(src), 9858 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 //-------------Float Instructions------------------------------- 9863 // Float Math 9864 9865 // Code for float compare: 9866 // fcompp(); 9867 // fwait(); fnstsw_ax(); 9868 // sahf(); 9869 // movl(dst, unordered_result); 9870 // jcc(Assembler::parity, exit); 9871 // movl(dst, less_result); 9872 // jcc(Assembler::below, exit); 9873 // movl(dst, equal_result); 9874 // jcc(Assembler::equal, exit); 9875 // movl(dst, greater_result); 9876 // exit: 9877 9878 // P6 version of float compare, sets condition codes in EFLAGS 9879 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9880 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9881 match(Set cr (CmpF src1 src2)); 9882 effect(KILL rax); 9883 ins_cost(150); 9884 format %{ "FLD $src1\n\t" 9885 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9886 "JNP exit\n\t" 9887 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9888 "SAHF\n" 9889 "exit:\tNOP // avoid branch to branch" %} 9890 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9891 ins_encode( Push_Reg_DPR(src1), 9892 OpcP, RegOpc(src2), 9893 cmpF_P6_fixup ); 9894 ins_pipe( pipe_slow ); 9895 %} 9896 9897 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9898 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9899 match(Set cr (CmpF src1 src2)); 9900 ins_cost(100); 9901 format %{ "FLD $src1\n\t" 9902 "FUCOMIP ST,$src2 // P6 instruction" %} 9903 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9904 ins_encode( Push_Reg_DPR(src1), 9905 OpcP, RegOpc(src2)); 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 9910 // Compare & branch 9911 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9912 predicate(UseSSE == 0); 9913 match(Set cr (CmpF src1 src2)); 9914 effect(KILL rax); 9915 ins_cost(200); 9916 format %{ "FLD $src1\n\t" 9917 "FCOMp $src2\n\t" 9918 "FNSTSW AX\n\t" 9919 "TEST AX,0x400\n\t" 9920 "JZ,s flags\n\t" 9921 "MOV AH,1\t# unordered treat as LT\n" 9922 "flags:\tSAHF" %} 9923 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9924 ins_encode( Push_Reg_DPR(src1), 9925 OpcP, RegOpc(src2), 9926 fpu_flags); 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 // Compare vs zero into -1,0,1 9931 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9932 predicate(UseSSE == 0); 9933 match(Set dst (CmpF3 src1 zero)); 9934 effect(KILL cr, KILL rax); 9935 ins_cost(280); 9936 format %{ "FTSTF $dst,$src1" %} 9937 opcode(0xE4, 0xD9); 9938 ins_encode( Push_Reg_DPR(src1), 9939 OpcS, OpcP, PopFPU, 9940 CmpF_Result(dst)); 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 // Compare into -1,0,1 9945 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9946 predicate(UseSSE == 0); 9947 match(Set dst (CmpF3 src1 src2)); 9948 effect(KILL cr, KILL rax); 9949 ins_cost(300); 9950 format %{ "FCMPF $dst,$src1,$src2" %} 9951 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9952 ins_encode( Push_Reg_DPR(src1), 9953 OpcP, RegOpc(src2), 9954 CmpF_Result(dst)); 9955 ins_pipe( pipe_slow ); 9956 %} 9957 9958 // float compare and set condition codes in EFLAGS by XMM regs 9959 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9960 predicate(UseSSE>=1); 9961 match(Set cr (CmpF src1 src2)); 9962 ins_cost(145); 9963 format %{ "UCOMISS $src1,$src2\n\t" 9964 "JNP,s exit\n\t" 9965 "PUSHF\t# saw NaN, set CF\n\t" 9966 "AND [rsp], #0xffffff2b\n\t" 9967 "POPF\n" 9968 "exit:" %} 9969 ins_encode %{ 9970 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9971 emit_cmpfp_fixup(_masm); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 %} 9975 9976 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9977 predicate(UseSSE>=1); 9978 match(Set cr (CmpF src1 src2)); 9979 ins_cost(100); 9980 format %{ "UCOMISS $src1,$src2" %} 9981 ins_encode %{ 9982 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 // float compare and set condition codes in EFLAGS by XMM regs 9988 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 9989 predicate(UseSSE>=1); 9990 match(Set cr (CmpF src1 (LoadF src2))); 9991 ins_cost(165); 9992 format %{ "UCOMISS $src1,$src2\n\t" 9993 "JNP,s exit\n\t" 9994 "PUSHF\t# saw NaN, set CF\n\t" 9995 "AND [rsp], #0xffffff2b\n\t" 9996 "POPF\n" 9997 "exit:" %} 9998 ins_encode %{ 9999 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10000 emit_cmpfp_fixup(_masm); 10001 %} 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10006 predicate(UseSSE>=1); 10007 match(Set cr (CmpF src1 (LoadF src2))); 10008 ins_cost(100); 10009 format %{ "UCOMISS $src1,$src2" %} 10010 ins_encode %{ 10011 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10012 %} 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 // Compare into -1,0,1 in XMM 10017 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10018 predicate(UseSSE>=1); 10019 match(Set dst (CmpF3 src1 src2)); 10020 effect(KILL cr); 10021 ins_cost(255); 10022 format %{ "UCOMISS $src1, $src2\n\t" 10023 "MOV $dst, #-1\n\t" 10024 "JP,s done\n\t" 10025 "JB,s done\n\t" 10026 "SETNE $dst\n\t" 10027 "MOVZB $dst, $dst\n" 10028 "done:" %} 10029 ins_encode %{ 10030 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10031 emit_cmpfp3(_masm, $dst$$Register); 10032 %} 10033 ins_pipe( pipe_slow ); 10034 %} 10035 10036 // Compare into -1,0,1 in XMM and memory 10037 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10038 predicate(UseSSE>=1); 10039 match(Set dst (CmpF3 src1 (LoadF src2))); 10040 effect(KILL cr); 10041 ins_cost(275); 10042 format %{ "UCOMISS $src1, $src2\n\t" 10043 "MOV $dst, #-1\n\t" 10044 "JP,s done\n\t" 10045 "JB,s done\n\t" 10046 "SETNE $dst\n\t" 10047 "MOVZB $dst, $dst\n" 10048 "done:" %} 10049 ins_encode %{ 10050 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10051 emit_cmpfp3(_masm, $dst$$Register); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 // Spill to obtain 24-bit precision 10057 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10058 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10059 match(Set dst (SubF src1 src2)); 10060 10061 format %{ "FSUB $dst,$src1 - $src2" %} 10062 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10063 ins_encode( Push_Reg_FPR(src1), 10064 OpcReg_FPR(src2), 10065 Pop_Mem_FPR(dst) ); 10066 ins_pipe( fpu_mem_reg_reg ); 10067 %} 10068 // 10069 // This instruction does not round to 24-bits 10070 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10071 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10072 match(Set dst (SubF dst src)); 10073 10074 format %{ "FSUB $dst,$src" %} 10075 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10076 ins_encode( Push_Reg_FPR(src), 10077 OpcP, RegOpc(dst) ); 10078 ins_pipe( fpu_reg_reg ); 10079 %} 10080 10081 // Spill to obtain 24-bit precision 10082 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10083 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10084 match(Set dst (AddF src1 src2)); 10085 10086 format %{ "FADD $dst,$src1,$src2" %} 10087 opcode(0xD8, 0x0); /* D8 C0+i */ 10088 ins_encode( Push_Reg_FPR(src2), 10089 OpcReg_FPR(src1), 10090 Pop_Mem_FPR(dst) ); 10091 ins_pipe( fpu_mem_reg_reg ); 10092 %} 10093 // 10094 // This instruction does not round to 24-bits 10095 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10096 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10097 match(Set dst (AddF dst src)); 10098 10099 format %{ "FLD $src\n\t" 10100 "FADDp $dst,ST" %} 10101 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10102 ins_encode( Push_Reg_FPR(src), 10103 OpcP, RegOpc(dst) ); 10104 ins_pipe( fpu_reg_reg ); 10105 %} 10106 10107 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10108 predicate(UseSSE==0); 10109 match(Set dst (AbsF src)); 10110 ins_cost(100); 10111 format %{ "FABS" %} 10112 opcode(0xE1, 0xD9); 10113 ins_encode( OpcS, OpcP ); 10114 ins_pipe( fpu_reg_reg ); 10115 %} 10116 10117 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10118 predicate(UseSSE==0); 10119 match(Set dst (NegF src)); 10120 ins_cost(100); 10121 format %{ "FCHS" %} 10122 opcode(0xE0, 0xD9); 10123 ins_encode( OpcS, OpcP ); 10124 ins_pipe( fpu_reg_reg ); 10125 %} 10126 10127 // Cisc-alternate to addFPR_reg 10128 // Spill to obtain 24-bit precision 10129 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10130 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10131 match(Set dst (AddF src1 (LoadF src2))); 10132 10133 format %{ "FLD $src2\n\t" 10134 "FADD ST,$src1\n\t" 10135 "FSTP_S $dst" %} 10136 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10137 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10138 OpcReg_FPR(src1), 10139 Pop_Mem_FPR(dst) ); 10140 ins_pipe( fpu_mem_reg_mem ); 10141 %} 10142 // 10143 // Cisc-alternate to addFPR_reg 10144 // This instruction does not round to 24-bits 10145 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10146 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10147 match(Set dst (AddF dst (LoadF src))); 10148 10149 format %{ "FADD $dst,$src" %} 10150 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10151 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10152 OpcP, RegOpc(dst) ); 10153 ins_pipe( fpu_reg_mem ); 10154 %} 10155 10156 // // Following two instructions for _222_mpegaudio 10157 // Spill to obtain 24-bit precision 10158 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10159 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10160 match(Set dst (AddF src1 src2)); 10161 10162 format %{ "FADD $dst,$src1,$src2" %} 10163 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10164 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10165 OpcReg_FPR(src2), 10166 Pop_Mem_FPR(dst) ); 10167 ins_pipe( fpu_mem_reg_mem ); 10168 %} 10169 10170 // Cisc-spill variant 10171 // Spill to obtain 24-bit precision 10172 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10173 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10174 match(Set dst (AddF src1 (LoadF src2))); 10175 10176 format %{ "FADD $dst,$src1,$src2 cisc" %} 10177 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10178 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10179 set_instruction_start, 10180 OpcP, RMopc_Mem(secondary,src1), 10181 Pop_Mem_FPR(dst) ); 10182 ins_pipe( fpu_mem_mem_mem ); 10183 %} 10184 10185 // Spill to obtain 24-bit precision 10186 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10187 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10188 match(Set dst (AddF src1 src2)); 10189 10190 format %{ "FADD $dst,$src1,$src2" %} 10191 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10192 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10193 set_instruction_start, 10194 OpcP, RMopc_Mem(secondary,src1), 10195 Pop_Mem_FPR(dst) ); 10196 ins_pipe( fpu_mem_mem_mem ); 10197 %} 10198 10199 10200 // Spill to obtain 24-bit precision 10201 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10202 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10203 match(Set dst (AddF src con)); 10204 format %{ "FLD $src\n\t" 10205 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10206 "FSTP_S $dst" %} 10207 ins_encode %{ 10208 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10209 __ fadd_s($constantaddress($con)); 10210 __ fstp_s(Address(rsp, $dst$$disp)); 10211 %} 10212 ins_pipe(fpu_mem_reg_con); 10213 %} 10214 // 10215 // This instruction does not round to 24-bits 10216 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10217 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10218 match(Set dst (AddF src con)); 10219 format %{ "FLD $src\n\t" 10220 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10221 "FSTP $dst" %} 10222 ins_encode %{ 10223 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10224 __ fadd_s($constantaddress($con)); 10225 __ fstp_d($dst$$reg); 10226 %} 10227 ins_pipe(fpu_reg_reg_con); 10228 %} 10229 10230 // Spill to obtain 24-bit precision 10231 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10232 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10233 match(Set dst (MulF src1 src2)); 10234 10235 format %{ "FLD $src1\n\t" 10236 "FMUL $src2\n\t" 10237 "FSTP_S $dst" %} 10238 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10239 ins_encode( Push_Reg_FPR(src1), 10240 OpcReg_FPR(src2), 10241 Pop_Mem_FPR(dst) ); 10242 ins_pipe( fpu_mem_reg_reg ); 10243 %} 10244 // 10245 // This instruction does not round to 24-bits 10246 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10247 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10248 match(Set dst (MulF src1 src2)); 10249 10250 format %{ "FLD $src1\n\t" 10251 "FMUL $src2\n\t" 10252 "FSTP_S $dst" %} 10253 opcode(0xD8, 0x1); /* D8 C8+i */ 10254 ins_encode( Push_Reg_FPR(src2), 10255 OpcReg_FPR(src1), 10256 Pop_Reg_FPR(dst) ); 10257 ins_pipe( fpu_reg_reg_reg ); 10258 %} 10259 10260 10261 // Spill to obtain 24-bit precision 10262 // Cisc-alternate to reg-reg multiply 10263 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10264 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10265 match(Set dst (MulF src1 (LoadF src2))); 10266 10267 format %{ "FLD_S $src2\n\t" 10268 "FMUL $src1\n\t" 10269 "FSTP_S $dst" %} 10270 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10271 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10272 OpcReg_FPR(src1), 10273 Pop_Mem_FPR(dst) ); 10274 ins_pipe( fpu_mem_reg_mem ); 10275 %} 10276 // 10277 // This instruction does not round to 24-bits 10278 // Cisc-alternate to reg-reg multiply 10279 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10280 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10281 match(Set dst (MulF src1 (LoadF src2))); 10282 10283 format %{ "FMUL $dst,$src1,$src2" %} 10284 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10285 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10286 OpcReg_FPR(src1), 10287 Pop_Reg_FPR(dst) ); 10288 ins_pipe( fpu_reg_reg_mem ); 10289 %} 10290 10291 // Spill to obtain 24-bit precision 10292 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10293 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10294 match(Set dst (MulF src1 src2)); 10295 10296 format %{ "FMUL $dst,$src1,$src2" %} 10297 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10298 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10299 set_instruction_start, 10300 OpcP, RMopc_Mem(secondary,src1), 10301 Pop_Mem_FPR(dst) ); 10302 ins_pipe( fpu_mem_mem_mem ); 10303 %} 10304 10305 // Spill to obtain 24-bit precision 10306 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10307 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10308 match(Set dst (MulF src con)); 10309 10310 format %{ "FLD $src\n\t" 10311 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10312 "FSTP_S $dst" %} 10313 ins_encode %{ 10314 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10315 __ fmul_s($constantaddress($con)); 10316 __ fstp_s(Address(rsp, $dst$$disp)); 10317 %} 10318 ins_pipe(fpu_mem_reg_con); 10319 %} 10320 // 10321 // This instruction does not round to 24-bits 10322 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10323 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10324 match(Set dst (MulF src con)); 10325 10326 format %{ "FLD $src\n\t" 10327 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10328 "FSTP $dst" %} 10329 ins_encode %{ 10330 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10331 __ fmul_s($constantaddress($con)); 10332 __ fstp_d($dst$$reg); 10333 %} 10334 ins_pipe(fpu_reg_reg_con); 10335 %} 10336 10337 10338 // 10339 // MACRO1 -- subsume unshared load into mulFPR 10340 // This instruction does not round to 24-bits 10341 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10342 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10343 match(Set dst (MulF (LoadF mem1) src)); 10344 10345 format %{ "FLD $mem1 ===MACRO1===\n\t" 10346 "FMUL ST,$src\n\t" 10347 "FSTP $dst" %} 10348 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10349 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10350 OpcReg_FPR(src), 10351 Pop_Reg_FPR(dst) ); 10352 ins_pipe( fpu_reg_reg_mem ); 10353 %} 10354 // 10355 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10356 // This instruction does not round to 24-bits 10357 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10358 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10359 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10360 ins_cost(95); 10361 10362 format %{ "FLD $mem1 ===MACRO2===\n\t" 10363 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10364 "FADD ST,$src2\n\t" 10365 "FSTP $dst" %} 10366 opcode(0xD9); /* LoadF D9 /0 */ 10367 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10368 FMul_ST_reg(src1), 10369 FAdd_ST_reg(src2), 10370 Pop_Reg_FPR(dst) ); 10371 ins_pipe( fpu_reg_mem_reg_reg ); 10372 %} 10373 10374 // MACRO3 -- addFPR a mulFPR 10375 // This instruction does not round to 24-bits. It is a '2-address' 10376 // instruction in that the result goes back to src2. This eliminates 10377 // a move from the macro; possibly the register allocator will have 10378 // to add it back (and maybe not). 10379 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10380 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10381 match(Set src2 (AddF (MulF src0 src1) src2)); 10382 10383 format %{ "FLD $src0 ===MACRO3===\n\t" 10384 "FMUL ST,$src1\n\t" 10385 "FADDP $src2,ST" %} 10386 opcode(0xD9); /* LoadF D9 /0 */ 10387 ins_encode( Push_Reg_FPR(src0), 10388 FMul_ST_reg(src1), 10389 FAddP_reg_ST(src2) ); 10390 ins_pipe( fpu_reg_reg_reg ); 10391 %} 10392 10393 // MACRO4 -- divFPR subFPR 10394 // This instruction does not round to 24-bits 10395 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10396 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10397 match(Set dst (DivF (SubF src2 src1) src3)); 10398 10399 format %{ "FLD $src2 ===MACRO4===\n\t" 10400 "FSUB ST,$src1\n\t" 10401 "FDIV ST,$src3\n\t" 10402 "FSTP $dst" %} 10403 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10404 ins_encode( Push_Reg_FPR(src2), 10405 subFPR_divFPR_encode(src1,src3), 10406 Pop_Reg_FPR(dst) ); 10407 ins_pipe( fpu_reg_reg_reg_reg ); 10408 %} 10409 10410 // Spill to obtain 24-bit precision 10411 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10412 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10413 match(Set dst (DivF src1 src2)); 10414 10415 format %{ "FDIV $dst,$src1,$src2" %} 10416 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10417 ins_encode( Push_Reg_FPR(src1), 10418 OpcReg_FPR(src2), 10419 Pop_Mem_FPR(dst) ); 10420 ins_pipe( fpu_mem_reg_reg ); 10421 %} 10422 // 10423 // This instruction does not round to 24-bits 10424 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10425 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10426 match(Set dst (DivF dst src)); 10427 10428 format %{ "FDIV $dst,$src" %} 10429 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10430 ins_encode( Push_Reg_FPR(src), 10431 OpcP, RegOpc(dst) ); 10432 ins_pipe( fpu_reg_reg ); 10433 %} 10434 10435 10436 // Spill to obtain 24-bit precision 10437 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10438 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10439 match(Set dst (ModF src1 src2)); 10440 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10441 10442 format %{ "FMOD $dst,$src1,$src2" %} 10443 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10444 emitModDPR(), 10445 Push_Result_Mod_DPR(src2), 10446 Pop_Mem_FPR(dst)); 10447 ins_pipe( pipe_slow ); 10448 %} 10449 // 10450 // This instruction does not round to 24-bits 10451 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10452 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10453 match(Set dst (ModF dst src)); 10454 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10455 10456 format %{ "FMOD $dst,$src" %} 10457 ins_encode(Push_Reg_Mod_DPR(dst, src), 10458 emitModDPR(), 10459 Push_Result_Mod_DPR(src), 10460 Pop_Reg_FPR(dst)); 10461 ins_pipe( pipe_slow ); 10462 %} 10463 10464 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10465 predicate(UseSSE>=1); 10466 match(Set dst (ModF src0 src1)); 10467 effect(KILL rax, KILL cr); 10468 format %{ "SUB ESP,4\t # FMOD\n" 10469 "\tMOVSS [ESP+0],$src1\n" 10470 "\tFLD_S [ESP+0]\n" 10471 "\tMOVSS [ESP+0],$src0\n" 10472 "\tFLD_S [ESP+0]\n" 10473 "loop:\tFPREM\n" 10474 "\tFWAIT\n" 10475 "\tFNSTSW AX\n" 10476 "\tSAHF\n" 10477 "\tJP loop\n" 10478 "\tFSTP_S [ESP+0]\n" 10479 "\tMOVSS $dst,[ESP+0]\n" 10480 "\tADD ESP,4\n" 10481 "\tFSTP ST0\t # Restore FPU Stack" 10482 %} 10483 ins_cost(250); 10484 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10485 ins_pipe( pipe_slow ); 10486 %} 10487 10488 10489 //----------Arithmetic Conversion Instructions--------------------------------- 10490 // The conversions operations are all Alpha sorted. Please keep it that way! 10491 10492 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10493 predicate(UseSSE==0); 10494 match(Set dst (RoundFloat src)); 10495 ins_cost(125); 10496 format %{ "FST_S $dst,$src\t# F-round" %} 10497 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10498 ins_pipe( fpu_mem_reg ); 10499 %} 10500 10501 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10502 predicate(UseSSE<=1); 10503 match(Set dst (RoundDouble src)); 10504 ins_cost(125); 10505 format %{ "FST_D $dst,$src\t# D-round" %} 10506 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10507 ins_pipe( fpu_mem_reg ); 10508 %} 10509 10510 // Force rounding to 24-bit precision and 6-bit exponent 10511 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10512 predicate(UseSSE==0); 10513 match(Set dst (ConvD2F src)); 10514 format %{ "FST_S $dst,$src\t# F-round" %} 10515 expand %{ 10516 roundFloat_mem_reg(dst,src); 10517 %} 10518 %} 10519 10520 // Force rounding to 24-bit precision and 6-bit exponent 10521 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10522 predicate(UseSSE==1); 10523 match(Set dst (ConvD2F src)); 10524 effect( KILL cr ); 10525 format %{ "SUB ESP,4\n\t" 10526 "FST_S [ESP],$src\t# F-round\n\t" 10527 "MOVSS $dst,[ESP]\n\t" 10528 "ADD ESP,4" %} 10529 ins_encode %{ 10530 __ subptr(rsp, 4); 10531 if ($src$$reg != FPR1L_enc) { 10532 __ fld_s($src$$reg-1); 10533 __ fstp_s(Address(rsp, 0)); 10534 } else { 10535 __ fst_s(Address(rsp, 0)); 10536 } 10537 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10538 __ addptr(rsp, 4); 10539 %} 10540 ins_pipe( pipe_slow ); 10541 %} 10542 10543 // Force rounding double precision to single precision 10544 instruct convD2F_reg(regF dst, regD src) %{ 10545 predicate(UseSSE>=2); 10546 match(Set dst (ConvD2F src)); 10547 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10548 ins_encode %{ 10549 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10550 %} 10551 ins_pipe( pipe_slow ); 10552 %} 10553 10554 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10555 predicate(UseSSE==0); 10556 match(Set dst (ConvF2D src)); 10557 format %{ "FST_S $dst,$src\t# D-round" %} 10558 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10559 ins_pipe( fpu_reg_reg ); 10560 %} 10561 10562 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10563 predicate(UseSSE==1); 10564 match(Set dst (ConvF2D src)); 10565 format %{ "FST_D $dst,$src\t# D-round" %} 10566 expand %{ 10567 roundDouble_mem_reg(dst,src); 10568 %} 10569 %} 10570 10571 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10572 predicate(UseSSE==1); 10573 match(Set dst (ConvF2D src)); 10574 effect( KILL cr ); 10575 format %{ "SUB ESP,4\n\t" 10576 "MOVSS [ESP] $src\n\t" 10577 "FLD_S [ESP]\n\t" 10578 "ADD ESP,4\n\t" 10579 "FSTP $dst\t# D-round" %} 10580 ins_encode %{ 10581 __ subptr(rsp, 4); 10582 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10583 __ fld_s(Address(rsp, 0)); 10584 __ addptr(rsp, 4); 10585 __ fstp_d($dst$$reg); 10586 %} 10587 ins_pipe( pipe_slow ); 10588 %} 10589 10590 instruct convF2D_reg(regD dst, regF src) %{ 10591 predicate(UseSSE>=2); 10592 match(Set dst (ConvF2D src)); 10593 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10594 ins_encode %{ 10595 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10596 %} 10597 ins_pipe( pipe_slow ); 10598 %} 10599 10600 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10601 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10602 predicate(UseSSE<=1); 10603 match(Set dst (ConvD2I src)); 10604 effect( KILL tmp, KILL cr ); 10605 format %{ "FLD $src\t# Convert double to int \n\t" 10606 "FLDCW trunc mode\n\t" 10607 "SUB ESP,4\n\t" 10608 "FISTp [ESP + #0]\n\t" 10609 "FLDCW std/24-bit mode\n\t" 10610 "POP EAX\n\t" 10611 "CMP EAX,0x80000000\n\t" 10612 "JNE,s fast\n\t" 10613 "FLD_D $src\n\t" 10614 "CALL d2i_wrapper\n" 10615 "fast:" %} 10616 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10617 ins_pipe( pipe_slow ); 10618 %} 10619 10620 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10621 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10622 predicate(UseSSE>=2); 10623 match(Set dst (ConvD2I src)); 10624 effect( KILL tmp, KILL cr ); 10625 format %{ "CVTTSD2SI $dst, $src\n\t" 10626 "CMP $dst,0x80000000\n\t" 10627 "JNE,s fast\n\t" 10628 "SUB ESP, 8\n\t" 10629 "MOVSD [ESP], $src\n\t" 10630 "FLD_D [ESP]\n\t" 10631 "ADD ESP, 8\n\t" 10632 "CALL d2i_wrapper\n" 10633 "fast:" %} 10634 ins_encode %{ 10635 Label fast; 10636 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10637 __ cmpl($dst$$Register, 0x80000000); 10638 __ jccb(Assembler::notEqual, fast); 10639 __ subptr(rsp, 8); 10640 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10641 __ fld_d(Address(rsp, 0)); 10642 __ addptr(rsp, 8); 10643 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10644 __ bind(fast); 10645 %} 10646 ins_pipe( pipe_slow ); 10647 %} 10648 10649 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10650 predicate(UseSSE<=1); 10651 match(Set dst (ConvD2L src)); 10652 effect( KILL cr ); 10653 format %{ "FLD $src\t# Convert double to long\n\t" 10654 "FLDCW trunc mode\n\t" 10655 "SUB ESP,8\n\t" 10656 "FISTp [ESP + #0]\n\t" 10657 "FLDCW std/24-bit mode\n\t" 10658 "POP EAX\n\t" 10659 "POP EDX\n\t" 10660 "CMP EDX,0x80000000\n\t" 10661 "JNE,s fast\n\t" 10662 "TEST EAX,EAX\n\t" 10663 "JNE,s fast\n\t" 10664 "FLD $src\n\t" 10665 "CALL d2l_wrapper\n" 10666 "fast:" %} 10667 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10672 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10673 predicate (UseSSE>=2); 10674 match(Set dst (ConvD2L src)); 10675 effect( KILL cr ); 10676 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10677 "MOVSD [ESP],$src\n\t" 10678 "FLD_D [ESP]\n\t" 10679 "FLDCW trunc mode\n\t" 10680 "FISTp [ESP + #0]\n\t" 10681 "FLDCW std/24-bit mode\n\t" 10682 "POP EAX\n\t" 10683 "POP EDX\n\t" 10684 "CMP EDX,0x80000000\n\t" 10685 "JNE,s fast\n\t" 10686 "TEST EAX,EAX\n\t" 10687 "JNE,s fast\n\t" 10688 "SUB ESP,8\n\t" 10689 "MOVSD [ESP],$src\n\t" 10690 "FLD_D [ESP]\n\t" 10691 "ADD ESP,8\n\t" 10692 "CALL d2l_wrapper\n" 10693 "fast:" %} 10694 ins_encode %{ 10695 Label fast; 10696 __ subptr(rsp, 8); 10697 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10698 __ fld_d(Address(rsp, 0)); 10699 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10700 __ fistp_d(Address(rsp, 0)); 10701 // Restore the rounding mode, mask the exception 10702 if (Compile::current()->in_24_bit_fp_mode()) { 10703 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10704 } else { 10705 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10706 } 10707 // Load the converted long, adjust CPU stack 10708 __ pop(rax); 10709 __ pop(rdx); 10710 __ cmpl(rdx, 0x80000000); 10711 __ jccb(Assembler::notEqual, fast); 10712 __ testl(rax, rax); 10713 __ jccb(Assembler::notEqual, fast); 10714 __ subptr(rsp, 8); 10715 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10716 __ fld_d(Address(rsp, 0)); 10717 __ addptr(rsp, 8); 10718 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10719 __ bind(fast); 10720 %} 10721 ins_pipe( pipe_slow ); 10722 %} 10723 10724 // Convert a double to an int. Java semantics require we do complex 10725 // manglations in the corner cases. So we set the rounding mode to 10726 // 'zero', store the darned double down as an int, and reset the 10727 // rounding mode to 'nearest'. The hardware stores a flag value down 10728 // if we would overflow or converted a NAN; we check for this and 10729 // and go the slow path if needed. 10730 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10731 predicate(UseSSE==0); 10732 match(Set dst (ConvF2I src)); 10733 effect( KILL tmp, KILL cr ); 10734 format %{ "FLD $src\t# Convert float to int \n\t" 10735 "FLDCW trunc mode\n\t" 10736 "SUB ESP,4\n\t" 10737 "FISTp [ESP + #0]\n\t" 10738 "FLDCW std/24-bit mode\n\t" 10739 "POP EAX\n\t" 10740 "CMP EAX,0x80000000\n\t" 10741 "JNE,s fast\n\t" 10742 "FLD $src\n\t" 10743 "CALL d2i_wrapper\n" 10744 "fast:" %} 10745 // DPR2I_encoding works for FPR2I 10746 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10747 ins_pipe( pipe_slow ); 10748 %} 10749 10750 // Convert a float in xmm to an int reg. 10751 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10752 predicate(UseSSE>=1); 10753 match(Set dst (ConvF2I src)); 10754 effect( KILL tmp, KILL cr ); 10755 format %{ "CVTTSS2SI $dst, $src\n\t" 10756 "CMP $dst,0x80000000\n\t" 10757 "JNE,s fast\n\t" 10758 "SUB ESP, 4\n\t" 10759 "MOVSS [ESP], $src\n\t" 10760 "FLD [ESP]\n\t" 10761 "ADD ESP, 4\n\t" 10762 "CALL d2i_wrapper\n" 10763 "fast:" %} 10764 ins_encode %{ 10765 Label fast; 10766 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10767 __ cmpl($dst$$Register, 0x80000000); 10768 __ jccb(Assembler::notEqual, fast); 10769 __ subptr(rsp, 4); 10770 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10771 __ fld_s(Address(rsp, 0)); 10772 __ addptr(rsp, 4); 10773 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10774 __ bind(fast); 10775 %} 10776 ins_pipe( pipe_slow ); 10777 %} 10778 10779 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10780 predicate(UseSSE==0); 10781 match(Set dst (ConvF2L src)); 10782 effect( KILL cr ); 10783 format %{ "FLD $src\t# Convert float to long\n\t" 10784 "FLDCW trunc mode\n\t" 10785 "SUB ESP,8\n\t" 10786 "FISTp [ESP + #0]\n\t" 10787 "FLDCW std/24-bit mode\n\t" 10788 "POP EAX\n\t" 10789 "POP EDX\n\t" 10790 "CMP EDX,0x80000000\n\t" 10791 "JNE,s fast\n\t" 10792 "TEST EAX,EAX\n\t" 10793 "JNE,s fast\n\t" 10794 "FLD $src\n\t" 10795 "CALL d2l_wrapper\n" 10796 "fast:" %} 10797 // DPR2L_encoding works for FPR2L 10798 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10799 ins_pipe( pipe_slow ); 10800 %} 10801 10802 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10803 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10804 predicate (UseSSE>=1); 10805 match(Set dst (ConvF2L src)); 10806 effect( KILL cr ); 10807 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10808 "MOVSS [ESP],$src\n\t" 10809 "FLD_S [ESP]\n\t" 10810 "FLDCW trunc mode\n\t" 10811 "FISTp [ESP + #0]\n\t" 10812 "FLDCW std/24-bit mode\n\t" 10813 "POP EAX\n\t" 10814 "POP EDX\n\t" 10815 "CMP EDX,0x80000000\n\t" 10816 "JNE,s fast\n\t" 10817 "TEST EAX,EAX\n\t" 10818 "JNE,s fast\n\t" 10819 "SUB ESP,4\t# Convert float to long\n\t" 10820 "MOVSS [ESP],$src\n\t" 10821 "FLD_S [ESP]\n\t" 10822 "ADD ESP,4\n\t" 10823 "CALL d2l_wrapper\n" 10824 "fast:" %} 10825 ins_encode %{ 10826 Label fast; 10827 __ subptr(rsp, 8); 10828 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10829 __ fld_s(Address(rsp, 0)); 10830 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10831 __ fistp_d(Address(rsp, 0)); 10832 // Restore the rounding mode, mask the exception 10833 if (Compile::current()->in_24_bit_fp_mode()) { 10834 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10835 } else { 10836 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10837 } 10838 // Load the converted long, adjust CPU stack 10839 __ pop(rax); 10840 __ pop(rdx); 10841 __ cmpl(rdx, 0x80000000); 10842 __ jccb(Assembler::notEqual, fast); 10843 __ testl(rax, rax); 10844 __ jccb(Assembler::notEqual, fast); 10845 __ subptr(rsp, 4); 10846 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10847 __ fld_s(Address(rsp, 0)); 10848 __ addptr(rsp, 4); 10849 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10850 __ bind(fast); 10851 %} 10852 ins_pipe( pipe_slow ); 10853 %} 10854 10855 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10856 predicate( UseSSE<=1 ); 10857 match(Set dst (ConvI2D src)); 10858 format %{ "FILD $src\n\t" 10859 "FSTP $dst" %} 10860 opcode(0xDB, 0x0); /* DB /0 */ 10861 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10862 ins_pipe( fpu_reg_mem ); 10863 %} 10864 10865 instruct convI2D_reg(regD dst, rRegI src) %{ 10866 predicate( UseSSE>=2 && !UseXmmI2D ); 10867 match(Set dst (ConvI2D src)); 10868 format %{ "CVTSI2SD $dst,$src" %} 10869 ins_encode %{ 10870 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10871 %} 10872 ins_pipe( pipe_slow ); 10873 %} 10874 10875 instruct convI2D_mem(regD dst, memory mem) %{ 10876 predicate( UseSSE>=2 ); 10877 match(Set dst (ConvI2D (LoadI mem))); 10878 format %{ "CVTSI2SD $dst,$mem" %} 10879 ins_encode %{ 10880 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10881 %} 10882 ins_pipe( pipe_slow ); 10883 %} 10884 10885 instruct convXI2D_reg(regD dst, rRegI src) 10886 %{ 10887 predicate( UseSSE>=2 && UseXmmI2D ); 10888 match(Set dst (ConvI2D src)); 10889 10890 format %{ "MOVD $dst,$src\n\t" 10891 "CVTDQ2PD $dst,$dst\t# i2d" %} 10892 ins_encode %{ 10893 __ movdl($dst$$XMMRegister, $src$$Register); 10894 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10895 %} 10896 ins_pipe(pipe_slow); // XXX 10897 %} 10898 10899 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10900 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10901 match(Set dst (ConvI2D (LoadI mem))); 10902 format %{ "FILD $mem\n\t" 10903 "FSTP $dst" %} 10904 opcode(0xDB); /* DB /0 */ 10905 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10906 Pop_Reg_DPR(dst)); 10907 ins_pipe( fpu_reg_mem ); 10908 %} 10909 10910 // Convert a byte to a float; no rounding step needed. 10911 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10912 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10913 match(Set dst (ConvI2F src)); 10914 format %{ "FILD $src\n\t" 10915 "FSTP $dst" %} 10916 10917 opcode(0xDB, 0x0); /* DB /0 */ 10918 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10919 ins_pipe( fpu_reg_mem ); 10920 %} 10921 10922 // In 24-bit mode, force exponent rounding by storing back out 10923 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10924 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10925 match(Set dst (ConvI2F src)); 10926 ins_cost(200); 10927 format %{ "FILD $src\n\t" 10928 "FSTP_S $dst" %} 10929 opcode(0xDB, 0x0); /* DB /0 */ 10930 ins_encode( Push_Mem_I(src), 10931 Pop_Mem_FPR(dst)); 10932 ins_pipe( fpu_mem_mem ); 10933 %} 10934 10935 // In 24-bit mode, force exponent rounding by storing back out 10936 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10937 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10938 match(Set dst (ConvI2F (LoadI mem))); 10939 ins_cost(200); 10940 format %{ "FILD $mem\n\t" 10941 "FSTP_S $dst" %} 10942 opcode(0xDB); /* DB /0 */ 10943 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10944 Pop_Mem_FPR(dst)); 10945 ins_pipe( fpu_mem_mem ); 10946 %} 10947 10948 // This instruction does not round to 24-bits 10949 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10950 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10951 match(Set dst (ConvI2F src)); 10952 format %{ "FILD $src\n\t" 10953 "FSTP $dst" %} 10954 opcode(0xDB, 0x0); /* DB /0 */ 10955 ins_encode( Push_Mem_I(src), 10956 Pop_Reg_FPR(dst)); 10957 ins_pipe( fpu_reg_mem ); 10958 %} 10959 10960 // This instruction does not round to 24-bits 10961 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10962 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10963 match(Set dst (ConvI2F (LoadI mem))); 10964 format %{ "FILD $mem\n\t" 10965 "FSTP $dst" %} 10966 opcode(0xDB); /* DB /0 */ 10967 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10968 Pop_Reg_FPR(dst)); 10969 ins_pipe( fpu_reg_mem ); 10970 %} 10971 10972 // Convert an int to a float in xmm; no rounding step needed. 10973 instruct convI2F_reg(regF dst, rRegI src) %{ 10974 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 10975 match(Set dst (ConvI2F src)); 10976 format %{ "CVTSI2SS $dst, $src" %} 10977 ins_encode %{ 10978 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 10979 %} 10980 ins_pipe( pipe_slow ); 10981 %} 10982 10983 instruct convXI2F_reg(regF dst, rRegI src) 10984 %{ 10985 predicate( UseSSE>=2 && UseXmmI2F ); 10986 match(Set dst (ConvI2F src)); 10987 10988 format %{ "MOVD $dst,$src\n\t" 10989 "CVTDQ2PS $dst,$dst\t# i2f" %} 10990 ins_encode %{ 10991 __ movdl($dst$$XMMRegister, $src$$Register); 10992 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 10993 %} 10994 ins_pipe(pipe_slow); // XXX 10995 %} 10996 10997 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 10998 match(Set dst (ConvI2L src)); 10999 effect(KILL cr); 11000 ins_cost(375); 11001 format %{ "MOV $dst.lo,$src\n\t" 11002 "MOV $dst.hi,$src\n\t" 11003 "SAR $dst.hi,31" %} 11004 ins_encode(convert_int_long(dst,src)); 11005 ins_pipe( ialu_reg_reg_long ); 11006 %} 11007 11008 // Zero-extend convert int to long 11009 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11010 match(Set dst (AndL (ConvI2L src) mask) ); 11011 effect( KILL flags ); 11012 ins_cost(250); 11013 format %{ "MOV $dst.lo,$src\n\t" 11014 "XOR $dst.hi,$dst.hi" %} 11015 opcode(0x33); // XOR 11016 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11017 ins_pipe( ialu_reg_reg_long ); 11018 %} 11019 11020 // Zero-extend long 11021 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11022 match(Set dst (AndL src mask) ); 11023 effect( KILL flags ); 11024 ins_cost(250); 11025 format %{ "MOV $dst.lo,$src.lo\n\t" 11026 "XOR $dst.hi,$dst.hi\n\t" %} 11027 opcode(0x33); // XOR 11028 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11029 ins_pipe( ialu_reg_reg_long ); 11030 %} 11031 11032 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11033 predicate (UseSSE<=1); 11034 match(Set dst (ConvL2D src)); 11035 effect( KILL cr ); 11036 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11037 "PUSH $src.lo\n\t" 11038 "FILD ST,[ESP + #0]\n\t" 11039 "ADD ESP,8\n\t" 11040 "FSTP_D $dst\t# D-round" %} 11041 opcode(0xDF, 0x5); /* DF /5 */ 11042 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11043 ins_pipe( pipe_slow ); 11044 %} 11045 11046 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11047 predicate (UseSSE>=2); 11048 match(Set dst (ConvL2D src)); 11049 effect( KILL cr ); 11050 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11051 "PUSH $src.lo\n\t" 11052 "FILD_D [ESP]\n\t" 11053 "FSTP_D [ESP]\n\t" 11054 "MOVSD $dst,[ESP]\n\t" 11055 "ADD ESP,8" %} 11056 opcode(0xDF, 0x5); /* DF /5 */ 11057 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11058 ins_pipe( pipe_slow ); 11059 %} 11060 11061 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11062 predicate (UseSSE>=1); 11063 match(Set dst (ConvL2F src)); 11064 effect( KILL cr ); 11065 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11066 "PUSH $src.lo\n\t" 11067 "FILD_D [ESP]\n\t" 11068 "FSTP_S [ESP]\n\t" 11069 "MOVSS $dst,[ESP]\n\t" 11070 "ADD ESP,8" %} 11071 opcode(0xDF, 0x5); /* DF /5 */ 11072 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11073 ins_pipe( pipe_slow ); 11074 %} 11075 11076 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11077 match(Set dst (ConvL2F src)); 11078 effect( KILL cr ); 11079 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11080 "PUSH $src.lo\n\t" 11081 "FILD ST,[ESP + #0]\n\t" 11082 "ADD ESP,8\n\t" 11083 "FSTP_S $dst\t# F-round" %} 11084 opcode(0xDF, 0x5); /* DF /5 */ 11085 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11086 ins_pipe( pipe_slow ); 11087 %} 11088 11089 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11090 match(Set dst (ConvL2I src)); 11091 effect( DEF dst, USE src ); 11092 format %{ "MOV $dst,$src.lo" %} 11093 ins_encode(enc_CopyL_Lo(dst,src)); 11094 ins_pipe( ialu_reg_reg ); 11095 %} 11096 11097 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11098 match(Set dst (MoveF2I src)); 11099 effect( DEF dst, USE src ); 11100 ins_cost(100); 11101 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11102 ins_encode %{ 11103 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11104 %} 11105 ins_pipe( ialu_reg_mem ); 11106 %} 11107 11108 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11109 predicate(UseSSE==0); 11110 match(Set dst (MoveF2I src)); 11111 effect( DEF dst, USE src ); 11112 11113 ins_cost(125); 11114 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11115 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11116 ins_pipe( fpu_mem_reg ); 11117 %} 11118 11119 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11120 predicate(UseSSE>=1); 11121 match(Set dst (MoveF2I src)); 11122 effect( DEF dst, USE src ); 11123 11124 ins_cost(95); 11125 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11126 ins_encode %{ 11127 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11128 %} 11129 ins_pipe( pipe_slow ); 11130 %} 11131 11132 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11133 predicate(UseSSE>=2); 11134 match(Set dst (MoveF2I src)); 11135 effect( DEF dst, USE src ); 11136 ins_cost(85); 11137 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11138 ins_encode %{ 11139 __ movdl($dst$$Register, $src$$XMMRegister); 11140 %} 11141 ins_pipe( pipe_slow ); 11142 %} 11143 11144 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11145 match(Set dst (MoveI2F src)); 11146 effect( DEF dst, USE src ); 11147 11148 ins_cost(100); 11149 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11150 ins_encode %{ 11151 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11152 %} 11153 ins_pipe( ialu_mem_reg ); 11154 %} 11155 11156 11157 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11158 predicate(UseSSE==0); 11159 match(Set dst (MoveI2F src)); 11160 effect(DEF dst, USE src); 11161 11162 ins_cost(125); 11163 format %{ "FLD_S $src\n\t" 11164 "FSTP $dst\t# MoveI2F_stack_reg" %} 11165 opcode(0xD9); /* D9 /0, FLD m32real */ 11166 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11167 Pop_Reg_FPR(dst) ); 11168 ins_pipe( fpu_reg_mem ); 11169 %} 11170 11171 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11172 predicate(UseSSE>=1); 11173 match(Set dst (MoveI2F src)); 11174 effect( DEF dst, USE src ); 11175 11176 ins_cost(95); 11177 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11178 ins_encode %{ 11179 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11180 %} 11181 ins_pipe( pipe_slow ); 11182 %} 11183 11184 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11185 predicate(UseSSE>=2); 11186 match(Set dst (MoveI2F src)); 11187 effect( DEF dst, USE src ); 11188 11189 ins_cost(85); 11190 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11191 ins_encode %{ 11192 __ movdl($dst$$XMMRegister, $src$$Register); 11193 %} 11194 ins_pipe( pipe_slow ); 11195 %} 11196 11197 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11198 match(Set dst (MoveD2L src)); 11199 effect(DEF dst, USE src); 11200 11201 ins_cost(250); 11202 format %{ "MOV $dst.lo,$src\n\t" 11203 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11204 opcode(0x8B, 0x8B); 11205 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11206 ins_pipe( ialu_mem_long_reg ); 11207 %} 11208 11209 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11210 predicate(UseSSE<=1); 11211 match(Set dst (MoveD2L src)); 11212 effect(DEF dst, USE src); 11213 11214 ins_cost(125); 11215 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11216 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11217 ins_pipe( fpu_mem_reg ); 11218 %} 11219 11220 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11221 predicate(UseSSE>=2); 11222 match(Set dst (MoveD2L src)); 11223 effect(DEF dst, USE src); 11224 ins_cost(95); 11225 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11226 ins_encode %{ 11227 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11228 %} 11229 ins_pipe( pipe_slow ); 11230 %} 11231 11232 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11233 predicate(UseSSE>=2); 11234 match(Set dst (MoveD2L src)); 11235 effect(DEF dst, USE src, TEMP tmp); 11236 ins_cost(85); 11237 format %{ "MOVD $dst.lo,$src\n\t" 11238 "PSHUFLW $tmp,$src,0x4E\n\t" 11239 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11240 ins_encode %{ 11241 __ movdl($dst$$Register, $src$$XMMRegister); 11242 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11243 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11244 %} 11245 ins_pipe( pipe_slow ); 11246 %} 11247 11248 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11249 match(Set dst (MoveL2D src)); 11250 effect(DEF dst, USE src); 11251 11252 ins_cost(200); 11253 format %{ "MOV $dst,$src.lo\n\t" 11254 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11255 opcode(0x89, 0x89); 11256 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11257 ins_pipe( ialu_mem_long_reg ); 11258 %} 11259 11260 11261 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11262 predicate(UseSSE<=1); 11263 match(Set dst (MoveL2D src)); 11264 effect(DEF dst, USE src); 11265 ins_cost(125); 11266 11267 format %{ "FLD_D $src\n\t" 11268 "FSTP $dst\t# MoveL2D_stack_reg" %} 11269 opcode(0xDD); /* DD /0, FLD m64real */ 11270 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11271 Pop_Reg_DPR(dst) ); 11272 ins_pipe( fpu_reg_mem ); 11273 %} 11274 11275 11276 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11277 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11278 match(Set dst (MoveL2D src)); 11279 effect(DEF dst, USE src); 11280 11281 ins_cost(95); 11282 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11283 ins_encode %{ 11284 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11285 %} 11286 ins_pipe( pipe_slow ); 11287 %} 11288 11289 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11290 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11291 match(Set dst (MoveL2D src)); 11292 effect(DEF dst, USE src); 11293 11294 ins_cost(95); 11295 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11296 ins_encode %{ 11297 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11298 %} 11299 ins_pipe( pipe_slow ); 11300 %} 11301 11302 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11303 predicate(UseSSE>=2); 11304 match(Set dst (MoveL2D src)); 11305 effect(TEMP dst, USE src, TEMP tmp); 11306 ins_cost(85); 11307 format %{ "MOVD $dst,$src.lo\n\t" 11308 "MOVD $tmp,$src.hi\n\t" 11309 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11310 ins_encode %{ 11311 __ movdl($dst$$XMMRegister, $src$$Register); 11312 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11313 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11314 %} 11315 ins_pipe( pipe_slow ); 11316 %} 11317 11318 11319 // ======================================================================= 11320 // fast clearing of an array 11321 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11322 predicate(!((ClearArrayNode*)n)->is_large()); 11323 match(Set dummy (ClearArray cnt base)); 11324 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11325 11326 format %{ $$template 11327 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11328 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11329 $$emit$$"JG LARGE\n\t" 11330 $$emit$$"SHL ECX, 1\n\t" 11331 $$emit$$"DEC ECX\n\t" 11332 $$emit$$"JS DONE\t# Zero length\n\t" 11333 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11334 $$emit$$"DEC ECX\n\t" 11335 $$emit$$"JGE LOOP\n\t" 11336 $$emit$$"JMP DONE\n\t" 11337 $$emit$$"# LARGE:\n\t" 11338 if (UseFastStosb) { 11339 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11340 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11341 } else { 11342 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11343 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11344 } 11345 $$emit$$"# DONE" 11346 %} 11347 ins_encode %{ 11348 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11349 %} 11350 ins_pipe( pipe_slow ); 11351 %} 11352 11353 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11354 predicate(((ClearArrayNode*)n)->is_large()); 11355 match(Set dummy (ClearArray cnt base)); 11356 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11357 format %{ $$template 11358 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11359 if (UseFastStosb) { 11360 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11361 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11362 } else { 11363 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11364 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11365 } 11366 $$emit$$"# DONE" 11367 %} 11368 ins_encode %{ 11369 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11370 %} 11371 ins_pipe( pipe_slow ); 11372 %} 11373 11374 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11375 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11376 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11377 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11378 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11379 11380 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11381 ins_encode %{ 11382 __ string_compare($str1$$Register, $str2$$Register, 11383 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11384 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11385 %} 11386 ins_pipe( pipe_slow ); 11387 %} 11388 11389 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11390 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11391 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11392 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11393 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11394 11395 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11396 ins_encode %{ 11397 __ string_compare($str1$$Register, $str2$$Register, 11398 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11399 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11400 %} 11401 ins_pipe( pipe_slow ); 11402 %} 11403 11404 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11405 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11406 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11407 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11408 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11409 11410 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11411 ins_encode %{ 11412 __ string_compare($str1$$Register, $str2$$Register, 11413 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11414 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11420 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11421 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11422 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11423 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11424 11425 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11426 ins_encode %{ 11427 __ string_compare($str2$$Register, $str1$$Register, 11428 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11429 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11430 %} 11431 ins_pipe( pipe_slow ); 11432 %} 11433 11434 // fast string equals 11435 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11436 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11437 match(Set result (StrEquals (Binary str1 str2) cnt)); 11438 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11439 11440 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11441 ins_encode %{ 11442 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11443 $cnt$$Register, $result$$Register, $tmp3$$Register, 11444 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11445 %} 11446 11447 ins_pipe( pipe_slow ); 11448 %} 11449 11450 // fast search of substring with known size. 11451 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11452 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11453 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11454 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11455 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11456 11457 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11458 ins_encode %{ 11459 int icnt2 = (int)$int_cnt2$$constant; 11460 if (icnt2 >= 16) { 11461 // IndexOf for constant substrings with size >= 16 elements 11462 // which don't need to be loaded through stack. 11463 __ string_indexofC8($str1$$Register, $str2$$Register, 11464 $cnt1$$Register, $cnt2$$Register, 11465 icnt2, $result$$Register, 11466 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11467 } else { 11468 // Small strings are loaded through stack if they cross page boundary. 11469 __ string_indexof($str1$$Register, $str2$$Register, 11470 $cnt1$$Register, $cnt2$$Register, 11471 icnt2, $result$$Register, 11472 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11473 } 11474 %} 11475 ins_pipe( pipe_slow ); 11476 %} 11477 11478 // fast search of substring with known size. 11479 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11480 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11481 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11482 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11483 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11484 11485 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11486 ins_encode %{ 11487 int icnt2 = (int)$int_cnt2$$constant; 11488 if (icnt2 >= 8) { 11489 // IndexOf for constant substrings with size >= 8 elements 11490 // which don't need to be loaded through stack. 11491 __ string_indexofC8($str1$$Register, $str2$$Register, 11492 $cnt1$$Register, $cnt2$$Register, 11493 icnt2, $result$$Register, 11494 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11495 } else { 11496 // Small strings are loaded through stack if they cross page boundary. 11497 __ string_indexof($str1$$Register, $str2$$Register, 11498 $cnt1$$Register, $cnt2$$Register, 11499 icnt2, $result$$Register, 11500 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11501 } 11502 %} 11503 ins_pipe( pipe_slow ); 11504 %} 11505 11506 // fast search of substring with known size. 11507 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11508 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11509 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11510 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11511 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11512 11513 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11514 ins_encode %{ 11515 int icnt2 = (int)$int_cnt2$$constant; 11516 if (icnt2 >= 8) { 11517 // IndexOf for constant substrings with size >= 8 elements 11518 // which don't need to be loaded through stack. 11519 __ string_indexofC8($str1$$Register, $str2$$Register, 11520 $cnt1$$Register, $cnt2$$Register, 11521 icnt2, $result$$Register, 11522 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11523 } else { 11524 // Small strings are loaded through stack if they cross page boundary. 11525 __ string_indexof($str1$$Register, $str2$$Register, 11526 $cnt1$$Register, $cnt2$$Register, 11527 icnt2, $result$$Register, 11528 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11529 } 11530 %} 11531 ins_pipe( pipe_slow ); 11532 %} 11533 11534 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11535 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11536 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11537 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11538 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11539 11540 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11541 ins_encode %{ 11542 __ string_indexof($str1$$Register, $str2$$Register, 11543 $cnt1$$Register, $cnt2$$Register, 11544 (-1), $result$$Register, 11545 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11546 %} 11547 ins_pipe( pipe_slow ); 11548 %} 11549 11550 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11551 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11552 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11553 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11554 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11555 11556 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11557 ins_encode %{ 11558 __ string_indexof($str1$$Register, $str2$$Register, 11559 $cnt1$$Register, $cnt2$$Register, 11560 (-1), $result$$Register, 11561 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11562 %} 11563 ins_pipe( pipe_slow ); 11564 %} 11565 11566 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11567 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11568 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11569 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11570 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11571 11572 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11573 ins_encode %{ 11574 __ string_indexof($str1$$Register, $str2$$Register, 11575 $cnt1$$Register, $cnt2$$Register, 11576 (-1), $result$$Register, 11577 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11578 %} 11579 ins_pipe( pipe_slow ); 11580 %} 11581 11582 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11583 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11584 predicate(UseSSE42Intrinsics); 11585 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11586 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11587 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11588 ins_encode %{ 11589 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11590 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11591 %} 11592 ins_pipe( pipe_slow ); 11593 %} 11594 11595 // fast array equals 11596 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11597 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11598 %{ 11599 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11600 match(Set result (AryEq ary1 ary2)); 11601 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11602 //ins_cost(300); 11603 11604 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11605 ins_encode %{ 11606 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11607 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11608 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11609 %} 11610 ins_pipe( pipe_slow ); 11611 %} 11612 11613 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11614 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11615 %{ 11616 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11617 match(Set result (AryEq ary1 ary2)); 11618 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11619 //ins_cost(300); 11620 11621 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11622 ins_encode %{ 11623 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11624 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11625 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11626 %} 11627 ins_pipe( pipe_slow ); 11628 %} 11629 11630 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11631 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11632 %{ 11633 match(Set result (HasNegatives ary1 len)); 11634 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11635 11636 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11637 ins_encode %{ 11638 __ has_negatives($ary1$$Register, $len$$Register, 11639 $result$$Register, $tmp3$$Register, 11640 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11641 %} 11642 ins_pipe( pipe_slow ); 11643 %} 11644 11645 // fast char[] to byte[] compression 11646 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11647 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11648 match(Set result (StrCompressedCopy src (Binary dst len))); 11649 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11650 11651 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11652 ins_encode %{ 11653 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11654 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11655 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11656 %} 11657 ins_pipe( pipe_slow ); 11658 %} 11659 11660 // fast byte[] to char[] inflation 11661 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11662 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11663 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11664 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11665 11666 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11667 ins_encode %{ 11668 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11669 $tmp1$$XMMRegister, $tmp2$$Register); 11670 %} 11671 ins_pipe( pipe_slow ); 11672 %} 11673 11674 // encode char[] to byte[] in ISO_8859_1 11675 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11676 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11677 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11678 match(Set result (EncodeISOArray src (Binary dst len))); 11679 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11680 11681 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11682 ins_encode %{ 11683 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11684 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11685 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11686 %} 11687 ins_pipe( pipe_slow ); 11688 %} 11689 11690 11691 //----------Control Flow Instructions------------------------------------------ 11692 // Signed compare Instructions 11693 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11694 match(Set cr (CmpI op1 op2)); 11695 effect( DEF cr, USE op1, USE op2 ); 11696 format %{ "CMP $op1,$op2" %} 11697 opcode(0x3B); /* Opcode 3B /r */ 11698 ins_encode( OpcP, RegReg( op1, op2) ); 11699 ins_pipe( ialu_cr_reg_reg ); 11700 %} 11701 11702 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11703 match(Set cr (CmpI op1 op2)); 11704 effect( DEF cr, USE op1 ); 11705 format %{ "CMP $op1,$op2" %} 11706 opcode(0x81,0x07); /* Opcode 81 /7 */ 11707 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11708 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11709 ins_pipe( ialu_cr_reg_imm ); 11710 %} 11711 11712 // Cisc-spilled version of cmpI_eReg 11713 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11714 match(Set cr (CmpI op1 (LoadI op2))); 11715 11716 format %{ "CMP $op1,$op2" %} 11717 ins_cost(500); 11718 opcode(0x3B); /* Opcode 3B /r */ 11719 ins_encode( OpcP, RegMem( op1, op2) ); 11720 ins_pipe( ialu_cr_reg_mem ); 11721 %} 11722 11723 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11724 match(Set cr (CmpI src zero)); 11725 effect( DEF cr, USE src ); 11726 11727 format %{ "TEST $src,$src" %} 11728 opcode(0x85); 11729 ins_encode( OpcP, RegReg( src, src ) ); 11730 ins_pipe( ialu_cr_reg_imm ); 11731 %} 11732 11733 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11734 match(Set cr (CmpI (AndI src con) zero)); 11735 11736 format %{ "TEST $src,$con" %} 11737 opcode(0xF7,0x00); 11738 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11739 ins_pipe( ialu_cr_reg_imm ); 11740 %} 11741 11742 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11743 match(Set cr (CmpI (AndI src mem) zero)); 11744 11745 format %{ "TEST $src,$mem" %} 11746 opcode(0x85); 11747 ins_encode( OpcP, RegMem( src, mem ) ); 11748 ins_pipe( ialu_cr_reg_mem ); 11749 %} 11750 11751 // Unsigned compare Instructions; really, same as signed except they 11752 // produce an eFlagsRegU instead of eFlagsReg. 11753 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11754 match(Set cr (CmpU op1 op2)); 11755 11756 format %{ "CMPu $op1,$op2" %} 11757 opcode(0x3B); /* Opcode 3B /r */ 11758 ins_encode( OpcP, RegReg( op1, op2) ); 11759 ins_pipe( ialu_cr_reg_reg ); 11760 %} 11761 11762 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11763 match(Set cr (CmpU op1 op2)); 11764 11765 format %{ "CMPu $op1,$op2" %} 11766 opcode(0x81,0x07); /* Opcode 81 /7 */ 11767 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11768 ins_pipe( ialu_cr_reg_imm ); 11769 %} 11770 11771 // // Cisc-spilled version of cmpU_eReg 11772 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11773 match(Set cr (CmpU op1 (LoadI op2))); 11774 11775 format %{ "CMPu $op1,$op2" %} 11776 ins_cost(500); 11777 opcode(0x3B); /* Opcode 3B /r */ 11778 ins_encode( OpcP, RegMem( op1, op2) ); 11779 ins_pipe( ialu_cr_reg_mem ); 11780 %} 11781 11782 // // Cisc-spilled version of cmpU_eReg 11783 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11784 // match(Set cr (CmpU (LoadI op1) op2)); 11785 // 11786 // format %{ "CMPu $op1,$op2" %} 11787 // ins_cost(500); 11788 // opcode(0x39); /* Opcode 39 /r */ 11789 // ins_encode( OpcP, RegMem( op1, op2) ); 11790 //%} 11791 11792 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11793 match(Set cr (CmpU src zero)); 11794 11795 format %{ "TESTu $src,$src" %} 11796 opcode(0x85); 11797 ins_encode( OpcP, RegReg( src, src ) ); 11798 ins_pipe( ialu_cr_reg_imm ); 11799 %} 11800 11801 // Unsigned pointer compare Instructions 11802 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11803 match(Set cr (CmpP op1 op2)); 11804 11805 format %{ "CMPu $op1,$op2" %} 11806 opcode(0x3B); /* Opcode 3B /r */ 11807 ins_encode( OpcP, RegReg( op1, op2) ); 11808 ins_pipe( ialu_cr_reg_reg ); 11809 %} 11810 11811 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11812 match(Set cr (CmpP op1 op2)); 11813 11814 format %{ "CMPu $op1,$op2" %} 11815 opcode(0x81,0x07); /* Opcode 81 /7 */ 11816 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11817 ins_pipe( ialu_cr_reg_imm ); 11818 %} 11819 11820 // // Cisc-spilled version of cmpP_eReg 11821 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11822 match(Set cr (CmpP op1 (LoadP op2))); 11823 11824 format %{ "CMPu $op1,$op2" %} 11825 ins_cost(500); 11826 opcode(0x3B); /* Opcode 3B /r */ 11827 ins_encode( OpcP, RegMem( op1, op2) ); 11828 ins_pipe( ialu_cr_reg_mem ); 11829 %} 11830 11831 // // Cisc-spilled version of cmpP_eReg 11832 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11833 // match(Set cr (CmpP (LoadP op1) op2)); 11834 // 11835 // format %{ "CMPu $op1,$op2" %} 11836 // ins_cost(500); 11837 // opcode(0x39); /* Opcode 39 /r */ 11838 // ins_encode( OpcP, RegMem( op1, op2) ); 11839 //%} 11840 11841 // Compare raw pointer (used in out-of-heap check). 11842 // Only works because non-oop pointers must be raw pointers 11843 // and raw pointers have no anti-dependencies. 11844 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11845 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11846 match(Set cr (CmpP op1 (LoadP op2))); 11847 11848 format %{ "CMPu $op1,$op2" %} 11849 opcode(0x3B); /* Opcode 3B /r */ 11850 ins_encode( OpcP, RegMem( op1, op2) ); 11851 ins_pipe( ialu_cr_reg_mem ); 11852 %} 11853 11854 // 11855 // This will generate a signed flags result. This should be ok 11856 // since any compare to a zero should be eq/neq. 11857 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11858 match(Set cr (CmpP src zero)); 11859 11860 format %{ "TEST $src,$src" %} 11861 opcode(0x85); 11862 ins_encode( OpcP, RegReg( src, src ) ); 11863 ins_pipe( ialu_cr_reg_imm ); 11864 %} 11865 11866 // Cisc-spilled version of testP_reg 11867 // This will generate a signed flags result. This should be ok 11868 // since any compare to a zero should be eq/neq. 11869 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11870 match(Set cr (CmpP (LoadP op) zero)); 11871 11872 format %{ "TEST $op,0xFFFFFFFF" %} 11873 ins_cost(500); 11874 opcode(0xF7); /* Opcode F7 /0 */ 11875 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11876 ins_pipe( ialu_cr_reg_imm ); 11877 %} 11878 11879 // Yanked all unsigned pointer compare operations. 11880 // Pointer compares are done with CmpP which is already unsigned. 11881 11882 //----------Max and Min-------------------------------------------------------- 11883 // Min Instructions 11884 //// 11885 // *** Min and Max using the conditional move are slower than the 11886 // *** branch version on a Pentium III. 11887 // // Conditional move for min 11888 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11889 // effect( USE_DEF op2, USE op1, USE cr ); 11890 // format %{ "CMOVlt $op2,$op1\t! min" %} 11891 // opcode(0x4C,0x0F); 11892 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11893 // ins_pipe( pipe_cmov_reg ); 11894 //%} 11895 // 11896 //// Min Register with Register (P6 version) 11897 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11898 // predicate(VM_Version::supports_cmov() ); 11899 // match(Set op2 (MinI op1 op2)); 11900 // ins_cost(200); 11901 // expand %{ 11902 // eFlagsReg cr; 11903 // compI_eReg(cr,op1,op2); 11904 // cmovI_reg_lt(op2,op1,cr); 11905 // %} 11906 //%} 11907 11908 // Min Register with Register (generic version) 11909 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11910 match(Set dst (MinI dst src)); 11911 effect(KILL flags); 11912 ins_cost(300); 11913 11914 format %{ "MIN $dst,$src" %} 11915 opcode(0xCC); 11916 ins_encode( min_enc(dst,src) ); 11917 ins_pipe( pipe_slow ); 11918 %} 11919 11920 // Max Register with Register 11921 // *** Min and Max using the conditional move are slower than the 11922 // *** branch version on a Pentium III. 11923 // // Conditional move for max 11924 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11925 // effect( USE_DEF op2, USE op1, USE cr ); 11926 // format %{ "CMOVgt $op2,$op1\t! max" %} 11927 // opcode(0x4F,0x0F); 11928 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11929 // ins_pipe( pipe_cmov_reg ); 11930 //%} 11931 // 11932 // // Max Register with Register (P6 version) 11933 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11934 // predicate(VM_Version::supports_cmov() ); 11935 // match(Set op2 (MaxI op1 op2)); 11936 // ins_cost(200); 11937 // expand %{ 11938 // eFlagsReg cr; 11939 // compI_eReg(cr,op1,op2); 11940 // cmovI_reg_gt(op2,op1,cr); 11941 // %} 11942 //%} 11943 11944 // Max Register with Register (generic version) 11945 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11946 match(Set dst (MaxI dst src)); 11947 effect(KILL flags); 11948 ins_cost(300); 11949 11950 format %{ "MAX $dst,$src" %} 11951 opcode(0xCC); 11952 ins_encode( max_enc(dst,src) ); 11953 ins_pipe( pipe_slow ); 11954 %} 11955 11956 // ============================================================================ 11957 // Counted Loop limit node which represents exact final iterator value. 11958 // Note: the resulting value should fit into integer range since 11959 // counted loops have limit check on overflow. 11960 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11961 match(Set limit (LoopLimit (Binary init limit) stride)); 11962 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11963 ins_cost(300); 11964 11965 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11966 ins_encode %{ 11967 int strd = (int)$stride$$constant; 11968 assert(strd != 1 && strd != -1, "sanity"); 11969 int m1 = (strd > 0) ? 1 : -1; 11970 // Convert limit to long (EAX:EDX) 11971 __ cdql(); 11972 // Convert init to long (init:tmp) 11973 __ movl($tmp$$Register, $init$$Register); 11974 __ sarl($tmp$$Register, 31); 11975 // $limit - $init 11976 __ subl($limit$$Register, $init$$Register); 11977 __ sbbl($limit_hi$$Register, $tmp$$Register); 11978 // + ($stride - 1) 11979 if (strd > 0) { 11980 __ addl($limit$$Register, (strd - 1)); 11981 __ adcl($limit_hi$$Register, 0); 11982 __ movl($tmp$$Register, strd); 11983 } else { 11984 __ addl($limit$$Register, (strd + 1)); 11985 __ adcl($limit_hi$$Register, -1); 11986 __ lneg($limit_hi$$Register, $limit$$Register); 11987 __ movl($tmp$$Register, -strd); 11988 } 11989 // signed devision: (EAX:EDX) / pos_stride 11990 __ idivl($tmp$$Register); 11991 if (strd < 0) { 11992 // restore sign 11993 __ negl($tmp$$Register); 11994 } 11995 // (EAX) * stride 11996 __ mull($tmp$$Register); 11997 // + init (ignore upper bits) 11998 __ addl($limit$$Register, $init$$Register); 11999 %} 12000 ins_pipe( pipe_slow ); 12001 %} 12002 12003 // ============================================================================ 12004 // Branch Instructions 12005 // Jump Table 12006 instruct jumpXtnd(rRegI switch_val) %{ 12007 match(Jump switch_val); 12008 ins_cost(350); 12009 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12010 ins_encode %{ 12011 // Jump to Address(table_base + switch_reg) 12012 Address index(noreg, $switch_val$$Register, Address::times_1); 12013 __ jump(ArrayAddress($constantaddress, index)); 12014 %} 12015 ins_pipe(pipe_jmp); 12016 %} 12017 12018 // Jump Direct - Label defines a relative address from JMP+1 12019 instruct jmpDir(label labl) %{ 12020 match(Goto); 12021 effect(USE labl); 12022 12023 ins_cost(300); 12024 format %{ "JMP $labl" %} 12025 size(5); 12026 ins_encode %{ 12027 Label* L = $labl$$label; 12028 __ jmp(*L, false); // Always long jump 12029 %} 12030 ins_pipe( pipe_jmp ); 12031 %} 12032 12033 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12034 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12035 match(If cop cr); 12036 effect(USE labl); 12037 12038 ins_cost(300); 12039 format %{ "J$cop $labl" %} 12040 size(6); 12041 ins_encode %{ 12042 Label* L = $labl$$label; 12043 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12044 %} 12045 ins_pipe( pipe_jcc ); 12046 %} 12047 12048 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12049 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12050 predicate(!n->has_vector_mask_set()); 12051 match(CountedLoopEnd cop cr); 12052 effect(USE labl); 12053 12054 ins_cost(300); 12055 format %{ "J$cop $labl\t# Loop end" %} 12056 size(6); 12057 ins_encode %{ 12058 Label* L = $labl$$label; 12059 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12060 %} 12061 ins_pipe( pipe_jcc ); 12062 %} 12063 12064 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12065 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12066 predicate(!n->has_vector_mask_set()); 12067 match(CountedLoopEnd cop cmp); 12068 effect(USE labl); 12069 12070 ins_cost(300); 12071 format %{ "J$cop,u $labl\t# Loop end" %} 12072 size(6); 12073 ins_encode %{ 12074 Label* L = $labl$$label; 12075 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12076 %} 12077 ins_pipe( pipe_jcc ); 12078 %} 12079 12080 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12081 predicate(!n->has_vector_mask_set()); 12082 match(CountedLoopEnd cop cmp); 12083 effect(USE labl); 12084 12085 ins_cost(200); 12086 format %{ "J$cop,u $labl\t# Loop end" %} 12087 size(6); 12088 ins_encode %{ 12089 Label* L = $labl$$label; 12090 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12091 %} 12092 ins_pipe( pipe_jcc ); 12093 %} 12094 12095 // mask version 12096 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12097 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12098 predicate(n->has_vector_mask_set()); 12099 match(CountedLoopEnd cop cr); 12100 effect(USE labl); 12101 12102 ins_cost(400); 12103 format %{ "J$cop $labl\t# Loop end\n\t" 12104 "restorevectmask \t# vector mask restore for loops" %} 12105 size(10); 12106 ins_encode %{ 12107 Label* L = $labl$$label; 12108 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12109 __ restorevectmask(); 12110 %} 12111 ins_pipe( pipe_jcc ); 12112 %} 12113 12114 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12115 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12116 predicate(n->has_vector_mask_set()); 12117 match(CountedLoopEnd cop cmp); 12118 effect(USE labl); 12119 12120 ins_cost(400); 12121 format %{ "J$cop,u $labl\t# Loop end\n\t" 12122 "restorevectmask \t# vector mask restore for loops" %} 12123 size(10); 12124 ins_encode %{ 12125 Label* L = $labl$$label; 12126 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12127 __ restorevectmask(); 12128 %} 12129 ins_pipe( pipe_jcc ); 12130 %} 12131 12132 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12133 predicate(n->has_vector_mask_set()); 12134 match(CountedLoopEnd cop cmp); 12135 effect(USE labl); 12136 12137 ins_cost(300); 12138 format %{ "J$cop,u $labl\t# Loop end\n\t" 12139 "restorevectmask \t# vector mask restore for loops" %} 12140 size(10); 12141 ins_encode %{ 12142 Label* L = $labl$$label; 12143 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12144 __ restorevectmask(); 12145 %} 12146 ins_pipe( pipe_jcc ); 12147 %} 12148 12149 // Jump Direct Conditional - using unsigned comparison 12150 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12151 match(If cop cmp); 12152 effect(USE labl); 12153 12154 ins_cost(300); 12155 format %{ "J$cop,u $labl" %} 12156 size(6); 12157 ins_encode %{ 12158 Label* L = $labl$$label; 12159 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12160 %} 12161 ins_pipe(pipe_jcc); 12162 %} 12163 12164 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12165 match(If cop cmp); 12166 effect(USE labl); 12167 12168 ins_cost(200); 12169 format %{ "J$cop,u $labl" %} 12170 size(6); 12171 ins_encode %{ 12172 Label* L = $labl$$label; 12173 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12174 %} 12175 ins_pipe(pipe_jcc); 12176 %} 12177 12178 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12179 match(If cop cmp); 12180 effect(USE labl); 12181 12182 ins_cost(200); 12183 format %{ $$template 12184 if ($cop$$cmpcode == Assembler::notEqual) { 12185 $$emit$$"JP,u $labl\n\t" 12186 $$emit$$"J$cop,u $labl" 12187 } else { 12188 $$emit$$"JP,u done\n\t" 12189 $$emit$$"J$cop,u $labl\n\t" 12190 $$emit$$"done:" 12191 } 12192 %} 12193 ins_encode %{ 12194 Label* l = $labl$$label; 12195 if ($cop$$cmpcode == Assembler::notEqual) { 12196 __ jcc(Assembler::parity, *l, false); 12197 __ jcc(Assembler::notEqual, *l, false); 12198 } else if ($cop$$cmpcode == Assembler::equal) { 12199 Label done; 12200 __ jccb(Assembler::parity, done); 12201 __ jcc(Assembler::equal, *l, false); 12202 __ bind(done); 12203 } else { 12204 ShouldNotReachHere(); 12205 } 12206 %} 12207 ins_pipe(pipe_jcc); 12208 %} 12209 12210 // ============================================================================ 12211 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12212 // array for an instance of the superklass. Set a hidden internal cache on a 12213 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12214 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12215 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12216 match(Set result (PartialSubtypeCheck sub super)); 12217 effect( KILL rcx, KILL cr ); 12218 12219 ins_cost(1100); // slightly larger than the next version 12220 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12221 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12222 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12223 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12224 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12225 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12226 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12227 "miss:\t" %} 12228 12229 opcode(0x1); // Force a XOR of EDI 12230 ins_encode( enc_PartialSubtypeCheck() ); 12231 ins_pipe( pipe_slow ); 12232 %} 12233 12234 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12235 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12236 effect( KILL rcx, KILL result ); 12237 12238 ins_cost(1000); 12239 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12240 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12241 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12242 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12243 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12244 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12245 "miss:\t" %} 12246 12247 opcode(0x0); // No need to XOR EDI 12248 ins_encode( enc_PartialSubtypeCheck() ); 12249 ins_pipe( pipe_slow ); 12250 %} 12251 12252 // ============================================================================ 12253 // Branch Instructions -- short offset versions 12254 // 12255 // These instructions are used to replace jumps of a long offset (the default 12256 // match) with jumps of a shorter offset. These instructions are all tagged 12257 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12258 // match rules in general matching. Instead, the ADLC generates a conversion 12259 // method in the MachNode which can be used to do in-place replacement of the 12260 // long variant with the shorter variant. The compiler will determine if a 12261 // branch can be taken by the is_short_branch_offset() predicate in the machine 12262 // specific code section of the file. 12263 12264 // Jump Direct - Label defines a relative address from JMP+1 12265 instruct jmpDir_short(label labl) %{ 12266 match(Goto); 12267 effect(USE labl); 12268 12269 ins_cost(300); 12270 format %{ "JMP,s $labl" %} 12271 size(2); 12272 ins_encode %{ 12273 Label* L = $labl$$label; 12274 __ jmpb(*L); 12275 %} 12276 ins_pipe( pipe_jmp ); 12277 ins_short_branch(1); 12278 %} 12279 12280 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12281 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12282 match(If cop cr); 12283 effect(USE labl); 12284 12285 ins_cost(300); 12286 format %{ "J$cop,s $labl" %} 12287 size(2); 12288 ins_encode %{ 12289 Label* L = $labl$$label; 12290 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12291 %} 12292 ins_pipe( pipe_jcc ); 12293 ins_short_branch(1); 12294 %} 12295 12296 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12297 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12298 match(CountedLoopEnd cop cr); 12299 effect(USE labl); 12300 12301 ins_cost(300); 12302 format %{ "J$cop,s $labl\t# Loop end" %} 12303 size(2); 12304 ins_encode %{ 12305 Label* L = $labl$$label; 12306 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12307 %} 12308 ins_pipe( pipe_jcc ); 12309 ins_short_branch(1); 12310 %} 12311 12312 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12313 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12314 match(CountedLoopEnd cop cmp); 12315 effect(USE labl); 12316 12317 ins_cost(300); 12318 format %{ "J$cop,us $labl\t# Loop end" %} 12319 size(2); 12320 ins_encode %{ 12321 Label* L = $labl$$label; 12322 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12323 %} 12324 ins_pipe( pipe_jcc ); 12325 ins_short_branch(1); 12326 %} 12327 12328 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12329 match(CountedLoopEnd cop cmp); 12330 effect(USE labl); 12331 12332 ins_cost(300); 12333 format %{ "J$cop,us $labl\t# Loop end" %} 12334 size(2); 12335 ins_encode %{ 12336 Label* L = $labl$$label; 12337 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12338 %} 12339 ins_pipe( pipe_jcc ); 12340 ins_short_branch(1); 12341 %} 12342 12343 // Jump Direct Conditional - using unsigned comparison 12344 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12345 match(If cop cmp); 12346 effect(USE labl); 12347 12348 ins_cost(300); 12349 format %{ "J$cop,us $labl" %} 12350 size(2); 12351 ins_encode %{ 12352 Label* L = $labl$$label; 12353 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12354 %} 12355 ins_pipe( pipe_jcc ); 12356 ins_short_branch(1); 12357 %} 12358 12359 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12360 match(If cop cmp); 12361 effect(USE labl); 12362 12363 ins_cost(300); 12364 format %{ "J$cop,us $labl" %} 12365 size(2); 12366 ins_encode %{ 12367 Label* L = $labl$$label; 12368 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12369 %} 12370 ins_pipe( pipe_jcc ); 12371 ins_short_branch(1); 12372 %} 12373 12374 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12375 match(If cop cmp); 12376 effect(USE labl); 12377 12378 ins_cost(300); 12379 format %{ $$template 12380 if ($cop$$cmpcode == Assembler::notEqual) { 12381 $$emit$$"JP,u,s $labl\n\t" 12382 $$emit$$"J$cop,u,s $labl" 12383 } else { 12384 $$emit$$"JP,u,s done\n\t" 12385 $$emit$$"J$cop,u,s $labl\n\t" 12386 $$emit$$"done:" 12387 } 12388 %} 12389 size(4); 12390 ins_encode %{ 12391 Label* l = $labl$$label; 12392 if ($cop$$cmpcode == Assembler::notEqual) { 12393 __ jccb(Assembler::parity, *l); 12394 __ jccb(Assembler::notEqual, *l); 12395 } else if ($cop$$cmpcode == Assembler::equal) { 12396 Label done; 12397 __ jccb(Assembler::parity, done); 12398 __ jccb(Assembler::equal, *l); 12399 __ bind(done); 12400 } else { 12401 ShouldNotReachHere(); 12402 } 12403 %} 12404 ins_pipe(pipe_jcc); 12405 ins_short_branch(1); 12406 %} 12407 12408 // ============================================================================ 12409 // Long Compare 12410 // 12411 // Currently we hold longs in 2 registers. Comparing such values efficiently 12412 // is tricky. The flavor of compare used depends on whether we are testing 12413 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12414 // The GE test is the negated LT test. The LE test can be had by commuting 12415 // the operands (yielding a GE test) and then negating; negate again for the 12416 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12417 // NE test is negated from that. 12418 12419 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12420 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12421 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12422 // are collapsed internally in the ADLC's dfa-gen code. The match for 12423 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12424 // foo match ends up with the wrong leaf. One fix is to not match both 12425 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12426 // both forms beat the trinary form of long-compare and both are very useful 12427 // on Intel which has so few registers. 12428 12429 // Manifest a CmpL result in an integer register. Very painful. 12430 // This is the test to avoid. 12431 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12432 match(Set dst (CmpL3 src1 src2)); 12433 effect( KILL flags ); 12434 ins_cost(1000); 12435 format %{ "XOR $dst,$dst\n\t" 12436 "CMP $src1.hi,$src2.hi\n\t" 12437 "JLT,s m_one\n\t" 12438 "JGT,s p_one\n\t" 12439 "CMP $src1.lo,$src2.lo\n\t" 12440 "JB,s m_one\n\t" 12441 "JEQ,s done\n" 12442 "p_one:\tINC $dst\n\t" 12443 "JMP,s done\n" 12444 "m_one:\tDEC $dst\n" 12445 "done:" %} 12446 ins_encode %{ 12447 Label p_one, m_one, done; 12448 __ xorptr($dst$$Register, $dst$$Register); 12449 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12450 __ jccb(Assembler::less, m_one); 12451 __ jccb(Assembler::greater, p_one); 12452 __ cmpl($src1$$Register, $src2$$Register); 12453 __ jccb(Assembler::below, m_one); 12454 __ jccb(Assembler::equal, done); 12455 __ bind(p_one); 12456 __ incrementl($dst$$Register); 12457 __ jmpb(done); 12458 __ bind(m_one); 12459 __ decrementl($dst$$Register); 12460 __ bind(done); 12461 %} 12462 ins_pipe( pipe_slow ); 12463 %} 12464 12465 //====== 12466 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12467 // compares. Can be used for LE or GT compares by reversing arguments. 12468 // NOT GOOD FOR EQ/NE tests. 12469 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12470 match( Set flags (CmpL src zero )); 12471 ins_cost(100); 12472 format %{ "TEST $src.hi,$src.hi" %} 12473 opcode(0x85); 12474 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12475 ins_pipe( ialu_cr_reg_reg ); 12476 %} 12477 12478 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12479 // compares. Can be used for LE or GT compares by reversing arguments. 12480 // NOT GOOD FOR EQ/NE tests. 12481 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12482 match( Set flags (CmpL src1 src2 )); 12483 effect( TEMP tmp ); 12484 ins_cost(300); 12485 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12486 "MOV $tmp,$src1.hi\n\t" 12487 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12488 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12489 ins_pipe( ialu_cr_reg_reg ); 12490 %} 12491 12492 // Long compares reg < zero/req OR reg >= zero/req. 12493 // Just a wrapper for a normal branch, plus the predicate test. 12494 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12495 match(If cmp flags); 12496 effect(USE labl); 12497 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12498 expand %{ 12499 jmpCon(cmp,flags,labl); // JLT or JGE... 12500 %} 12501 %} 12502 12503 // Compare 2 longs and CMOVE longs. 12504 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12505 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12506 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12507 ins_cost(400); 12508 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12509 "CMOV$cmp $dst.hi,$src.hi" %} 12510 opcode(0x0F,0x40); 12511 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12512 ins_pipe( pipe_cmov_reg_long ); 12513 %} 12514 12515 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12516 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12517 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12518 ins_cost(500); 12519 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12520 "CMOV$cmp $dst.hi,$src.hi" %} 12521 opcode(0x0F,0x40); 12522 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12523 ins_pipe( pipe_cmov_reg_long ); 12524 %} 12525 12526 // Compare 2 longs and CMOVE ints. 12527 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12528 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12529 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12530 ins_cost(200); 12531 format %{ "CMOV$cmp $dst,$src" %} 12532 opcode(0x0F,0x40); 12533 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12534 ins_pipe( pipe_cmov_reg ); 12535 %} 12536 12537 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12538 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12539 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12540 ins_cost(250); 12541 format %{ "CMOV$cmp $dst,$src" %} 12542 opcode(0x0F,0x40); 12543 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12544 ins_pipe( pipe_cmov_mem ); 12545 %} 12546 12547 // Compare 2 longs and CMOVE ints. 12548 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12549 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12550 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12551 ins_cost(200); 12552 format %{ "CMOV$cmp $dst,$src" %} 12553 opcode(0x0F,0x40); 12554 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12555 ins_pipe( pipe_cmov_reg ); 12556 %} 12557 12558 // Compare 2 longs and CMOVE doubles 12559 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12560 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12561 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12562 ins_cost(200); 12563 expand %{ 12564 fcmovDPR_regS(cmp,flags,dst,src); 12565 %} 12566 %} 12567 12568 // Compare 2 longs and CMOVE doubles 12569 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12570 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12571 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12572 ins_cost(200); 12573 expand %{ 12574 fcmovD_regS(cmp,flags,dst,src); 12575 %} 12576 %} 12577 12578 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12579 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12580 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12581 ins_cost(200); 12582 expand %{ 12583 fcmovFPR_regS(cmp,flags,dst,src); 12584 %} 12585 %} 12586 12587 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12588 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12589 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12590 ins_cost(200); 12591 expand %{ 12592 fcmovF_regS(cmp,flags,dst,src); 12593 %} 12594 %} 12595 12596 //====== 12597 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12598 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12599 match( Set flags (CmpL src zero )); 12600 effect(TEMP tmp); 12601 ins_cost(200); 12602 format %{ "MOV $tmp,$src.lo\n\t" 12603 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12604 ins_encode( long_cmp_flags0( src, tmp ) ); 12605 ins_pipe( ialu_reg_reg_long ); 12606 %} 12607 12608 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12609 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12610 match( Set flags (CmpL src1 src2 )); 12611 ins_cost(200+300); 12612 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12613 "JNE,s skip\n\t" 12614 "CMP $src1.hi,$src2.hi\n\t" 12615 "skip:\t" %} 12616 ins_encode( long_cmp_flags1( src1, src2 ) ); 12617 ins_pipe( ialu_cr_reg_reg ); 12618 %} 12619 12620 // Long compare reg == zero/reg OR reg != zero/reg 12621 // Just a wrapper for a normal branch, plus the predicate test. 12622 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12623 match(If cmp flags); 12624 effect(USE labl); 12625 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12626 expand %{ 12627 jmpCon(cmp,flags,labl); // JEQ or JNE... 12628 %} 12629 %} 12630 12631 // Compare 2 longs and CMOVE longs. 12632 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12633 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12634 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12635 ins_cost(400); 12636 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12637 "CMOV$cmp $dst.hi,$src.hi" %} 12638 opcode(0x0F,0x40); 12639 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12640 ins_pipe( pipe_cmov_reg_long ); 12641 %} 12642 12643 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12644 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12645 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12646 ins_cost(500); 12647 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12648 "CMOV$cmp $dst.hi,$src.hi" %} 12649 opcode(0x0F,0x40); 12650 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12651 ins_pipe( pipe_cmov_reg_long ); 12652 %} 12653 12654 // Compare 2 longs and CMOVE ints. 12655 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12656 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12657 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12658 ins_cost(200); 12659 format %{ "CMOV$cmp $dst,$src" %} 12660 opcode(0x0F,0x40); 12661 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12662 ins_pipe( pipe_cmov_reg ); 12663 %} 12664 12665 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12666 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12667 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12668 ins_cost(250); 12669 format %{ "CMOV$cmp $dst,$src" %} 12670 opcode(0x0F,0x40); 12671 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12672 ins_pipe( pipe_cmov_mem ); 12673 %} 12674 12675 // Compare 2 longs and CMOVE ints. 12676 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12677 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12678 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12679 ins_cost(200); 12680 format %{ "CMOV$cmp $dst,$src" %} 12681 opcode(0x0F,0x40); 12682 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12683 ins_pipe( pipe_cmov_reg ); 12684 %} 12685 12686 // Compare 2 longs and CMOVE doubles 12687 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12688 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12689 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12690 ins_cost(200); 12691 expand %{ 12692 fcmovDPR_regS(cmp,flags,dst,src); 12693 %} 12694 %} 12695 12696 // Compare 2 longs and CMOVE doubles 12697 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12698 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12699 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12700 ins_cost(200); 12701 expand %{ 12702 fcmovD_regS(cmp,flags,dst,src); 12703 %} 12704 %} 12705 12706 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12707 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12708 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12709 ins_cost(200); 12710 expand %{ 12711 fcmovFPR_regS(cmp,flags,dst,src); 12712 %} 12713 %} 12714 12715 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12716 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12717 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12718 ins_cost(200); 12719 expand %{ 12720 fcmovF_regS(cmp,flags,dst,src); 12721 %} 12722 %} 12723 12724 //====== 12725 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12726 // Same as cmpL_reg_flags_LEGT except must negate src 12727 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12728 match( Set flags (CmpL src zero )); 12729 effect( TEMP tmp ); 12730 ins_cost(300); 12731 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12732 "CMP $tmp,$src.lo\n\t" 12733 "SBB $tmp,$src.hi\n\t" %} 12734 ins_encode( long_cmp_flags3(src, tmp) ); 12735 ins_pipe( ialu_reg_reg_long ); 12736 %} 12737 12738 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12739 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12740 // requires a commuted test to get the same result. 12741 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12742 match( Set flags (CmpL src1 src2 )); 12743 effect( TEMP tmp ); 12744 ins_cost(300); 12745 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12746 "MOV $tmp,$src2.hi\n\t" 12747 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12748 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12749 ins_pipe( ialu_cr_reg_reg ); 12750 %} 12751 12752 // Long compares reg < zero/req OR reg >= zero/req. 12753 // Just a wrapper for a normal branch, plus the predicate test 12754 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12755 match(If cmp flags); 12756 effect(USE labl); 12757 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12758 ins_cost(300); 12759 expand %{ 12760 jmpCon(cmp,flags,labl); // JGT or JLE... 12761 %} 12762 %} 12763 12764 // Compare 2 longs and CMOVE longs. 12765 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12766 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12767 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12768 ins_cost(400); 12769 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12770 "CMOV$cmp $dst.hi,$src.hi" %} 12771 opcode(0x0F,0x40); 12772 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12773 ins_pipe( pipe_cmov_reg_long ); 12774 %} 12775 12776 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12777 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12778 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12779 ins_cost(500); 12780 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12781 "CMOV$cmp $dst.hi,$src.hi+4" %} 12782 opcode(0x0F,0x40); 12783 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12784 ins_pipe( pipe_cmov_reg_long ); 12785 %} 12786 12787 // Compare 2 longs and CMOVE ints. 12788 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12789 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12790 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12791 ins_cost(200); 12792 format %{ "CMOV$cmp $dst,$src" %} 12793 opcode(0x0F,0x40); 12794 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12795 ins_pipe( pipe_cmov_reg ); 12796 %} 12797 12798 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12799 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12800 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12801 ins_cost(250); 12802 format %{ "CMOV$cmp $dst,$src" %} 12803 opcode(0x0F,0x40); 12804 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12805 ins_pipe( pipe_cmov_mem ); 12806 %} 12807 12808 // Compare 2 longs and CMOVE ptrs. 12809 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12810 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12811 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12812 ins_cost(200); 12813 format %{ "CMOV$cmp $dst,$src" %} 12814 opcode(0x0F,0x40); 12815 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12816 ins_pipe( pipe_cmov_reg ); 12817 %} 12818 12819 // Compare 2 longs and CMOVE doubles 12820 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12821 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12822 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12823 ins_cost(200); 12824 expand %{ 12825 fcmovDPR_regS(cmp,flags,dst,src); 12826 %} 12827 %} 12828 12829 // Compare 2 longs and CMOVE doubles 12830 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12831 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12832 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12833 ins_cost(200); 12834 expand %{ 12835 fcmovD_regS(cmp,flags,dst,src); 12836 %} 12837 %} 12838 12839 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12840 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12841 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12842 ins_cost(200); 12843 expand %{ 12844 fcmovFPR_regS(cmp,flags,dst,src); 12845 %} 12846 %} 12847 12848 12849 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12850 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12851 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12852 ins_cost(200); 12853 expand %{ 12854 fcmovF_regS(cmp,flags,dst,src); 12855 %} 12856 %} 12857 12858 12859 // ============================================================================ 12860 // Procedure Call/Return Instructions 12861 // Call Java Static Instruction 12862 // Note: If this code changes, the corresponding ret_addr_offset() and 12863 // compute_padding() functions will have to be adjusted. 12864 instruct CallStaticJavaDirect(method meth) %{ 12865 match(CallStaticJava); 12866 effect(USE meth); 12867 12868 ins_cost(300); 12869 format %{ "CALL,static " %} 12870 opcode(0xE8); /* E8 cd */ 12871 ins_encode( pre_call_resets, 12872 Java_Static_Call( meth ), 12873 call_epilog, 12874 post_call_FPU ); 12875 ins_pipe( pipe_slow ); 12876 ins_alignment(4); 12877 %} 12878 12879 // Call Java Dynamic Instruction 12880 // Note: If this code changes, the corresponding ret_addr_offset() and 12881 // compute_padding() functions will have to be adjusted. 12882 instruct CallDynamicJavaDirect(method meth) %{ 12883 match(CallDynamicJava); 12884 effect(USE meth); 12885 12886 ins_cost(300); 12887 format %{ "MOV EAX,(oop)-1\n\t" 12888 "CALL,dynamic" %} 12889 opcode(0xE8); /* E8 cd */ 12890 ins_encode( pre_call_resets, 12891 Java_Dynamic_Call( meth ), 12892 call_epilog, 12893 post_call_FPU ); 12894 ins_pipe( pipe_slow ); 12895 ins_alignment(4); 12896 %} 12897 12898 // Call Runtime Instruction 12899 instruct CallRuntimeDirect(method meth) %{ 12900 match(CallRuntime ); 12901 effect(USE meth); 12902 12903 ins_cost(300); 12904 format %{ "CALL,runtime " %} 12905 opcode(0xE8); /* E8 cd */ 12906 // Use FFREEs to clear entries in float stack 12907 ins_encode( pre_call_resets, 12908 FFree_Float_Stack_All, 12909 Java_To_Runtime( meth ), 12910 post_call_FPU ); 12911 ins_pipe( pipe_slow ); 12912 %} 12913 12914 // Call runtime without safepoint 12915 instruct CallLeafDirect(method meth) %{ 12916 match(CallLeaf); 12917 effect(USE meth); 12918 12919 ins_cost(300); 12920 format %{ "CALL_LEAF,runtime " %} 12921 opcode(0xE8); /* E8 cd */ 12922 ins_encode( pre_call_resets, 12923 FFree_Float_Stack_All, 12924 Java_To_Runtime( meth ), 12925 Verify_FPU_For_Leaf, post_call_FPU ); 12926 ins_pipe( pipe_slow ); 12927 %} 12928 12929 instruct CallLeafNoFPDirect(method meth) %{ 12930 match(CallLeafNoFP); 12931 effect(USE meth); 12932 12933 ins_cost(300); 12934 format %{ "CALL_LEAF_NOFP,runtime " %} 12935 opcode(0xE8); /* E8 cd */ 12936 ins_encode(Java_To_Runtime(meth)); 12937 ins_pipe( pipe_slow ); 12938 %} 12939 12940 12941 // Return Instruction 12942 // Remove the return address & jump to it. 12943 instruct Ret() %{ 12944 match(Return); 12945 format %{ "RET" %} 12946 opcode(0xC3); 12947 ins_encode(OpcP); 12948 ins_pipe( pipe_jmp ); 12949 %} 12950 12951 // Tail Call; Jump from runtime stub to Java code. 12952 // Also known as an 'interprocedural jump'. 12953 // Target of jump will eventually return to caller. 12954 // TailJump below removes the return address. 12955 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12956 match(TailCall jump_target method_oop ); 12957 ins_cost(300); 12958 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12959 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12960 ins_encode( OpcP, RegOpc(jump_target) ); 12961 ins_pipe( pipe_jmp ); 12962 %} 12963 12964 12965 // Tail Jump; remove the return address; jump to target. 12966 // TailCall above leaves the return address around. 12967 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12968 match( TailJump jump_target ex_oop ); 12969 ins_cost(300); 12970 format %{ "POP EDX\t# pop return address into dummy\n\t" 12971 "JMP $jump_target " %} 12972 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12973 ins_encode( enc_pop_rdx, 12974 OpcP, RegOpc(jump_target) ); 12975 ins_pipe( pipe_jmp ); 12976 %} 12977 12978 // Create exception oop: created by stack-crawling runtime code. 12979 // Created exception is now available to this handler, and is setup 12980 // just prior to jumping to this handler. No code emitted. 12981 instruct CreateException( eAXRegP ex_oop ) 12982 %{ 12983 match(Set ex_oop (CreateEx)); 12984 12985 size(0); 12986 // use the following format syntax 12987 format %{ "# exception oop is in EAX; no code emitted" %} 12988 ins_encode(); 12989 ins_pipe( empty ); 12990 %} 12991 12992 12993 // Rethrow exception: 12994 // The exception oop will come in the first argument position. 12995 // Then JUMP (not call) to the rethrow stub code. 12996 instruct RethrowException() 12997 %{ 12998 match(Rethrow); 12999 13000 // use the following format syntax 13001 format %{ "JMP rethrow_stub" %} 13002 ins_encode(enc_rethrow); 13003 ins_pipe( pipe_jmp ); 13004 %} 13005 13006 // inlined locking and unlocking 13007 13008 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13009 predicate(Compile::current()->use_rtm()); 13010 match(Set cr (FastLock object box)); 13011 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13012 ins_cost(300); 13013 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13014 ins_encode %{ 13015 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13016 $scr$$Register, $cx1$$Register, $cx2$$Register, 13017 _counters, _rtm_counters, _stack_rtm_counters, 13018 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13019 true, ra_->C->profile_rtm()); 13020 %} 13021 ins_pipe(pipe_slow); 13022 %} 13023 13024 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13025 predicate(!Compile::current()->use_rtm()); 13026 match(Set cr (FastLock object box)); 13027 effect(TEMP tmp, TEMP scr, USE_KILL box); 13028 ins_cost(300); 13029 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13030 ins_encode %{ 13031 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13032 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13033 %} 13034 ins_pipe(pipe_slow); 13035 %} 13036 13037 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13038 match(Set cr (FastUnlock object box)); 13039 effect(TEMP tmp, USE_KILL box); 13040 ins_cost(300); 13041 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13042 ins_encode %{ 13043 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13044 %} 13045 ins_pipe(pipe_slow); 13046 %} 13047 13048 13049 13050 // ============================================================================ 13051 // Safepoint Instruction 13052 instruct safePoint_poll(eFlagsReg cr) %{ 13053 match(SafePoint); 13054 effect(KILL cr); 13055 13056 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13057 // On SPARC that might be acceptable as we can generate the address with 13058 // just a sethi, saving an or. By polling at offset 0 we can end up 13059 // putting additional pressure on the index-0 in the D$. Because of 13060 // alignment (just like the situation at hand) the lower indices tend 13061 // to see more traffic. It'd be better to change the polling address 13062 // to offset 0 of the last $line in the polling page. 13063 13064 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13065 ins_cost(125); 13066 size(6) ; 13067 ins_encode( Safepoint_Poll() ); 13068 ins_pipe( ialu_reg_mem ); 13069 %} 13070 13071 13072 // ============================================================================ 13073 // This name is KNOWN by the ADLC and cannot be changed. 13074 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13075 // for this guy. 13076 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13077 match(Set dst (ThreadLocal)); 13078 effect(DEF dst, KILL cr); 13079 13080 format %{ "MOV $dst, Thread::current()" %} 13081 ins_encode %{ 13082 Register dstReg = as_Register($dst$$reg); 13083 __ get_thread(dstReg); 13084 %} 13085 ins_pipe( ialu_reg_fat ); 13086 %} 13087 13088 13089 13090 //----------PEEPHOLE RULES----------------------------------------------------- 13091 // These must follow all instruction definitions as they use the names 13092 // defined in the instructions definitions. 13093 // 13094 // peepmatch ( root_instr_name [preceding_instruction]* ); 13095 // 13096 // peepconstraint %{ 13097 // (instruction_number.operand_name relational_op instruction_number.operand_name 13098 // [, ...] ); 13099 // // instruction numbers are zero-based using left to right order in peepmatch 13100 // 13101 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13102 // // provide an instruction_number.operand_name for each operand that appears 13103 // // in the replacement instruction's match rule 13104 // 13105 // ---------VM FLAGS--------------------------------------------------------- 13106 // 13107 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13108 // 13109 // Each peephole rule is given an identifying number starting with zero and 13110 // increasing by one in the order seen by the parser. An individual peephole 13111 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13112 // on the command-line. 13113 // 13114 // ---------CURRENT LIMITATIONS---------------------------------------------- 13115 // 13116 // Only match adjacent instructions in same basic block 13117 // Only equality constraints 13118 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13119 // Only one replacement instruction 13120 // 13121 // ---------EXAMPLE---------------------------------------------------------- 13122 // 13123 // // pertinent parts of existing instructions in architecture description 13124 // instruct movI(rRegI dst, rRegI src) %{ 13125 // match(Set dst (CopyI src)); 13126 // %} 13127 // 13128 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13129 // match(Set dst (AddI dst src)); 13130 // effect(KILL cr); 13131 // %} 13132 // 13133 // // Change (inc mov) to lea 13134 // peephole %{ 13135 // // increment preceeded by register-register move 13136 // peepmatch ( incI_eReg movI ); 13137 // // require that the destination register of the increment 13138 // // match the destination register of the move 13139 // peepconstraint ( 0.dst == 1.dst ); 13140 // // construct a replacement instruction that sets 13141 // // the destination to ( move's source register + one ) 13142 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13143 // %} 13144 // 13145 // Implementation no longer uses movX instructions since 13146 // machine-independent system no longer uses CopyX nodes. 13147 // 13148 // peephole %{ 13149 // peepmatch ( incI_eReg movI ); 13150 // peepconstraint ( 0.dst == 1.dst ); 13151 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13152 // %} 13153 // 13154 // peephole %{ 13155 // peepmatch ( decI_eReg movI ); 13156 // peepconstraint ( 0.dst == 1.dst ); 13157 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13158 // %} 13159 // 13160 // peephole %{ 13161 // peepmatch ( addI_eReg_imm movI ); 13162 // peepconstraint ( 0.dst == 1.dst ); 13163 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13164 // %} 13165 // 13166 // peephole %{ 13167 // peepmatch ( addP_eReg_imm movP ); 13168 // peepconstraint ( 0.dst == 1.dst ); 13169 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13170 // %} 13171 13172 // // Change load of spilled value to only a spill 13173 // instruct storeI(memory mem, rRegI src) %{ 13174 // match(Set mem (StoreI mem src)); 13175 // %} 13176 // 13177 // instruct loadI(rRegI dst, memory mem) %{ 13178 // match(Set dst (LoadI mem)); 13179 // %} 13180 // 13181 peephole %{ 13182 peepmatch ( loadI storeI ); 13183 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13184 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13185 %} 13186 13187 //----------SMARTSPILL RULES--------------------------------------------------- 13188 // These must follow all instruction definitions as they use the names 13189 // defined in the instructions definitions.