1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 674 if (C->max_vector_size() > 16) { 675 // Clear upper bits of YMM registers when current compiled code uses 676 // wide vectors to avoid AVX <-> SSE transition penalty during call. 677 MacroAssembler masm(&cbuf); 678 masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 MacroAssembler masm(&cbuf); 683 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 684 } 685 686 int framesize = C->frame_size_in_bytes(); 687 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 688 // Remove two words for return addr and rbp, 689 framesize -= 2*wordSize; 690 691 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 692 693 if (framesize >= 128) { 694 emit_opcode(cbuf, 0x81); // add SP, #framesize 695 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 696 emit_d32(cbuf, framesize); 697 } else if (framesize) { 698 emit_opcode(cbuf, 0x83); // add SP, #framesize 699 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 700 emit_d8(cbuf, framesize); 701 } 702 703 emit_opcode(cbuf, 0x58 | EBP_enc); 704 705 if (do_polling() && C->is_method_compilation()) { 706 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 707 emit_opcode(cbuf,0x85); 708 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 709 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 710 } 711 } 712 713 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 714 Compile *C = ra_->C; 715 // If method set FPU control word, restore to standard control word 716 int size = C->in_24_bit_fp_mode() ? 6 : 0; 717 if (C->max_vector_size() > 16) size += 3; // vzeroupper 718 if (do_polling() && C->is_method_compilation()) size += 6; 719 720 int framesize = C->frame_size_in_bytes(); 721 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 722 // Remove two words for return addr and rbp, 723 framesize -= 2*wordSize; 724 725 size++; // popl rbp, 726 727 if (framesize >= 128) { 728 size += 6; 729 } else { 730 size += framesize ? 3 : 0; 731 } 732 return size; 733 } 734 735 int MachEpilogNode::reloc() const { 736 return 0; // a large enough number 737 } 738 739 const Pipeline * MachEpilogNode::pipeline() const { 740 return MachNode::pipeline_class(); 741 } 742 743 int MachEpilogNode::safepoint_offset() const { return 0; } 744 745 //============================================================================= 746 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 748 static enum RC rc_class( OptoReg::Name reg ) { 749 750 if( !OptoReg::is_valid(reg) ) return rc_bad; 751 if (OptoReg::is_stack(reg)) return rc_stack; 752 753 VMReg r = OptoReg::as_VMReg(reg); 754 if (r->is_Register()) return rc_int; 755 if (r->is_FloatRegister()) { 756 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 757 return rc_float; 758 } 759 assert(r->is_XMMRegister(), "must be"); 760 return rc_xmm; 761 } 762 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 764 int opcode, const char *op_str, int size, outputStream* st ) { 765 if( cbuf ) { 766 emit_opcode (*cbuf, opcode ); 767 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 768 #ifndef PRODUCT 769 } else if( !do_size ) { 770 if( size != 0 ) st->print("\n\t"); 771 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 772 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 773 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 774 } else { // FLD, FST, PUSH, POP 775 st->print("%s [ESP + #%d]",op_str,offset); 776 } 777 #endif 778 } 779 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 780 return size+3+offset_size; 781 } 782 783 // Helper for XMM registers. Extra opcode bits, limited syntax. 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 785 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 786 int in_size_in_bits = Assembler::EVEX_32bit; 787 int evex_encoding = 0; 788 if (reg_lo+1 == reg_hi) { 789 in_size_in_bits = Assembler::EVEX_64bit; 790 evex_encoding = Assembler::VEX_W; 791 } 792 if (cbuf) { 793 MacroAssembler _masm(cbuf); 794 if (reg_lo+1 == reg_hi) { // double move? 795 if (is_load) { 796 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 797 } else { 798 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 799 } 800 } else { 801 if (is_load) { 802 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 803 } else { 804 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 805 } 806 } 807 #ifndef PRODUCT 808 } else if (!do_size) { 809 if (size != 0) st->print("\n\t"); 810 if (reg_lo+1 == reg_hi) { // double move? 811 if (is_load) st->print("%s %s,[ESP + #%d]", 812 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 813 Matcher::regName[reg_lo], offset); 814 else st->print("MOVSD [ESP + #%d],%s", 815 offset, Matcher::regName[reg_lo]); 816 } else { 817 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 818 Matcher::regName[reg_lo], offset); 819 else st->print("MOVSS [ESP + #%d],%s", 820 offset, Matcher::regName[reg_lo]); 821 } 822 #endif 823 } 824 bool is_single_byte = false; 825 if ((UseAVX > 2) && (offset != 0)) { 826 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 827 } 828 int offset_size = 0; 829 if (UseAVX > 2 ) { 830 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 831 } else { 832 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 833 } 834 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 835 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 836 return size+5+offset_size; 837 } 838 839 840 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 841 int src_hi, int dst_hi, int size, outputStream* st ) { 842 if (cbuf) { 843 MacroAssembler _masm(cbuf); 844 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 845 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 846 as_XMMRegister(Matcher::_regEncode[src_lo])); 847 } else { 848 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 849 as_XMMRegister(Matcher::_regEncode[src_lo])); 850 } 851 #ifndef PRODUCT 852 } else if (!do_size) { 853 if (size != 0) st->print("\n\t"); 854 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 855 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 856 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } else { 861 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 862 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } 867 #endif 868 } 869 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 870 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 871 int sz = (UseAVX > 2) ? 6 : 4; 872 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 873 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 874 return size + sz; 875 } 876 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 878 int src_hi, int dst_hi, int size, outputStream* st ) { 879 // 32-bit 880 if (cbuf) { 881 MacroAssembler _masm(cbuf); 882 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 883 as_Register(Matcher::_regEncode[src_lo])); 884 #ifndef PRODUCT 885 } else if (!do_size) { 886 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 887 #endif 888 } 889 return (UseAVX> 2) ? 6 : 4; 890 } 891 892 893 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 894 int src_hi, int dst_hi, int size, outputStream* st ) { 895 // 32-bit 896 if (cbuf) { 897 MacroAssembler _masm(cbuf); 898 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 899 as_XMMRegister(Matcher::_regEncode[src_lo])); 900 #ifndef PRODUCT 901 } else if (!do_size) { 902 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 903 #endif 904 } 905 return (UseAVX> 2) ? 6 : 4; 906 } 907 908 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 909 if( cbuf ) { 910 emit_opcode(*cbuf, 0x8B ); 911 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 912 #ifndef PRODUCT 913 } else if( !do_size ) { 914 if( size != 0 ) st->print("\n\t"); 915 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 916 #endif 917 } 918 return size+2; 919 } 920 921 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 922 int offset, int size, outputStream* st ) { 923 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 924 if( cbuf ) { 925 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 926 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 927 #ifndef PRODUCT 928 } else if( !do_size ) { 929 if( size != 0 ) st->print("\n\t"); 930 st->print("FLD %s",Matcher::regName[src_lo]); 931 #endif 932 } 933 size += 2; 934 } 935 936 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 937 const char *op_str; 938 int op; 939 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 940 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 941 op = 0xDD; 942 } else { // 32-bit store 943 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 944 op = 0xD9; 945 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 946 } 947 948 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 949 } 950 951 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 952 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 953 int src_hi, int dst_hi, uint ireg, outputStream* st); 954 955 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 956 int stack_offset, int reg, uint ireg, outputStream* st); 957 958 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 959 int dst_offset, uint ireg, outputStream* st) { 960 int calc_size = 0; 961 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 962 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 963 switch (ireg) { 964 case Op_VecS: 965 calc_size = 3+src_offset_size + 3+dst_offset_size; 966 break; 967 case Op_VecD: 968 calc_size = 3+src_offset_size + 3+dst_offset_size; 969 src_offset += 4; 970 dst_offset += 4; 971 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 972 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 973 calc_size += 3+src_offset_size + 3+dst_offset_size; 974 break; 975 case Op_VecX: 976 case Op_VecY: 977 case Op_VecZ: 978 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 979 break; 980 default: 981 ShouldNotReachHere(); 982 } 983 if (cbuf) { 984 MacroAssembler _masm(cbuf); 985 int offset = __ offset(); 986 switch (ireg) { 987 case Op_VecS: 988 __ pushl(Address(rsp, src_offset)); 989 __ popl (Address(rsp, dst_offset)); 990 break; 991 case Op_VecD: 992 __ pushl(Address(rsp, src_offset)); 993 __ popl (Address(rsp, dst_offset)); 994 __ pushl(Address(rsp, src_offset+4)); 995 __ popl (Address(rsp, dst_offset+4)); 996 break; 997 case Op_VecX: 998 __ movdqu(Address(rsp, -16), xmm0); 999 __ movdqu(xmm0, Address(rsp, src_offset)); 1000 __ movdqu(Address(rsp, dst_offset), xmm0); 1001 __ movdqu(xmm0, Address(rsp, -16)); 1002 break; 1003 case Op_VecY: 1004 __ vmovdqu(Address(rsp, -32), xmm0); 1005 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1006 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1007 __ vmovdqu(xmm0, Address(rsp, -32)); 1008 case Op_VecZ: 1009 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1010 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1011 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1012 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1013 break; 1014 default: 1015 ShouldNotReachHere(); 1016 } 1017 int size = __ offset() - offset; 1018 assert(size == calc_size, "incorrect size calculattion"); 1019 return size; 1020 #ifndef PRODUCT 1021 } else if (!do_size) { 1022 switch (ireg) { 1023 case Op_VecS: 1024 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1025 "popl [rsp + #%d]", 1026 src_offset, dst_offset); 1027 break; 1028 case Op_VecD: 1029 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1030 "popq [rsp + #%d]\n\t" 1031 "pushl [rsp + #%d]\n\t" 1032 "popq [rsp + #%d]", 1033 src_offset, dst_offset, src_offset+4, dst_offset+4); 1034 break; 1035 case Op_VecX: 1036 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1037 "movdqu xmm0, [rsp + #%d]\n\t" 1038 "movdqu [rsp + #%d], xmm0\n\t" 1039 "movdqu xmm0, [rsp - #16]", 1040 src_offset, dst_offset); 1041 break; 1042 case Op_VecY: 1043 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1044 "vmovdqu xmm0, [rsp + #%d]\n\t" 1045 "vmovdqu [rsp + #%d], xmm0\n\t" 1046 "vmovdqu xmm0, [rsp - #32]", 1047 src_offset, dst_offset); 1048 case Op_VecZ: 1049 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #64]", 1053 src_offset, dst_offset); 1054 break; 1055 default: 1056 ShouldNotReachHere(); 1057 } 1058 #endif 1059 } 1060 return calc_size; 1061 } 1062 1063 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1064 // Get registers to move 1065 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1066 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1067 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1068 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1069 1070 enum RC src_second_rc = rc_class(src_second); 1071 enum RC src_first_rc = rc_class(src_first); 1072 enum RC dst_second_rc = rc_class(dst_second); 1073 enum RC dst_first_rc = rc_class(dst_first); 1074 1075 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1076 1077 // Generate spill code! 1078 int size = 0; 1079 1080 if( src_first == dst_first && src_second == dst_second ) 1081 return size; // Self copy, no move 1082 1083 if (bottom_type()->isa_vect() != NULL) { 1084 uint ireg = ideal_reg(); 1085 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1086 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1087 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1088 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1089 // mem -> mem 1090 int src_offset = ra_->reg2offset(src_first); 1091 int dst_offset = ra_->reg2offset(dst_first); 1092 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1093 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1094 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1095 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1096 int stack_offset = ra_->reg2offset(dst_first); 1097 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1098 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1099 int stack_offset = ra_->reg2offset(src_first); 1100 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1101 } else { 1102 ShouldNotReachHere(); 1103 } 1104 } 1105 1106 // -------------------------------------- 1107 // Check for mem-mem move. push/pop to move. 1108 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1109 if( src_second == dst_first ) { // overlapping stack copy ranges 1110 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1111 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1112 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1113 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1114 } 1115 // move low bits 1116 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1117 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1118 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1119 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1120 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1121 } 1122 return size; 1123 } 1124 1125 // -------------------------------------- 1126 // Check for integer reg-reg copy 1127 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1128 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1129 1130 // Check for integer store 1131 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1132 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1133 1134 // Check for integer load 1135 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1136 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1137 1138 // Check for integer reg-xmm reg copy 1139 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1140 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1141 "no 64 bit integer-float reg moves" ); 1142 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1143 } 1144 // -------------------------------------- 1145 // Check for float reg-reg copy 1146 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1148 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1149 if( cbuf ) { 1150 1151 // Note the mucking with the register encode to compensate for the 0/1 1152 // indexing issue mentioned in a comment in the reg_def sections 1153 // for FPR registers many lines above here. 1154 1155 if( src_first != FPR1L_num ) { 1156 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1157 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1158 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1159 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1160 } else { 1161 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1162 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1163 } 1164 #ifndef PRODUCT 1165 } else if( !do_size ) { 1166 if( size != 0 ) st->print("\n\t"); 1167 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1168 else st->print( "FST %s", Matcher::regName[dst_first]); 1169 #endif 1170 } 1171 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1172 } 1173 1174 // Check for float store 1175 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1176 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1177 } 1178 1179 // Check for float load 1180 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1181 int offset = ra_->reg2offset(src_first); 1182 const char *op_str; 1183 int op; 1184 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1185 op_str = "FLD_D"; 1186 op = 0xDD; 1187 } else { // 32-bit load 1188 op_str = "FLD_S"; 1189 op = 0xD9; 1190 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1191 } 1192 if( cbuf ) { 1193 emit_opcode (*cbuf, op ); 1194 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1195 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1196 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1197 #ifndef PRODUCT 1198 } else if( !do_size ) { 1199 if( size != 0 ) st->print("\n\t"); 1200 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1201 #endif 1202 } 1203 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1204 return size + 3+offset_size+2; 1205 } 1206 1207 // Check for xmm reg-reg copy 1208 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1209 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1210 (src_first+1 == src_second && dst_first+1 == dst_second), 1211 "no non-adjacent float-moves" ); 1212 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1213 } 1214 1215 // Check for xmm reg-integer reg copy 1216 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1217 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1218 "no 64 bit float-integer reg moves" ); 1219 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm store 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1224 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1225 } 1226 1227 // Check for float xmm load 1228 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1229 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1230 } 1231 1232 // Copy from float reg to xmm reg 1233 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1234 // copy to the top of stack from floating point reg 1235 // and use LEA to preserve flags 1236 if( cbuf ) { 1237 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1238 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1239 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1240 emit_d8(*cbuf,0xF8); 1241 #ifndef PRODUCT 1242 } else if( !do_size ) { 1243 if( size != 0 ) st->print("\n\t"); 1244 st->print("LEA ESP,[ESP-8]"); 1245 #endif 1246 } 1247 size += 4; 1248 1249 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1250 1251 // Copy from the temp memory to the xmm reg. 1252 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1253 1254 if( cbuf ) { 1255 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1256 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1257 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1258 emit_d8(*cbuf,0x08); 1259 #ifndef PRODUCT 1260 } else if( !do_size ) { 1261 if( size != 0 ) st->print("\n\t"); 1262 st->print("LEA ESP,[ESP+8]"); 1263 #endif 1264 } 1265 size += 4; 1266 return size; 1267 } 1268 1269 assert( size > 0, "missed a case" ); 1270 1271 // -------------------------------------------------------------------- 1272 // Check for second bits still needing moving. 1273 if( src_second == dst_second ) 1274 return size; // Self copy; no move 1275 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1276 1277 // Check for second word int-int move 1278 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1279 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1280 1281 // Check for second word integer store 1282 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1283 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1284 1285 // Check for second word integer load 1286 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1287 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1288 1289 1290 Unimplemented(); 1291 return 0; // Mute compiler 1292 } 1293 1294 #ifndef PRODUCT 1295 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1296 implementation( NULL, ra_, false, st ); 1297 } 1298 #endif 1299 1300 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1301 implementation( &cbuf, ra_, false, NULL ); 1302 } 1303 1304 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1305 return implementation( NULL, ra_, true, NULL ); 1306 } 1307 1308 1309 //============================================================================= 1310 #ifndef PRODUCT 1311 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1312 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1313 int reg = ra_->get_reg_first(this); 1314 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1315 } 1316 #endif 1317 1318 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_encode(this); 1321 if( offset >= 128 ) { 1322 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1323 emit_rm(cbuf, 0x2, reg, 0x04); 1324 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1325 emit_d32(cbuf, offset); 1326 } 1327 else { 1328 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1329 emit_rm(cbuf, 0x1, reg, 0x04); 1330 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1331 emit_d8(cbuf, offset); 1332 } 1333 } 1334 1335 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1336 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1337 if( offset >= 128 ) { 1338 return 7; 1339 } 1340 else { 1341 return 4; 1342 } 1343 } 1344 1345 //============================================================================= 1346 #ifndef PRODUCT 1347 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1348 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1349 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1350 st->print_cr("\tNOP"); 1351 st->print_cr("\tNOP"); 1352 if( !OptoBreakpoint ) 1353 st->print_cr("\tNOP"); 1354 } 1355 #endif 1356 1357 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1358 MacroAssembler masm(&cbuf); 1359 #ifdef ASSERT 1360 uint insts_size = cbuf.insts_size(); 1361 #endif 1362 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1363 masm.jump_cc(Assembler::notEqual, 1364 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1365 /* WARNING these NOPs are critical so that verified entry point is properly 1366 aligned for patching by NativeJump::patch_verified_entry() */ 1367 int nops_cnt = 2; 1368 if( !OptoBreakpoint ) // Leave space for int3 1369 nops_cnt += 1; 1370 masm.nop(nops_cnt); 1371 1372 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1373 } 1374 1375 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1376 return OptoBreakpoint ? 11 : 12; 1377 } 1378 1379 1380 //============================================================================= 1381 1382 int Matcher::regnum_to_fpu_offset(int regnum) { 1383 return regnum - 32; // The FP registers are in the second chunk 1384 } 1385 1386 // This is UltraSparc specific, true just means we have fast l2f conversion 1387 const bool Matcher::convL2FSupported(void) { 1388 return true; 1389 } 1390 1391 // Is this branch offset short enough that a short branch can be used? 1392 // 1393 // NOTE: If the platform does not provide any short branch variants, then 1394 // this method should return false for offset 0. 1395 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1396 // The passed offset is relative to address of the branch. 1397 // On 86 a branch displacement is calculated relative to address 1398 // of a next instruction. 1399 offset -= br_size; 1400 1401 // the short version of jmpConUCF2 contains multiple branches, 1402 // making the reach slightly less 1403 if (rule == jmpConUCF2_rule) 1404 return (-126 <= offset && offset <= 125); 1405 return (-128 <= offset && offset <= 127); 1406 } 1407 1408 const bool Matcher::isSimpleConstant64(jlong value) { 1409 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1410 return false; 1411 } 1412 1413 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1414 const bool Matcher::init_array_count_is_in_bytes = false; 1415 1416 // Threshold size for cleararray. 1417 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1418 1419 // Needs 2 CMOV's for longs. 1420 const int Matcher::long_cmove_cost() { return 1; } 1421 1422 // No CMOVF/CMOVD with SSE/SSE2 1423 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1424 1425 // Does the CPU require late expand (see block.cpp for description of late expand)? 1426 const bool Matcher::require_postalloc_expand = false; 1427 1428 // Should the Matcher clone shifts on addressing modes, expecting them to 1429 // be subsumed into complex addressing expressions or compute them into 1430 // registers? True for Intel but false for most RISCs 1431 const bool Matcher::clone_shift_expressions = true; 1432 1433 // Do we need to mask the count passed to shift instructions or does 1434 // the cpu only look at the lower 5/6 bits anyway? 1435 const bool Matcher::need_masked_shift_count = false; 1436 1437 bool Matcher::narrow_oop_use_complex_address() { 1438 ShouldNotCallThis(); 1439 return true; 1440 } 1441 1442 bool Matcher::narrow_klass_use_complex_address() { 1443 ShouldNotCallThis(); 1444 return true; 1445 } 1446 1447 1448 // Is it better to copy float constants, or load them directly from memory? 1449 // Intel can load a float constant from a direct address, requiring no 1450 // extra registers. Most RISCs will have to materialize an address into a 1451 // register first, so they would do better to copy the constant from stack. 1452 const bool Matcher::rematerialize_float_constants = true; 1453 1454 // If CPU can load and store mis-aligned doubles directly then no fixup is 1455 // needed. Else we split the double into 2 integer pieces and move it 1456 // piece-by-piece. Only happens when passing doubles into C code as the 1457 // Java calling convention forces doubles to be aligned. 1458 const bool Matcher::misaligned_doubles_ok = true; 1459 1460 1461 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1462 // Get the memory operand from the node 1463 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1464 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1465 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1466 uint opcnt = 1; // First operand 1467 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1468 while( idx >= skipped+num_edges ) { 1469 skipped += num_edges; 1470 opcnt++; // Bump operand count 1471 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1472 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1473 } 1474 1475 MachOper *memory = node->_opnds[opcnt]; 1476 MachOper *new_memory = NULL; 1477 switch (memory->opcode()) { 1478 case DIRECT: 1479 case INDOFFSET32X: 1480 // No transformation necessary. 1481 return; 1482 case INDIRECT: 1483 new_memory = new indirect_win95_safeOper( ); 1484 break; 1485 case INDOFFSET8: 1486 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1487 break; 1488 case INDOFFSET32: 1489 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1490 break; 1491 case INDINDEXOFFSET: 1492 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1493 break; 1494 case INDINDEXSCALE: 1495 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1496 break; 1497 case INDINDEXSCALEOFFSET: 1498 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1499 break; 1500 case LOAD_LONG_INDIRECT: 1501 case LOAD_LONG_INDOFFSET32: 1502 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1503 return; 1504 default: 1505 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1506 return; 1507 } 1508 node->_opnds[opcnt] = new_memory; 1509 } 1510 1511 // Advertise here if the CPU requires explicit rounding operations 1512 // to implement the UseStrictFP mode. 1513 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1514 1515 // Are floats conerted to double when stored to stack during deoptimization? 1516 // On x32 it is stored with convertion only when FPU is used for floats. 1517 bool Matcher::float_in_double() { return (UseSSE == 0); } 1518 1519 // Do ints take an entire long register or just half? 1520 const bool Matcher::int_in_long = false; 1521 1522 // Return whether or not this register is ever used as an argument. This 1523 // function is used on startup to build the trampoline stubs in generateOptoStub. 1524 // Registers not mentioned will be killed by the VM call in the trampoline, and 1525 // arguments in those registers not be available to the callee. 1526 bool Matcher::can_be_java_arg( int reg ) { 1527 if( reg == ECX_num || reg == EDX_num ) return true; 1528 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1529 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1530 return false; 1531 } 1532 1533 bool Matcher::is_spillable_arg( int reg ) { 1534 return can_be_java_arg(reg); 1535 } 1536 1537 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1538 // Use hardware integer DIV instruction when 1539 // it is faster than a code which use multiply. 1540 // Only when constant divisor fits into 32 bit 1541 // (min_jint is excluded to get only correct 1542 // positive 32 bit values from negative). 1543 return VM_Version::has_fast_idiv() && 1544 (divisor == (int)divisor && divisor != min_jint); 1545 } 1546 1547 // Register for DIVI projection of divmodI 1548 RegMask Matcher::divI_proj_mask() { 1549 return EAX_REG_mask(); 1550 } 1551 1552 // Register for MODI projection of divmodI 1553 RegMask Matcher::modI_proj_mask() { 1554 return EDX_REG_mask(); 1555 } 1556 1557 // Register for DIVL projection of divmodL 1558 RegMask Matcher::divL_proj_mask() { 1559 ShouldNotReachHere(); 1560 return RegMask(); 1561 } 1562 1563 // Register for MODL projection of divmodL 1564 RegMask Matcher::modL_proj_mask() { 1565 ShouldNotReachHere(); 1566 return RegMask(); 1567 } 1568 1569 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1570 return NO_REG_mask(); 1571 } 1572 1573 // Returns true if the high 32 bits of the value is known to be zero. 1574 bool is_operand_hi32_zero(Node* n) { 1575 int opc = n->Opcode(); 1576 if (opc == Op_AndL) { 1577 Node* o2 = n->in(2); 1578 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1579 return true; 1580 } 1581 } 1582 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1583 return true; 1584 } 1585 return false; 1586 } 1587 1588 %} 1589 1590 //----------ENCODING BLOCK----------------------------------------------------- 1591 // This block specifies the encoding classes used by the compiler to output 1592 // byte streams. Encoding classes generate functions which are called by 1593 // Machine Instruction Nodes in order to generate the bit encoding of the 1594 // instruction. Operands specify their base encoding interface with the 1595 // interface keyword. There are currently supported four interfaces, 1596 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1597 // operand to generate a function which returns its register number when 1598 // queried. CONST_INTER causes an operand to generate a function which 1599 // returns the value of the constant when queried. MEMORY_INTER causes an 1600 // operand to generate four functions which return the Base Register, the 1601 // Index Register, the Scale Value, and the Offset Value of the operand when 1602 // queried. COND_INTER causes an operand to generate six functions which 1603 // return the encoding code (ie - encoding bits for the instruction) 1604 // associated with each basic boolean condition for a conditional instruction. 1605 // Instructions specify two basic values for encoding. They use the 1606 // ins_encode keyword to specify their encoding class (which must be one of 1607 // the class names specified in the encoding block), and they use the 1608 // opcode keyword to specify, in order, their primary, secondary, and 1609 // tertiary opcode. Only the opcode sections which a particular instruction 1610 // needs for encoding need to be specified. 1611 encode %{ 1612 // Build emit functions for each basic byte or larger field in the intel 1613 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1614 // code in the enc_class source block. Emit functions will live in the 1615 // main source block for now. In future, we can generalize this by 1616 // adding a syntax that specifies the sizes of fields in an order, 1617 // so that the adlc can build the emit functions automagically 1618 1619 // Emit primary opcode 1620 enc_class OpcP %{ 1621 emit_opcode(cbuf, $primary); 1622 %} 1623 1624 // Emit secondary opcode 1625 enc_class OpcS %{ 1626 emit_opcode(cbuf, $secondary); 1627 %} 1628 1629 // Emit opcode directly 1630 enc_class Opcode(immI d8) %{ 1631 emit_opcode(cbuf, $d8$$constant); 1632 %} 1633 1634 enc_class SizePrefix %{ 1635 emit_opcode(cbuf,0x66); 1636 %} 1637 1638 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1639 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1640 %} 1641 1642 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1643 emit_opcode(cbuf,$opcode$$constant); 1644 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1645 %} 1646 1647 enc_class mov_r32_imm0( rRegI dst ) %{ 1648 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1649 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1650 %} 1651 1652 enc_class cdq_enc %{ 1653 // Full implementation of Java idiv and irem; checks for 1654 // special case as described in JVM spec., p.243 & p.271. 1655 // 1656 // normal case special case 1657 // 1658 // input : rax,: dividend min_int 1659 // reg: divisor -1 1660 // 1661 // output: rax,: quotient (= rax, idiv reg) min_int 1662 // rdx: remainder (= rax, irem reg) 0 1663 // 1664 // Code sequnce: 1665 // 1666 // 81 F8 00 00 00 80 cmp rax,80000000h 1667 // 0F 85 0B 00 00 00 jne normal_case 1668 // 33 D2 xor rdx,edx 1669 // 83 F9 FF cmp rcx,0FFh 1670 // 0F 84 03 00 00 00 je done 1671 // normal_case: 1672 // 99 cdq 1673 // F7 F9 idiv rax,ecx 1674 // done: 1675 // 1676 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1677 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1678 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1679 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1680 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1681 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1682 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1683 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1684 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1685 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1687 // normal_case: 1688 emit_opcode(cbuf,0x99); // cdq 1689 // idiv (note: must be emitted by the user of this rule) 1690 // normal: 1691 %} 1692 1693 // Dense encoding for older common ops 1694 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1695 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1696 %} 1697 1698 1699 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1700 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1701 // Check for 8-bit immediate, and set sign extend bit in opcode 1702 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1703 emit_opcode(cbuf, $primary | 0x02); 1704 } 1705 else { // If 32-bit immediate 1706 emit_opcode(cbuf, $primary); 1707 } 1708 %} 1709 1710 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1711 // Emit primary opcode and set sign-extend bit 1712 // Check for 8-bit immediate, and set sign extend bit in opcode 1713 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1714 emit_opcode(cbuf, $primary | 0x02); } 1715 else { // If 32-bit immediate 1716 emit_opcode(cbuf, $primary); 1717 } 1718 // Emit r/m byte with secondary opcode, after primary opcode. 1719 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1720 %} 1721 1722 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1723 // Check for 8-bit immediate, and set sign extend bit in opcode 1724 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1725 $$$emit8$imm$$constant; 1726 } 1727 else { // If 32-bit immediate 1728 // Output immediate 1729 $$$emit32$imm$$constant; 1730 } 1731 %} 1732 1733 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1734 // Emit primary opcode and set sign-extend bit 1735 // Check for 8-bit immediate, and set sign extend bit in opcode 1736 int con = (int)$imm$$constant; // Throw away top bits 1737 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1738 // Emit r/m byte with secondary opcode, after primary opcode. 1739 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1740 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1741 else emit_d32(cbuf,con); 1742 %} 1743 1744 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1745 // Emit primary opcode and set sign-extend bit 1746 // Check for 8-bit immediate, and set sign extend bit in opcode 1747 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1748 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1749 // Emit r/m byte with tertiary opcode, after primary opcode. 1750 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1751 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1752 else emit_d32(cbuf,con); 1753 %} 1754 1755 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1756 emit_cc(cbuf, $secondary, $dst$$reg ); 1757 %} 1758 1759 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1760 int destlo = $dst$$reg; 1761 int desthi = HIGH_FROM_LOW(destlo); 1762 // bswap lo 1763 emit_opcode(cbuf, 0x0F); 1764 emit_cc(cbuf, 0xC8, destlo); 1765 // bswap hi 1766 emit_opcode(cbuf, 0x0F); 1767 emit_cc(cbuf, 0xC8, desthi); 1768 // xchg lo and hi 1769 emit_opcode(cbuf, 0x87); 1770 emit_rm(cbuf, 0x3, destlo, desthi); 1771 %} 1772 1773 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1774 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1775 %} 1776 1777 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1778 $$$emit8$primary; 1779 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1780 %} 1781 1782 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1783 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1784 emit_d8(cbuf, op >> 8 ); 1785 emit_d8(cbuf, op & 255); 1786 %} 1787 1788 // emulate a CMOV with a conditional branch around a MOV 1789 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1790 // Invert sense of branch from sense of CMOV 1791 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1792 emit_d8( cbuf, $brOffs$$constant ); 1793 %} 1794 1795 enc_class enc_PartialSubtypeCheck( ) %{ 1796 Register Redi = as_Register(EDI_enc); // result register 1797 Register Reax = as_Register(EAX_enc); // super class 1798 Register Recx = as_Register(ECX_enc); // killed 1799 Register Resi = as_Register(ESI_enc); // sub class 1800 Label miss; 1801 1802 MacroAssembler _masm(&cbuf); 1803 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1804 NULL, &miss, 1805 /*set_cond_codes:*/ true); 1806 if ($primary) { 1807 __ xorptr(Redi, Redi); 1808 } 1809 __ bind(miss); 1810 %} 1811 1812 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1813 MacroAssembler masm(&cbuf); 1814 int start = masm.offset(); 1815 if (UseSSE >= 2) { 1816 if (VerifyFPU) { 1817 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1818 } 1819 } else { 1820 // External c_calling_convention expects the FPU stack to be 'clean'. 1821 // Compiled code leaves it dirty. Do cleanup now. 1822 masm.empty_FPU_stack(); 1823 } 1824 if (sizeof_FFree_Float_Stack_All == -1) { 1825 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1826 } else { 1827 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1828 } 1829 %} 1830 1831 enc_class Verify_FPU_For_Leaf %{ 1832 if( VerifyFPU ) { 1833 MacroAssembler masm(&cbuf); 1834 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1835 } 1836 %} 1837 1838 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1839 // This is the instruction starting address for relocation info. 1840 cbuf.set_insts_mark(); 1841 $$$emit8$primary; 1842 // CALL directly to the runtime 1843 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1844 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1845 1846 if (UseSSE >= 2) { 1847 MacroAssembler _masm(&cbuf); 1848 BasicType rt = tf()->return_type(); 1849 1850 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1851 // A C runtime call where the return value is unused. In SSE2+ 1852 // mode the result needs to be removed from the FPU stack. It's 1853 // likely that this function call could be removed by the 1854 // optimizer if the C function is a pure function. 1855 __ ffree(0); 1856 } else if (rt == T_FLOAT) { 1857 __ lea(rsp, Address(rsp, -4)); 1858 __ fstp_s(Address(rsp, 0)); 1859 __ movflt(xmm0, Address(rsp, 0)); 1860 __ lea(rsp, Address(rsp, 4)); 1861 } else if (rt == T_DOUBLE) { 1862 __ lea(rsp, Address(rsp, -8)); 1863 __ fstp_d(Address(rsp, 0)); 1864 __ movdbl(xmm0, Address(rsp, 0)); 1865 __ lea(rsp, Address(rsp, 8)); 1866 } 1867 } 1868 %} 1869 1870 1871 enc_class pre_call_resets %{ 1872 // If method sets FPU control word restore it here 1873 debug_only(int off0 = cbuf.insts_size()); 1874 if (ra_->C->in_24_bit_fp_mode()) { 1875 MacroAssembler _masm(&cbuf); 1876 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1877 } 1878 if (ra_->C->max_vector_size() > 16) { 1879 // Clear upper bits of YMM registers when current compiled code uses 1880 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1881 MacroAssembler _masm(&cbuf); 1882 __ vzeroupper(); 1883 } 1884 debug_only(int off1 = cbuf.insts_size()); 1885 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1886 %} 1887 1888 enc_class post_call_FPU %{ 1889 // If method sets FPU control word do it here also 1890 if (Compile::current()->in_24_bit_fp_mode()) { 1891 MacroAssembler masm(&cbuf); 1892 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1893 } 1894 %} 1895 1896 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1897 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1898 // who we intended to call. 1899 cbuf.set_insts_mark(); 1900 $$$emit8$primary; 1901 if (!_method) { 1902 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1903 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1904 } else if (_optimized_virtual) { 1905 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1906 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1907 } else { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 static_call_Relocation::spec(), RELOC_IMM32 ); 1910 } 1911 if (_method) { // Emit stub for static call. 1912 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1913 if (stub == NULL) { 1914 ciEnv::current()->record_failure("CodeCache is full"); 1915 return; 1916 } 1917 } 1918 %} 1919 1920 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1921 MacroAssembler _masm(&cbuf); 1922 __ ic_call((address)$meth$$method); 1923 %} 1924 1925 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1926 int disp = in_bytes(Method::from_compiled_offset()); 1927 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1928 1929 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1930 cbuf.set_insts_mark(); 1931 $$$emit8$primary; 1932 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1933 emit_d8(cbuf, disp); // Displacement 1934 1935 %} 1936 1937 // Following encoding is no longer used, but may be restored if calling 1938 // convention changes significantly. 1939 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1940 // 1941 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1942 // // int ic_reg = Matcher::inline_cache_reg(); 1943 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1944 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1945 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1946 // 1947 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1948 // // // so we load it immediately before the call 1949 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1950 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1951 // 1952 // // xor rbp,ebp 1953 // emit_opcode(cbuf, 0x33); 1954 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1955 // 1956 // // CALL to interpreter. 1957 // cbuf.set_insts_mark(); 1958 // $$$emit8$primary; 1959 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1960 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1961 // %} 1962 1963 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1964 $$$emit8$primary; 1965 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1966 $$$emit8$shift$$constant; 1967 %} 1968 1969 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1970 // Load immediate does not have a zero or sign extended version 1971 // for 8-bit immediates 1972 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1973 $$$emit32$src$$constant; 1974 %} 1975 1976 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1977 // Load immediate does not have a zero or sign extended version 1978 // for 8-bit immediates 1979 emit_opcode(cbuf, $primary + $dst$$reg); 1980 $$$emit32$src$$constant; 1981 %} 1982 1983 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1984 // Load immediate does not have a zero or sign extended version 1985 // for 8-bit immediates 1986 int dst_enc = $dst$$reg; 1987 int src_con = $src$$constant & 0x0FFFFFFFFL; 1988 if (src_con == 0) { 1989 // xor dst, dst 1990 emit_opcode(cbuf, 0x33); 1991 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1992 } else { 1993 emit_opcode(cbuf, $primary + dst_enc); 1994 emit_d32(cbuf, src_con); 1995 } 1996 %} 1997 1998 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1999 // Load immediate does not have a zero or sign extended version 2000 // for 8-bit immediates 2001 int dst_enc = $dst$$reg + 2; 2002 int src_con = ((julong)($src$$constant)) >> 32; 2003 if (src_con == 0) { 2004 // xor dst, dst 2005 emit_opcode(cbuf, 0x33); 2006 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2007 } else { 2008 emit_opcode(cbuf, $primary + dst_enc); 2009 emit_d32(cbuf, src_con); 2010 } 2011 %} 2012 2013 2014 // Encode a reg-reg copy. If it is useless, then empty encoding. 2015 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2016 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2017 %} 2018 2019 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2020 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2021 %} 2022 2023 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2024 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2025 %} 2026 2027 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2028 $$$emit8$primary; 2029 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2030 %} 2031 2032 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2033 $$$emit8$secondary; 2034 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2035 %} 2036 2037 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2038 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2039 %} 2040 2041 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2042 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2043 %} 2044 2045 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2046 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2047 %} 2048 2049 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2050 // Output immediate 2051 $$$emit32$src$$constant; 2052 %} 2053 2054 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2055 // Output Float immediate bits 2056 jfloat jf = $src$$constant; 2057 int jf_as_bits = jint_cast( jf ); 2058 emit_d32(cbuf, jf_as_bits); 2059 %} 2060 2061 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2062 // Output Float immediate bits 2063 jfloat jf = $src$$constant; 2064 int jf_as_bits = jint_cast( jf ); 2065 emit_d32(cbuf, jf_as_bits); 2066 %} 2067 2068 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2069 // Output immediate 2070 $$$emit16$src$$constant; 2071 %} 2072 2073 enc_class Con_d32(immI src) %{ 2074 emit_d32(cbuf,$src$$constant); 2075 %} 2076 2077 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2078 // Output immediate memory reference 2079 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2080 emit_d32(cbuf, 0x00); 2081 %} 2082 2083 enc_class lock_prefix( ) %{ 2084 if( os::is_MP() ) 2085 emit_opcode(cbuf,0xF0); // [Lock] 2086 %} 2087 2088 // Cmp-xchg long value. 2089 // Note: we need to swap rbx, and rcx before and after the 2090 // cmpxchg8 instruction because the instruction uses 2091 // rcx as the high order word of the new value to store but 2092 // our register encoding uses rbx,. 2093 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2094 2095 // XCHG rbx,ecx 2096 emit_opcode(cbuf,0x87); 2097 emit_opcode(cbuf,0xD9); 2098 // [Lock] 2099 if( os::is_MP() ) 2100 emit_opcode(cbuf,0xF0); 2101 // CMPXCHG8 [Eptr] 2102 emit_opcode(cbuf,0x0F); 2103 emit_opcode(cbuf,0xC7); 2104 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2105 // XCHG rbx,ecx 2106 emit_opcode(cbuf,0x87); 2107 emit_opcode(cbuf,0xD9); 2108 %} 2109 2110 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2111 // [Lock] 2112 if( os::is_MP() ) 2113 emit_opcode(cbuf,0xF0); 2114 2115 // CMPXCHG [Eptr] 2116 emit_opcode(cbuf,0x0F); 2117 emit_opcode(cbuf,0xB1); 2118 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2119 %} 2120 2121 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2122 int res_encoding = $res$$reg; 2123 2124 // MOV res,0 2125 emit_opcode( cbuf, 0xB8 + res_encoding); 2126 emit_d32( cbuf, 0 ); 2127 // JNE,s fail 2128 emit_opcode(cbuf,0x75); 2129 emit_d8(cbuf, 5 ); 2130 // MOV res,1 2131 emit_opcode( cbuf, 0xB8 + res_encoding); 2132 emit_d32( cbuf, 1 ); 2133 // fail: 2134 %} 2135 2136 enc_class set_instruction_start( ) %{ 2137 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2138 %} 2139 2140 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2141 int reg_encoding = $ereg$$reg; 2142 int base = $mem$$base; 2143 int index = $mem$$index; 2144 int scale = $mem$$scale; 2145 int displace = $mem$$disp; 2146 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2147 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2148 %} 2149 2150 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2151 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2152 int base = $mem$$base; 2153 int index = $mem$$index; 2154 int scale = $mem$$scale; 2155 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2156 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2157 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2158 %} 2159 2160 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2161 int r1, r2; 2162 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2163 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2164 emit_opcode(cbuf,0x0F); 2165 emit_opcode(cbuf,$tertiary); 2166 emit_rm(cbuf, 0x3, r1, r2); 2167 emit_d8(cbuf,$cnt$$constant); 2168 emit_d8(cbuf,$primary); 2169 emit_rm(cbuf, 0x3, $secondary, r1); 2170 emit_d8(cbuf,$cnt$$constant); 2171 %} 2172 2173 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2174 emit_opcode( cbuf, 0x8B ); // Move 2175 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2176 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2177 emit_d8(cbuf,$primary); 2178 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2179 emit_d8(cbuf,$cnt$$constant-32); 2180 } 2181 emit_d8(cbuf,$primary); 2182 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2183 emit_d8(cbuf,31); 2184 %} 2185 2186 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2187 int r1, r2; 2188 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2189 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2190 2191 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2192 emit_rm(cbuf, 0x3, r1, r2); 2193 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2194 emit_opcode(cbuf,$primary); 2195 emit_rm(cbuf, 0x3, $secondary, r1); 2196 emit_d8(cbuf,$cnt$$constant-32); 2197 } 2198 emit_opcode(cbuf,0x33); // XOR r2,r2 2199 emit_rm(cbuf, 0x3, r2, r2); 2200 %} 2201 2202 // Clone of RegMem but accepts an extra parameter to access each 2203 // half of a double in memory; it never needs relocation info. 2204 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2205 emit_opcode(cbuf,$opcode$$constant); 2206 int reg_encoding = $rm_reg$$reg; 2207 int base = $mem$$base; 2208 int index = $mem$$index; 2209 int scale = $mem$$scale; 2210 int displace = $mem$$disp + $disp_for_half$$constant; 2211 relocInfo::relocType disp_reloc = relocInfo::none; 2212 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2213 %} 2214 2215 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2216 // 2217 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2218 // and it never needs relocation information. 2219 // Frequently used to move data between FPU's Stack Top and memory. 2220 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2221 int rm_byte_opcode = $rm_opcode$$constant; 2222 int base = $mem$$base; 2223 int index = $mem$$index; 2224 int scale = $mem$$scale; 2225 int displace = $mem$$disp; 2226 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2227 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2228 %} 2229 2230 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2231 int rm_byte_opcode = $rm_opcode$$constant; 2232 int base = $mem$$base; 2233 int index = $mem$$index; 2234 int scale = $mem$$scale; 2235 int displace = $mem$$disp; 2236 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2237 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2238 %} 2239 2240 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2241 int reg_encoding = $dst$$reg; 2242 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2243 int index = 0x04; // 0x04 indicates no index 2244 int scale = 0x00; // 0x00 indicates no scale 2245 int displace = $src1$$constant; // 0x00 indicates no displacement 2246 relocInfo::relocType disp_reloc = relocInfo::none; 2247 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2248 %} 2249 2250 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2251 // Compare dst,src 2252 emit_opcode(cbuf,0x3B); 2253 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2254 // jmp dst < src around move 2255 emit_opcode(cbuf,0x7C); 2256 emit_d8(cbuf,2); 2257 // move dst,src 2258 emit_opcode(cbuf,0x8B); 2259 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2260 %} 2261 2262 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2263 // Compare dst,src 2264 emit_opcode(cbuf,0x3B); 2265 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2266 // jmp dst > src around move 2267 emit_opcode(cbuf,0x7F); 2268 emit_d8(cbuf,2); 2269 // move dst,src 2270 emit_opcode(cbuf,0x8B); 2271 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2272 %} 2273 2274 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2275 // If src is FPR1, we can just FST to store it. 2276 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2277 int reg_encoding = 0x2; // Just store 2278 int base = $mem$$base; 2279 int index = $mem$$index; 2280 int scale = $mem$$scale; 2281 int displace = $mem$$disp; 2282 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2283 if( $src$$reg != FPR1L_enc ) { 2284 reg_encoding = 0x3; // Store & pop 2285 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2286 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2287 } 2288 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2289 emit_opcode(cbuf,$primary); 2290 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2291 %} 2292 2293 enc_class neg_reg(rRegI dst) %{ 2294 // NEG $dst 2295 emit_opcode(cbuf,0xF7); 2296 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2297 %} 2298 2299 enc_class setLT_reg(eCXRegI dst) %{ 2300 // SETLT $dst 2301 emit_opcode(cbuf,0x0F); 2302 emit_opcode(cbuf,0x9C); 2303 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2304 %} 2305 2306 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2307 int tmpReg = $tmp$$reg; 2308 2309 // SUB $p,$q 2310 emit_opcode(cbuf,0x2B); 2311 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2312 // SBB $tmp,$tmp 2313 emit_opcode(cbuf,0x1B); 2314 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2315 // AND $tmp,$y 2316 emit_opcode(cbuf,0x23); 2317 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2318 // ADD $p,$tmp 2319 emit_opcode(cbuf,0x03); 2320 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2321 %} 2322 2323 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2324 // TEST shift,32 2325 emit_opcode(cbuf,0xF7); 2326 emit_rm(cbuf, 0x3, 0, ECX_enc); 2327 emit_d32(cbuf,0x20); 2328 // JEQ,s small 2329 emit_opcode(cbuf, 0x74); 2330 emit_d8(cbuf, 0x04); 2331 // MOV $dst.hi,$dst.lo 2332 emit_opcode( cbuf, 0x8B ); 2333 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2334 // CLR $dst.lo 2335 emit_opcode(cbuf, 0x33); 2336 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2337 // small: 2338 // SHLD $dst.hi,$dst.lo,$shift 2339 emit_opcode(cbuf,0x0F); 2340 emit_opcode(cbuf,0xA5); 2341 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2342 // SHL $dst.lo,$shift" 2343 emit_opcode(cbuf,0xD3); 2344 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2345 %} 2346 2347 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2348 // TEST shift,32 2349 emit_opcode(cbuf,0xF7); 2350 emit_rm(cbuf, 0x3, 0, ECX_enc); 2351 emit_d32(cbuf,0x20); 2352 // JEQ,s small 2353 emit_opcode(cbuf, 0x74); 2354 emit_d8(cbuf, 0x04); 2355 // MOV $dst.lo,$dst.hi 2356 emit_opcode( cbuf, 0x8B ); 2357 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2358 // CLR $dst.hi 2359 emit_opcode(cbuf, 0x33); 2360 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2361 // small: 2362 // SHRD $dst.lo,$dst.hi,$shift 2363 emit_opcode(cbuf,0x0F); 2364 emit_opcode(cbuf,0xAD); 2365 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2366 // SHR $dst.hi,$shift" 2367 emit_opcode(cbuf,0xD3); 2368 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2369 %} 2370 2371 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2372 // TEST shift,32 2373 emit_opcode(cbuf,0xF7); 2374 emit_rm(cbuf, 0x3, 0, ECX_enc); 2375 emit_d32(cbuf,0x20); 2376 // JEQ,s small 2377 emit_opcode(cbuf, 0x74); 2378 emit_d8(cbuf, 0x05); 2379 // MOV $dst.lo,$dst.hi 2380 emit_opcode( cbuf, 0x8B ); 2381 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2382 // SAR $dst.hi,31 2383 emit_opcode(cbuf, 0xC1); 2384 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2385 emit_d8(cbuf, 0x1F ); 2386 // small: 2387 // SHRD $dst.lo,$dst.hi,$shift 2388 emit_opcode(cbuf,0x0F); 2389 emit_opcode(cbuf,0xAD); 2390 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2391 // SAR $dst.hi,$shift" 2392 emit_opcode(cbuf,0xD3); 2393 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2394 %} 2395 2396 2397 // ----------------- Encodings for floating point unit ----------------- 2398 // May leave result in FPU-TOS or FPU reg depending on opcodes 2399 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2400 $$$emit8$primary; 2401 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2402 %} 2403 2404 // Pop argument in FPR0 with FSTP ST(0) 2405 enc_class PopFPU() %{ 2406 emit_opcode( cbuf, 0xDD ); 2407 emit_d8( cbuf, 0xD8 ); 2408 %} 2409 2410 // !!!!! equivalent to Pop_Reg_F 2411 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2412 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2413 emit_d8( cbuf, 0xD8+$dst$$reg ); 2414 %} 2415 2416 enc_class Push_Reg_DPR( regDPR dst ) %{ 2417 emit_opcode( cbuf, 0xD9 ); 2418 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2419 %} 2420 2421 enc_class strictfp_bias1( regDPR dst ) %{ 2422 emit_opcode( cbuf, 0xDB ); // FLD m80real 2423 emit_opcode( cbuf, 0x2D ); 2424 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2425 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2426 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2427 %} 2428 2429 enc_class strictfp_bias2( regDPR dst ) %{ 2430 emit_opcode( cbuf, 0xDB ); // FLD m80real 2431 emit_opcode( cbuf, 0x2D ); 2432 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2433 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2434 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2435 %} 2436 2437 // Special case for moving an integer register to a stack slot. 2438 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2439 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2440 %} 2441 2442 // Special case for moving a register to a stack slot. 2443 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2444 // Opcode already emitted 2445 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2446 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2447 emit_d32(cbuf, $dst$$disp); // Displacement 2448 %} 2449 2450 // Push the integer in stackSlot 'src' onto FP-stack 2451 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2452 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2453 %} 2454 2455 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2456 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2457 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2458 %} 2459 2460 // Same as Pop_Mem_F except for opcode 2461 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2462 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2463 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2464 %} 2465 2466 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2467 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2468 emit_d8( cbuf, 0xD8+$dst$$reg ); 2469 %} 2470 2471 enc_class Push_Reg_FPR( regFPR dst ) %{ 2472 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2473 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2474 %} 2475 2476 // Push FPU's float to a stack-slot, and pop FPU-stack 2477 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2478 int pop = 0x02; 2479 if ($src$$reg != FPR1L_enc) { 2480 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2481 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2482 pop = 0x03; 2483 } 2484 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2485 %} 2486 2487 // Push FPU's double to a stack-slot, and pop FPU-stack 2488 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2489 int pop = 0x02; 2490 if ($src$$reg != FPR1L_enc) { 2491 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2492 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2493 pop = 0x03; 2494 } 2495 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2496 %} 2497 2498 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2499 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2500 int pop = 0xD0 - 1; // -1 since we skip FLD 2501 if ($src$$reg != FPR1L_enc) { 2502 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2503 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2504 pop = 0xD8; 2505 } 2506 emit_opcode( cbuf, 0xDD ); 2507 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2508 %} 2509 2510 2511 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2512 // load dst in FPR0 2513 emit_opcode( cbuf, 0xD9 ); 2514 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2515 if ($src$$reg != FPR1L_enc) { 2516 // fincstp 2517 emit_opcode (cbuf, 0xD9); 2518 emit_opcode (cbuf, 0xF7); 2519 // swap src with FPR1: 2520 // FXCH FPR1 with src 2521 emit_opcode(cbuf, 0xD9); 2522 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2523 // fdecstp 2524 emit_opcode (cbuf, 0xD9); 2525 emit_opcode (cbuf, 0xF6); 2526 } 2527 %} 2528 2529 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2530 MacroAssembler _masm(&cbuf); 2531 __ subptr(rsp, 8); 2532 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2533 __ fld_d(Address(rsp, 0)); 2534 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2535 __ fld_d(Address(rsp, 0)); 2536 %} 2537 2538 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2539 MacroAssembler _masm(&cbuf); 2540 __ subptr(rsp, 4); 2541 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2542 __ fld_s(Address(rsp, 0)); 2543 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2544 __ fld_s(Address(rsp, 0)); 2545 %} 2546 2547 enc_class Push_ResultD(regD dst) %{ 2548 MacroAssembler _masm(&cbuf); 2549 __ fstp_d(Address(rsp, 0)); 2550 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2551 __ addptr(rsp, 8); 2552 %} 2553 2554 enc_class Push_ResultF(regF dst, immI d8) %{ 2555 MacroAssembler _masm(&cbuf); 2556 __ fstp_s(Address(rsp, 0)); 2557 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2558 __ addptr(rsp, $d8$$constant); 2559 %} 2560 2561 enc_class Push_SrcD(regD src) %{ 2562 MacroAssembler _masm(&cbuf); 2563 __ subptr(rsp, 8); 2564 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2565 __ fld_d(Address(rsp, 0)); 2566 %} 2567 2568 enc_class push_stack_temp_qword() %{ 2569 MacroAssembler _masm(&cbuf); 2570 __ subptr(rsp, 8); 2571 %} 2572 2573 enc_class pop_stack_temp_qword() %{ 2574 MacroAssembler _masm(&cbuf); 2575 __ addptr(rsp, 8); 2576 %} 2577 2578 enc_class push_xmm_to_fpr1(regD src) %{ 2579 MacroAssembler _masm(&cbuf); 2580 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2581 __ fld_d(Address(rsp, 0)); 2582 %} 2583 2584 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2585 if ($src$$reg != FPR1L_enc) { 2586 // fincstp 2587 emit_opcode (cbuf, 0xD9); 2588 emit_opcode (cbuf, 0xF7); 2589 // FXCH FPR1 with src 2590 emit_opcode(cbuf, 0xD9); 2591 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2592 // fdecstp 2593 emit_opcode (cbuf, 0xD9); 2594 emit_opcode (cbuf, 0xF6); 2595 } 2596 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2597 // // FSTP FPR$dst$$reg 2598 // emit_opcode( cbuf, 0xDD ); 2599 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2600 %} 2601 2602 enc_class fnstsw_sahf_skip_parity() %{ 2603 // fnstsw ax 2604 emit_opcode( cbuf, 0xDF ); 2605 emit_opcode( cbuf, 0xE0 ); 2606 // sahf 2607 emit_opcode( cbuf, 0x9E ); 2608 // jnp ::skip 2609 emit_opcode( cbuf, 0x7B ); 2610 emit_opcode( cbuf, 0x05 ); 2611 %} 2612 2613 enc_class emitModDPR() %{ 2614 // fprem must be iterative 2615 // :: loop 2616 // fprem 2617 emit_opcode( cbuf, 0xD9 ); 2618 emit_opcode( cbuf, 0xF8 ); 2619 // wait 2620 emit_opcode( cbuf, 0x9b ); 2621 // fnstsw ax 2622 emit_opcode( cbuf, 0xDF ); 2623 emit_opcode( cbuf, 0xE0 ); 2624 // sahf 2625 emit_opcode( cbuf, 0x9E ); 2626 // jp ::loop 2627 emit_opcode( cbuf, 0x0F ); 2628 emit_opcode( cbuf, 0x8A ); 2629 emit_opcode( cbuf, 0xF4 ); 2630 emit_opcode( cbuf, 0xFF ); 2631 emit_opcode( cbuf, 0xFF ); 2632 emit_opcode( cbuf, 0xFF ); 2633 %} 2634 2635 enc_class fpu_flags() %{ 2636 // fnstsw_ax 2637 emit_opcode( cbuf, 0xDF); 2638 emit_opcode( cbuf, 0xE0); 2639 // test ax,0x0400 2640 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2641 emit_opcode( cbuf, 0xA9 ); 2642 emit_d16 ( cbuf, 0x0400 ); 2643 // // // This sequence works, but stalls for 12-16 cycles on PPro 2644 // // test rax,0x0400 2645 // emit_opcode( cbuf, 0xA9 ); 2646 // emit_d32 ( cbuf, 0x00000400 ); 2647 // 2648 // jz exit (no unordered comparison) 2649 emit_opcode( cbuf, 0x74 ); 2650 emit_d8 ( cbuf, 0x02 ); 2651 // mov ah,1 - treat as LT case (set carry flag) 2652 emit_opcode( cbuf, 0xB4 ); 2653 emit_d8 ( cbuf, 0x01 ); 2654 // sahf 2655 emit_opcode( cbuf, 0x9E); 2656 %} 2657 2658 enc_class cmpF_P6_fixup() %{ 2659 // Fixup the integer flags in case comparison involved a NaN 2660 // 2661 // JNP exit (no unordered comparison, P-flag is set by NaN) 2662 emit_opcode( cbuf, 0x7B ); 2663 emit_d8 ( cbuf, 0x03 ); 2664 // MOV AH,1 - treat as LT case (set carry flag) 2665 emit_opcode( cbuf, 0xB4 ); 2666 emit_d8 ( cbuf, 0x01 ); 2667 // SAHF 2668 emit_opcode( cbuf, 0x9E); 2669 // NOP // target for branch to avoid branch to branch 2670 emit_opcode( cbuf, 0x90); 2671 %} 2672 2673 // fnstsw_ax(); 2674 // sahf(); 2675 // movl(dst, nan_result); 2676 // jcc(Assembler::parity, exit); 2677 // movl(dst, less_result); 2678 // jcc(Assembler::below, exit); 2679 // movl(dst, equal_result); 2680 // jcc(Assembler::equal, exit); 2681 // movl(dst, greater_result); 2682 2683 // less_result = 1; 2684 // greater_result = -1; 2685 // equal_result = 0; 2686 // nan_result = -1; 2687 2688 enc_class CmpF_Result(rRegI dst) %{ 2689 // fnstsw_ax(); 2690 emit_opcode( cbuf, 0xDF); 2691 emit_opcode( cbuf, 0xE0); 2692 // sahf 2693 emit_opcode( cbuf, 0x9E); 2694 // movl(dst, nan_result); 2695 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2696 emit_d32( cbuf, -1 ); 2697 // jcc(Assembler::parity, exit); 2698 emit_opcode( cbuf, 0x7A ); 2699 emit_d8 ( cbuf, 0x13 ); 2700 // movl(dst, less_result); 2701 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2702 emit_d32( cbuf, -1 ); 2703 // jcc(Assembler::below, exit); 2704 emit_opcode( cbuf, 0x72 ); 2705 emit_d8 ( cbuf, 0x0C ); 2706 // movl(dst, equal_result); 2707 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2708 emit_d32( cbuf, 0 ); 2709 // jcc(Assembler::equal, exit); 2710 emit_opcode( cbuf, 0x74 ); 2711 emit_d8 ( cbuf, 0x05 ); 2712 // movl(dst, greater_result); 2713 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2714 emit_d32( cbuf, 1 ); 2715 %} 2716 2717 2718 // Compare the longs and set flags 2719 // BROKEN! Do Not use as-is 2720 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2721 // CMP $src1.hi,$src2.hi 2722 emit_opcode( cbuf, 0x3B ); 2723 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2724 // JNE,s done 2725 emit_opcode(cbuf,0x75); 2726 emit_d8(cbuf, 2 ); 2727 // CMP $src1.lo,$src2.lo 2728 emit_opcode( cbuf, 0x3B ); 2729 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2730 // done: 2731 %} 2732 2733 enc_class convert_int_long( regL dst, rRegI src ) %{ 2734 // mov $dst.lo,$src 2735 int dst_encoding = $dst$$reg; 2736 int src_encoding = $src$$reg; 2737 encode_Copy( cbuf, dst_encoding , src_encoding ); 2738 // mov $dst.hi,$src 2739 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2740 // sar $dst.hi,31 2741 emit_opcode( cbuf, 0xC1 ); 2742 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2743 emit_d8(cbuf, 0x1F ); 2744 %} 2745 2746 enc_class convert_long_double( eRegL src ) %{ 2747 // push $src.hi 2748 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2749 // push $src.lo 2750 emit_opcode(cbuf, 0x50+$src$$reg ); 2751 // fild 64-bits at [SP] 2752 emit_opcode(cbuf,0xdf); 2753 emit_d8(cbuf, 0x6C); 2754 emit_d8(cbuf, 0x24); 2755 emit_d8(cbuf, 0x00); 2756 // pop stack 2757 emit_opcode(cbuf, 0x83); // add SP, #8 2758 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2759 emit_d8(cbuf, 0x8); 2760 %} 2761 2762 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2763 // IMUL EDX:EAX,$src1 2764 emit_opcode( cbuf, 0xF7 ); 2765 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2766 // SAR EDX,$cnt-32 2767 int shift_count = ((int)$cnt$$constant) - 32; 2768 if (shift_count > 0) { 2769 emit_opcode(cbuf, 0xC1); 2770 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2771 emit_d8(cbuf, shift_count); 2772 } 2773 %} 2774 2775 // this version doesn't have add sp, 8 2776 enc_class convert_long_double2( eRegL src ) %{ 2777 // push $src.hi 2778 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2779 // push $src.lo 2780 emit_opcode(cbuf, 0x50+$src$$reg ); 2781 // fild 64-bits at [SP] 2782 emit_opcode(cbuf,0xdf); 2783 emit_d8(cbuf, 0x6C); 2784 emit_d8(cbuf, 0x24); 2785 emit_d8(cbuf, 0x00); 2786 %} 2787 2788 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2789 // Basic idea: long = (long)int * (long)int 2790 // IMUL EDX:EAX, src 2791 emit_opcode( cbuf, 0xF7 ); 2792 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2793 %} 2794 2795 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2796 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2797 // MUL EDX:EAX, src 2798 emit_opcode( cbuf, 0xF7 ); 2799 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2800 %} 2801 2802 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2803 // Basic idea: lo(result) = lo(x_lo * y_lo) 2804 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2805 // MOV $tmp,$src.lo 2806 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2807 // IMUL $tmp,EDX 2808 emit_opcode( cbuf, 0x0F ); 2809 emit_opcode( cbuf, 0xAF ); 2810 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2811 // MOV EDX,$src.hi 2812 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2813 // IMUL EDX,EAX 2814 emit_opcode( cbuf, 0x0F ); 2815 emit_opcode( cbuf, 0xAF ); 2816 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2817 // ADD $tmp,EDX 2818 emit_opcode( cbuf, 0x03 ); 2819 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2820 // MUL EDX:EAX,$src.lo 2821 emit_opcode( cbuf, 0xF7 ); 2822 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2823 // ADD EDX,ESI 2824 emit_opcode( cbuf, 0x03 ); 2825 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2826 %} 2827 2828 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2829 // Basic idea: lo(result) = lo(src * y_lo) 2830 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2831 // IMUL $tmp,EDX,$src 2832 emit_opcode( cbuf, 0x6B ); 2833 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2834 emit_d8( cbuf, (int)$src$$constant ); 2835 // MOV EDX,$src 2836 emit_opcode(cbuf, 0xB8 + EDX_enc); 2837 emit_d32( cbuf, (int)$src$$constant ); 2838 // MUL EDX:EAX,EDX 2839 emit_opcode( cbuf, 0xF7 ); 2840 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2841 // ADD EDX,ESI 2842 emit_opcode( cbuf, 0x03 ); 2843 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2844 %} 2845 2846 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2847 // PUSH src1.hi 2848 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2849 // PUSH src1.lo 2850 emit_opcode(cbuf, 0x50+$src1$$reg ); 2851 // PUSH src2.hi 2852 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2853 // PUSH src2.lo 2854 emit_opcode(cbuf, 0x50+$src2$$reg ); 2855 // CALL directly to the runtime 2856 cbuf.set_insts_mark(); 2857 emit_opcode(cbuf,0xE8); // Call into runtime 2858 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2859 // Restore stack 2860 emit_opcode(cbuf, 0x83); // add SP, #framesize 2861 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2862 emit_d8(cbuf, 4*4); 2863 %} 2864 2865 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2866 // PUSH src1.hi 2867 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2868 // PUSH src1.lo 2869 emit_opcode(cbuf, 0x50+$src1$$reg ); 2870 // PUSH src2.hi 2871 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2872 // PUSH src2.lo 2873 emit_opcode(cbuf, 0x50+$src2$$reg ); 2874 // CALL directly to the runtime 2875 cbuf.set_insts_mark(); 2876 emit_opcode(cbuf,0xE8); // Call into runtime 2877 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2878 // Restore stack 2879 emit_opcode(cbuf, 0x83); // add SP, #framesize 2880 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2881 emit_d8(cbuf, 4*4); 2882 %} 2883 2884 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2885 // MOV $tmp,$src.lo 2886 emit_opcode(cbuf, 0x8B); 2887 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2888 // OR $tmp,$src.hi 2889 emit_opcode(cbuf, 0x0B); 2890 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2891 %} 2892 2893 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2894 // CMP $src1.lo,$src2.lo 2895 emit_opcode( cbuf, 0x3B ); 2896 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2897 // JNE,s skip 2898 emit_cc(cbuf, 0x70, 0x5); 2899 emit_d8(cbuf,2); 2900 // CMP $src1.hi,$src2.hi 2901 emit_opcode( cbuf, 0x3B ); 2902 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2903 %} 2904 2905 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2906 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2907 emit_opcode( cbuf, 0x3B ); 2908 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2909 // MOV $tmp,$src1.hi 2910 emit_opcode( cbuf, 0x8B ); 2911 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2912 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2913 emit_opcode( cbuf, 0x1B ); 2914 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2915 %} 2916 2917 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2918 // XOR $tmp,$tmp 2919 emit_opcode(cbuf,0x33); // XOR 2920 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2921 // CMP $tmp,$src.lo 2922 emit_opcode( cbuf, 0x3B ); 2923 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2924 // SBB $tmp,$src.hi 2925 emit_opcode( cbuf, 0x1B ); 2926 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2927 %} 2928 2929 // Sniff, sniff... smells like Gnu Superoptimizer 2930 enc_class neg_long( eRegL dst ) %{ 2931 emit_opcode(cbuf,0xF7); // NEG hi 2932 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2933 emit_opcode(cbuf,0xF7); // NEG lo 2934 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2935 emit_opcode(cbuf,0x83); // SBB hi,0 2936 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2937 emit_d8 (cbuf,0 ); 2938 %} 2939 2940 enc_class enc_pop_rdx() %{ 2941 emit_opcode(cbuf,0x5A); 2942 %} 2943 2944 enc_class enc_rethrow() %{ 2945 cbuf.set_insts_mark(); 2946 emit_opcode(cbuf, 0xE9); // jmp entry 2947 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2948 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2949 %} 2950 2951 2952 // Convert a double to an int. Java semantics require we do complex 2953 // manglelations in the corner cases. So we set the rounding mode to 2954 // 'zero', store the darned double down as an int, and reset the 2955 // rounding mode to 'nearest'. The hardware throws an exception which 2956 // patches up the correct value directly to the stack. 2957 enc_class DPR2I_encoding( regDPR src ) %{ 2958 // Flip to round-to-zero mode. We attempted to allow invalid-op 2959 // exceptions here, so that a NAN or other corner-case value will 2960 // thrown an exception (but normal values get converted at full speed). 2961 // However, I2C adapters and other float-stack manglers leave pending 2962 // invalid-op exceptions hanging. We would have to clear them before 2963 // enabling them and that is more expensive than just testing for the 2964 // invalid value Intel stores down in the corner cases. 2965 emit_opcode(cbuf,0xD9); // FLDCW trunc 2966 emit_opcode(cbuf,0x2D); 2967 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2968 // Allocate a word 2969 emit_opcode(cbuf,0x83); // SUB ESP,4 2970 emit_opcode(cbuf,0xEC); 2971 emit_d8(cbuf,0x04); 2972 // Encoding assumes a double has been pushed into FPR0. 2973 // Store down the double as an int, popping the FPU stack 2974 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2975 emit_opcode(cbuf,0x1C); 2976 emit_d8(cbuf,0x24); 2977 // Restore the rounding mode; mask the exception 2978 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2979 emit_opcode(cbuf,0x2D); 2980 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2981 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2982 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2983 2984 // Load the converted int; adjust CPU stack 2985 emit_opcode(cbuf,0x58); // POP EAX 2986 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2987 emit_d32 (cbuf,0x80000000); // 0x80000000 2988 emit_opcode(cbuf,0x75); // JNE around_slow_call 2989 emit_d8 (cbuf,0x07); // Size of slow_call 2990 // Push src onto stack slow-path 2991 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2992 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2993 // CALL directly to the runtime 2994 cbuf.set_insts_mark(); 2995 emit_opcode(cbuf,0xE8); // Call into runtime 2996 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2997 // Carry on here... 2998 %} 2999 3000 enc_class DPR2L_encoding( regDPR src ) %{ 3001 emit_opcode(cbuf,0xD9); // FLDCW trunc 3002 emit_opcode(cbuf,0x2D); 3003 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3004 // Allocate a word 3005 emit_opcode(cbuf,0x83); // SUB ESP,8 3006 emit_opcode(cbuf,0xEC); 3007 emit_d8(cbuf,0x08); 3008 // Encoding assumes a double has been pushed into FPR0. 3009 // Store down the double as a long, popping the FPU stack 3010 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3011 emit_opcode(cbuf,0x3C); 3012 emit_d8(cbuf,0x24); 3013 // Restore the rounding mode; mask the exception 3014 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3015 emit_opcode(cbuf,0x2D); 3016 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3017 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3018 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3019 3020 // Load the converted int; adjust CPU stack 3021 emit_opcode(cbuf,0x58); // POP EAX 3022 emit_opcode(cbuf,0x5A); // POP EDX 3023 emit_opcode(cbuf,0x81); // CMP EDX,imm 3024 emit_d8 (cbuf,0xFA); // rdx 3025 emit_d32 (cbuf,0x80000000); // 0x80000000 3026 emit_opcode(cbuf,0x75); // JNE around_slow_call 3027 emit_d8 (cbuf,0x07+4); // Size of slow_call 3028 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3029 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3030 emit_opcode(cbuf,0x75); // JNE around_slow_call 3031 emit_d8 (cbuf,0x07); // Size of slow_call 3032 // Push src onto stack slow-path 3033 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3034 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3035 // CALL directly to the runtime 3036 cbuf.set_insts_mark(); 3037 emit_opcode(cbuf,0xE8); // Call into runtime 3038 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3039 // Carry on here... 3040 %} 3041 3042 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3043 // Operand was loaded from memory into fp ST (stack top) 3044 // FMUL ST,$src /* D8 C8+i */ 3045 emit_opcode(cbuf, 0xD8); 3046 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3047 %} 3048 3049 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3050 // FADDP ST,src2 /* D8 C0+i */ 3051 emit_opcode(cbuf, 0xD8); 3052 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3053 //could use FADDP src2,fpST /* DE C0+i */ 3054 %} 3055 3056 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3057 // FADDP src2,ST /* DE C0+i */ 3058 emit_opcode(cbuf, 0xDE); 3059 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3060 %} 3061 3062 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3063 // Operand has been loaded into fp ST (stack top) 3064 // FSUB ST,$src1 3065 emit_opcode(cbuf, 0xD8); 3066 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3067 3068 // FDIV 3069 emit_opcode(cbuf, 0xD8); 3070 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3071 %} 3072 3073 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3074 // Operand was loaded from memory into fp ST (stack top) 3075 // FADD ST,$src /* D8 C0+i */ 3076 emit_opcode(cbuf, 0xD8); 3077 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3078 3079 // FMUL ST,src2 /* D8 C*+i */ 3080 emit_opcode(cbuf, 0xD8); 3081 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3082 %} 3083 3084 3085 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3086 // Operand was loaded from memory into fp ST (stack top) 3087 // FADD ST,$src /* D8 C0+i */ 3088 emit_opcode(cbuf, 0xD8); 3089 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3090 3091 // FMULP src2,ST /* DE C8+i */ 3092 emit_opcode(cbuf, 0xDE); 3093 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3094 %} 3095 3096 // Atomically load the volatile long 3097 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3098 emit_opcode(cbuf,0xDF); 3099 int rm_byte_opcode = 0x05; 3100 int base = $mem$$base; 3101 int index = $mem$$index; 3102 int scale = $mem$$scale; 3103 int displace = $mem$$disp; 3104 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3105 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3106 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3107 %} 3108 3109 // Volatile Store Long. Must be atomic, so move it into 3110 // the FP TOS and then do a 64-bit FIST. Has to probe the 3111 // target address before the store (for null-ptr checks) 3112 // so the memory operand is used twice in the encoding. 3113 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3114 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3115 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3116 emit_opcode(cbuf,0xDF); 3117 int rm_byte_opcode = 0x07; 3118 int base = $mem$$base; 3119 int index = $mem$$index; 3120 int scale = $mem$$scale; 3121 int displace = $mem$$disp; 3122 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3123 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3124 %} 3125 3126 // Safepoint Poll. This polls the safepoint page, and causes an 3127 // exception if it is not readable. Unfortunately, it kills the condition code 3128 // in the process 3129 // We current use TESTL [spp],EDI 3130 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3131 3132 enc_class Safepoint_Poll() %{ 3133 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3134 emit_opcode(cbuf,0x85); 3135 emit_rm (cbuf, 0x0, 0x7, 0x5); 3136 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3137 %} 3138 %} 3139 3140 3141 //----------FRAME-------------------------------------------------------------- 3142 // Definition of frame structure and management information. 3143 // 3144 // S T A C K L A Y O U T Allocators stack-slot number 3145 // | (to get allocators register number 3146 // G Owned by | | v add OptoReg::stack0()) 3147 // r CALLER | | 3148 // o | +--------+ pad to even-align allocators stack-slot 3149 // w V | pad0 | numbers; owned by CALLER 3150 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3151 // h ^ | in | 5 3152 // | | args | 4 Holes in incoming args owned by SELF 3153 // | | | | 3 3154 // | | +--------+ 3155 // V | | old out| Empty on Intel, window on Sparc 3156 // | old |preserve| Must be even aligned. 3157 // | SP-+--------+----> Matcher::_old_SP, even aligned 3158 // | | in | 3 area for Intel ret address 3159 // Owned by |preserve| Empty on Sparc. 3160 // SELF +--------+ 3161 // | | pad2 | 2 pad to align old SP 3162 // | +--------+ 1 3163 // | | locks | 0 3164 // | +--------+----> OptoReg::stack0(), even aligned 3165 // | | pad1 | 11 pad to align new SP 3166 // | +--------+ 3167 // | | | 10 3168 // | | spills | 9 spills 3169 // V | | 8 (pad0 slot for callee) 3170 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3171 // ^ | out | 7 3172 // | | args | 6 Holes in outgoing args owned by CALLEE 3173 // Owned by +--------+ 3174 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3175 // | new |preserve| Must be even-aligned. 3176 // | SP-+--------+----> Matcher::_new_SP, even aligned 3177 // | | | 3178 // 3179 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3180 // known from SELF's arguments and the Java calling convention. 3181 // Region 6-7 is determined per call site. 3182 // Note 2: If the calling convention leaves holes in the incoming argument 3183 // area, those holes are owned by SELF. Holes in the outgoing area 3184 // are owned by the CALLEE. Holes should not be nessecary in the 3185 // incoming area, as the Java calling convention is completely under 3186 // the control of the AD file. Doubles can be sorted and packed to 3187 // avoid holes. Holes in the outgoing arguments may be nessecary for 3188 // varargs C calling conventions. 3189 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3190 // even aligned with pad0 as needed. 3191 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3192 // region 6-11 is even aligned; it may be padded out more so that 3193 // the region from SP to FP meets the minimum stack alignment. 3194 3195 frame %{ 3196 // What direction does stack grow in (assumed to be same for C & Java) 3197 stack_direction(TOWARDS_LOW); 3198 3199 // These three registers define part of the calling convention 3200 // between compiled code and the interpreter. 3201 inline_cache_reg(EAX); // Inline Cache Register 3202 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3203 3204 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3205 cisc_spilling_operand_name(indOffset32); 3206 3207 // Number of stack slots consumed by locking an object 3208 sync_stack_slots(1); 3209 3210 // Compiled code's Frame Pointer 3211 frame_pointer(ESP); 3212 // Interpreter stores its frame pointer in a register which is 3213 // stored to the stack by I2CAdaptors. 3214 // I2CAdaptors convert from interpreted java to compiled java. 3215 interpreter_frame_pointer(EBP); 3216 3217 // Stack alignment requirement 3218 // Alignment size in bytes (128-bit -> 16 bytes) 3219 stack_alignment(StackAlignmentInBytes); 3220 3221 // Number of stack slots between incoming argument block and the start of 3222 // a new frame. The PROLOG must add this many slots to the stack. The 3223 // EPILOG must remove this many slots. Intel needs one slot for 3224 // return address and one for rbp, (must save rbp) 3225 in_preserve_stack_slots(2+VerifyStackAtCalls); 3226 3227 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3228 // for calls to C. Supports the var-args backing area for register parms. 3229 varargs_C_out_slots_killed(0); 3230 3231 // The after-PROLOG location of the return address. Location of 3232 // return address specifies a type (REG or STACK) and a number 3233 // representing the register number (i.e. - use a register name) or 3234 // stack slot. 3235 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3236 // Otherwise, it is above the locks and verification slot and alignment word 3237 return_addr(STACK - 1 + 3238 round_to((Compile::current()->in_preserve_stack_slots() + 3239 Compile::current()->fixed_slots()), 3240 stack_alignment_in_slots())); 3241 3242 // Body of function which returns an integer array locating 3243 // arguments either in registers or in stack slots. Passed an array 3244 // of ideal registers called "sig" and a "length" count. Stack-slot 3245 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3246 // arguments for a CALLEE. Incoming stack arguments are 3247 // automatically biased by the preserve_stack_slots field above. 3248 calling_convention %{ 3249 // No difference between ingoing/outgoing just pass false 3250 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3251 %} 3252 3253 3254 // Body of function which returns an integer array locating 3255 // arguments either in registers or in stack slots. Passed an array 3256 // of ideal registers called "sig" and a "length" count. Stack-slot 3257 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3258 // arguments for a CALLEE. Incoming stack arguments are 3259 // automatically biased by the preserve_stack_slots field above. 3260 c_calling_convention %{ 3261 // This is obviously always outgoing 3262 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3263 %} 3264 3265 // Location of C & interpreter return values 3266 c_return_value %{ 3267 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3268 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3269 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3270 3271 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3272 // that C functions return float and double results in XMM0. 3273 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3274 return OptoRegPair(XMM0b_num,XMM0_num); 3275 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3276 return OptoRegPair(OptoReg::Bad,XMM0_num); 3277 3278 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3279 %} 3280 3281 // Location of return values 3282 return_value %{ 3283 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3284 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3285 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3286 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3287 return OptoRegPair(XMM0b_num,XMM0_num); 3288 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3289 return OptoRegPair(OptoReg::Bad,XMM0_num); 3290 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3291 %} 3292 3293 %} 3294 3295 //----------ATTRIBUTES--------------------------------------------------------- 3296 //----------Operand Attributes------------------------------------------------- 3297 op_attrib op_cost(0); // Required cost attribute 3298 3299 //----------Instruction Attributes--------------------------------------------- 3300 ins_attrib ins_cost(100); // Required cost attribute 3301 ins_attrib ins_size(8); // Required size attribute (in bits) 3302 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3303 // non-matching short branch variant of some 3304 // long branch? 3305 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3306 // specifies the alignment that some part of the instruction (not 3307 // necessarily the start) requires. If > 1, a compute_padding() 3308 // function must be provided for the instruction 3309 3310 //----------OPERANDS----------------------------------------------------------- 3311 // Operand definitions must precede instruction definitions for correct parsing 3312 // in the ADLC because operands constitute user defined types which are used in 3313 // instruction definitions. 3314 3315 //----------Simple Operands---------------------------------------------------- 3316 // Immediate Operands 3317 // Integer Immediate 3318 operand immI() %{ 3319 match(ConI); 3320 3321 op_cost(10); 3322 format %{ %} 3323 interface(CONST_INTER); 3324 %} 3325 3326 // Constant for test vs zero 3327 operand immI0() %{ 3328 predicate(n->get_int() == 0); 3329 match(ConI); 3330 3331 op_cost(0); 3332 format %{ %} 3333 interface(CONST_INTER); 3334 %} 3335 3336 // Constant for increment 3337 operand immI1() %{ 3338 predicate(n->get_int() == 1); 3339 match(ConI); 3340 3341 op_cost(0); 3342 format %{ %} 3343 interface(CONST_INTER); 3344 %} 3345 3346 // Constant for decrement 3347 operand immI_M1() %{ 3348 predicate(n->get_int() == -1); 3349 match(ConI); 3350 3351 op_cost(0); 3352 format %{ %} 3353 interface(CONST_INTER); 3354 %} 3355 3356 // Valid scale values for addressing modes 3357 operand immI2() %{ 3358 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3359 match(ConI); 3360 3361 format %{ %} 3362 interface(CONST_INTER); 3363 %} 3364 3365 operand immI8() %{ 3366 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3367 match(ConI); 3368 3369 op_cost(5); 3370 format %{ %} 3371 interface(CONST_INTER); 3372 %} 3373 3374 operand immI16() %{ 3375 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3376 match(ConI); 3377 3378 op_cost(10); 3379 format %{ %} 3380 interface(CONST_INTER); 3381 %} 3382 3383 // Int Immediate non-negative 3384 operand immU31() 3385 %{ 3386 predicate(n->get_int() >= 0); 3387 match(ConI); 3388 3389 op_cost(0); 3390 format %{ %} 3391 interface(CONST_INTER); 3392 %} 3393 3394 // Constant for long shifts 3395 operand immI_32() %{ 3396 predicate( n->get_int() == 32 ); 3397 match(ConI); 3398 3399 op_cost(0); 3400 format %{ %} 3401 interface(CONST_INTER); 3402 %} 3403 3404 operand immI_1_31() %{ 3405 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3406 match(ConI); 3407 3408 op_cost(0); 3409 format %{ %} 3410 interface(CONST_INTER); 3411 %} 3412 3413 operand immI_32_63() %{ 3414 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3415 match(ConI); 3416 op_cost(0); 3417 3418 format %{ %} 3419 interface(CONST_INTER); 3420 %} 3421 3422 operand immI_1() %{ 3423 predicate( n->get_int() == 1 ); 3424 match(ConI); 3425 3426 op_cost(0); 3427 format %{ %} 3428 interface(CONST_INTER); 3429 %} 3430 3431 operand immI_2() %{ 3432 predicate( n->get_int() == 2 ); 3433 match(ConI); 3434 3435 op_cost(0); 3436 format %{ %} 3437 interface(CONST_INTER); 3438 %} 3439 3440 operand immI_3() %{ 3441 predicate( n->get_int() == 3 ); 3442 match(ConI); 3443 3444 op_cost(0); 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 // Pointer Immediate 3450 operand immP() %{ 3451 match(ConP); 3452 3453 op_cost(10); 3454 format %{ %} 3455 interface(CONST_INTER); 3456 %} 3457 3458 // NULL Pointer Immediate 3459 operand immP0() %{ 3460 predicate( n->get_ptr() == 0 ); 3461 match(ConP); 3462 op_cost(0); 3463 3464 format %{ %} 3465 interface(CONST_INTER); 3466 %} 3467 3468 // Long Immediate 3469 operand immL() %{ 3470 match(ConL); 3471 3472 op_cost(20); 3473 format %{ %} 3474 interface(CONST_INTER); 3475 %} 3476 3477 // Long Immediate zero 3478 operand immL0() %{ 3479 predicate( n->get_long() == 0L ); 3480 match(ConL); 3481 op_cost(0); 3482 3483 format %{ %} 3484 interface(CONST_INTER); 3485 %} 3486 3487 // Long Immediate zero 3488 operand immL_M1() %{ 3489 predicate( n->get_long() == -1L ); 3490 match(ConL); 3491 op_cost(0); 3492 3493 format %{ %} 3494 interface(CONST_INTER); 3495 %} 3496 3497 // Long immediate from 0 to 127. 3498 // Used for a shorter form of long mul by 10. 3499 operand immL_127() %{ 3500 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3501 match(ConL); 3502 op_cost(0); 3503 3504 format %{ %} 3505 interface(CONST_INTER); 3506 %} 3507 3508 // Long Immediate: low 32-bit mask 3509 operand immL_32bits() %{ 3510 predicate(n->get_long() == 0xFFFFFFFFL); 3511 match(ConL); 3512 op_cost(0); 3513 3514 format %{ %} 3515 interface(CONST_INTER); 3516 %} 3517 3518 // Long Immediate: low 32-bit mask 3519 operand immL32() %{ 3520 predicate(n->get_long() == (int)(n->get_long())); 3521 match(ConL); 3522 op_cost(20); 3523 3524 format %{ %} 3525 interface(CONST_INTER); 3526 %} 3527 3528 //Double Immediate zero 3529 operand immDPR0() %{ 3530 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3531 // bug that generates code such that NaNs compare equal to 0.0 3532 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3533 match(ConD); 3534 3535 op_cost(5); 3536 format %{ %} 3537 interface(CONST_INTER); 3538 %} 3539 3540 // Double Immediate one 3541 operand immDPR1() %{ 3542 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3543 match(ConD); 3544 3545 op_cost(5); 3546 format %{ %} 3547 interface(CONST_INTER); 3548 %} 3549 3550 // Double Immediate 3551 operand immDPR() %{ 3552 predicate(UseSSE<=1); 3553 match(ConD); 3554 3555 op_cost(5); 3556 format %{ %} 3557 interface(CONST_INTER); 3558 %} 3559 3560 operand immD() %{ 3561 predicate(UseSSE>=2); 3562 match(ConD); 3563 3564 op_cost(5); 3565 format %{ %} 3566 interface(CONST_INTER); 3567 %} 3568 3569 // Double Immediate zero 3570 operand immD0() %{ 3571 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3572 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3573 // compare equal to -0.0. 3574 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3575 match(ConD); 3576 3577 format %{ %} 3578 interface(CONST_INTER); 3579 %} 3580 3581 // Float Immediate zero 3582 operand immFPR0() %{ 3583 predicate(UseSSE == 0 && n->getf() == 0.0F); 3584 match(ConF); 3585 3586 op_cost(5); 3587 format %{ %} 3588 interface(CONST_INTER); 3589 %} 3590 3591 // Float Immediate one 3592 operand immFPR1() %{ 3593 predicate(UseSSE == 0 && n->getf() == 1.0F); 3594 match(ConF); 3595 3596 op_cost(5); 3597 format %{ %} 3598 interface(CONST_INTER); 3599 %} 3600 3601 // Float Immediate 3602 operand immFPR() %{ 3603 predicate( UseSSE == 0 ); 3604 match(ConF); 3605 3606 op_cost(5); 3607 format %{ %} 3608 interface(CONST_INTER); 3609 %} 3610 3611 // Float Immediate 3612 operand immF() %{ 3613 predicate(UseSSE >= 1); 3614 match(ConF); 3615 3616 op_cost(5); 3617 format %{ %} 3618 interface(CONST_INTER); 3619 %} 3620 3621 // Float Immediate zero. Zero and not -0.0 3622 operand immF0() %{ 3623 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3624 match(ConF); 3625 3626 op_cost(5); 3627 format %{ %} 3628 interface(CONST_INTER); 3629 %} 3630 3631 // Immediates for special shifts (sign extend) 3632 3633 // Constants for increment 3634 operand immI_16() %{ 3635 predicate( n->get_int() == 16 ); 3636 match(ConI); 3637 3638 format %{ %} 3639 interface(CONST_INTER); 3640 %} 3641 3642 operand immI_24() %{ 3643 predicate( n->get_int() == 24 ); 3644 match(ConI); 3645 3646 format %{ %} 3647 interface(CONST_INTER); 3648 %} 3649 3650 // Constant for byte-wide masking 3651 operand immI_255() %{ 3652 predicate( n->get_int() == 255 ); 3653 match(ConI); 3654 3655 format %{ %} 3656 interface(CONST_INTER); 3657 %} 3658 3659 // Constant for short-wide masking 3660 operand immI_65535() %{ 3661 predicate(n->get_int() == 65535); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 // Register Operands 3669 // Integer Register 3670 operand rRegI() %{ 3671 constraint(ALLOC_IN_RC(int_reg)); 3672 match(RegI); 3673 match(xRegI); 3674 match(eAXRegI); 3675 match(eBXRegI); 3676 match(eCXRegI); 3677 match(eDXRegI); 3678 match(eDIRegI); 3679 match(eSIRegI); 3680 3681 format %{ %} 3682 interface(REG_INTER); 3683 %} 3684 3685 // Subset of Integer Register 3686 operand xRegI(rRegI reg) %{ 3687 constraint(ALLOC_IN_RC(int_x_reg)); 3688 match(reg); 3689 match(eAXRegI); 3690 match(eBXRegI); 3691 match(eCXRegI); 3692 match(eDXRegI); 3693 3694 format %{ %} 3695 interface(REG_INTER); 3696 %} 3697 3698 // Special Registers 3699 operand eAXRegI(xRegI reg) %{ 3700 constraint(ALLOC_IN_RC(eax_reg)); 3701 match(reg); 3702 match(rRegI); 3703 3704 format %{ "EAX" %} 3705 interface(REG_INTER); 3706 %} 3707 3708 // Special Registers 3709 operand eBXRegI(xRegI reg) %{ 3710 constraint(ALLOC_IN_RC(ebx_reg)); 3711 match(reg); 3712 match(rRegI); 3713 3714 format %{ "EBX" %} 3715 interface(REG_INTER); 3716 %} 3717 3718 operand eCXRegI(xRegI reg) %{ 3719 constraint(ALLOC_IN_RC(ecx_reg)); 3720 match(reg); 3721 match(rRegI); 3722 3723 format %{ "ECX" %} 3724 interface(REG_INTER); 3725 %} 3726 3727 operand eDXRegI(xRegI reg) %{ 3728 constraint(ALLOC_IN_RC(edx_reg)); 3729 match(reg); 3730 match(rRegI); 3731 3732 format %{ "EDX" %} 3733 interface(REG_INTER); 3734 %} 3735 3736 operand eDIRegI(xRegI reg) %{ 3737 constraint(ALLOC_IN_RC(edi_reg)); 3738 match(reg); 3739 match(rRegI); 3740 3741 format %{ "EDI" %} 3742 interface(REG_INTER); 3743 %} 3744 3745 operand naxRegI() %{ 3746 constraint(ALLOC_IN_RC(nax_reg)); 3747 match(RegI); 3748 match(eCXRegI); 3749 match(eDXRegI); 3750 match(eSIRegI); 3751 match(eDIRegI); 3752 3753 format %{ %} 3754 interface(REG_INTER); 3755 %} 3756 3757 operand nadxRegI() %{ 3758 constraint(ALLOC_IN_RC(nadx_reg)); 3759 match(RegI); 3760 match(eBXRegI); 3761 match(eCXRegI); 3762 match(eSIRegI); 3763 match(eDIRegI); 3764 3765 format %{ %} 3766 interface(REG_INTER); 3767 %} 3768 3769 operand ncxRegI() %{ 3770 constraint(ALLOC_IN_RC(ncx_reg)); 3771 match(RegI); 3772 match(eAXRegI); 3773 match(eDXRegI); 3774 match(eSIRegI); 3775 match(eDIRegI); 3776 3777 format %{ %} 3778 interface(REG_INTER); 3779 %} 3780 3781 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3782 // // 3783 operand eSIRegI(xRegI reg) %{ 3784 constraint(ALLOC_IN_RC(esi_reg)); 3785 match(reg); 3786 match(rRegI); 3787 3788 format %{ "ESI" %} 3789 interface(REG_INTER); 3790 %} 3791 3792 // Pointer Register 3793 operand anyRegP() %{ 3794 constraint(ALLOC_IN_RC(any_reg)); 3795 match(RegP); 3796 match(eAXRegP); 3797 match(eBXRegP); 3798 match(eCXRegP); 3799 match(eDIRegP); 3800 match(eRegP); 3801 3802 format %{ %} 3803 interface(REG_INTER); 3804 %} 3805 3806 operand eRegP() %{ 3807 constraint(ALLOC_IN_RC(int_reg)); 3808 match(RegP); 3809 match(eAXRegP); 3810 match(eBXRegP); 3811 match(eCXRegP); 3812 match(eDIRegP); 3813 3814 format %{ %} 3815 interface(REG_INTER); 3816 %} 3817 3818 // On windows95, EBP is not safe to use for implicit null tests. 3819 operand eRegP_no_EBP() %{ 3820 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3821 match(RegP); 3822 match(eAXRegP); 3823 match(eBXRegP); 3824 match(eCXRegP); 3825 match(eDIRegP); 3826 3827 op_cost(100); 3828 format %{ %} 3829 interface(REG_INTER); 3830 %} 3831 3832 operand naxRegP() %{ 3833 constraint(ALLOC_IN_RC(nax_reg)); 3834 match(RegP); 3835 match(eBXRegP); 3836 match(eDXRegP); 3837 match(eCXRegP); 3838 match(eSIRegP); 3839 match(eDIRegP); 3840 3841 format %{ %} 3842 interface(REG_INTER); 3843 %} 3844 3845 operand nabxRegP() %{ 3846 constraint(ALLOC_IN_RC(nabx_reg)); 3847 match(RegP); 3848 match(eCXRegP); 3849 match(eDXRegP); 3850 match(eSIRegP); 3851 match(eDIRegP); 3852 3853 format %{ %} 3854 interface(REG_INTER); 3855 %} 3856 3857 operand pRegP() %{ 3858 constraint(ALLOC_IN_RC(p_reg)); 3859 match(RegP); 3860 match(eBXRegP); 3861 match(eDXRegP); 3862 match(eSIRegP); 3863 match(eDIRegP); 3864 3865 format %{ %} 3866 interface(REG_INTER); 3867 %} 3868 3869 // Special Registers 3870 // Return a pointer value 3871 operand eAXRegP(eRegP reg) %{ 3872 constraint(ALLOC_IN_RC(eax_reg)); 3873 match(reg); 3874 format %{ "EAX" %} 3875 interface(REG_INTER); 3876 %} 3877 3878 // Used in AtomicAdd 3879 operand eBXRegP(eRegP reg) %{ 3880 constraint(ALLOC_IN_RC(ebx_reg)); 3881 match(reg); 3882 format %{ "EBX" %} 3883 interface(REG_INTER); 3884 %} 3885 3886 // Tail-call (interprocedural jump) to interpreter 3887 operand eCXRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(ecx_reg)); 3889 match(reg); 3890 format %{ "ECX" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 operand eSIRegP(eRegP reg) %{ 3895 constraint(ALLOC_IN_RC(esi_reg)); 3896 match(reg); 3897 format %{ "ESI" %} 3898 interface(REG_INTER); 3899 %} 3900 3901 // Used in rep stosw 3902 operand eDIRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(edi_reg)); 3904 match(reg); 3905 format %{ "EDI" %} 3906 interface(REG_INTER); 3907 %} 3908 3909 operand eRegL() %{ 3910 constraint(ALLOC_IN_RC(long_reg)); 3911 match(RegL); 3912 match(eADXRegL); 3913 3914 format %{ %} 3915 interface(REG_INTER); 3916 %} 3917 3918 operand eADXRegL( eRegL reg ) %{ 3919 constraint(ALLOC_IN_RC(eadx_reg)); 3920 match(reg); 3921 3922 format %{ "EDX:EAX" %} 3923 interface(REG_INTER); 3924 %} 3925 3926 operand eBCXRegL( eRegL reg ) %{ 3927 constraint(ALLOC_IN_RC(ebcx_reg)); 3928 match(reg); 3929 3930 format %{ "EBX:ECX" %} 3931 interface(REG_INTER); 3932 %} 3933 3934 // Special case for integer high multiply 3935 operand eADXRegL_low_only() %{ 3936 constraint(ALLOC_IN_RC(eadx_reg)); 3937 match(RegL); 3938 3939 format %{ "EAX" %} 3940 interface(REG_INTER); 3941 %} 3942 3943 // Flags register, used as output of compare instructions 3944 operand eFlagsReg() %{ 3945 constraint(ALLOC_IN_RC(int_flags)); 3946 match(RegFlags); 3947 3948 format %{ "EFLAGS" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 // Flags register, used as output of FLOATING POINT compare instructions 3953 operand eFlagsRegU() %{ 3954 constraint(ALLOC_IN_RC(int_flags)); 3955 match(RegFlags); 3956 3957 format %{ "EFLAGS_U" %} 3958 interface(REG_INTER); 3959 %} 3960 3961 operand eFlagsRegUCF() %{ 3962 constraint(ALLOC_IN_RC(int_flags)); 3963 match(RegFlags); 3964 predicate(false); 3965 3966 format %{ "EFLAGS_U_CF" %} 3967 interface(REG_INTER); 3968 %} 3969 3970 // Condition Code Register used by long compare 3971 operand flagsReg_long_LTGE() %{ 3972 constraint(ALLOC_IN_RC(int_flags)); 3973 match(RegFlags); 3974 format %{ "FLAGS_LTGE" %} 3975 interface(REG_INTER); 3976 %} 3977 operand flagsReg_long_EQNE() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 format %{ "FLAGS_EQNE" %} 3981 interface(REG_INTER); 3982 %} 3983 operand flagsReg_long_LEGT() %{ 3984 constraint(ALLOC_IN_RC(int_flags)); 3985 match(RegFlags); 3986 format %{ "FLAGS_LEGT" %} 3987 interface(REG_INTER); 3988 %} 3989 3990 // Float register operands 3991 operand regDPR() %{ 3992 predicate( UseSSE < 2 ); 3993 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3994 match(RegD); 3995 match(regDPR1); 3996 match(regDPR2); 3997 format %{ %} 3998 interface(REG_INTER); 3999 %} 4000 4001 operand regDPR1(regDPR reg) %{ 4002 predicate( UseSSE < 2 ); 4003 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4004 match(reg); 4005 format %{ "FPR1" %} 4006 interface(REG_INTER); 4007 %} 4008 4009 operand regDPR2(regDPR reg) %{ 4010 predicate( UseSSE < 2 ); 4011 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4012 match(reg); 4013 format %{ "FPR2" %} 4014 interface(REG_INTER); 4015 %} 4016 4017 operand regnotDPR1(regDPR reg) %{ 4018 predicate( UseSSE < 2 ); 4019 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4020 match(reg); 4021 format %{ %} 4022 interface(REG_INTER); 4023 %} 4024 4025 // Float register operands 4026 operand regFPR() %{ 4027 predicate( UseSSE < 2 ); 4028 constraint(ALLOC_IN_RC(fp_flt_reg)); 4029 match(RegF); 4030 match(regFPR1); 4031 format %{ %} 4032 interface(REG_INTER); 4033 %} 4034 4035 // Float register operands 4036 operand regFPR1(regFPR reg) %{ 4037 predicate( UseSSE < 2 ); 4038 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4039 match(reg); 4040 format %{ "FPR1" %} 4041 interface(REG_INTER); 4042 %} 4043 4044 // XMM Float register operands 4045 operand regF() %{ 4046 predicate( UseSSE>=1 ); 4047 constraint(ALLOC_IN_RC(float_reg_legacy)); 4048 match(RegF); 4049 format %{ %} 4050 interface(REG_INTER); 4051 %} 4052 4053 // XMM Double register operands 4054 operand regD() %{ 4055 predicate( UseSSE>=2 ); 4056 constraint(ALLOC_IN_RC(double_reg_legacy)); 4057 match(RegD); 4058 format %{ %} 4059 interface(REG_INTER); 4060 %} 4061 4062 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4063 // runtime code generation via reg_class_dynamic. 4064 operand vecS() %{ 4065 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4066 match(VecS); 4067 4068 format %{ %} 4069 interface(REG_INTER); 4070 %} 4071 4072 operand vecD() %{ 4073 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4074 match(VecD); 4075 4076 format %{ %} 4077 interface(REG_INTER); 4078 %} 4079 4080 operand vecX() %{ 4081 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4082 match(VecX); 4083 4084 format %{ %} 4085 interface(REG_INTER); 4086 %} 4087 4088 operand vecY() %{ 4089 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4090 match(VecY); 4091 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 //----------Memory Operands---------------------------------------------------- 4097 // Direct Memory Operand 4098 operand direct(immP addr) %{ 4099 match(addr); 4100 4101 format %{ "[$addr]" %} 4102 interface(MEMORY_INTER) %{ 4103 base(0xFFFFFFFF); 4104 index(0x4); 4105 scale(0x0); 4106 disp($addr); 4107 %} 4108 %} 4109 4110 // Indirect Memory Operand 4111 operand indirect(eRegP reg) %{ 4112 constraint(ALLOC_IN_RC(int_reg)); 4113 match(reg); 4114 4115 format %{ "[$reg]" %} 4116 interface(MEMORY_INTER) %{ 4117 base($reg); 4118 index(0x4); 4119 scale(0x0); 4120 disp(0x0); 4121 %} 4122 %} 4123 4124 // Indirect Memory Plus Short Offset Operand 4125 operand indOffset8(eRegP reg, immI8 off) %{ 4126 match(AddP reg off); 4127 4128 format %{ "[$reg + $off]" %} 4129 interface(MEMORY_INTER) %{ 4130 base($reg); 4131 index(0x4); 4132 scale(0x0); 4133 disp($off); 4134 %} 4135 %} 4136 4137 // Indirect Memory Plus Long Offset Operand 4138 operand indOffset32(eRegP reg, immI off) %{ 4139 match(AddP reg off); 4140 4141 format %{ "[$reg + $off]" %} 4142 interface(MEMORY_INTER) %{ 4143 base($reg); 4144 index(0x4); 4145 scale(0x0); 4146 disp($off); 4147 %} 4148 %} 4149 4150 // Indirect Memory Plus Long Offset Operand 4151 operand indOffset32X(rRegI reg, immP off) %{ 4152 match(AddP off reg); 4153 4154 format %{ "[$reg + $off]" %} 4155 interface(MEMORY_INTER) %{ 4156 base($reg); 4157 index(0x4); 4158 scale(0x0); 4159 disp($off); 4160 %} 4161 %} 4162 4163 // Indirect Memory Plus Index Register Plus Offset Operand 4164 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4165 match(AddP (AddP reg ireg) off); 4166 4167 op_cost(10); 4168 format %{"[$reg + $off + $ireg]" %} 4169 interface(MEMORY_INTER) %{ 4170 base($reg); 4171 index($ireg); 4172 scale(0x0); 4173 disp($off); 4174 %} 4175 %} 4176 4177 // Indirect Memory Plus Index Register Plus Offset Operand 4178 operand indIndex(eRegP reg, rRegI ireg) %{ 4179 match(AddP reg ireg); 4180 4181 op_cost(10); 4182 format %{"[$reg + $ireg]" %} 4183 interface(MEMORY_INTER) %{ 4184 base($reg); 4185 index($ireg); 4186 scale(0x0); 4187 disp(0x0); 4188 %} 4189 %} 4190 4191 // // ------------------------------------------------------------------------- 4192 // // 486 architecture doesn't support "scale * index + offset" with out a base 4193 // // ------------------------------------------------------------------------- 4194 // // Scaled Memory Operands 4195 // // Indirect Memory Times Scale Plus Offset Operand 4196 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4197 // match(AddP off (LShiftI ireg scale)); 4198 // 4199 // op_cost(10); 4200 // format %{"[$off + $ireg << $scale]" %} 4201 // interface(MEMORY_INTER) %{ 4202 // base(0x4); 4203 // index($ireg); 4204 // scale($scale); 4205 // disp($off); 4206 // %} 4207 // %} 4208 4209 // Indirect Memory Times Scale Plus Index Register 4210 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4211 match(AddP reg (LShiftI ireg scale)); 4212 4213 op_cost(10); 4214 format %{"[$reg + $ireg << $scale]" %} 4215 interface(MEMORY_INTER) %{ 4216 base($reg); 4217 index($ireg); 4218 scale($scale); 4219 disp(0x0); 4220 %} 4221 %} 4222 4223 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4224 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4225 match(AddP (AddP reg (LShiftI ireg scale)) off); 4226 4227 op_cost(10); 4228 format %{"[$reg + $off + $ireg << $scale]" %} 4229 interface(MEMORY_INTER) %{ 4230 base($reg); 4231 index($ireg); 4232 scale($scale); 4233 disp($off); 4234 %} 4235 %} 4236 4237 //----------Load Long Memory Operands------------------------------------------ 4238 // The load-long idiom will use it's address expression again after loading 4239 // the first word of the long. If the load-long destination overlaps with 4240 // registers used in the addressing expression, the 2nd half will be loaded 4241 // from a clobbered address. Fix this by requiring that load-long use 4242 // address registers that do not overlap with the load-long target. 4243 4244 // load-long support 4245 operand load_long_RegP() %{ 4246 constraint(ALLOC_IN_RC(esi_reg)); 4247 match(RegP); 4248 match(eSIRegP); 4249 op_cost(100); 4250 format %{ %} 4251 interface(REG_INTER); 4252 %} 4253 4254 // Indirect Memory Operand Long 4255 operand load_long_indirect(load_long_RegP reg) %{ 4256 constraint(ALLOC_IN_RC(esi_reg)); 4257 match(reg); 4258 4259 format %{ "[$reg]" %} 4260 interface(MEMORY_INTER) %{ 4261 base($reg); 4262 index(0x4); 4263 scale(0x0); 4264 disp(0x0); 4265 %} 4266 %} 4267 4268 // Indirect Memory Plus Long Offset Operand 4269 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4270 match(AddP reg off); 4271 4272 format %{ "[$reg + $off]" %} 4273 interface(MEMORY_INTER) %{ 4274 base($reg); 4275 index(0x4); 4276 scale(0x0); 4277 disp($off); 4278 %} 4279 %} 4280 4281 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4282 4283 4284 //----------Special Memory Operands-------------------------------------------- 4285 // Stack Slot Operand - This operand is used for loading and storing temporary 4286 // values on the stack where a match requires a value to 4287 // flow through memory. 4288 operand stackSlotP(sRegP reg) %{ 4289 constraint(ALLOC_IN_RC(stack_slots)); 4290 // No match rule because this operand is only generated in matching 4291 format %{ "[$reg]" %} 4292 interface(MEMORY_INTER) %{ 4293 base(0x4); // ESP 4294 index(0x4); // No Index 4295 scale(0x0); // No Scale 4296 disp($reg); // Stack Offset 4297 %} 4298 %} 4299 4300 operand stackSlotI(sRegI reg) %{ 4301 constraint(ALLOC_IN_RC(stack_slots)); 4302 // No match rule because this operand is only generated in matching 4303 format %{ "[$reg]" %} 4304 interface(MEMORY_INTER) %{ 4305 base(0x4); // ESP 4306 index(0x4); // No Index 4307 scale(0x0); // No Scale 4308 disp($reg); // Stack Offset 4309 %} 4310 %} 4311 4312 operand stackSlotF(sRegF reg) %{ 4313 constraint(ALLOC_IN_RC(stack_slots)); 4314 // No match rule because this operand is only generated in matching 4315 format %{ "[$reg]" %} 4316 interface(MEMORY_INTER) %{ 4317 base(0x4); // ESP 4318 index(0x4); // No Index 4319 scale(0x0); // No Scale 4320 disp($reg); // Stack Offset 4321 %} 4322 %} 4323 4324 operand stackSlotD(sRegD reg) %{ 4325 constraint(ALLOC_IN_RC(stack_slots)); 4326 // No match rule because this operand is only generated in matching 4327 format %{ "[$reg]" %} 4328 interface(MEMORY_INTER) %{ 4329 base(0x4); // ESP 4330 index(0x4); // No Index 4331 scale(0x0); // No Scale 4332 disp($reg); // Stack Offset 4333 %} 4334 %} 4335 4336 operand stackSlotL(sRegL reg) %{ 4337 constraint(ALLOC_IN_RC(stack_slots)); 4338 // No match rule because this operand is only generated in matching 4339 format %{ "[$reg]" %} 4340 interface(MEMORY_INTER) %{ 4341 base(0x4); // ESP 4342 index(0x4); // No Index 4343 scale(0x0); // No Scale 4344 disp($reg); // Stack Offset 4345 %} 4346 %} 4347 4348 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4349 // Indirect Memory Operand 4350 operand indirect_win95_safe(eRegP_no_EBP reg) 4351 %{ 4352 constraint(ALLOC_IN_RC(int_reg)); 4353 match(reg); 4354 4355 op_cost(100); 4356 format %{ "[$reg]" %} 4357 interface(MEMORY_INTER) %{ 4358 base($reg); 4359 index(0x4); 4360 scale(0x0); 4361 disp(0x0); 4362 %} 4363 %} 4364 4365 // Indirect Memory Plus Short Offset Operand 4366 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4367 %{ 4368 match(AddP reg off); 4369 4370 op_cost(100); 4371 format %{ "[$reg + $off]" %} 4372 interface(MEMORY_INTER) %{ 4373 base($reg); 4374 index(0x4); 4375 scale(0x0); 4376 disp($off); 4377 %} 4378 %} 4379 4380 // Indirect Memory Plus Long Offset Operand 4381 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4382 %{ 4383 match(AddP reg off); 4384 4385 op_cost(100); 4386 format %{ "[$reg + $off]" %} 4387 interface(MEMORY_INTER) %{ 4388 base($reg); 4389 index(0x4); 4390 scale(0x0); 4391 disp($off); 4392 %} 4393 %} 4394 4395 // Indirect Memory Plus Index Register Plus Offset Operand 4396 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4397 %{ 4398 match(AddP (AddP reg ireg) off); 4399 4400 op_cost(100); 4401 format %{"[$reg + $off + $ireg]" %} 4402 interface(MEMORY_INTER) %{ 4403 base($reg); 4404 index($ireg); 4405 scale(0x0); 4406 disp($off); 4407 %} 4408 %} 4409 4410 // Indirect Memory Times Scale Plus Index Register 4411 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4412 %{ 4413 match(AddP reg (LShiftI ireg scale)); 4414 4415 op_cost(100); 4416 format %{"[$reg + $ireg << $scale]" %} 4417 interface(MEMORY_INTER) %{ 4418 base($reg); 4419 index($ireg); 4420 scale($scale); 4421 disp(0x0); 4422 %} 4423 %} 4424 4425 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4426 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4427 %{ 4428 match(AddP (AddP reg (LShiftI ireg scale)) off); 4429 4430 op_cost(100); 4431 format %{"[$reg + $off + $ireg << $scale]" %} 4432 interface(MEMORY_INTER) %{ 4433 base($reg); 4434 index($ireg); 4435 scale($scale); 4436 disp($off); 4437 %} 4438 %} 4439 4440 //----------Conditional Branch Operands---------------------------------------- 4441 // Comparison Op - This is the operation of the comparison, and is limited to 4442 // the following set of codes: 4443 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4444 // 4445 // Other attributes of the comparison, such as unsignedness, are specified 4446 // by the comparison instruction that sets a condition code flags register. 4447 // That result is represented by a flags operand whose subtype is appropriate 4448 // to the unsignedness (etc.) of the comparison. 4449 // 4450 // Later, the instruction which matches both the Comparison Op (a Bool) and 4451 // the flags (produced by the Cmp) specifies the coding of the comparison op 4452 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4453 4454 // Comparision Code 4455 operand cmpOp() %{ 4456 match(Bool); 4457 4458 format %{ "" %} 4459 interface(COND_INTER) %{ 4460 equal(0x4, "e"); 4461 not_equal(0x5, "ne"); 4462 less(0xC, "l"); 4463 greater_equal(0xD, "ge"); 4464 less_equal(0xE, "le"); 4465 greater(0xF, "g"); 4466 overflow(0x0, "o"); 4467 no_overflow(0x1, "no"); 4468 %} 4469 %} 4470 4471 // Comparison Code, unsigned compare. Used by FP also, with 4472 // C2 (unordered) turned into GT or LT already. The other bits 4473 // C0 and C3 are turned into Carry & Zero flags. 4474 operand cmpOpU() %{ 4475 match(Bool); 4476 4477 format %{ "" %} 4478 interface(COND_INTER) %{ 4479 equal(0x4, "e"); 4480 not_equal(0x5, "ne"); 4481 less(0x2, "b"); 4482 greater_equal(0x3, "nb"); 4483 less_equal(0x6, "be"); 4484 greater(0x7, "nbe"); 4485 overflow(0x0, "o"); 4486 no_overflow(0x1, "no"); 4487 %} 4488 %} 4489 4490 // Floating comparisons that don't require any fixup for the unordered case 4491 operand cmpOpUCF() %{ 4492 match(Bool); 4493 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4494 n->as_Bool()->_test._test == BoolTest::ge || 4495 n->as_Bool()->_test._test == BoolTest::le || 4496 n->as_Bool()->_test._test == BoolTest::gt); 4497 format %{ "" %} 4498 interface(COND_INTER) %{ 4499 equal(0x4, "e"); 4500 not_equal(0x5, "ne"); 4501 less(0x2, "b"); 4502 greater_equal(0x3, "nb"); 4503 less_equal(0x6, "be"); 4504 greater(0x7, "nbe"); 4505 overflow(0x0, "o"); 4506 no_overflow(0x1, "no"); 4507 %} 4508 %} 4509 4510 4511 // Floating comparisons that can be fixed up with extra conditional jumps 4512 operand cmpOpUCF2() %{ 4513 match(Bool); 4514 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4515 n->as_Bool()->_test._test == BoolTest::eq); 4516 format %{ "" %} 4517 interface(COND_INTER) %{ 4518 equal(0x4, "e"); 4519 not_equal(0x5, "ne"); 4520 less(0x2, "b"); 4521 greater_equal(0x3, "nb"); 4522 less_equal(0x6, "be"); 4523 greater(0x7, "nbe"); 4524 overflow(0x0, "o"); 4525 no_overflow(0x1, "no"); 4526 %} 4527 %} 4528 4529 // Comparison Code for FP conditional move 4530 operand cmpOp_fcmov() %{ 4531 match(Bool); 4532 4533 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4534 n->as_Bool()->_test._test != BoolTest::no_overflow); 4535 format %{ "" %} 4536 interface(COND_INTER) %{ 4537 equal (0x0C8); 4538 not_equal (0x1C8); 4539 less (0x0C0); 4540 greater_equal(0x1C0); 4541 less_equal (0x0D0); 4542 greater (0x1D0); 4543 overflow(0x0, "o"); // not really supported by the instruction 4544 no_overflow(0x1, "no"); // not really supported by the instruction 4545 %} 4546 %} 4547 4548 // Comparision Code used in long compares 4549 operand cmpOp_commute() %{ 4550 match(Bool); 4551 4552 format %{ "" %} 4553 interface(COND_INTER) %{ 4554 equal(0x4, "e"); 4555 not_equal(0x5, "ne"); 4556 less(0xF, "g"); 4557 greater_equal(0xE, "le"); 4558 less_equal(0xD, "ge"); 4559 greater(0xC, "l"); 4560 overflow(0x0, "o"); 4561 no_overflow(0x1, "no"); 4562 %} 4563 %} 4564 4565 //----------OPERAND CLASSES---------------------------------------------------- 4566 // Operand Classes are groups of operands that are used as to simplify 4567 // instruction definitions by not requiring the AD writer to specify separate 4568 // instructions for every form of operand when the instruction accepts 4569 // multiple operand types with the same basic encoding and format. The classic 4570 // case of this is memory operands. 4571 4572 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4573 indIndex, indIndexScale, indIndexScaleOffset); 4574 4575 // Long memory operations are encoded in 2 instructions and a +4 offset. 4576 // This means some kind of offset is always required and you cannot use 4577 // an oop as the offset (done when working on static globals). 4578 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4579 indIndex, indIndexScale, indIndexScaleOffset); 4580 4581 4582 //----------PIPELINE----------------------------------------------------------- 4583 // Rules which define the behavior of the target architectures pipeline. 4584 pipeline %{ 4585 4586 //----------ATTRIBUTES--------------------------------------------------------- 4587 attributes %{ 4588 variable_size_instructions; // Fixed size instructions 4589 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4590 instruction_unit_size = 1; // An instruction is 1 bytes long 4591 instruction_fetch_unit_size = 16; // The processor fetches one line 4592 instruction_fetch_units = 1; // of 16 bytes 4593 4594 // List of nop instructions 4595 nops( MachNop ); 4596 %} 4597 4598 //----------RESOURCES---------------------------------------------------------- 4599 // Resources are the functional units available to the machine 4600 4601 // Generic P2/P3 pipeline 4602 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4603 // 3 instructions decoded per cycle. 4604 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4605 // 2 ALU op, only ALU0 handles mul/div instructions. 4606 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4607 MS0, MS1, MEM = MS0 | MS1, 4608 BR, FPU, 4609 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4610 4611 //----------PIPELINE DESCRIPTION----------------------------------------------- 4612 // Pipeline Description specifies the stages in the machine's pipeline 4613 4614 // Generic P2/P3 pipeline 4615 pipe_desc(S0, S1, S2, S3, S4, S5); 4616 4617 //----------PIPELINE CLASSES--------------------------------------------------- 4618 // Pipeline Classes describe the stages in which input and output are 4619 // referenced by the hardware pipeline. 4620 4621 // Naming convention: ialu or fpu 4622 // Then: _reg 4623 // Then: _reg if there is a 2nd register 4624 // Then: _long if it's a pair of instructions implementing a long 4625 // Then: _fat if it requires the big decoder 4626 // Or: _mem if it requires the big decoder and a memory unit. 4627 4628 // Integer ALU reg operation 4629 pipe_class ialu_reg(rRegI dst) %{ 4630 single_instruction; 4631 dst : S4(write); 4632 dst : S3(read); 4633 DECODE : S0; // any decoder 4634 ALU : S3; // any alu 4635 %} 4636 4637 // Long ALU reg operation 4638 pipe_class ialu_reg_long(eRegL dst) %{ 4639 instruction_count(2); 4640 dst : S4(write); 4641 dst : S3(read); 4642 DECODE : S0(2); // any 2 decoders 4643 ALU : S3(2); // both alus 4644 %} 4645 4646 // Integer ALU reg operation using big decoder 4647 pipe_class ialu_reg_fat(rRegI dst) %{ 4648 single_instruction; 4649 dst : S4(write); 4650 dst : S3(read); 4651 D0 : S0; // big decoder only 4652 ALU : S3; // any alu 4653 %} 4654 4655 // Long ALU reg operation using big decoder 4656 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4657 instruction_count(2); 4658 dst : S4(write); 4659 dst : S3(read); 4660 D0 : S0(2); // big decoder only; twice 4661 ALU : S3(2); // any 2 alus 4662 %} 4663 4664 // Integer ALU reg-reg operation 4665 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4666 single_instruction; 4667 dst : S4(write); 4668 src : S3(read); 4669 DECODE : S0; // any decoder 4670 ALU : S3; // any alu 4671 %} 4672 4673 // Long ALU reg-reg operation 4674 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4675 instruction_count(2); 4676 dst : S4(write); 4677 src : S3(read); 4678 DECODE : S0(2); // any 2 decoders 4679 ALU : S3(2); // both alus 4680 %} 4681 4682 // Integer ALU reg-reg operation 4683 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4684 single_instruction; 4685 dst : S4(write); 4686 src : S3(read); 4687 D0 : S0; // big decoder only 4688 ALU : S3; // any alu 4689 %} 4690 4691 // Long ALU reg-reg operation 4692 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4693 instruction_count(2); 4694 dst : S4(write); 4695 src : S3(read); 4696 D0 : S0(2); // big decoder only; twice 4697 ALU : S3(2); // both alus 4698 %} 4699 4700 // Integer ALU reg-mem operation 4701 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4702 single_instruction; 4703 dst : S5(write); 4704 mem : S3(read); 4705 D0 : S0; // big decoder only 4706 ALU : S4; // any alu 4707 MEM : S3; // any mem 4708 %} 4709 4710 // Long ALU reg-mem operation 4711 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4712 instruction_count(2); 4713 dst : S5(write); 4714 mem : S3(read); 4715 D0 : S0(2); // big decoder only; twice 4716 ALU : S4(2); // any 2 alus 4717 MEM : S3(2); // both mems 4718 %} 4719 4720 // Integer mem operation (prefetch) 4721 pipe_class ialu_mem(memory mem) 4722 %{ 4723 single_instruction; 4724 mem : S3(read); 4725 D0 : S0; // big decoder only 4726 MEM : S3; // any mem 4727 %} 4728 4729 // Integer Store to Memory 4730 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4731 single_instruction; 4732 mem : S3(read); 4733 src : S5(read); 4734 D0 : S0; // big decoder only 4735 ALU : S4; // any alu 4736 MEM : S3; 4737 %} 4738 4739 // Long Store to Memory 4740 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4741 instruction_count(2); 4742 mem : S3(read); 4743 src : S5(read); 4744 D0 : S0(2); // big decoder only; twice 4745 ALU : S4(2); // any 2 alus 4746 MEM : S3(2); // Both mems 4747 %} 4748 4749 // Integer Store to Memory 4750 pipe_class ialu_mem_imm(memory mem) %{ 4751 single_instruction; 4752 mem : S3(read); 4753 D0 : S0; // big decoder only 4754 ALU : S4; // any alu 4755 MEM : S3; 4756 %} 4757 4758 // Integer ALU0 reg-reg operation 4759 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4760 single_instruction; 4761 dst : S4(write); 4762 src : S3(read); 4763 D0 : S0; // Big decoder only 4764 ALU0 : S3; // only alu0 4765 %} 4766 4767 // Integer ALU0 reg-mem operation 4768 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4769 single_instruction; 4770 dst : S5(write); 4771 mem : S3(read); 4772 D0 : S0; // big decoder only 4773 ALU0 : S4; // ALU0 only 4774 MEM : S3; // any mem 4775 %} 4776 4777 // Integer ALU reg-reg operation 4778 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4779 single_instruction; 4780 cr : S4(write); 4781 src1 : S3(read); 4782 src2 : S3(read); 4783 DECODE : S0; // any decoder 4784 ALU : S3; // any alu 4785 %} 4786 4787 // Integer ALU reg-imm operation 4788 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4789 single_instruction; 4790 cr : S4(write); 4791 src1 : S3(read); 4792 DECODE : S0; // any decoder 4793 ALU : S3; // any alu 4794 %} 4795 4796 // Integer ALU reg-mem operation 4797 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4798 single_instruction; 4799 cr : S4(write); 4800 src1 : S3(read); 4801 src2 : S3(read); 4802 D0 : S0; // big decoder only 4803 ALU : S4; // any alu 4804 MEM : S3; 4805 %} 4806 4807 // Conditional move reg-reg 4808 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4809 instruction_count(4); 4810 y : S4(read); 4811 q : S3(read); 4812 p : S3(read); 4813 DECODE : S0(4); // any decoder 4814 %} 4815 4816 // Conditional move reg-reg 4817 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4818 single_instruction; 4819 dst : S4(write); 4820 src : S3(read); 4821 cr : S3(read); 4822 DECODE : S0; // any decoder 4823 %} 4824 4825 // Conditional move reg-mem 4826 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4827 single_instruction; 4828 dst : S4(write); 4829 src : S3(read); 4830 cr : S3(read); 4831 DECODE : S0; // any decoder 4832 MEM : S3; 4833 %} 4834 4835 // Conditional move reg-reg long 4836 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4837 single_instruction; 4838 dst : S4(write); 4839 src : S3(read); 4840 cr : S3(read); 4841 DECODE : S0(2); // any 2 decoders 4842 %} 4843 4844 // Conditional move double reg-reg 4845 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4846 single_instruction; 4847 dst : S4(write); 4848 src : S3(read); 4849 cr : S3(read); 4850 DECODE : S0; // any decoder 4851 %} 4852 4853 // Float reg-reg operation 4854 pipe_class fpu_reg(regDPR dst) %{ 4855 instruction_count(2); 4856 dst : S3(read); 4857 DECODE : S0(2); // any 2 decoders 4858 FPU : S3; 4859 %} 4860 4861 // Float reg-reg operation 4862 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4863 instruction_count(2); 4864 dst : S4(write); 4865 src : S3(read); 4866 DECODE : S0(2); // any 2 decoders 4867 FPU : S3; 4868 %} 4869 4870 // Float reg-reg operation 4871 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4872 instruction_count(3); 4873 dst : S4(write); 4874 src1 : S3(read); 4875 src2 : S3(read); 4876 DECODE : S0(3); // any 3 decoders 4877 FPU : S3(2); 4878 %} 4879 4880 // Float reg-reg operation 4881 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4882 instruction_count(4); 4883 dst : S4(write); 4884 src1 : S3(read); 4885 src2 : S3(read); 4886 src3 : S3(read); 4887 DECODE : S0(4); // any 3 decoders 4888 FPU : S3(2); 4889 %} 4890 4891 // Float reg-reg operation 4892 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4893 instruction_count(4); 4894 dst : S4(write); 4895 src1 : S3(read); 4896 src2 : S3(read); 4897 src3 : S3(read); 4898 DECODE : S1(3); // any 3 decoders 4899 D0 : S0; // Big decoder only 4900 FPU : S3(2); 4901 MEM : S3; 4902 %} 4903 4904 // Float reg-mem operation 4905 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4906 instruction_count(2); 4907 dst : S5(write); 4908 mem : S3(read); 4909 D0 : S0; // big decoder only 4910 DECODE : S1; // any decoder for FPU POP 4911 FPU : S4; 4912 MEM : S3; // any mem 4913 %} 4914 4915 // Float reg-mem operation 4916 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4917 instruction_count(3); 4918 dst : S5(write); 4919 src1 : S3(read); 4920 mem : S3(read); 4921 D0 : S0; // big decoder only 4922 DECODE : S1(2); // any decoder for FPU POP 4923 FPU : S4; 4924 MEM : S3; // any mem 4925 %} 4926 4927 // Float mem-reg operation 4928 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4929 instruction_count(2); 4930 src : S5(read); 4931 mem : S3(read); 4932 DECODE : S0; // any decoder for FPU PUSH 4933 D0 : S1; // big decoder only 4934 FPU : S4; 4935 MEM : S3; // any mem 4936 %} 4937 4938 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4939 instruction_count(3); 4940 src1 : S3(read); 4941 src2 : S3(read); 4942 mem : S3(read); 4943 DECODE : S0(2); // any decoder for FPU PUSH 4944 D0 : S1; // big decoder only 4945 FPU : S4; 4946 MEM : S3; // any mem 4947 %} 4948 4949 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4950 instruction_count(3); 4951 src1 : S3(read); 4952 src2 : S3(read); 4953 mem : S4(read); 4954 DECODE : S0; // any decoder for FPU PUSH 4955 D0 : S0(2); // big decoder only 4956 FPU : S4; 4957 MEM : S3(2); // any mem 4958 %} 4959 4960 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4961 instruction_count(2); 4962 src1 : S3(read); 4963 dst : S4(read); 4964 D0 : S0(2); // big decoder only 4965 MEM : S3(2); // any mem 4966 %} 4967 4968 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4969 instruction_count(3); 4970 src1 : S3(read); 4971 src2 : S3(read); 4972 dst : S4(read); 4973 D0 : S0(3); // big decoder only 4974 FPU : S4; 4975 MEM : S3(3); // any mem 4976 %} 4977 4978 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4979 instruction_count(3); 4980 src1 : S4(read); 4981 mem : S4(read); 4982 DECODE : S0; // any decoder for FPU PUSH 4983 D0 : S0(2); // big decoder only 4984 FPU : S4; 4985 MEM : S3(2); // any mem 4986 %} 4987 4988 // Float load constant 4989 pipe_class fpu_reg_con(regDPR dst) %{ 4990 instruction_count(2); 4991 dst : S5(write); 4992 D0 : S0; // big decoder only for the load 4993 DECODE : S1; // any decoder for FPU POP 4994 FPU : S4; 4995 MEM : S3; // any mem 4996 %} 4997 4998 // Float load constant 4999 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5000 instruction_count(3); 5001 dst : S5(write); 5002 src : S3(read); 5003 D0 : S0; // big decoder only for the load 5004 DECODE : S1(2); // any decoder for FPU POP 5005 FPU : S4; 5006 MEM : S3; // any mem 5007 %} 5008 5009 // UnConditional branch 5010 pipe_class pipe_jmp( label labl ) %{ 5011 single_instruction; 5012 BR : S3; 5013 %} 5014 5015 // Conditional branch 5016 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5017 single_instruction; 5018 cr : S1(read); 5019 BR : S3; 5020 %} 5021 5022 // Allocation idiom 5023 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5024 instruction_count(1); force_serialization; 5025 fixed_latency(6); 5026 heap_ptr : S3(read); 5027 DECODE : S0(3); 5028 D0 : S2; 5029 MEM : S3; 5030 ALU : S3(2); 5031 dst : S5(write); 5032 BR : S5; 5033 %} 5034 5035 // Generic big/slow expanded idiom 5036 pipe_class pipe_slow( ) %{ 5037 instruction_count(10); multiple_bundles; force_serialization; 5038 fixed_latency(100); 5039 D0 : S0(2); 5040 MEM : S3(2); 5041 %} 5042 5043 // The real do-nothing guy 5044 pipe_class empty( ) %{ 5045 instruction_count(0); 5046 %} 5047 5048 // Define the class for the Nop node 5049 define %{ 5050 MachNop = empty; 5051 %} 5052 5053 %} 5054 5055 //----------INSTRUCTIONS------------------------------------------------------- 5056 // 5057 // match -- States which machine-independent subtree may be replaced 5058 // by this instruction. 5059 // ins_cost -- The estimated cost of this instruction is used by instruction 5060 // selection to identify a minimum cost tree of machine 5061 // instructions that matches a tree of machine-independent 5062 // instructions. 5063 // format -- A string providing the disassembly for this instruction. 5064 // The value of an instruction's operand may be inserted 5065 // by referring to it with a '$' prefix. 5066 // opcode -- Three instruction opcodes may be provided. These are referred 5067 // to within an encode class as $primary, $secondary, and $tertiary 5068 // respectively. The primary opcode is commonly used to 5069 // indicate the type of machine instruction, while secondary 5070 // and tertiary are often used for prefix options or addressing 5071 // modes. 5072 // ins_encode -- A list of encode classes with parameters. The encode class 5073 // name must have been defined in an 'enc_class' specification 5074 // in the encode section of the architecture description. 5075 5076 //----------BSWAP-Instruction-------------------------------------------------- 5077 instruct bytes_reverse_int(rRegI dst) %{ 5078 match(Set dst (ReverseBytesI dst)); 5079 5080 format %{ "BSWAP $dst" %} 5081 opcode(0x0F, 0xC8); 5082 ins_encode( OpcP, OpcSReg(dst) ); 5083 ins_pipe( ialu_reg ); 5084 %} 5085 5086 instruct bytes_reverse_long(eRegL dst) %{ 5087 match(Set dst (ReverseBytesL dst)); 5088 5089 format %{ "BSWAP $dst.lo\n\t" 5090 "BSWAP $dst.hi\n\t" 5091 "XCHG $dst.lo $dst.hi" %} 5092 5093 ins_cost(125); 5094 ins_encode( bswap_long_bytes(dst) ); 5095 ins_pipe( ialu_reg_reg); 5096 %} 5097 5098 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5099 match(Set dst (ReverseBytesUS dst)); 5100 effect(KILL cr); 5101 5102 format %{ "BSWAP $dst\n\t" 5103 "SHR $dst,16\n\t" %} 5104 ins_encode %{ 5105 __ bswapl($dst$$Register); 5106 __ shrl($dst$$Register, 16); 5107 %} 5108 ins_pipe( ialu_reg ); 5109 %} 5110 5111 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5112 match(Set dst (ReverseBytesS dst)); 5113 effect(KILL cr); 5114 5115 format %{ "BSWAP $dst\n\t" 5116 "SAR $dst,16\n\t" %} 5117 ins_encode %{ 5118 __ bswapl($dst$$Register); 5119 __ sarl($dst$$Register, 16); 5120 %} 5121 ins_pipe( ialu_reg ); 5122 %} 5123 5124 5125 //---------- Zeros Count Instructions ------------------------------------------ 5126 5127 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5128 predicate(UseCountLeadingZerosInstruction); 5129 match(Set dst (CountLeadingZerosI src)); 5130 effect(KILL cr); 5131 5132 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5133 ins_encode %{ 5134 __ lzcntl($dst$$Register, $src$$Register); 5135 %} 5136 ins_pipe(ialu_reg); 5137 %} 5138 5139 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5140 predicate(!UseCountLeadingZerosInstruction); 5141 match(Set dst (CountLeadingZerosI src)); 5142 effect(KILL cr); 5143 5144 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5145 "JNZ skip\n\t" 5146 "MOV $dst, -1\n" 5147 "skip:\n\t" 5148 "NEG $dst\n\t" 5149 "ADD $dst, 31" %} 5150 ins_encode %{ 5151 Register Rdst = $dst$$Register; 5152 Register Rsrc = $src$$Register; 5153 Label skip; 5154 __ bsrl(Rdst, Rsrc); 5155 __ jccb(Assembler::notZero, skip); 5156 __ movl(Rdst, -1); 5157 __ bind(skip); 5158 __ negl(Rdst); 5159 __ addl(Rdst, BitsPerInt - 1); 5160 %} 5161 ins_pipe(ialu_reg); 5162 %} 5163 5164 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5165 predicate(UseCountLeadingZerosInstruction); 5166 match(Set dst (CountLeadingZerosL src)); 5167 effect(TEMP dst, KILL cr); 5168 5169 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5170 "JNC done\n\t" 5171 "LZCNT $dst, $src.lo\n\t" 5172 "ADD $dst, 32\n" 5173 "done:" %} 5174 ins_encode %{ 5175 Register Rdst = $dst$$Register; 5176 Register Rsrc = $src$$Register; 5177 Label done; 5178 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5179 __ jccb(Assembler::carryClear, done); 5180 __ lzcntl(Rdst, Rsrc); 5181 __ addl(Rdst, BitsPerInt); 5182 __ bind(done); 5183 %} 5184 ins_pipe(ialu_reg); 5185 %} 5186 5187 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5188 predicate(!UseCountLeadingZerosInstruction); 5189 match(Set dst (CountLeadingZerosL src)); 5190 effect(TEMP dst, KILL cr); 5191 5192 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5193 "JZ msw_is_zero\n\t" 5194 "ADD $dst, 32\n\t" 5195 "JMP not_zero\n" 5196 "msw_is_zero:\n\t" 5197 "BSR $dst, $src.lo\n\t" 5198 "JNZ not_zero\n\t" 5199 "MOV $dst, -1\n" 5200 "not_zero:\n\t" 5201 "NEG $dst\n\t" 5202 "ADD $dst, 63\n" %} 5203 ins_encode %{ 5204 Register Rdst = $dst$$Register; 5205 Register Rsrc = $src$$Register; 5206 Label msw_is_zero; 5207 Label not_zero; 5208 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5209 __ jccb(Assembler::zero, msw_is_zero); 5210 __ addl(Rdst, BitsPerInt); 5211 __ jmpb(not_zero); 5212 __ bind(msw_is_zero); 5213 __ bsrl(Rdst, Rsrc); 5214 __ jccb(Assembler::notZero, not_zero); 5215 __ movl(Rdst, -1); 5216 __ bind(not_zero); 5217 __ negl(Rdst); 5218 __ addl(Rdst, BitsPerLong - 1); 5219 %} 5220 ins_pipe(ialu_reg); 5221 %} 5222 5223 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5224 predicate(UseCountTrailingZerosInstruction); 5225 match(Set dst (CountTrailingZerosI src)); 5226 effect(KILL cr); 5227 5228 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5229 ins_encode %{ 5230 __ tzcntl($dst$$Register, $src$$Register); 5231 %} 5232 ins_pipe(ialu_reg); 5233 %} 5234 5235 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5236 predicate(!UseCountTrailingZerosInstruction); 5237 match(Set dst (CountTrailingZerosI src)); 5238 effect(KILL cr); 5239 5240 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5241 "JNZ done\n\t" 5242 "MOV $dst, 32\n" 5243 "done:" %} 5244 ins_encode %{ 5245 Register Rdst = $dst$$Register; 5246 Label done; 5247 __ bsfl(Rdst, $src$$Register); 5248 __ jccb(Assembler::notZero, done); 5249 __ movl(Rdst, BitsPerInt); 5250 __ bind(done); 5251 %} 5252 ins_pipe(ialu_reg); 5253 %} 5254 5255 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5256 predicate(UseCountTrailingZerosInstruction); 5257 match(Set dst (CountTrailingZerosL src)); 5258 effect(TEMP dst, KILL cr); 5259 5260 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5261 "JNC done\n\t" 5262 "TZCNT $dst, $src.hi\n\t" 5263 "ADD $dst, 32\n" 5264 "done:" %} 5265 ins_encode %{ 5266 Register Rdst = $dst$$Register; 5267 Register Rsrc = $src$$Register; 5268 Label done; 5269 __ tzcntl(Rdst, Rsrc); 5270 __ jccb(Assembler::carryClear, done); 5271 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5272 __ addl(Rdst, BitsPerInt); 5273 __ bind(done); 5274 %} 5275 ins_pipe(ialu_reg); 5276 %} 5277 5278 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5279 predicate(!UseCountTrailingZerosInstruction); 5280 match(Set dst (CountTrailingZerosL src)); 5281 effect(TEMP dst, KILL cr); 5282 5283 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5284 "JNZ done\n\t" 5285 "BSF $dst, $src.hi\n\t" 5286 "JNZ msw_not_zero\n\t" 5287 "MOV $dst, 32\n" 5288 "msw_not_zero:\n\t" 5289 "ADD $dst, 32\n" 5290 "done:" %} 5291 ins_encode %{ 5292 Register Rdst = $dst$$Register; 5293 Register Rsrc = $src$$Register; 5294 Label msw_not_zero; 5295 Label done; 5296 __ bsfl(Rdst, Rsrc); 5297 __ jccb(Assembler::notZero, done); 5298 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5299 __ jccb(Assembler::notZero, msw_not_zero); 5300 __ movl(Rdst, BitsPerInt); 5301 __ bind(msw_not_zero); 5302 __ addl(Rdst, BitsPerInt); 5303 __ bind(done); 5304 %} 5305 ins_pipe(ialu_reg); 5306 %} 5307 5308 5309 //---------- Population Count Instructions ------------------------------------- 5310 5311 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5312 predicate(UsePopCountInstruction); 5313 match(Set dst (PopCountI src)); 5314 effect(KILL cr); 5315 5316 format %{ "POPCNT $dst, $src" %} 5317 ins_encode %{ 5318 __ popcntl($dst$$Register, $src$$Register); 5319 %} 5320 ins_pipe(ialu_reg); 5321 %} 5322 5323 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5324 predicate(UsePopCountInstruction); 5325 match(Set dst (PopCountI (LoadI mem))); 5326 effect(KILL cr); 5327 5328 format %{ "POPCNT $dst, $mem" %} 5329 ins_encode %{ 5330 __ popcntl($dst$$Register, $mem$$Address); 5331 %} 5332 ins_pipe(ialu_reg); 5333 %} 5334 5335 // Note: Long.bitCount(long) returns an int. 5336 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5337 predicate(UsePopCountInstruction); 5338 match(Set dst (PopCountL src)); 5339 effect(KILL cr, TEMP tmp, TEMP dst); 5340 5341 format %{ "POPCNT $dst, $src.lo\n\t" 5342 "POPCNT $tmp, $src.hi\n\t" 5343 "ADD $dst, $tmp" %} 5344 ins_encode %{ 5345 __ popcntl($dst$$Register, $src$$Register); 5346 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5347 __ addl($dst$$Register, $tmp$$Register); 5348 %} 5349 ins_pipe(ialu_reg); 5350 %} 5351 5352 // Note: Long.bitCount(long) returns an int. 5353 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5354 predicate(UsePopCountInstruction); 5355 match(Set dst (PopCountL (LoadL mem))); 5356 effect(KILL cr, TEMP tmp, TEMP dst); 5357 5358 format %{ "POPCNT $dst, $mem\n\t" 5359 "POPCNT $tmp, $mem+4\n\t" 5360 "ADD $dst, $tmp" %} 5361 ins_encode %{ 5362 //__ popcntl($dst$$Register, $mem$$Address$$first); 5363 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5364 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5365 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5366 __ addl($dst$$Register, $tmp$$Register); 5367 %} 5368 ins_pipe(ialu_reg); 5369 %} 5370 5371 5372 //----------Load/Store/Move Instructions--------------------------------------- 5373 //----------Load Instructions-------------------------------------------------- 5374 // Load Byte (8bit signed) 5375 instruct loadB(xRegI dst, memory mem) %{ 5376 match(Set dst (LoadB mem)); 5377 5378 ins_cost(125); 5379 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5380 5381 ins_encode %{ 5382 __ movsbl($dst$$Register, $mem$$Address); 5383 %} 5384 5385 ins_pipe(ialu_reg_mem); 5386 %} 5387 5388 // Load Byte (8bit signed) into Long Register 5389 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5390 match(Set dst (ConvI2L (LoadB mem))); 5391 effect(KILL cr); 5392 5393 ins_cost(375); 5394 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5395 "MOV $dst.hi,$dst.lo\n\t" 5396 "SAR $dst.hi,7" %} 5397 5398 ins_encode %{ 5399 __ movsbl($dst$$Register, $mem$$Address); 5400 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5401 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5402 %} 5403 5404 ins_pipe(ialu_reg_mem); 5405 %} 5406 5407 // Load Unsigned Byte (8bit UNsigned) 5408 instruct loadUB(xRegI dst, memory mem) %{ 5409 match(Set dst (LoadUB mem)); 5410 5411 ins_cost(125); 5412 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5413 5414 ins_encode %{ 5415 __ movzbl($dst$$Register, $mem$$Address); 5416 %} 5417 5418 ins_pipe(ialu_reg_mem); 5419 %} 5420 5421 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5422 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5423 match(Set dst (ConvI2L (LoadUB mem))); 5424 effect(KILL cr); 5425 5426 ins_cost(250); 5427 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5428 "XOR $dst.hi,$dst.hi" %} 5429 5430 ins_encode %{ 5431 Register Rdst = $dst$$Register; 5432 __ movzbl(Rdst, $mem$$Address); 5433 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5434 %} 5435 5436 ins_pipe(ialu_reg_mem); 5437 %} 5438 5439 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5440 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5441 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5442 effect(KILL cr); 5443 5444 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5445 "XOR $dst.hi,$dst.hi\n\t" 5446 "AND $dst.lo,right_n_bits($mask, 8)" %} 5447 ins_encode %{ 5448 Register Rdst = $dst$$Register; 5449 __ movzbl(Rdst, $mem$$Address); 5450 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5451 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5452 %} 5453 ins_pipe(ialu_reg_mem); 5454 %} 5455 5456 // Load Short (16bit signed) 5457 instruct loadS(rRegI dst, memory mem) %{ 5458 match(Set dst (LoadS mem)); 5459 5460 ins_cost(125); 5461 format %{ "MOVSX $dst,$mem\t# short" %} 5462 5463 ins_encode %{ 5464 __ movswl($dst$$Register, $mem$$Address); 5465 %} 5466 5467 ins_pipe(ialu_reg_mem); 5468 %} 5469 5470 // Load Short (16 bit signed) to Byte (8 bit signed) 5471 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5472 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5473 5474 ins_cost(125); 5475 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5476 ins_encode %{ 5477 __ movsbl($dst$$Register, $mem$$Address); 5478 %} 5479 ins_pipe(ialu_reg_mem); 5480 %} 5481 5482 // Load Short (16bit signed) into Long Register 5483 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5484 match(Set dst (ConvI2L (LoadS mem))); 5485 effect(KILL cr); 5486 5487 ins_cost(375); 5488 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5489 "MOV $dst.hi,$dst.lo\n\t" 5490 "SAR $dst.hi,15" %} 5491 5492 ins_encode %{ 5493 __ movswl($dst$$Register, $mem$$Address); 5494 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5495 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5496 %} 5497 5498 ins_pipe(ialu_reg_mem); 5499 %} 5500 5501 // Load Unsigned Short/Char (16bit unsigned) 5502 instruct loadUS(rRegI dst, memory mem) %{ 5503 match(Set dst (LoadUS mem)); 5504 5505 ins_cost(125); 5506 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5507 5508 ins_encode %{ 5509 __ movzwl($dst$$Register, $mem$$Address); 5510 %} 5511 5512 ins_pipe(ialu_reg_mem); 5513 %} 5514 5515 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5516 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5517 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5518 5519 ins_cost(125); 5520 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5521 ins_encode %{ 5522 __ movsbl($dst$$Register, $mem$$Address); 5523 %} 5524 ins_pipe(ialu_reg_mem); 5525 %} 5526 5527 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5528 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5529 match(Set dst (ConvI2L (LoadUS mem))); 5530 effect(KILL cr); 5531 5532 ins_cost(250); 5533 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5534 "XOR $dst.hi,$dst.hi" %} 5535 5536 ins_encode %{ 5537 __ movzwl($dst$$Register, $mem$$Address); 5538 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5539 %} 5540 5541 ins_pipe(ialu_reg_mem); 5542 %} 5543 5544 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5545 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5546 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5547 effect(KILL cr); 5548 5549 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5550 "XOR $dst.hi,$dst.hi" %} 5551 ins_encode %{ 5552 Register Rdst = $dst$$Register; 5553 __ movzbl(Rdst, $mem$$Address); 5554 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5555 %} 5556 ins_pipe(ialu_reg_mem); 5557 %} 5558 5559 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5560 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5561 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5562 effect(KILL cr); 5563 5564 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5565 "XOR $dst.hi,$dst.hi\n\t" 5566 "AND $dst.lo,right_n_bits($mask, 16)" %} 5567 ins_encode %{ 5568 Register Rdst = $dst$$Register; 5569 __ movzwl(Rdst, $mem$$Address); 5570 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5571 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5572 %} 5573 ins_pipe(ialu_reg_mem); 5574 %} 5575 5576 // Load Integer 5577 instruct loadI(rRegI dst, memory mem) %{ 5578 match(Set dst (LoadI mem)); 5579 5580 ins_cost(125); 5581 format %{ "MOV $dst,$mem\t# int" %} 5582 5583 ins_encode %{ 5584 __ movl($dst$$Register, $mem$$Address); 5585 %} 5586 5587 ins_pipe(ialu_reg_mem); 5588 %} 5589 5590 // Load Integer (32 bit signed) to Byte (8 bit signed) 5591 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5592 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5593 5594 ins_cost(125); 5595 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5596 ins_encode %{ 5597 __ movsbl($dst$$Register, $mem$$Address); 5598 %} 5599 ins_pipe(ialu_reg_mem); 5600 %} 5601 5602 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5603 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5604 match(Set dst (AndI (LoadI mem) mask)); 5605 5606 ins_cost(125); 5607 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5608 ins_encode %{ 5609 __ movzbl($dst$$Register, $mem$$Address); 5610 %} 5611 ins_pipe(ialu_reg_mem); 5612 %} 5613 5614 // Load Integer (32 bit signed) to Short (16 bit signed) 5615 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5616 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5617 5618 ins_cost(125); 5619 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5620 ins_encode %{ 5621 __ movswl($dst$$Register, $mem$$Address); 5622 %} 5623 ins_pipe(ialu_reg_mem); 5624 %} 5625 5626 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5627 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5628 match(Set dst (AndI (LoadI mem) mask)); 5629 5630 ins_cost(125); 5631 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5632 ins_encode %{ 5633 __ movzwl($dst$$Register, $mem$$Address); 5634 %} 5635 ins_pipe(ialu_reg_mem); 5636 %} 5637 5638 // Load Integer into Long Register 5639 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5640 match(Set dst (ConvI2L (LoadI mem))); 5641 effect(KILL cr); 5642 5643 ins_cost(375); 5644 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5645 "MOV $dst.hi,$dst.lo\n\t" 5646 "SAR $dst.hi,31" %} 5647 5648 ins_encode %{ 5649 __ movl($dst$$Register, $mem$$Address); 5650 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5651 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5652 %} 5653 5654 ins_pipe(ialu_reg_mem); 5655 %} 5656 5657 // Load Integer with mask 0xFF into Long Register 5658 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5659 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5660 effect(KILL cr); 5661 5662 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5663 "XOR $dst.hi,$dst.hi" %} 5664 ins_encode %{ 5665 Register Rdst = $dst$$Register; 5666 __ movzbl(Rdst, $mem$$Address); 5667 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5668 %} 5669 ins_pipe(ialu_reg_mem); 5670 %} 5671 5672 // Load Integer with mask 0xFFFF into Long Register 5673 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5674 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5675 effect(KILL cr); 5676 5677 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5678 "XOR $dst.hi,$dst.hi" %} 5679 ins_encode %{ 5680 Register Rdst = $dst$$Register; 5681 __ movzwl(Rdst, $mem$$Address); 5682 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5683 %} 5684 ins_pipe(ialu_reg_mem); 5685 %} 5686 5687 // Load Integer with 31-bit mask into Long Register 5688 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5689 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5690 effect(KILL cr); 5691 5692 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5693 "XOR $dst.hi,$dst.hi\n\t" 5694 "AND $dst.lo,$mask" %} 5695 ins_encode %{ 5696 Register Rdst = $dst$$Register; 5697 __ movl(Rdst, $mem$$Address); 5698 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5699 __ andl(Rdst, $mask$$constant); 5700 %} 5701 ins_pipe(ialu_reg_mem); 5702 %} 5703 5704 // Load Unsigned Integer into Long Register 5705 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5706 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5707 effect(KILL cr); 5708 5709 ins_cost(250); 5710 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5711 "XOR $dst.hi,$dst.hi" %} 5712 5713 ins_encode %{ 5714 __ movl($dst$$Register, $mem$$Address); 5715 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5716 %} 5717 5718 ins_pipe(ialu_reg_mem); 5719 %} 5720 5721 // Load Long. Cannot clobber address while loading, so restrict address 5722 // register to ESI 5723 instruct loadL(eRegL dst, load_long_memory mem) %{ 5724 predicate(!((LoadLNode*)n)->require_atomic_access()); 5725 match(Set dst (LoadL mem)); 5726 5727 ins_cost(250); 5728 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5729 "MOV $dst.hi,$mem+4" %} 5730 5731 ins_encode %{ 5732 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5733 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5734 __ movl($dst$$Register, Amemlo); 5735 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5736 %} 5737 5738 ins_pipe(ialu_reg_long_mem); 5739 %} 5740 5741 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5742 // then store it down to the stack and reload on the int 5743 // side. 5744 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5745 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5746 match(Set dst (LoadL mem)); 5747 5748 ins_cost(200); 5749 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5750 "FISTp $dst" %} 5751 ins_encode(enc_loadL_volatile(mem,dst)); 5752 ins_pipe( fpu_reg_mem ); 5753 %} 5754 5755 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5756 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5757 match(Set dst (LoadL mem)); 5758 effect(TEMP tmp); 5759 ins_cost(180); 5760 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5761 "MOVSD $dst,$tmp" %} 5762 ins_encode %{ 5763 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5764 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5770 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5771 match(Set dst (LoadL mem)); 5772 effect(TEMP tmp); 5773 ins_cost(160); 5774 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5775 "MOVD $dst.lo,$tmp\n\t" 5776 "PSRLQ $tmp,32\n\t" 5777 "MOVD $dst.hi,$tmp" %} 5778 ins_encode %{ 5779 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5780 __ movdl($dst$$Register, $tmp$$XMMRegister); 5781 __ psrlq($tmp$$XMMRegister, 32); 5782 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5783 %} 5784 ins_pipe( pipe_slow ); 5785 %} 5786 5787 // Load Range 5788 instruct loadRange(rRegI dst, memory mem) %{ 5789 match(Set dst (LoadRange mem)); 5790 5791 ins_cost(125); 5792 format %{ "MOV $dst,$mem" %} 5793 opcode(0x8B); 5794 ins_encode( OpcP, RegMem(dst,mem)); 5795 ins_pipe( ialu_reg_mem ); 5796 %} 5797 5798 5799 // Load Pointer 5800 instruct loadP(eRegP dst, memory mem) %{ 5801 match(Set dst (LoadP mem)); 5802 5803 ins_cost(125); 5804 format %{ "MOV $dst,$mem" %} 5805 opcode(0x8B); 5806 ins_encode( OpcP, RegMem(dst,mem)); 5807 ins_pipe( ialu_reg_mem ); 5808 %} 5809 5810 // Load Klass Pointer 5811 instruct loadKlass(eRegP dst, memory mem) %{ 5812 match(Set dst (LoadKlass mem)); 5813 5814 ins_cost(125); 5815 format %{ "MOV $dst,$mem" %} 5816 opcode(0x8B); 5817 ins_encode( OpcP, RegMem(dst,mem)); 5818 ins_pipe( ialu_reg_mem ); 5819 %} 5820 5821 // Load Double 5822 instruct loadDPR(regDPR dst, memory mem) %{ 5823 predicate(UseSSE<=1); 5824 match(Set dst (LoadD mem)); 5825 5826 ins_cost(150); 5827 format %{ "FLD_D ST,$mem\n\t" 5828 "FSTP $dst" %} 5829 opcode(0xDD); /* DD /0 */ 5830 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5831 Pop_Reg_DPR(dst) ); 5832 ins_pipe( fpu_reg_mem ); 5833 %} 5834 5835 // Load Double to XMM 5836 instruct loadD(regD dst, memory mem) %{ 5837 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5838 match(Set dst (LoadD mem)); 5839 ins_cost(145); 5840 format %{ "MOVSD $dst,$mem" %} 5841 ins_encode %{ 5842 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 instruct loadD_partial(regD dst, memory mem) %{ 5848 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5849 match(Set dst (LoadD mem)); 5850 ins_cost(145); 5851 format %{ "MOVLPD $dst,$mem" %} 5852 ins_encode %{ 5853 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 // Load to XMM register (single-precision floating point) 5859 // MOVSS instruction 5860 instruct loadF(regF dst, memory mem) %{ 5861 predicate(UseSSE>=1); 5862 match(Set dst (LoadF mem)); 5863 ins_cost(145); 5864 format %{ "MOVSS $dst,$mem" %} 5865 ins_encode %{ 5866 __ movflt ($dst$$XMMRegister, $mem$$Address); 5867 %} 5868 ins_pipe( pipe_slow ); 5869 %} 5870 5871 // Load Float 5872 instruct loadFPR(regFPR dst, memory mem) %{ 5873 predicate(UseSSE==0); 5874 match(Set dst (LoadF mem)); 5875 5876 ins_cost(150); 5877 format %{ "FLD_S ST,$mem\n\t" 5878 "FSTP $dst" %} 5879 opcode(0xD9); /* D9 /0 */ 5880 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5881 Pop_Reg_FPR(dst) ); 5882 ins_pipe( fpu_reg_mem ); 5883 %} 5884 5885 // Load Effective Address 5886 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5887 match(Set dst mem); 5888 5889 ins_cost(110); 5890 format %{ "LEA $dst,$mem" %} 5891 opcode(0x8D); 5892 ins_encode( OpcP, RegMem(dst,mem)); 5893 ins_pipe( ialu_reg_reg_fat ); 5894 %} 5895 5896 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5897 match(Set dst mem); 5898 5899 ins_cost(110); 5900 format %{ "LEA $dst,$mem" %} 5901 opcode(0x8D); 5902 ins_encode( OpcP, RegMem(dst,mem)); 5903 ins_pipe( ialu_reg_reg_fat ); 5904 %} 5905 5906 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5907 match(Set dst mem); 5908 5909 ins_cost(110); 5910 format %{ "LEA $dst,$mem" %} 5911 opcode(0x8D); 5912 ins_encode( OpcP, RegMem(dst,mem)); 5913 ins_pipe( ialu_reg_reg_fat ); 5914 %} 5915 5916 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5917 match(Set dst mem); 5918 5919 ins_cost(110); 5920 format %{ "LEA $dst,$mem" %} 5921 opcode(0x8D); 5922 ins_encode( OpcP, RegMem(dst,mem)); 5923 ins_pipe( ialu_reg_reg_fat ); 5924 %} 5925 5926 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5927 match(Set dst mem); 5928 5929 ins_cost(110); 5930 format %{ "LEA $dst,$mem" %} 5931 opcode(0x8D); 5932 ins_encode( OpcP, RegMem(dst,mem)); 5933 ins_pipe( ialu_reg_reg_fat ); 5934 %} 5935 5936 // Load Constant 5937 instruct loadConI(rRegI dst, immI src) %{ 5938 match(Set dst src); 5939 5940 format %{ "MOV $dst,$src" %} 5941 ins_encode( LdImmI(dst, src) ); 5942 ins_pipe( ialu_reg_fat ); 5943 %} 5944 5945 // Load Constant zero 5946 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5947 match(Set dst src); 5948 effect(KILL cr); 5949 5950 ins_cost(50); 5951 format %{ "XOR $dst,$dst" %} 5952 opcode(0x33); /* + rd */ 5953 ins_encode( OpcP, RegReg( dst, dst ) ); 5954 ins_pipe( ialu_reg ); 5955 %} 5956 5957 instruct loadConP(eRegP dst, immP src) %{ 5958 match(Set dst src); 5959 5960 format %{ "MOV $dst,$src" %} 5961 opcode(0xB8); /* + rd */ 5962 ins_encode( LdImmP(dst, src) ); 5963 ins_pipe( ialu_reg_fat ); 5964 %} 5965 5966 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5967 match(Set dst src); 5968 effect(KILL cr); 5969 ins_cost(200); 5970 format %{ "MOV $dst.lo,$src.lo\n\t" 5971 "MOV $dst.hi,$src.hi" %} 5972 opcode(0xB8); 5973 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5974 ins_pipe( ialu_reg_long_fat ); 5975 %} 5976 5977 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5978 match(Set dst src); 5979 effect(KILL cr); 5980 ins_cost(150); 5981 format %{ "XOR $dst.lo,$dst.lo\n\t" 5982 "XOR $dst.hi,$dst.hi" %} 5983 opcode(0x33,0x33); 5984 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5985 ins_pipe( ialu_reg_long ); 5986 %} 5987 5988 // The instruction usage is guarded by predicate in operand immFPR(). 5989 instruct loadConFPR(regFPR dst, immFPR con) %{ 5990 match(Set dst con); 5991 ins_cost(125); 5992 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5993 "FSTP $dst" %} 5994 ins_encode %{ 5995 __ fld_s($constantaddress($con)); 5996 __ fstp_d($dst$$reg); 5997 %} 5998 ins_pipe(fpu_reg_con); 5999 %} 6000 6001 // The instruction usage is guarded by predicate in operand immFPR0(). 6002 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6003 match(Set dst con); 6004 ins_cost(125); 6005 format %{ "FLDZ ST\n\t" 6006 "FSTP $dst" %} 6007 ins_encode %{ 6008 __ fldz(); 6009 __ fstp_d($dst$$reg); 6010 %} 6011 ins_pipe(fpu_reg_con); 6012 %} 6013 6014 // The instruction usage is guarded by predicate in operand immFPR1(). 6015 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6016 match(Set dst con); 6017 ins_cost(125); 6018 format %{ "FLD1 ST\n\t" 6019 "FSTP $dst" %} 6020 ins_encode %{ 6021 __ fld1(); 6022 __ fstp_d($dst$$reg); 6023 %} 6024 ins_pipe(fpu_reg_con); 6025 %} 6026 6027 // The instruction usage is guarded by predicate in operand immF(). 6028 instruct loadConF(regF dst, immF con) %{ 6029 match(Set dst con); 6030 ins_cost(125); 6031 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6032 ins_encode %{ 6033 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6034 %} 6035 ins_pipe(pipe_slow); 6036 %} 6037 6038 // The instruction usage is guarded by predicate in operand immF0(). 6039 instruct loadConF0(regF dst, immF0 src) %{ 6040 match(Set dst src); 6041 ins_cost(100); 6042 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6043 ins_encode %{ 6044 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6045 %} 6046 ins_pipe(pipe_slow); 6047 %} 6048 6049 // The instruction usage is guarded by predicate in operand immDPR(). 6050 instruct loadConDPR(regDPR dst, immDPR con) %{ 6051 match(Set dst con); 6052 ins_cost(125); 6053 6054 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6055 "FSTP $dst" %} 6056 ins_encode %{ 6057 __ fld_d($constantaddress($con)); 6058 __ fstp_d($dst$$reg); 6059 %} 6060 ins_pipe(fpu_reg_con); 6061 %} 6062 6063 // The instruction usage is guarded by predicate in operand immDPR0(). 6064 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6065 match(Set dst con); 6066 ins_cost(125); 6067 6068 format %{ "FLDZ ST\n\t" 6069 "FSTP $dst" %} 6070 ins_encode %{ 6071 __ fldz(); 6072 __ fstp_d($dst$$reg); 6073 %} 6074 ins_pipe(fpu_reg_con); 6075 %} 6076 6077 // The instruction usage is guarded by predicate in operand immDPR1(). 6078 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6079 match(Set dst con); 6080 ins_cost(125); 6081 6082 format %{ "FLD1 ST\n\t" 6083 "FSTP $dst" %} 6084 ins_encode %{ 6085 __ fld1(); 6086 __ fstp_d($dst$$reg); 6087 %} 6088 ins_pipe(fpu_reg_con); 6089 %} 6090 6091 // The instruction usage is guarded by predicate in operand immD(). 6092 instruct loadConD(regD dst, immD con) %{ 6093 match(Set dst con); 6094 ins_cost(125); 6095 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6096 ins_encode %{ 6097 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6098 %} 6099 ins_pipe(pipe_slow); 6100 %} 6101 6102 // The instruction usage is guarded by predicate in operand immD0(). 6103 instruct loadConD0(regD dst, immD0 src) %{ 6104 match(Set dst src); 6105 ins_cost(100); 6106 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6107 ins_encode %{ 6108 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6109 %} 6110 ins_pipe( pipe_slow ); 6111 %} 6112 6113 // Load Stack Slot 6114 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6115 match(Set dst src); 6116 ins_cost(125); 6117 6118 format %{ "MOV $dst,$src" %} 6119 opcode(0x8B); 6120 ins_encode( OpcP, RegMem(dst,src)); 6121 ins_pipe( ialu_reg_mem ); 6122 %} 6123 6124 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6125 match(Set dst src); 6126 6127 ins_cost(200); 6128 format %{ "MOV $dst,$src.lo\n\t" 6129 "MOV $dst+4,$src.hi" %} 6130 opcode(0x8B, 0x8B); 6131 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6132 ins_pipe( ialu_mem_long_reg ); 6133 %} 6134 6135 // Load Stack Slot 6136 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6137 match(Set dst src); 6138 ins_cost(125); 6139 6140 format %{ "MOV $dst,$src" %} 6141 opcode(0x8B); 6142 ins_encode( OpcP, RegMem(dst,src)); 6143 ins_pipe( ialu_reg_mem ); 6144 %} 6145 6146 // Load Stack Slot 6147 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6148 match(Set dst src); 6149 ins_cost(125); 6150 6151 format %{ "FLD_S $src\n\t" 6152 "FSTP $dst" %} 6153 opcode(0xD9); /* D9 /0, FLD m32real */ 6154 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6155 Pop_Reg_FPR(dst) ); 6156 ins_pipe( fpu_reg_mem ); 6157 %} 6158 6159 // Load Stack Slot 6160 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6161 match(Set dst src); 6162 ins_cost(125); 6163 6164 format %{ "FLD_D $src\n\t" 6165 "FSTP $dst" %} 6166 opcode(0xDD); /* DD /0, FLD m64real */ 6167 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6168 Pop_Reg_DPR(dst) ); 6169 ins_pipe( fpu_reg_mem ); 6170 %} 6171 6172 // Prefetch instructions for allocation. 6173 // Must be safe to execute with invalid address (cannot fault). 6174 6175 instruct prefetchAlloc0( memory mem ) %{ 6176 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6177 match(PrefetchAllocation mem); 6178 ins_cost(0); 6179 size(0); 6180 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6181 ins_encode(); 6182 ins_pipe(empty); 6183 %} 6184 6185 instruct prefetchAlloc( memory mem ) %{ 6186 predicate(AllocatePrefetchInstr==3); 6187 match( PrefetchAllocation mem ); 6188 ins_cost(100); 6189 6190 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6191 ins_encode %{ 6192 __ prefetchw($mem$$Address); 6193 %} 6194 ins_pipe(ialu_mem); 6195 %} 6196 6197 instruct prefetchAllocNTA( memory mem ) %{ 6198 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6199 match(PrefetchAllocation mem); 6200 ins_cost(100); 6201 6202 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6203 ins_encode %{ 6204 __ prefetchnta($mem$$Address); 6205 %} 6206 ins_pipe(ialu_mem); 6207 %} 6208 6209 instruct prefetchAllocT0( memory mem ) %{ 6210 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6211 match(PrefetchAllocation mem); 6212 ins_cost(100); 6213 6214 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6215 ins_encode %{ 6216 __ prefetcht0($mem$$Address); 6217 %} 6218 ins_pipe(ialu_mem); 6219 %} 6220 6221 instruct prefetchAllocT2( memory mem ) %{ 6222 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6223 match(PrefetchAllocation mem); 6224 ins_cost(100); 6225 6226 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6227 ins_encode %{ 6228 __ prefetcht2($mem$$Address); 6229 %} 6230 ins_pipe(ialu_mem); 6231 %} 6232 6233 //----------Store Instructions------------------------------------------------- 6234 6235 // Store Byte 6236 instruct storeB(memory mem, xRegI src) %{ 6237 match(Set mem (StoreB mem src)); 6238 6239 ins_cost(125); 6240 format %{ "MOV8 $mem,$src" %} 6241 opcode(0x88); 6242 ins_encode( OpcP, RegMem( src, mem ) ); 6243 ins_pipe( ialu_mem_reg ); 6244 %} 6245 6246 // Store Char/Short 6247 instruct storeC(memory mem, rRegI src) %{ 6248 match(Set mem (StoreC mem src)); 6249 6250 ins_cost(125); 6251 format %{ "MOV16 $mem,$src" %} 6252 opcode(0x89, 0x66); 6253 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6254 ins_pipe( ialu_mem_reg ); 6255 %} 6256 6257 // Store Integer 6258 instruct storeI(memory mem, rRegI src) %{ 6259 match(Set mem (StoreI mem src)); 6260 6261 ins_cost(125); 6262 format %{ "MOV $mem,$src" %} 6263 opcode(0x89); 6264 ins_encode( OpcP, RegMem( src, mem ) ); 6265 ins_pipe( ialu_mem_reg ); 6266 %} 6267 6268 // Store Long 6269 instruct storeL(long_memory mem, eRegL src) %{ 6270 predicate(!((StoreLNode*)n)->require_atomic_access()); 6271 match(Set mem (StoreL mem src)); 6272 6273 ins_cost(200); 6274 format %{ "MOV $mem,$src.lo\n\t" 6275 "MOV $mem+4,$src.hi" %} 6276 opcode(0x89, 0x89); 6277 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6278 ins_pipe( ialu_mem_long_reg ); 6279 %} 6280 6281 // Store Long to Integer 6282 instruct storeL2I(memory mem, eRegL src) %{ 6283 match(Set mem (StoreI mem (ConvL2I src))); 6284 6285 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6286 ins_encode %{ 6287 __ movl($mem$$Address, $src$$Register); 6288 %} 6289 ins_pipe(ialu_mem_reg); 6290 %} 6291 6292 // Volatile Store Long. Must be atomic, so move it into 6293 // the FP TOS and then do a 64-bit FIST. Has to probe the 6294 // target address before the store (for null-ptr checks) 6295 // so the memory operand is used twice in the encoding. 6296 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6297 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6298 match(Set mem (StoreL mem src)); 6299 effect( KILL cr ); 6300 ins_cost(400); 6301 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6302 "FILD $src\n\t" 6303 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6304 opcode(0x3B); 6305 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6306 ins_pipe( fpu_reg_mem ); 6307 %} 6308 6309 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6310 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6311 match(Set mem (StoreL mem src)); 6312 effect( TEMP tmp, KILL cr ); 6313 ins_cost(380); 6314 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6315 "MOVSD $tmp,$src\n\t" 6316 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6317 ins_encode %{ 6318 __ cmpl(rax, $mem$$Address); 6319 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6320 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6321 %} 6322 ins_pipe( pipe_slow ); 6323 %} 6324 6325 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6326 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6327 match(Set mem (StoreL mem src)); 6328 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6329 ins_cost(360); 6330 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6331 "MOVD $tmp,$src.lo\n\t" 6332 "MOVD $tmp2,$src.hi\n\t" 6333 "PUNPCKLDQ $tmp,$tmp2\n\t" 6334 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6335 ins_encode %{ 6336 __ cmpl(rax, $mem$$Address); 6337 __ movdl($tmp$$XMMRegister, $src$$Register); 6338 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6339 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6340 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6341 %} 6342 ins_pipe( pipe_slow ); 6343 %} 6344 6345 // Store Pointer; for storing unknown oops and raw pointers 6346 instruct storeP(memory mem, anyRegP src) %{ 6347 match(Set mem (StoreP mem src)); 6348 6349 ins_cost(125); 6350 format %{ "MOV $mem,$src" %} 6351 opcode(0x89); 6352 ins_encode( OpcP, RegMem( src, mem ) ); 6353 ins_pipe( ialu_mem_reg ); 6354 %} 6355 6356 // Store Integer Immediate 6357 instruct storeImmI(memory mem, immI src) %{ 6358 match(Set mem (StoreI mem src)); 6359 6360 ins_cost(150); 6361 format %{ "MOV $mem,$src" %} 6362 opcode(0xC7); /* C7 /0 */ 6363 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6364 ins_pipe( ialu_mem_imm ); 6365 %} 6366 6367 // Store Short/Char Immediate 6368 instruct storeImmI16(memory mem, immI16 src) %{ 6369 predicate(UseStoreImmI16); 6370 match(Set mem (StoreC mem src)); 6371 6372 ins_cost(150); 6373 format %{ "MOV16 $mem,$src" %} 6374 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6375 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6376 ins_pipe( ialu_mem_imm ); 6377 %} 6378 6379 // Store Pointer Immediate; null pointers or constant oops that do not 6380 // need card-mark barriers. 6381 instruct storeImmP(memory mem, immP src) %{ 6382 match(Set mem (StoreP mem src)); 6383 6384 ins_cost(150); 6385 format %{ "MOV $mem,$src" %} 6386 opcode(0xC7); /* C7 /0 */ 6387 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6388 ins_pipe( ialu_mem_imm ); 6389 %} 6390 6391 // Store Byte Immediate 6392 instruct storeImmB(memory mem, immI8 src) %{ 6393 match(Set mem (StoreB mem src)); 6394 6395 ins_cost(150); 6396 format %{ "MOV8 $mem,$src" %} 6397 opcode(0xC6); /* C6 /0 */ 6398 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6399 ins_pipe( ialu_mem_imm ); 6400 %} 6401 6402 // Store CMS card-mark Immediate 6403 instruct storeImmCM(memory mem, immI8 src) %{ 6404 match(Set mem (StoreCM mem src)); 6405 6406 ins_cost(150); 6407 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6408 opcode(0xC6); /* C6 /0 */ 6409 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6410 ins_pipe( ialu_mem_imm ); 6411 %} 6412 6413 // Store Double 6414 instruct storeDPR( memory mem, regDPR1 src) %{ 6415 predicate(UseSSE<=1); 6416 match(Set mem (StoreD mem src)); 6417 6418 ins_cost(100); 6419 format %{ "FST_D $mem,$src" %} 6420 opcode(0xDD); /* DD /2 */ 6421 ins_encode( enc_FPR_store(mem,src) ); 6422 ins_pipe( fpu_mem_reg ); 6423 %} 6424 6425 // Store double does rounding on x86 6426 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6427 predicate(UseSSE<=1); 6428 match(Set mem (StoreD mem (RoundDouble src))); 6429 6430 ins_cost(100); 6431 format %{ "FST_D $mem,$src\t# round" %} 6432 opcode(0xDD); /* DD /2 */ 6433 ins_encode( enc_FPR_store(mem,src) ); 6434 ins_pipe( fpu_mem_reg ); 6435 %} 6436 6437 // Store XMM register to memory (double-precision floating points) 6438 // MOVSD instruction 6439 instruct storeD(memory mem, regD src) %{ 6440 predicate(UseSSE>=2); 6441 match(Set mem (StoreD mem src)); 6442 ins_cost(95); 6443 format %{ "MOVSD $mem,$src" %} 6444 ins_encode %{ 6445 __ movdbl($mem$$Address, $src$$XMMRegister); 6446 %} 6447 ins_pipe( pipe_slow ); 6448 %} 6449 6450 // Store XMM register to memory (single-precision floating point) 6451 // MOVSS instruction 6452 instruct storeF(memory mem, regF src) %{ 6453 predicate(UseSSE>=1); 6454 match(Set mem (StoreF mem src)); 6455 ins_cost(95); 6456 format %{ "MOVSS $mem,$src" %} 6457 ins_encode %{ 6458 __ movflt($mem$$Address, $src$$XMMRegister); 6459 %} 6460 ins_pipe( pipe_slow ); 6461 %} 6462 6463 // Store Float 6464 instruct storeFPR( memory mem, regFPR1 src) %{ 6465 predicate(UseSSE==0); 6466 match(Set mem (StoreF mem src)); 6467 6468 ins_cost(100); 6469 format %{ "FST_S $mem,$src" %} 6470 opcode(0xD9); /* D9 /2 */ 6471 ins_encode( enc_FPR_store(mem,src) ); 6472 ins_pipe( fpu_mem_reg ); 6473 %} 6474 6475 // Store Float does rounding on x86 6476 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6477 predicate(UseSSE==0); 6478 match(Set mem (StoreF mem (RoundFloat src))); 6479 6480 ins_cost(100); 6481 format %{ "FST_S $mem,$src\t# round" %} 6482 opcode(0xD9); /* D9 /2 */ 6483 ins_encode( enc_FPR_store(mem,src) ); 6484 ins_pipe( fpu_mem_reg ); 6485 %} 6486 6487 // Store Float does rounding on x86 6488 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6489 predicate(UseSSE<=1); 6490 match(Set mem (StoreF mem (ConvD2F src))); 6491 6492 ins_cost(100); 6493 format %{ "FST_S $mem,$src\t# D-round" %} 6494 opcode(0xD9); /* D9 /2 */ 6495 ins_encode( enc_FPR_store(mem,src) ); 6496 ins_pipe( fpu_mem_reg ); 6497 %} 6498 6499 // Store immediate Float value (it is faster than store from FPU register) 6500 // The instruction usage is guarded by predicate in operand immFPR(). 6501 instruct storeFPR_imm( memory mem, immFPR src) %{ 6502 match(Set mem (StoreF mem src)); 6503 6504 ins_cost(50); 6505 format %{ "MOV $mem,$src\t# store float" %} 6506 opcode(0xC7); /* C7 /0 */ 6507 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6508 ins_pipe( ialu_mem_imm ); 6509 %} 6510 6511 // Store immediate Float value (it is faster than store from XMM register) 6512 // The instruction usage is guarded by predicate in operand immF(). 6513 instruct storeF_imm( memory mem, immF src) %{ 6514 match(Set mem (StoreF mem src)); 6515 6516 ins_cost(50); 6517 format %{ "MOV $mem,$src\t# store float" %} 6518 opcode(0xC7); /* C7 /0 */ 6519 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6520 ins_pipe( ialu_mem_imm ); 6521 %} 6522 6523 // Store Integer to stack slot 6524 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6525 match(Set dst src); 6526 6527 ins_cost(100); 6528 format %{ "MOV $dst,$src" %} 6529 opcode(0x89); 6530 ins_encode( OpcPRegSS( dst, src ) ); 6531 ins_pipe( ialu_mem_reg ); 6532 %} 6533 6534 // Store Integer to stack slot 6535 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6536 match(Set dst src); 6537 6538 ins_cost(100); 6539 format %{ "MOV $dst,$src" %} 6540 opcode(0x89); 6541 ins_encode( OpcPRegSS( dst, src ) ); 6542 ins_pipe( ialu_mem_reg ); 6543 %} 6544 6545 // Store Long to stack slot 6546 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6547 match(Set dst src); 6548 6549 ins_cost(200); 6550 format %{ "MOV $dst,$src.lo\n\t" 6551 "MOV $dst+4,$src.hi" %} 6552 opcode(0x89, 0x89); 6553 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6554 ins_pipe( ialu_mem_long_reg ); 6555 %} 6556 6557 //----------MemBar Instructions----------------------------------------------- 6558 // Memory barrier flavors 6559 6560 instruct membar_acquire() %{ 6561 match(MemBarAcquire); 6562 match(LoadFence); 6563 ins_cost(400); 6564 6565 size(0); 6566 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6567 ins_encode(); 6568 ins_pipe(empty); 6569 %} 6570 6571 instruct membar_acquire_lock() %{ 6572 match(MemBarAcquireLock); 6573 ins_cost(0); 6574 6575 size(0); 6576 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6577 ins_encode( ); 6578 ins_pipe(empty); 6579 %} 6580 6581 instruct membar_release() %{ 6582 match(MemBarRelease); 6583 match(StoreFence); 6584 ins_cost(400); 6585 6586 size(0); 6587 format %{ "MEMBAR-release ! (empty encoding)" %} 6588 ins_encode( ); 6589 ins_pipe(empty); 6590 %} 6591 6592 instruct membar_release_lock() %{ 6593 match(MemBarReleaseLock); 6594 ins_cost(0); 6595 6596 size(0); 6597 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6598 ins_encode( ); 6599 ins_pipe(empty); 6600 %} 6601 6602 instruct membar_volatile(eFlagsReg cr) %{ 6603 match(MemBarVolatile); 6604 effect(KILL cr); 6605 ins_cost(400); 6606 6607 format %{ 6608 $$template 6609 if (os::is_MP()) { 6610 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6611 } else { 6612 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6613 } 6614 %} 6615 ins_encode %{ 6616 __ membar(Assembler::StoreLoad); 6617 %} 6618 ins_pipe(pipe_slow); 6619 %} 6620 6621 instruct unnecessary_membar_volatile() %{ 6622 match(MemBarVolatile); 6623 predicate(Matcher::post_store_load_barrier(n)); 6624 ins_cost(0); 6625 6626 size(0); 6627 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6628 ins_encode( ); 6629 ins_pipe(empty); 6630 %} 6631 6632 instruct membar_storestore() %{ 6633 match(MemBarStoreStore); 6634 ins_cost(0); 6635 6636 size(0); 6637 format %{ "MEMBAR-storestore (empty encoding)" %} 6638 ins_encode( ); 6639 ins_pipe(empty); 6640 %} 6641 6642 //----------Move Instructions-------------------------------------------------- 6643 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6644 match(Set dst (CastX2P src)); 6645 format %{ "# X2P $dst, $src" %} 6646 ins_encode( /*empty encoding*/ ); 6647 ins_cost(0); 6648 ins_pipe(empty); 6649 %} 6650 6651 instruct castP2X(rRegI dst, eRegP src ) %{ 6652 match(Set dst (CastP2X src)); 6653 ins_cost(50); 6654 format %{ "MOV $dst, $src\t# CastP2X" %} 6655 ins_encode( enc_Copy( dst, src) ); 6656 ins_pipe( ialu_reg_reg ); 6657 %} 6658 6659 //----------Conditional Move--------------------------------------------------- 6660 // Conditional move 6661 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6662 predicate(!VM_Version::supports_cmov() ); 6663 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6664 ins_cost(200); 6665 format %{ "J$cop,us skip\t# signed cmove\n\t" 6666 "MOV $dst,$src\n" 6667 "skip:" %} 6668 ins_encode %{ 6669 Label Lskip; 6670 // Invert sense of branch from sense of CMOV 6671 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6672 __ movl($dst$$Register, $src$$Register); 6673 __ bind(Lskip); 6674 %} 6675 ins_pipe( pipe_cmov_reg ); 6676 %} 6677 6678 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6679 predicate(!VM_Version::supports_cmov() ); 6680 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6681 ins_cost(200); 6682 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6683 "MOV $dst,$src\n" 6684 "skip:" %} 6685 ins_encode %{ 6686 Label Lskip; 6687 // Invert sense of branch from sense of CMOV 6688 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6689 __ movl($dst$$Register, $src$$Register); 6690 __ bind(Lskip); 6691 %} 6692 ins_pipe( pipe_cmov_reg ); 6693 %} 6694 6695 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6696 predicate(VM_Version::supports_cmov() ); 6697 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6698 ins_cost(200); 6699 format %{ "CMOV$cop $dst,$src" %} 6700 opcode(0x0F,0x40); 6701 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6702 ins_pipe( pipe_cmov_reg ); 6703 %} 6704 6705 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6706 predicate(VM_Version::supports_cmov() ); 6707 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6708 ins_cost(200); 6709 format %{ "CMOV$cop $dst,$src" %} 6710 opcode(0x0F,0x40); 6711 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6712 ins_pipe( pipe_cmov_reg ); 6713 %} 6714 6715 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6716 predicate(VM_Version::supports_cmov() ); 6717 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6718 ins_cost(200); 6719 expand %{ 6720 cmovI_regU(cop, cr, dst, src); 6721 %} 6722 %} 6723 6724 // Conditional move 6725 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6726 predicate(VM_Version::supports_cmov() ); 6727 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6728 ins_cost(250); 6729 format %{ "CMOV$cop $dst,$src" %} 6730 opcode(0x0F,0x40); 6731 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6732 ins_pipe( pipe_cmov_mem ); 6733 %} 6734 6735 // Conditional move 6736 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6737 predicate(VM_Version::supports_cmov() ); 6738 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6739 ins_cost(250); 6740 format %{ "CMOV$cop $dst,$src" %} 6741 opcode(0x0F,0x40); 6742 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6743 ins_pipe( pipe_cmov_mem ); 6744 %} 6745 6746 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6747 predicate(VM_Version::supports_cmov() ); 6748 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6749 ins_cost(250); 6750 expand %{ 6751 cmovI_memU(cop, cr, dst, src); 6752 %} 6753 %} 6754 6755 // Conditional move 6756 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6757 predicate(VM_Version::supports_cmov() ); 6758 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6759 ins_cost(200); 6760 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6761 opcode(0x0F,0x40); 6762 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6763 ins_pipe( pipe_cmov_reg ); 6764 %} 6765 6766 // Conditional move (non-P6 version) 6767 // Note: a CMoveP is generated for stubs and native wrappers 6768 // regardless of whether we are on a P6, so we 6769 // emulate a cmov here 6770 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6771 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6772 ins_cost(300); 6773 format %{ "Jn$cop skip\n\t" 6774 "MOV $dst,$src\t# pointer\n" 6775 "skip:" %} 6776 opcode(0x8b); 6777 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6778 ins_pipe( pipe_cmov_reg ); 6779 %} 6780 6781 // Conditional move 6782 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6783 predicate(VM_Version::supports_cmov() ); 6784 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6785 ins_cost(200); 6786 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6787 opcode(0x0F,0x40); 6788 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6789 ins_pipe( pipe_cmov_reg ); 6790 %} 6791 6792 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6793 predicate(VM_Version::supports_cmov() ); 6794 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6795 ins_cost(200); 6796 expand %{ 6797 cmovP_regU(cop, cr, dst, src); 6798 %} 6799 %} 6800 6801 // DISABLED: Requires the ADLC to emit a bottom_type call that 6802 // correctly meets the two pointer arguments; one is an incoming 6803 // register but the other is a memory operand. ALSO appears to 6804 // be buggy with implicit null checks. 6805 // 6806 //// Conditional move 6807 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6808 // predicate(VM_Version::supports_cmov() ); 6809 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6810 // ins_cost(250); 6811 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6812 // opcode(0x0F,0x40); 6813 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6814 // ins_pipe( pipe_cmov_mem ); 6815 //%} 6816 // 6817 //// Conditional move 6818 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6819 // predicate(VM_Version::supports_cmov() ); 6820 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6821 // ins_cost(250); 6822 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6823 // opcode(0x0F,0x40); 6824 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6825 // ins_pipe( pipe_cmov_mem ); 6826 //%} 6827 6828 // Conditional move 6829 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6830 predicate(UseSSE<=1); 6831 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6832 ins_cost(200); 6833 format %{ "FCMOV$cop $dst,$src\t# double" %} 6834 opcode(0xDA); 6835 ins_encode( enc_cmov_dpr(cop,src) ); 6836 ins_pipe( pipe_cmovDPR_reg ); 6837 %} 6838 6839 // Conditional move 6840 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6841 predicate(UseSSE==0); 6842 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6843 ins_cost(200); 6844 format %{ "FCMOV$cop $dst,$src\t# float" %} 6845 opcode(0xDA); 6846 ins_encode( enc_cmov_dpr(cop,src) ); 6847 ins_pipe( pipe_cmovDPR_reg ); 6848 %} 6849 6850 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6851 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6852 predicate(UseSSE<=1); 6853 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6854 ins_cost(200); 6855 format %{ "Jn$cop skip\n\t" 6856 "MOV $dst,$src\t# double\n" 6857 "skip:" %} 6858 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6859 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6860 ins_pipe( pipe_cmovDPR_reg ); 6861 %} 6862 6863 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6864 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6865 predicate(UseSSE==0); 6866 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6867 ins_cost(200); 6868 format %{ "Jn$cop skip\n\t" 6869 "MOV $dst,$src\t# float\n" 6870 "skip:" %} 6871 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6872 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6873 ins_pipe( pipe_cmovDPR_reg ); 6874 %} 6875 6876 // No CMOVE with SSE/SSE2 6877 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6878 predicate (UseSSE>=1); 6879 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6880 ins_cost(200); 6881 format %{ "Jn$cop skip\n\t" 6882 "MOVSS $dst,$src\t# float\n" 6883 "skip:" %} 6884 ins_encode %{ 6885 Label skip; 6886 // Invert sense of branch from sense of CMOV 6887 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6888 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6889 __ bind(skip); 6890 %} 6891 ins_pipe( pipe_slow ); 6892 %} 6893 6894 // No CMOVE with SSE/SSE2 6895 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6896 predicate (UseSSE>=2); 6897 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6898 ins_cost(200); 6899 format %{ "Jn$cop skip\n\t" 6900 "MOVSD $dst,$src\t# float\n" 6901 "skip:" %} 6902 ins_encode %{ 6903 Label skip; 6904 // Invert sense of branch from sense of CMOV 6905 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6906 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6907 __ bind(skip); 6908 %} 6909 ins_pipe( pipe_slow ); 6910 %} 6911 6912 // unsigned version 6913 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6914 predicate (UseSSE>=1); 6915 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6916 ins_cost(200); 6917 format %{ "Jn$cop skip\n\t" 6918 "MOVSS $dst,$src\t# float\n" 6919 "skip:" %} 6920 ins_encode %{ 6921 Label skip; 6922 // Invert sense of branch from sense of CMOV 6923 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6924 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6925 __ bind(skip); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6931 predicate (UseSSE>=1); 6932 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6933 ins_cost(200); 6934 expand %{ 6935 fcmovF_regU(cop, cr, dst, src); 6936 %} 6937 %} 6938 6939 // unsigned version 6940 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6941 predicate (UseSSE>=2); 6942 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6943 ins_cost(200); 6944 format %{ "Jn$cop skip\n\t" 6945 "MOVSD $dst,$src\t# float\n" 6946 "skip:" %} 6947 ins_encode %{ 6948 Label skip; 6949 // Invert sense of branch from sense of CMOV 6950 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6951 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6952 __ bind(skip); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6958 predicate (UseSSE>=2); 6959 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6960 ins_cost(200); 6961 expand %{ 6962 fcmovD_regU(cop, cr, dst, src); 6963 %} 6964 %} 6965 6966 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6967 predicate(VM_Version::supports_cmov() ); 6968 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6969 ins_cost(200); 6970 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6971 "CMOV$cop $dst.hi,$src.hi" %} 6972 opcode(0x0F,0x40); 6973 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6974 ins_pipe( pipe_cmov_reg_long ); 6975 %} 6976 6977 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6978 predicate(VM_Version::supports_cmov() ); 6979 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6980 ins_cost(200); 6981 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6982 "CMOV$cop $dst.hi,$src.hi" %} 6983 opcode(0x0F,0x40); 6984 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6985 ins_pipe( pipe_cmov_reg_long ); 6986 %} 6987 6988 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6989 predicate(VM_Version::supports_cmov() ); 6990 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6991 ins_cost(200); 6992 expand %{ 6993 cmovL_regU(cop, cr, dst, src); 6994 %} 6995 %} 6996 6997 //----------Arithmetic Instructions-------------------------------------------- 6998 //----------Addition Instructions---------------------------------------------- 6999 7000 // Integer Addition Instructions 7001 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7002 match(Set dst (AddI dst src)); 7003 effect(KILL cr); 7004 7005 size(2); 7006 format %{ "ADD $dst,$src" %} 7007 opcode(0x03); 7008 ins_encode( OpcP, RegReg( dst, src) ); 7009 ins_pipe( ialu_reg_reg ); 7010 %} 7011 7012 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7013 match(Set dst (AddI dst src)); 7014 effect(KILL cr); 7015 7016 format %{ "ADD $dst,$src" %} 7017 opcode(0x81, 0x00); /* /0 id */ 7018 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7019 ins_pipe( ialu_reg ); 7020 %} 7021 7022 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7023 predicate(UseIncDec); 7024 match(Set dst (AddI dst src)); 7025 effect(KILL cr); 7026 7027 size(1); 7028 format %{ "INC $dst" %} 7029 opcode(0x40); /* */ 7030 ins_encode( Opc_plus( primary, dst ) ); 7031 ins_pipe( ialu_reg ); 7032 %} 7033 7034 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7035 match(Set dst (AddI src0 src1)); 7036 ins_cost(110); 7037 7038 format %{ "LEA $dst,[$src0 + $src1]" %} 7039 opcode(0x8D); /* 0x8D /r */ 7040 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7041 ins_pipe( ialu_reg_reg ); 7042 %} 7043 7044 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7045 match(Set dst (AddP src0 src1)); 7046 ins_cost(110); 7047 7048 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7049 opcode(0x8D); /* 0x8D /r */ 7050 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7051 ins_pipe( ialu_reg_reg ); 7052 %} 7053 7054 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7055 predicate(UseIncDec); 7056 match(Set dst (AddI dst src)); 7057 effect(KILL cr); 7058 7059 size(1); 7060 format %{ "DEC $dst" %} 7061 opcode(0x48); /* */ 7062 ins_encode( Opc_plus( primary, dst ) ); 7063 ins_pipe( ialu_reg ); 7064 %} 7065 7066 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7067 match(Set dst (AddP dst src)); 7068 effect(KILL cr); 7069 7070 size(2); 7071 format %{ "ADD $dst,$src" %} 7072 opcode(0x03); 7073 ins_encode( OpcP, RegReg( dst, src) ); 7074 ins_pipe( ialu_reg_reg ); 7075 %} 7076 7077 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7078 match(Set dst (AddP dst src)); 7079 effect(KILL cr); 7080 7081 format %{ "ADD $dst,$src" %} 7082 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7083 // ins_encode( RegImm( dst, src) ); 7084 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7085 ins_pipe( ialu_reg ); 7086 %} 7087 7088 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7089 match(Set dst (AddI dst (LoadI src))); 7090 effect(KILL cr); 7091 7092 ins_cost(125); 7093 format %{ "ADD $dst,$src" %} 7094 opcode(0x03); 7095 ins_encode( OpcP, RegMem( dst, src) ); 7096 ins_pipe( ialu_reg_mem ); 7097 %} 7098 7099 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7100 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7101 effect(KILL cr); 7102 7103 ins_cost(150); 7104 format %{ "ADD $dst,$src" %} 7105 opcode(0x01); /* Opcode 01 /r */ 7106 ins_encode( OpcP, RegMem( src, dst ) ); 7107 ins_pipe( ialu_mem_reg ); 7108 %} 7109 7110 // Add Memory with Immediate 7111 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7112 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7113 effect(KILL cr); 7114 7115 ins_cost(125); 7116 format %{ "ADD $dst,$src" %} 7117 opcode(0x81); /* Opcode 81 /0 id */ 7118 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7119 ins_pipe( ialu_mem_imm ); 7120 %} 7121 7122 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7123 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7124 effect(KILL cr); 7125 7126 ins_cost(125); 7127 format %{ "INC $dst" %} 7128 opcode(0xFF); /* Opcode FF /0 */ 7129 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7130 ins_pipe( ialu_mem_imm ); 7131 %} 7132 7133 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7134 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7135 effect(KILL cr); 7136 7137 ins_cost(125); 7138 format %{ "DEC $dst" %} 7139 opcode(0xFF); /* Opcode FF /1 */ 7140 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7141 ins_pipe( ialu_mem_imm ); 7142 %} 7143 7144 7145 instruct checkCastPP( eRegP dst ) %{ 7146 match(Set dst (CheckCastPP dst)); 7147 7148 size(0); 7149 format %{ "#checkcastPP of $dst" %} 7150 ins_encode( /*empty encoding*/ ); 7151 ins_pipe( empty ); 7152 %} 7153 7154 instruct castPP( eRegP dst ) %{ 7155 match(Set dst (CastPP dst)); 7156 format %{ "#castPP of $dst" %} 7157 ins_encode( /*empty encoding*/ ); 7158 ins_pipe( empty ); 7159 %} 7160 7161 instruct castII( rRegI dst ) %{ 7162 match(Set dst (CastII dst)); 7163 format %{ "#castII of $dst" %} 7164 ins_encode( /*empty encoding*/ ); 7165 ins_cost(0); 7166 ins_pipe( empty ); 7167 %} 7168 7169 7170 // Load-locked - same as a regular pointer load when used with compare-swap 7171 instruct loadPLocked(eRegP dst, memory mem) %{ 7172 match(Set dst (LoadPLocked mem)); 7173 7174 ins_cost(125); 7175 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7176 opcode(0x8B); 7177 ins_encode( OpcP, RegMem(dst,mem)); 7178 ins_pipe( ialu_reg_mem ); 7179 %} 7180 7181 // Conditional-store of the updated heap-top. 7182 // Used during allocation of the shared heap. 7183 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7184 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7185 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7186 // EAX is killed if there is contention, but then it's also unused. 7187 // In the common case of no contention, EAX holds the new oop address. 7188 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7189 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7190 ins_pipe( pipe_cmpxchg ); 7191 %} 7192 7193 // Conditional-store of an int value. 7194 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7195 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7196 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7197 effect(KILL oldval); 7198 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7199 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7200 ins_pipe( pipe_cmpxchg ); 7201 %} 7202 7203 // Conditional-store of a long value. 7204 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7205 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7206 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7207 effect(KILL oldval); 7208 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7209 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7210 "XCHG EBX,ECX" 7211 %} 7212 ins_encode %{ 7213 // Note: we need to swap rbx, and rcx before and after the 7214 // cmpxchg8 instruction because the instruction uses 7215 // rcx as the high order word of the new value to store but 7216 // our register encoding uses rbx. 7217 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7218 if( os::is_MP() ) 7219 __ lock(); 7220 __ cmpxchg8($mem$$Address); 7221 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7222 %} 7223 ins_pipe( pipe_cmpxchg ); 7224 %} 7225 7226 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7227 7228 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7229 predicate(VM_Version::supports_cx8()); 7230 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7231 effect(KILL cr, KILL oldval); 7232 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7233 "MOV $res,0\n\t" 7234 "JNE,s fail\n\t" 7235 "MOV $res,1\n" 7236 "fail:" %} 7237 ins_encode( enc_cmpxchg8(mem_ptr), 7238 enc_flags_ne_to_boolean(res) ); 7239 ins_pipe( pipe_cmpxchg ); 7240 %} 7241 7242 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7243 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7244 effect(KILL cr, KILL oldval); 7245 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7246 "MOV $res,0\n\t" 7247 "JNE,s fail\n\t" 7248 "MOV $res,1\n" 7249 "fail:" %} 7250 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7251 ins_pipe( pipe_cmpxchg ); 7252 %} 7253 7254 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7255 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7256 effect(KILL cr, KILL oldval); 7257 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7258 "MOV $res,0\n\t" 7259 "JNE,s fail\n\t" 7260 "MOV $res,1\n" 7261 "fail:" %} 7262 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7263 ins_pipe( pipe_cmpxchg ); 7264 %} 7265 7266 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7267 predicate(n->as_LoadStore()->result_not_used()); 7268 match(Set dummy (GetAndAddI mem add)); 7269 effect(KILL cr); 7270 format %{ "ADDL [$mem],$add" %} 7271 ins_encode %{ 7272 if (os::is_MP()) { __ lock(); } 7273 __ addl($mem$$Address, $add$$constant); 7274 %} 7275 ins_pipe( pipe_cmpxchg ); 7276 %} 7277 7278 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7279 match(Set newval (GetAndAddI mem newval)); 7280 effect(KILL cr); 7281 format %{ "XADDL [$mem],$newval" %} 7282 ins_encode %{ 7283 if (os::is_MP()) { __ lock(); } 7284 __ xaddl($mem$$Address, $newval$$Register); 7285 %} 7286 ins_pipe( pipe_cmpxchg ); 7287 %} 7288 7289 instruct xchgI( memory mem, rRegI newval) %{ 7290 match(Set newval (GetAndSetI mem newval)); 7291 format %{ "XCHGL $newval,[$mem]" %} 7292 ins_encode %{ 7293 __ xchgl($newval$$Register, $mem$$Address); 7294 %} 7295 ins_pipe( pipe_cmpxchg ); 7296 %} 7297 7298 instruct xchgP( memory mem, pRegP newval) %{ 7299 match(Set newval (GetAndSetP mem newval)); 7300 format %{ "XCHGL $newval,[$mem]" %} 7301 ins_encode %{ 7302 __ xchgl($newval$$Register, $mem$$Address); 7303 %} 7304 ins_pipe( pipe_cmpxchg ); 7305 %} 7306 7307 //----------Subtraction Instructions------------------------------------------- 7308 7309 // Integer Subtraction Instructions 7310 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7311 match(Set dst (SubI dst src)); 7312 effect(KILL cr); 7313 7314 size(2); 7315 format %{ "SUB $dst,$src" %} 7316 opcode(0x2B); 7317 ins_encode( OpcP, RegReg( dst, src) ); 7318 ins_pipe( ialu_reg_reg ); 7319 %} 7320 7321 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7322 match(Set dst (SubI dst src)); 7323 effect(KILL cr); 7324 7325 format %{ "SUB $dst,$src" %} 7326 opcode(0x81,0x05); /* Opcode 81 /5 */ 7327 // ins_encode( RegImm( dst, src) ); 7328 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7329 ins_pipe( ialu_reg ); 7330 %} 7331 7332 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7333 match(Set dst (SubI dst (LoadI src))); 7334 effect(KILL cr); 7335 7336 ins_cost(125); 7337 format %{ "SUB $dst,$src" %} 7338 opcode(0x2B); 7339 ins_encode( OpcP, RegMem( dst, src) ); 7340 ins_pipe( ialu_reg_mem ); 7341 %} 7342 7343 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7344 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7345 effect(KILL cr); 7346 7347 ins_cost(150); 7348 format %{ "SUB $dst,$src" %} 7349 opcode(0x29); /* Opcode 29 /r */ 7350 ins_encode( OpcP, RegMem( src, dst ) ); 7351 ins_pipe( ialu_mem_reg ); 7352 %} 7353 7354 // Subtract from a pointer 7355 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7356 match(Set dst (AddP dst (SubI zero src))); 7357 effect(KILL cr); 7358 7359 size(2); 7360 format %{ "SUB $dst,$src" %} 7361 opcode(0x2B); 7362 ins_encode( OpcP, RegReg( dst, src) ); 7363 ins_pipe( ialu_reg_reg ); 7364 %} 7365 7366 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7367 match(Set dst (SubI zero dst)); 7368 effect(KILL cr); 7369 7370 size(2); 7371 format %{ "NEG $dst" %} 7372 opcode(0xF7,0x03); // Opcode F7 /3 7373 ins_encode( OpcP, RegOpc( dst ) ); 7374 ins_pipe( ialu_reg ); 7375 %} 7376 7377 //----------Multiplication/Division Instructions------------------------------- 7378 // Integer Multiplication Instructions 7379 // Multiply Register 7380 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7381 match(Set dst (MulI dst src)); 7382 effect(KILL cr); 7383 7384 size(3); 7385 ins_cost(300); 7386 format %{ "IMUL $dst,$src" %} 7387 opcode(0xAF, 0x0F); 7388 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7389 ins_pipe( ialu_reg_reg_alu0 ); 7390 %} 7391 7392 // Multiply 32-bit Immediate 7393 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7394 match(Set dst (MulI src imm)); 7395 effect(KILL cr); 7396 7397 ins_cost(300); 7398 format %{ "IMUL $dst,$src,$imm" %} 7399 opcode(0x69); /* 69 /r id */ 7400 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7401 ins_pipe( ialu_reg_reg_alu0 ); 7402 %} 7403 7404 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7405 match(Set dst src); 7406 effect(KILL cr); 7407 7408 // Note that this is artificially increased to make it more expensive than loadConL 7409 ins_cost(250); 7410 format %{ "MOV EAX,$src\t// low word only" %} 7411 opcode(0xB8); 7412 ins_encode( LdImmL_Lo(dst, src) ); 7413 ins_pipe( ialu_reg_fat ); 7414 %} 7415 7416 // Multiply by 32-bit Immediate, taking the shifted high order results 7417 // (special case for shift by 32) 7418 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7419 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7420 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7421 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7422 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7423 effect(USE src1, KILL cr); 7424 7425 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7426 ins_cost(0*100 + 1*400 - 150); 7427 format %{ "IMUL EDX:EAX,$src1" %} 7428 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7429 ins_pipe( pipe_slow ); 7430 %} 7431 7432 // Multiply by 32-bit Immediate, taking the shifted high order results 7433 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7434 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7435 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7436 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7437 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7438 effect(USE src1, KILL cr); 7439 7440 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7441 ins_cost(1*100 + 1*400 - 150); 7442 format %{ "IMUL EDX:EAX,$src1\n\t" 7443 "SAR EDX,$cnt-32" %} 7444 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7445 ins_pipe( pipe_slow ); 7446 %} 7447 7448 // Multiply Memory 32-bit Immediate 7449 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7450 match(Set dst (MulI (LoadI src) imm)); 7451 effect(KILL cr); 7452 7453 ins_cost(300); 7454 format %{ "IMUL $dst,$src,$imm" %} 7455 opcode(0x69); /* 69 /r id */ 7456 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7457 ins_pipe( ialu_reg_mem_alu0 ); 7458 %} 7459 7460 // Multiply Memory 7461 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7462 match(Set dst (MulI dst (LoadI src))); 7463 effect(KILL cr); 7464 7465 ins_cost(350); 7466 format %{ "IMUL $dst,$src" %} 7467 opcode(0xAF, 0x0F); 7468 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7469 ins_pipe( ialu_reg_mem_alu0 ); 7470 %} 7471 7472 // Multiply Register Int to Long 7473 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7474 // Basic Idea: long = (long)int * (long)int 7475 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7476 effect(DEF dst, USE src, USE src1, KILL flags); 7477 7478 ins_cost(300); 7479 format %{ "IMUL $dst,$src1" %} 7480 7481 ins_encode( long_int_multiply( dst, src1 ) ); 7482 ins_pipe( ialu_reg_reg_alu0 ); 7483 %} 7484 7485 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7486 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7487 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7488 effect(KILL flags); 7489 7490 ins_cost(300); 7491 format %{ "MUL $dst,$src1" %} 7492 7493 ins_encode( long_uint_multiply(dst, src1) ); 7494 ins_pipe( ialu_reg_reg_alu0 ); 7495 %} 7496 7497 // Multiply Register Long 7498 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7499 match(Set dst (MulL dst src)); 7500 effect(KILL cr, TEMP tmp); 7501 ins_cost(4*100+3*400); 7502 // Basic idea: lo(result) = lo(x_lo * y_lo) 7503 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7504 format %{ "MOV $tmp,$src.lo\n\t" 7505 "IMUL $tmp,EDX\n\t" 7506 "MOV EDX,$src.hi\n\t" 7507 "IMUL EDX,EAX\n\t" 7508 "ADD $tmp,EDX\n\t" 7509 "MUL EDX:EAX,$src.lo\n\t" 7510 "ADD EDX,$tmp" %} 7511 ins_encode( long_multiply( dst, src, tmp ) ); 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 // Multiply Register Long where the left operand's high 32 bits are zero 7516 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7517 predicate(is_operand_hi32_zero(n->in(1))); 7518 match(Set dst (MulL dst src)); 7519 effect(KILL cr, TEMP tmp); 7520 ins_cost(2*100+2*400); 7521 // Basic idea: lo(result) = lo(x_lo * y_lo) 7522 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7523 format %{ "MOV $tmp,$src.hi\n\t" 7524 "IMUL $tmp,EAX\n\t" 7525 "MUL EDX:EAX,$src.lo\n\t" 7526 "ADD EDX,$tmp" %} 7527 ins_encode %{ 7528 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7529 __ imull($tmp$$Register, rax); 7530 __ mull($src$$Register); 7531 __ addl(rdx, $tmp$$Register); 7532 %} 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 // Multiply Register Long where the right operand's high 32 bits are zero 7537 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7538 predicate(is_operand_hi32_zero(n->in(2))); 7539 match(Set dst (MulL dst src)); 7540 effect(KILL cr, TEMP tmp); 7541 ins_cost(2*100+2*400); 7542 // Basic idea: lo(result) = lo(x_lo * y_lo) 7543 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7544 format %{ "MOV $tmp,$src.lo\n\t" 7545 "IMUL $tmp,EDX\n\t" 7546 "MUL EDX:EAX,$src.lo\n\t" 7547 "ADD EDX,$tmp" %} 7548 ins_encode %{ 7549 __ movl($tmp$$Register, $src$$Register); 7550 __ imull($tmp$$Register, rdx); 7551 __ mull($src$$Register); 7552 __ addl(rdx, $tmp$$Register); 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7558 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7559 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7560 match(Set dst (MulL dst src)); 7561 effect(KILL cr); 7562 ins_cost(1*400); 7563 // Basic idea: lo(result) = lo(x_lo * y_lo) 7564 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7565 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7566 ins_encode %{ 7567 __ mull($src$$Register); 7568 %} 7569 ins_pipe( pipe_slow ); 7570 %} 7571 7572 // Multiply Register Long by small constant 7573 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7574 match(Set dst (MulL dst src)); 7575 effect(KILL cr, TEMP tmp); 7576 ins_cost(2*100+2*400); 7577 size(12); 7578 // Basic idea: lo(result) = lo(src * EAX) 7579 // hi(result) = hi(src * EAX) + lo(src * EDX) 7580 format %{ "IMUL $tmp,EDX,$src\n\t" 7581 "MOV EDX,$src\n\t" 7582 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7583 "ADD EDX,$tmp" %} 7584 ins_encode( long_multiply_con( dst, src, tmp ) ); 7585 ins_pipe( pipe_slow ); 7586 %} 7587 7588 // Integer DIV with Register 7589 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7590 match(Set rax (DivI rax div)); 7591 effect(KILL rdx, KILL cr); 7592 size(26); 7593 ins_cost(30*100+10*100); 7594 format %{ "CMP EAX,0x80000000\n\t" 7595 "JNE,s normal\n\t" 7596 "XOR EDX,EDX\n\t" 7597 "CMP ECX,-1\n\t" 7598 "JE,s done\n" 7599 "normal: CDQ\n\t" 7600 "IDIV $div\n\t" 7601 "done:" %} 7602 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7603 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7604 ins_pipe( ialu_reg_reg_alu0 ); 7605 %} 7606 7607 // Divide Register Long 7608 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7609 match(Set dst (DivL src1 src2)); 7610 effect( KILL cr, KILL cx, KILL bx ); 7611 ins_cost(10000); 7612 format %{ "PUSH $src1.hi\n\t" 7613 "PUSH $src1.lo\n\t" 7614 "PUSH $src2.hi\n\t" 7615 "PUSH $src2.lo\n\t" 7616 "CALL SharedRuntime::ldiv\n\t" 7617 "ADD ESP,16" %} 7618 ins_encode( long_div(src1,src2) ); 7619 ins_pipe( pipe_slow ); 7620 %} 7621 7622 // Integer DIVMOD with Register, both quotient and mod results 7623 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7624 match(DivModI rax div); 7625 effect(KILL cr); 7626 size(26); 7627 ins_cost(30*100+10*100); 7628 format %{ "CMP EAX,0x80000000\n\t" 7629 "JNE,s normal\n\t" 7630 "XOR EDX,EDX\n\t" 7631 "CMP ECX,-1\n\t" 7632 "JE,s done\n" 7633 "normal: CDQ\n\t" 7634 "IDIV $div\n\t" 7635 "done:" %} 7636 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7637 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 // Integer MOD with Register 7642 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7643 match(Set rdx (ModI rax div)); 7644 effect(KILL rax, KILL cr); 7645 7646 size(26); 7647 ins_cost(300); 7648 format %{ "CDQ\n\t" 7649 "IDIV $div" %} 7650 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7651 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7652 ins_pipe( ialu_reg_reg_alu0 ); 7653 %} 7654 7655 // Remainder Register Long 7656 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7657 match(Set dst (ModL src1 src2)); 7658 effect( KILL cr, KILL cx, KILL bx ); 7659 ins_cost(10000); 7660 format %{ "PUSH $src1.hi\n\t" 7661 "PUSH $src1.lo\n\t" 7662 "PUSH $src2.hi\n\t" 7663 "PUSH $src2.lo\n\t" 7664 "CALL SharedRuntime::lrem\n\t" 7665 "ADD ESP,16" %} 7666 ins_encode( long_mod(src1,src2) ); 7667 ins_pipe( pipe_slow ); 7668 %} 7669 7670 // Divide Register Long (no special case since divisor != -1) 7671 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7672 match(Set dst (DivL dst imm)); 7673 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7674 ins_cost(1000); 7675 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7676 "XOR $tmp2,$tmp2\n\t" 7677 "CMP $tmp,EDX\n\t" 7678 "JA,s fast\n\t" 7679 "MOV $tmp2,EAX\n\t" 7680 "MOV EAX,EDX\n\t" 7681 "MOV EDX,0\n\t" 7682 "JLE,s pos\n\t" 7683 "LNEG EAX : $tmp2\n\t" 7684 "DIV $tmp # unsigned division\n\t" 7685 "XCHG EAX,$tmp2\n\t" 7686 "DIV $tmp\n\t" 7687 "LNEG $tmp2 : EAX\n\t" 7688 "JMP,s done\n" 7689 "pos:\n\t" 7690 "DIV $tmp\n\t" 7691 "XCHG EAX,$tmp2\n" 7692 "fast:\n\t" 7693 "DIV $tmp\n" 7694 "done:\n\t" 7695 "MOV EDX,$tmp2\n\t" 7696 "NEG EDX:EAX # if $imm < 0" %} 7697 ins_encode %{ 7698 int con = (int)$imm$$constant; 7699 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7700 int pcon = (con > 0) ? con : -con; 7701 Label Lfast, Lpos, Ldone; 7702 7703 __ movl($tmp$$Register, pcon); 7704 __ xorl($tmp2$$Register,$tmp2$$Register); 7705 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7706 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7707 7708 __ movl($tmp2$$Register, $dst$$Register); // save 7709 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7710 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7711 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7712 7713 // Negative dividend. 7714 // convert value to positive to use unsigned division 7715 __ lneg($dst$$Register, $tmp2$$Register); 7716 __ divl($tmp$$Register); 7717 __ xchgl($dst$$Register, $tmp2$$Register); 7718 __ divl($tmp$$Register); 7719 // revert result back to negative 7720 __ lneg($tmp2$$Register, $dst$$Register); 7721 __ jmpb(Ldone); 7722 7723 __ bind(Lpos); 7724 __ divl($tmp$$Register); // Use unsigned division 7725 __ xchgl($dst$$Register, $tmp2$$Register); 7726 // Fallthrow for final divide, tmp2 has 32 bit hi result 7727 7728 __ bind(Lfast); 7729 // fast path: src is positive 7730 __ divl($tmp$$Register); // Use unsigned division 7731 7732 __ bind(Ldone); 7733 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7734 if (con < 0) { 7735 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7736 } 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 // Remainder Register Long (remainder fit into 32 bits) 7742 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7743 match(Set dst (ModL dst imm)); 7744 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7745 ins_cost(1000); 7746 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7747 "CMP $tmp,EDX\n\t" 7748 "JA,s fast\n\t" 7749 "MOV $tmp2,EAX\n\t" 7750 "MOV EAX,EDX\n\t" 7751 "MOV EDX,0\n\t" 7752 "JLE,s pos\n\t" 7753 "LNEG EAX : $tmp2\n\t" 7754 "DIV $tmp # unsigned division\n\t" 7755 "MOV EAX,$tmp2\n\t" 7756 "DIV $tmp\n\t" 7757 "NEG EDX\n\t" 7758 "JMP,s done\n" 7759 "pos:\n\t" 7760 "DIV $tmp\n\t" 7761 "MOV EAX,$tmp2\n" 7762 "fast:\n\t" 7763 "DIV $tmp\n" 7764 "done:\n\t" 7765 "MOV EAX,EDX\n\t" 7766 "SAR EDX,31\n\t" %} 7767 ins_encode %{ 7768 int con = (int)$imm$$constant; 7769 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7770 int pcon = (con > 0) ? con : -con; 7771 Label Lfast, Lpos, Ldone; 7772 7773 __ movl($tmp$$Register, pcon); 7774 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7775 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7776 7777 __ movl($tmp2$$Register, $dst$$Register); // save 7778 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7779 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7780 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7781 7782 // Negative dividend. 7783 // convert value to positive to use unsigned division 7784 __ lneg($dst$$Register, $tmp2$$Register); 7785 __ divl($tmp$$Register); 7786 __ movl($dst$$Register, $tmp2$$Register); 7787 __ divl($tmp$$Register); 7788 // revert remainder back to negative 7789 __ negl(HIGH_FROM_LOW($dst$$Register)); 7790 __ jmpb(Ldone); 7791 7792 __ bind(Lpos); 7793 __ divl($tmp$$Register); 7794 __ movl($dst$$Register, $tmp2$$Register); 7795 7796 __ bind(Lfast); 7797 // fast path: src is positive 7798 __ divl($tmp$$Register); 7799 7800 __ bind(Ldone); 7801 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7802 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7803 7804 %} 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 // Integer Shift Instructions 7809 // Shift Left by one 7810 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7811 match(Set dst (LShiftI dst shift)); 7812 effect(KILL cr); 7813 7814 size(2); 7815 format %{ "SHL $dst,$shift" %} 7816 opcode(0xD1, 0x4); /* D1 /4 */ 7817 ins_encode( OpcP, RegOpc( dst ) ); 7818 ins_pipe( ialu_reg ); 7819 %} 7820 7821 // Shift Left by 8-bit immediate 7822 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7823 match(Set dst (LShiftI dst shift)); 7824 effect(KILL cr); 7825 7826 size(3); 7827 format %{ "SHL $dst,$shift" %} 7828 opcode(0xC1, 0x4); /* C1 /4 ib */ 7829 ins_encode( RegOpcImm( dst, shift) ); 7830 ins_pipe( ialu_reg ); 7831 %} 7832 7833 // Shift Left by variable 7834 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7835 match(Set dst (LShiftI dst shift)); 7836 effect(KILL cr); 7837 7838 size(2); 7839 format %{ "SHL $dst,$shift" %} 7840 opcode(0xD3, 0x4); /* D3 /4 */ 7841 ins_encode( OpcP, RegOpc( dst ) ); 7842 ins_pipe( ialu_reg_reg ); 7843 %} 7844 7845 // Arithmetic shift right by one 7846 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7847 match(Set dst (RShiftI dst shift)); 7848 effect(KILL cr); 7849 7850 size(2); 7851 format %{ "SAR $dst,$shift" %} 7852 opcode(0xD1, 0x7); /* D1 /7 */ 7853 ins_encode( OpcP, RegOpc( dst ) ); 7854 ins_pipe( ialu_reg ); 7855 %} 7856 7857 // Arithmetic shift right by one 7858 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7859 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7860 effect(KILL cr); 7861 format %{ "SAR $dst,$shift" %} 7862 opcode(0xD1, 0x7); /* D1 /7 */ 7863 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7864 ins_pipe( ialu_mem_imm ); 7865 %} 7866 7867 // Arithmetic Shift Right by 8-bit immediate 7868 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7869 match(Set dst (RShiftI dst shift)); 7870 effect(KILL cr); 7871 7872 size(3); 7873 format %{ "SAR $dst,$shift" %} 7874 opcode(0xC1, 0x7); /* C1 /7 ib */ 7875 ins_encode( RegOpcImm( dst, shift ) ); 7876 ins_pipe( ialu_mem_imm ); 7877 %} 7878 7879 // Arithmetic Shift Right by 8-bit immediate 7880 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7881 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7882 effect(KILL cr); 7883 7884 format %{ "SAR $dst,$shift" %} 7885 opcode(0xC1, 0x7); /* C1 /7 ib */ 7886 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7887 ins_pipe( ialu_mem_imm ); 7888 %} 7889 7890 // Arithmetic Shift Right by variable 7891 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7892 match(Set dst (RShiftI dst shift)); 7893 effect(KILL cr); 7894 7895 size(2); 7896 format %{ "SAR $dst,$shift" %} 7897 opcode(0xD3, 0x7); /* D3 /7 */ 7898 ins_encode( OpcP, RegOpc( dst ) ); 7899 ins_pipe( ialu_reg_reg ); 7900 %} 7901 7902 // Logical shift right by one 7903 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7904 match(Set dst (URShiftI dst shift)); 7905 effect(KILL cr); 7906 7907 size(2); 7908 format %{ "SHR $dst,$shift" %} 7909 opcode(0xD1, 0x5); /* D1 /5 */ 7910 ins_encode( OpcP, RegOpc( dst ) ); 7911 ins_pipe( ialu_reg ); 7912 %} 7913 7914 // Logical Shift Right by 8-bit immediate 7915 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7916 match(Set dst (URShiftI dst shift)); 7917 effect(KILL cr); 7918 7919 size(3); 7920 format %{ "SHR $dst,$shift" %} 7921 opcode(0xC1, 0x5); /* C1 /5 ib */ 7922 ins_encode( RegOpcImm( dst, shift) ); 7923 ins_pipe( ialu_reg ); 7924 %} 7925 7926 7927 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7928 // This idiom is used by the compiler for the i2b bytecode. 7929 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7930 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7931 7932 size(3); 7933 format %{ "MOVSX $dst,$src :8" %} 7934 ins_encode %{ 7935 __ movsbl($dst$$Register, $src$$Register); 7936 %} 7937 ins_pipe(ialu_reg_reg); 7938 %} 7939 7940 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7941 // This idiom is used by the compiler the i2s bytecode. 7942 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7943 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7944 7945 size(3); 7946 format %{ "MOVSX $dst,$src :16" %} 7947 ins_encode %{ 7948 __ movswl($dst$$Register, $src$$Register); 7949 %} 7950 ins_pipe(ialu_reg_reg); 7951 %} 7952 7953 7954 // Logical Shift Right by variable 7955 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7956 match(Set dst (URShiftI dst shift)); 7957 effect(KILL cr); 7958 7959 size(2); 7960 format %{ "SHR $dst,$shift" %} 7961 opcode(0xD3, 0x5); /* D3 /5 */ 7962 ins_encode( OpcP, RegOpc( dst ) ); 7963 ins_pipe( ialu_reg_reg ); 7964 %} 7965 7966 7967 //----------Logical Instructions----------------------------------------------- 7968 //----------Integer Logical Instructions--------------------------------------- 7969 // And Instructions 7970 // And Register with Register 7971 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7972 match(Set dst (AndI dst src)); 7973 effect(KILL cr); 7974 7975 size(2); 7976 format %{ "AND $dst,$src" %} 7977 opcode(0x23); 7978 ins_encode( OpcP, RegReg( dst, src) ); 7979 ins_pipe( ialu_reg_reg ); 7980 %} 7981 7982 // And Register with Immediate 7983 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7984 match(Set dst (AndI dst src)); 7985 effect(KILL cr); 7986 7987 format %{ "AND $dst,$src" %} 7988 opcode(0x81,0x04); /* Opcode 81 /4 */ 7989 // ins_encode( RegImm( dst, src) ); 7990 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7991 ins_pipe( ialu_reg ); 7992 %} 7993 7994 // And Register with Memory 7995 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7996 match(Set dst (AndI dst (LoadI src))); 7997 effect(KILL cr); 7998 7999 ins_cost(125); 8000 format %{ "AND $dst,$src" %} 8001 opcode(0x23); 8002 ins_encode( OpcP, RegMem( dst, src) ); 8003 ins_pipe( ialu_reg_mem ); 8004 %} 8005 8006 // And Memory with Register 8007 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8008 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8009 effect(KILL cr); 8010 8011 ins_cost(150); 8012 format %{ "AND $dst,$src" %} 8013 opcode(0x21); /* Opcode 21 /r */ 8014 ins_encode( OpcP, RegMem( src, dst ) ); 8015 ins_pipe( ialu_mem_reg ); 8016 %} 8017 8018 // And Memory with Immediate 8019 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8020 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8021 effect(KILL cr); 8022 8023 ins_cost(125); 8024 format %{ "AND $dst,$src" %} 8025 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8026 // ins_encode( MemImm( dst, src) ); 8027 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8028 ins_pipe( ialu_mem_imm ); 8029 %} 8030 8031 // BMI1 instructions 8032 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8033 match(Set dst (AndI (XorI src1 minus_1) src2)); 8034 predicate(UseBMI1Instructions); 8035 effect(KILL cr); 8036 8037 format %{ "ANDNL $dst, $src1, $src2" %} 8038 8039 ins_encode %{ 8040 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8041 %} 8042 ins_pipe(ialu_reg); 8043 %} 8044 8045 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8046 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8047 predicate(UseBMI1Instructions); 8048 effect(KILL cr); 8049 8050 ins_cost(125); 8051 format %{ "ANDNL $dst, $src1, $src2" %} 8052 8053 ins_encode %{ 8054 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8055 %} 8056 ins_pipe(ialu_reg_mem); 8057 %} 8058 8059 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8060 match(Set dst (AndI (SubI imm_zero src) src)); 8061 predicate(UseBMI1Instructions); 8062 effect(KILL cr); 8063 8064 format %{ "BLSIL $dst, $src" %} 8065 8066 ins_encode %{ 8067 __ blsil($dst$$Register, $src$$Register); 8068 %} 8069 ins_pipe(ialu_reg); 8070 %} 8071 8072 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8073 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8074 predicate(UseBMI1Instructions); 8075 effect(KILL cr); 8076 8077 ins_cost(125); 8078 format %{ "BLSIL $dst, $src" %} 8079 8080 ins_encode %{ 8081 __ blsil($dst$$Register, $src$$Address); 8082 %} 8083 ins_pipe(ialu_reg_mem); 8084 %} 8085 8086 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8087 %{ 8088 match(Set dst (XorI (AddI src minus_1) src)); 8089 predicate(UseBMI1Instructions); 8090 effect(KILL cr); 8091 8092 format %{ "BLSMSKL $dst, $src" %} 8093 8094 ins_encode %{ 8095 __ blsmskl($dst$$Register, $src$$Register); 8096 %} 8097 8098 ins_pipe(ialu_reg); 8099 %} 8100 8101 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8102 %{ 8103 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8104 predicate(UseBMI1Instructions); 8105 effect(KILL cr); 8106 8107 ins_cost(125); 8108 format %{ "BLSMSKL $dst, $src" %} 8109 8110 ins_encode %{ 8111 __ blsmskl($dst$$Register, $src$$Address); 8112 %} 8113 8114 ins_pipe(ialu_reg_mem); 8115 %} 8116 8117 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8118 %{ 8119 match(Set dst (AndI (AddI src minus_1) src) ); 8120 predicate(UseBMI1Instructions); 8121 effect(KILL cr); 8122 8123 format %{ "BLSRL $dst, $src" %} 8124 8125 ins_encode %{ 8126 __ blsrl($dst$$Register, $src$$Register); 8127 %} 8128 8129 ins_pipe(ialu_reg); 8130 %} 8131 8132 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8133 %{ 8134 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8135 predicate(UseBMI1Instructions); 8136 effect(KILL cr); 8137 8138 ins_cost(125); 8139 format %{ "BLSRL $dst, $src" %} 8140 8141 ins_encode %{ 8142 __ blsrl($dst$$Register, $src$$Address); 8143 %} 8144 8145 ins_pipe(ialu_reg_mem); 8146 %} 8147 8148 // Or Instructions 8149 // Or Register with Register 8150 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8151 match(Set dst (OrI dst src)); 8152 effect(KILL cr); 8153 8154 size(2); 8155 format %{ "OR $dst,$src" %} 8156 opcode(0x0B); 8157 ins_encode( OpcP, RegReg( dst, src) ); 8158 ins_pipe( ialu_reg_reg ); 8159 %} 8160 8161 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8162 match(Set dst (OrI dst (CastP2X src))); 8163 effect(KILL cr); 8164 8165 size(2); 8166 format %{ "OR $dst,$src" %} 8167 opcode(0x0B); 8168 ins_encode( OpcP, RegReg( dst, src) ); 8169 ins_pipe( ialu_reg_reg ); 8170 %} 8171 8172 8173 // Or Register with Immediate 8174 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8175 match(Set dst (OrI dst src)); 8176 effect(KILL cr); 8177 8178 format %{ "OR $dst,$src" %} 8179 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8180 // ins_encode( RegImm( dst, src) ); 8181 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8182 ins_pipe( ialu_reg ); 8183 %} 8184 8185 // Or Register with Memory 8186 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8187 match(Set dst (OrI dst (LoadI src))); 8188 effect(KILL cr); 8189 8190 ins_cost(125); 8191 format %{ "OR $dst,$src" %} 8192 opcode(0x0B); 8193 ins_encode( OpcP, RegMem( dst, src) ); 8194 ins_pipe( ialu_reg_mem ); 8195 %} 8196 8197 // Or Memory with Register 8198 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8199 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8200 effect(KILL cr); 8201 8202 ins_cost(150); 8203 format %{ "OR $dst,$src" %} 8204 opcode(0x09); /* Opcode 09 /r */ 8205 ins_encode( OpcP, RegMem( src, dst ) ); 8206 ins_pipe( ialu_mem_reg ); 8207 %} 8208 8209 // Or Memory with Immediate 8210 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8211 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8212 effect(KILL cr); 8213 8214 ins_cost(125); 8215 format %{ "OR $dst,$src" %} 8216 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8217 // ins_encode( MemImm( dst, src) ); 8218 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8219 ins_pipe( ialu_mem_imm ); 8220 %} 8221 8222 // ROL/ROR 8223 // ROL expand 8224 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8225 effect(USE_DEF dst, USE shift, KILL cr); 8226 8227 format %{ "ROL $dst, $shift" %} 8228 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8229 ins_encode( OpcP, RegOpc( dst )); 8230 ins_pipe( ialu_reg ); 8231 %} 8232 8233 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8234 effect(USE_DEF dst, USE shift, KILL cr); 8235 8236 format %{ "ROL $dst, $shift" %} 8237 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8238 ins_encode( RegOpcImm(dst, shift) ); 8239 ins_pipe(ialu_reg); 8240 %} 8241 8242 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8243 effect(USE_DEF dst, USE shift, KILL cr); 8244 8245 format %{ "ROL $dst, $shift" %} 8246 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8247 ins_encode(OpcP, RegOpc(dst)); 8248 ins_pipe( ialu_reg_reg ); 8249 %} 8250 // end of ROL expand 8251 8252 // ROL 32bit by one once 8253 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8254 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8255 8256 expand %{ 8257 rolI_eReg_imm1(dst, lshift, cr); 8258 %} 8259 %} 8260 8261 // ROL 32bit var by imm8 once 8262 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8263 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8264 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8265 8266 expand %{ 8267 rolI_eReg_imm8(dst, lshift, cr); 8268 %} 8269 %} 8270 8271 // ROL 32bit var by var once 8272 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8273 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8274 8275 expand %{ 8276 rolI_eReg_CL(dst, shift, cr); 8277 %} 8278 %} 8279 8280 // ROL 32bit var by var once 8281 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8282 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8283 8284 expand %{ 8285 rolI_eReg_CL(dst, shift, cr); 8286 %} 8287 %} 8288 8289 // ROR expand 8290 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8291 effect(USE_DEF dst, USE shift, KILL cr); 8292 8293 format %{ "ROR $dst, $shift" %} 8294 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8295 ins_encode( OpcP, RegOpc( dst ) ); 8296 ins_pipe( ialu_reg ); 8297 %} 8298 8299 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8300 effect (USE_DEF dst, USE shift, KILL cr); 8301 8302 format %{ "ROR $dst, $shift" %} 8303 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8304 ins_encode( RegOpcImm(dst, shift) ); 8305 ins_pipe( ialu_reg ); 8306 %} 8307 8308 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8309 effect(USE_DEF dst, USE shift, KILL cr); 8310 8311 format %{ "ROR $dst, $shift" %} 8312 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8313 ins_encode(OpcP, RegOpc(dst)); 8314 ins_pipe( ialu_reg_reg ); 8315 %} 8316 // end of ROR expand 8317 8318 // ROR right once 8319 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8320 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8321 8322 expand %{ 8323 rorI_eReg_imm1(dst, rshift, cr); 8324 %} 8325 %} 8326 8327 // ROR 32bit by immI8 once 8328 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8329 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8330 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8331 8332 expand %{ 8333 rorI_eReg_imm8(dst, rshift, cr); 8334 %} 8335 %} 8336 8337 // ROR 32bit var by var once 8338 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8339 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8340 8341 expand %{ 8342 rorI_eReg_CL(dst, shift, cr); 8343 %} 8344 %} 8345 8346 // ROR 32bit var by var once 8347 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8348 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8349 8350 expand %{ 8351 rorI_eReg_CL(dst, shift, cr); 8352 %} 8353 %} 8354 8355 // Xor Instructions 8356 // Xor Register with Register 8357 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8358 match(Set dst (XorI dst src)); 8359 effect(KILL cr); 8360 8361 size(2); 8362 format %{ "XOR $dst,$src" %} 8363 opcode(0x33); 8364 ins_encode( OpcP, RegReg( dst, src) ); 8365 ins_pipe( ialu_reg_reg ); 8366 %} 8367 8368 // Xor Register with Immediate -1 8369 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8370 match(Set dst (XorI dst imm)); 8371 8372 size(2); 8373 format %{ "NOT $dst" %} 8374 ins_encode %{ 8375 __ notl($dst$$Register); 8376 %} 8377 ins_pipe( ialu_reg ); 8378 %} 8379 8380 // Xor Register with Immediate 8381 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8382 match(Set dst (XorI dst src)); 8383 effect(KILL cr); 8384 8385 format %{ "XOR $dst,$src" %} 8386 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8387 // ins_encode( RegImm( dst, src) ); 8388 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8389 ins_pipe( ialu_reg ); 8390 %} 8391 8392 // Xor Register with Memory 8393 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8394 match(Set dst (XorI dst (LoadI src))); 8395 effect(KILL cr); 8396 8397 ins_cost(125); 8398 format %{ "XOR $dst,$src" %} 8399 opcode(0x33); 8400 ins_encode( OpcP, RegMem(dst, src) ); 8401 ins_pipe( ialu_reg_mem ); 8402 %} 8403 8404 // Xor Memory with Register 8405 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8406 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8407 effect(KILL cr); 8408 8409 ins_cost(150); 8410 format %{ "XOR $dst,$src" %} 8411 opcode(0x31); /* Opcode 31 /r */ 8412 ins_encode( OpcP, RegMem( src, dst ) ); 8413 ins_pipe( ialu_mem_reg ); 8414 %} 8415 8416 // Xor Memory with Immediate 8417 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8418 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8419 effect(KILL cr); 8420 8421 ins_cost(125); 8422 format %{ "XOR $dst,$src" %} 8423 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8424 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8425 ins_pipe( ialu_mem_imm ); 8426 %} 8427 8428 //----------Convert Int to Boolean--------------------------------------------- 8429 8430 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8431 effect( DEF dst, USE src ); 8432 format %{ "MOV $dst,$src" %} 8433 ins_encode( enc_Copy( dst, src) ); 8434 ins_pipe( ialu_reg_reg ); 8435 %} 8436 8437 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8438 effect( USE_DEF dst, USE src, KILL cr ); 8439 8440 size(4); 8441 format %{ "NEG $dst\n\t" 8442 "ADC $dst,$src" %} 8443 ins_encode( neg_reg(dst), 8444 OpcRegReg(0x13,dst,src) ); 8445 ins_pipe( ialu_reg_reg_long ); 8446 %} 8447 8448 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8449 match(Set dst (Conv2B src)); 8450 8451 expand %{ 8452 movI_nocopy(dst,src); 8453 ci2b(dst,src,cr); 8454 %} 8455 %} 8456 8457 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8458 effect( DEF dst, USE src ); 8459 format %{ "MOV $dst,$src" %} 8460 ins_encode( enc_Copy( dst, src) ); 8461 ins_pipe( ialu_reg_reg ); 8462 %} 8463 8464 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8465 effect( USE_DEF dst, USE src, KILL cr ); 8466 format %{ "NEG $dst\n\t" 8467 "ADC $dst,$src" %} 8468 ins_encode( neg_reg(dst), 8469 OpcRegReg(0x13,dst,src) ); 8470 ins_pipe( ialu_reg_reg_long ); 8471 %} 8472 8473 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8474 match(Set dst (Conv2B src)); 8475 8476 expand %{ 8477 movP_nocopy(dst,src); 8478 cp2b(dst,src,cr); 8479 %} 8480 %} 8481 8482 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8483 match(Set dst (CmpLTMask p q)); 8484 effect(KILL cr); 8485 ins_cost(400); 8486 8487 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8488 format %{ "XOR $dst,$dst\n\t" 8489 "CMP $p,$q\n\t" 8490 "SETlt $dst\n\t" 8491 "NEG $dst" %} 8492 ins_encode %{ 8493 Register Rp = $p$$Register; 8494 Register Rq = $q$$Register; 8495 Register Rd = $dst$$Register; 8496 Label done; 8497 __ xorl(Rd, Rd); 8498 __ cmpl(Rp, Rq); 8499 __ setb(Assembler::less, Rd); 8500 __ negl(Rd); 8501 %} 8502 8503 ins_pipe(pipe_slow); 8504 %} 8505 8506 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8507 match(Set dst (CmpLTMask dst zero)); 8508 effect(DEF dst, KILL cr); 8509 ins_cost(100); 8510 8511 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8512 ins_encode %{ 8513 __ sarl($dst$$Register, 31); 8514 %} 8515 ins_pipe(ialu_reg); 8516 %} 8517 8518 /* better to save a register than avoid a branch */ 8519 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8520 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8521 effect(KILL cr); 8522 ins_cost(400); 8523 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8524 "JGE done\n\t" 8525 "ADD $p,$y\n" 8526 "done: " %} 8527 ins_encode %{ 8528 Register Rp = $p$$Register; 8529 Register Rq = $q$$Register; 8530 Register Ry = $y$$Register; 8531 Label done; 8532 __ subl(Rp, Rq); 8533 __ jccb(Assembler::greaterEqual, done); 8534 __ addl(Rp, Ry); 8535 __ bind(done); 8536 %} 8537 8538 ins_pipe(pipe_cmplt); 8539 %} 8540 8541 /* better to save a register than avoid a branch */ 8542 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8543 match(Set y (AndI (CmpLTMask p q) y)); 8544 effect(KILL cr); 8545 8546 ins_cost(300); 8547 8548 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8549 "JLT done\n\t" 8550 "XORL $y, $y\n" 8551 "done: " %} 8552 ins_encode %{ 8553 Register Rp = $p$$Register; 8554 Register Rq = $q$$Register; 8555 Register Ry = $y$$Register; 8556 Label done; 8557 __ cmpl(Rp, Rq); 8558 __ jccb(Assembler::less, done); 8559 __ xorl(Ry, Ry); 8560 __ bind(done); 8561 %} 8562 8563 ins_pipe(pipe_cmplt); 8564 %} 8565 8566 /* If I enable this, I encourage spilling in the inner loop of compress. 8567 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8568 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8569 */ 8570 //----------Overflow Math Instructions----------------------------------------- 8571 8572 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8573 %{ 8574 match(Set cr (OverflowAddI op1 op2)); 8575 effect(DEF cr, USE_KILL op1, USE op2); 8576 8577 format %{ "ADD $op1, $op2\t# overflow check int" %} 8578 8579 ins_encode %{ 8580 __ addl($op1$$Register, $op2$$Register); 8581 %} 8582 ins_pipe(ialu_reg_reg); 8583 %} 8584 8585 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8586 %{ 8587 match(Set cr (OverflowAddI op1 op2)); 8588 effect(DEF cr, USE_KILL op1, USE op2); 8589 8590 format %{ "ADD $op1, $op2\t# overflow check int" %} 8591 8592 ins_encode %{ 8593 __ addl($op1$$Register, $op2$$constant); 8594 %} 8595 ins_pipe(ialu_reg_reg); 8596 %} 8597 8598 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8599 %{ 8600 match(Set cr (OverflowSubI op1 op2)); 8601 8602 format %{ "CMP $op1, $op2\t# overflow check int" %} 8603 ins_encode %{ 8604 __ cmpl($op1$$Register, $op2$$Register); 8605 %} 8606 ins_pipe(ialu_reg_reg); 8607 %} 8608 8609 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8610 %{ 8611 match(Set cr (OverflowSubI op1 op2)); 8612 8613 format %{ "CMP $op1, $op2\t# overflow check int" %} 8614 ins_encode %{ 8615 __ cmpl($op1$$Register, $op2$$constant); 8616 %} 8617 ins_pipe(ialu_reg_reg); 8618 %} 8619 8620 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8621 %{ 8622 match(Set cr (OverflowSubI zero op2)); 8623 effect(DEF cr, USE_KILL op2); 8624 8625 format %{ "NEG $op2\t# overflow check int" %} 8626 ins_encode %{ 8627 __ negl($op2$$Register); 8628 %} 8629 ins_pipe(ialu_reg_reg); 8630 %} 8631 8632 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8633 %{ 8634 match(Set cr (OverflowMulI op1 op2)); 8635 effect(DEF cr, USE_KILL op1, USE op2); 8636 8637 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8638 ins_encode %{ 8639 __ imull($op1$$Register, $op2$$Register); 8640 %} 8641 ins_pipe(ialu_reg_reg_alu0); 8642 %} 8643 8644 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8645 %{ 8646 match(Set cr (OverflowMulI op1 op2)); 8647 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8648 8649 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8650 ins_encode %{ 8651 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8652 %} 8653 ins_pipe(ialu_reg_reg_alu0); 8654 %} 8655 8656 //----------Long Instructions------------------------------------------------ 8657 // Add Long Register with Register 8658 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8659 match(Set dst (AddL dst src)); 8660 effect(KILL cr); 8661 ins_cost(200); 8662 format %{ "ADD $dst.lo,$src.lo\n\t" 8663 "ADC $dst.hi,$src.hi" %} 8664 opcode(0x03, 0x13); 8665 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8666 ins_pipe( ialu_reg_reg_long ); 8667 %} 8668 8669 // Add Long Register with Immediate 8670 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8671 match(Set dst (AddL dst src)); 8672 effect(KILL cr); 8673 format %{ "ADD $dst.lo,$src.lo\n\t" 8674 "ADC $dst.hi,$src.hi" %} 8675 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8676 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8677 ins_pipe( ialu_reg_long ); 8678 %} 8679 8680 // Add Long Register with Memory 8681 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8682 match(Set dst (AddL dst (LoadL mem))); 8683 effect(KILL cr); 8684 ins_cost(125); 8685 format %{ "ADD $dst.lo,$mem\n\t" 8686 "ADC $dst.hi,$mem+4" %} 8687 opcode(0x03, 0x13); 8688 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8689 ins_pipe( ialu_reg_long_mem ); 8690 %} 8691 8692 // Subtract Long Register with Register. 8693 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8694 match(Set dst (SubL dst src)); 8695 effect(KILL cr); 8696 ins_cost(200); 8697 format %{ "SUB $dst.lo,$src.lo\n\t" 8698 "SBB $dst.hi,$src.hi" %} 8699 opcode(0x2B, 0x1B); 8700 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8701 ins_pipe( ialu_reg_reg_long ); 8702 %} 8703 8704 // Subtract Long Register with Immediate 8705 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8706 match(Set dst (SubL dst src)); 8707 effect(KILL cr); 8708 format %{ "SUB $dst.lo,$src.lo\n\t" 8709 "SBB $dst.hi,$src.hi" %} 8710 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8711 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8712 ins_pipe( ialu_reg_long ); 8713 %} 8714 8715 // Subtract Long Register with Memory 8716 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8717 match(Set dst (SubL dst (LoadL mem))); 8718 effect(KILL cr); 8719 ins_cost(125); 8720 format %{ "SUB $dst.lo,$mem\n\t" 8721 "SBB $dst.hi,$mem+4" %} 8722 opcode(0x2B, 0x1B); 8723 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8724 ins_pipe( ialu_reg_long_mem ); 8725 %} 8726 8727 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8728 match(Set dst (SubL zero dst)); 8729 effect(KILL cr); 8730 ins_cost(300); 8731 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8732 ins_encode( neg_long(dst) ); 8733 ins_pipe( ialu_reg_reg_long ); 8734 %} 8735 8736 // And Long Register with Register 8737 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8738 match(Set dst (AndL dst src)); 8739 effect(KILL cr); 8740 format %{ "AND $dst.lo,$src.lo\n\t" 8741 "AND $dst.hi,$src.hi" %} 8742 opcode(0x23,0x23); 8743 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8744 ins_pipe( ialu_reg_reg_long ); 8745 %} 8746 8747 // And Long Register with Immediate 8748 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8749 match(Set dst (AndL dst src)); 8750 effect(KILL cr); 8751 format %{ "AND $dst.lo,$src.lo\n\t" 8752 "AND $dst.hi,$src.hi" %} 8753 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8754 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8755 ins_pipe( ialu_reg_long ); 8756 %} 8757 8758 // And Long Register with Memory 8759 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8760 match(Set dst (AndL dst (LoadL mem))); 8761 effect(KILL cr); 8762 ins_cost(125); 8763 format %{ "AND $dst.lo,$mem\n\t" 8764 "AND $dst.hi,$mem+4" %} 8765 opcode(0x23, 0x23); 8766 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8767 ins_pipe( ialu_reg_long_mem ); 8768 %} 8769 8770 // BMI1 instructions 8771 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8772 match(Set dst (AndL (XorL src1 minus_1) src2)); 8773 predicate(UseBMI1Instructions); 8774 effect(KILL cr, TEMP dst); 8775 8776 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8777 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8778 %} 8779 8780 ins_encode %{ 8781 Register Rdst = $dst$$Register; 8782 Register Rsrc1 = $src1$$Register; 8783 Register Rsrc2 = $src2$$Register; 8784 __ andnl(Rdst, Rsrc1, Rsrc2); 8785 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8786 %} 8787 ins_pipe(ialu_reg_reg_long); 8788 %} 8789 8790 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8791 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8792 predicate(UseBMI1Instructions); 8793 effect(KILL cr, TEMP dst); 8794 8795 ins_cost(125); 8796 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8797 "ANDNL $dst.hi, $src1.hi, $src2+4" 8798 %} 8799 8800 ins_encode %{ 8801 Register Rdst = $dst$$Register; 8802 Register Rsrc1 = $src1$$Register; 8803 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8804 8805 __ andnl(Rdst, Rsrc1, $src2$$Address); 8806 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8807 %} 8808 ins_pipe(ialu_reg_mem); 8809 %} 8810 8811 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8812 match(Set dst (AndL (SubL imm_zero src) src)); 8813 predicate(UseBMI1Instructions); 8814 effect(KILL cr, TEMP dst); 8815 8816 format %{ "MOVL $dst.hi, 0\n\t" 8817 "BLSIL $dst.lo, $src.lo\n\t" 8818 "JNZ done\n\t" 8819 "BLSIL $dst.hi, $src.hi\n" 8820 "done:" 8821 %} 8822 8823 ins_encode %{ 8824 Label done; 8825 Register Rdst = $dst$$Register; 8826 Register Rsrc = $src$$Register; 8827 __ movl(HIGH_FROM_LOW(Rdst), 0); 8828 __ blsil(Rdst, Rsrc); 8829 __ jccb(Assembler::notZero, done); 8830 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8831 __ bind(done); 8832 %} 8833 ins_pipe(ialu_reg); 8834 %} 8835 8836 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8837 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8838 predicate(UseBMI1Instructions); 8839 effect(KILL cr, TEMP dst); 8840 8841 ins_cost(125); 8842 format %{ "MOVL $dst.hi, 0\n\t" 8843 "BLSIL $dst.lo, $src\n\t" 8844 "JNZ done\n\t" 8845 "BLSIL $dst.hi, $src+4\n" 8846 "done:" 8847 %} 8848 8849 ins_encode %{ 8850 Label done; 8851 Register Rdst = $dst$$Register; 8852 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8853 8854 __ movl(HIGH_FROM_LOW(Rdst), 0); 8855 __ blsil(Rdst, $src$$Address); 8856 __ jccb(Assembler::notZero, done); 8857 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8858 __ bind(done); 8859 %} 8860 ins_pipe(ialu_reg_mem); 8861 %} 8862 8863 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8864 %{ 8865 match(Set dst (XorL (AddL src minus_1) src)); 8866 predicate(UseBMI1Instructions); 8867 effect(KILL cr, TEMP dst); 8868 8869 format %{ "MOVL $dst.hi, 0\n\t" 8870 "BLSMSKL $dst.lo, $src.lo\n\t" 8871 "JNC done\n\t" 8872 "BLSMSKL $dst.hi, $src.hi\n" 8873 "done:" 8874 %} 8875 8876 ins_encode %{ 8877 Label done; 8878 Register Rdst = $dst$$Register; 8879 Register Rsrc = $src$$Register; 8880 __ movl(HIGH_FROM_LOW(Rdst), 0); 8881 __ blsmskl(Rdst, Rsrc); 8882 __ jccb(Assembler::carryClear, done); 8883 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8884 __ bind(done); 8885 %} 8886 8887 ins_pipe(ialu_reg); 8888 %} 8889 8890 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8891 %{ 8892 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8893 predicate(UseBMI1Instructions); 8894 effect(KILL cr, TEMP dst); 8895 8896 ins_cost(125); 8897 format %{ "MOVL $dst.hi, 0\n\t" 8898 "BLSMSKL $dst.lo, $src\n\t" 8899 "JNC done\n\t" 8900 "BLSMSKL $dst.hi, $src+4\n" 8901 "done:" 8902 %} 8903 8904 ins_encode %{ 8905 Label done; 8906 Register Rdst = $dst$$Register; 8907 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8908 8909 __ movl(HIGH_FROM_LOW(Rdst), 0); 8910 __ blsmskl(Rdst, $src$$Address); 8911 __ jccb(Assembler::carryClear, done); 8912 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8913 __ bind(done); 8914 %} 8915 8916 ins_pipe(ialu_reg_mem); 8917 %} 8918 8919 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8920 %{ 8921 match(Set dst (AndL (AddL src minus_1) src) ); 8922 predicate(UseBMI1Instructions); 8923 effect(KILL cr, TEMP dst); 8924 8925 format %{ "MOVL $dst.hi, $src.hi\n\t" 8926 "BLSRL $dst.lo, $src.lo\n\t" 8927 "JNC done\n\t" 8928 "BLSRL $dst.hi, $src.hi\n" 8929 "done:" 8930 %} 8931 8932 ins_encode %{ 8933 Label done; 8934 Register Rdst = $dst$$Register; 8935 Register Rsrc = $src$$Register; 8936 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8937 __ blsrl(Rdst, Rsrc); 8938 __ jccb(Assembler::carryClear, done); 8939 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8940 __ bind(done); 8941 %} 8942 8943 ins_pipe(ialu_reg); 8944 %} 8945 8946 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8947 %{ 8948 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8949 predicate(UseBMI1Instructions); 8950 effect(KILL cr, TEMP dst); 8951 8952 ins_cost(125); 8953 format %{ "MOVL $dst.hi, $src+4\n\t" 8954 "BLSRL $dst.lo, $src\n\t" 8955 "JNC done\n\t" 8956 "BLSRL $dst.hi, $src+4\n" 8957 "done:" 8958 %} 8959 8960 ins_encode %{ 8961 Label done; 8962 Register Rdst = $dst$$Register; 8963 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8964 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8965 __ blsrl(Rdst, $src$$Address); 8966 __ jccb(Assembler::carryClear, done); 8967 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8968 __ bind(done); 8969 %} 8970 8971 ins_pipe(ialu_reg_mem); 8972 %} 8973 8974 // Or Long Register with Register 8975 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8976 match(Set dst (OrL dst src)); 8977 effect(KILL cr); 8978 format %{ "OR $dst.lo,$src.lo\n\t" 8979 "OR $dst.hi,$src.hi" %} 8980 opcode(0x0B,0x0B); 8981 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8982 ins_pipe( ialu_reg_reg_long ); 8983 %} 8984 8985 // Or Long Register with Immediate 8986 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8987 match(Set dst (OrL dst src)); 8988 effect(KILL cr); 8989 format %{ "OR $dst.lo,$src.lo\n\t" 8990 "OR $dst.hi,$src.hi" %} 8991 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8992 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8993 ins_pipe( ialu_reg_long ); 8994 %} 8995 8996 // Or Long Register with Memory 8997 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8998 match(Set dst (OrL dst (LoadL mem))); 8999 effect(KILL cr); 9000 ins_cost(125); 9001 format %{ "OR $dst.lo,$mem\n\t" 9002 "OR $dst.hi,$mem+4" %} 9003 opcode(0x0B,0x0B); 9004 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9005 ins_pipe( ialu_reg_long_mem ); 9006 %} 9007 9008 // Xor Long Register with Register 9009 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9010 match(Set dst (XorL dst src)); 9011 effect(KILL cr); 9012 format %{ "XOR $dst.lo,$src.lo\n\t" 9013 "XOR $dst.hi,$src.hi" %} 9014 opcode(0x33,0x33); 9015 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9016 ins_pipe( ialu_reg_reg_long ); 9017 %} 9018 9019 // Xor Long Register with Immediate -1 9020 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9021 match(Set dst (XorL dst imm)); 9022 format %{ "NOT $dst.lo\n\t" 9023 "NOT $dst.hi" %} 9024 ins_encode %{ 9025 __ notl($dst$$Register); 9026 __ notl(HIGH_FROM_LOW($dst$$Register)); 9027 %} 9028 ins_pipe( ialu_reg_long ); 9029 %} 9030 9031 // Xor Long Register with Immediate 9032 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9033 match(Set dst (XorL dst src)); 9034 effect(KILL cr); 9035 format %{ "XOR $dst.lo,$src.lo\n\t" 9036 "XOR $dst.hi,$src.hi" %} 9037 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9038 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9039 ins_pipe( ialu_reg_long ); 9040 %} 9041 9042 // Xor Long Register with Memory 9043 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9044 match(Set dst (XorL dst (LoadL mem))); 9045 effect(KILL cr); 9046 ins_cost(125); 9047 format %{ "XOR $dst.lo,$mem\n\t" 9048 "XOR $dst.hi,$mem+4" %} 9049 opcode(0x33,0x33); 9050 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9051 ins_pipe( ialu_reg_long_mem ); 9052 %} 9053 9054 // Shift Left Long by 1 9055 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9056 predicate(UseNewLongLShift); 9057 match(Set dst (LShiftL dst cnt)); 9058 effect(KILL cr); 9059 ins_cost(100); 9060 format %{ "ADD $dst.lo,$dst.lo\n\t" 9061 "ADC $dst.hi,$dst.hi" %} 9062 ins_encode %{ 9063 __ addl($dst$$Register,$dst$$Register); 9064 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9065 %} 9066 ins_pipe( ialu_reg_long ); 9067 %} 9068 9069 // Shift Left Long by 2 9070 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9071 predicate(UseNewLongLShift); 9072 match(Set dst (LShiftL dst cnt)); 9073 effect(KILL cr); 9074 ins_cost(100); 9075 format %{ "ADD $dst.lo,$dst.lo\n\t" 9076 "ADC $dst.hi,$dst.hi\n\t" 9077 "ADD $dst.lo,$dst.lo\n\t" 9078 "ADC $dst.hi,$dst.hi" %} 9079 ins_encode %{ 9080 __ addl($dst$$Register,$dst$$Register); 9081 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9082 __ addl($dst$$Register,$dst$$Register); 9083 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9084 %} 9085 ins_pipe( ialu_reg_long ); 9086 %} 9087 9088 // Shift Left Long by 3 9089 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9090 predicate(UseNewLongLShift); 9091 match(Set dst (LShiftL dst cnt)); 9092 effect(KILL cr); 9093 ins_cost(100); 9094 format %{ "ADD $dst.lo,$dst.lo\n\t" 9095 "ADC $dst.hi,$dst.hi\n\t" 9096 "ADD $dst.lo,$dst.lo\n\t" 9097 "ADC $dst.hi,$dst.hi\n\t" 9098 "ADD $dst.lo,$dst.lo\n\t" 9099 "ADC $dst.hi,$dst.hi" %} 9100 ins_encode %{ 9101 __ addl($dst$$Register,$dst$$Register); 9102 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9103 __ addl($dst$$Register,$dst$$Register); 9104 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9105 __ addl($dst$$Register,$dst$$Register); 9106 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9107 %} 9108 ins_pipe( ialu_reg_long ); 9109 %} 9110 9111 // Shift Left Long by 1-31 9112 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9113 match(Set dst (LShiftL dst cnt)); 9114 effect(KILL cr); 9115 ins_cost(200); 9116 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9117 "SHL $dst.lo,$cnt" %} 9118 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9119 ins_encode( move_long_small_shift(dst,cnt) ); 9120 ins_pipe( ialu_reg_long ); 9121 %} 9122 9123 // Shift Left Long by 32-63 9124 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9125 match(Set dst (LShiftL dst cnt)); 9126 effect(KILL cr); 9127 ins_cost(300); 9128 format %{ "MOV $dst.hi,$dst.lo\n" 9129 "\tSHL $dst.hi,$cnt-32\n" 9130 "\tXOR $dst.lo,$dst.lo" %} 9131 opcode(0xC1, 0x4); /* C1 /4 ib */ 9132 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9133 ins_pipe( ialu_reg_long ); 9134 %} 9135 9136 // Shift Left Long by variable 9137 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9138 match(Set dst (LShiftL dst shift)); 9139 effect(KILL cr); 9140 ins_cost(500+200); 9141 size(17); 9142 format %{ "TEST $shift,32\n\t" 9143 "JEQ,s small\n\t" 9144 "MOV $dst.hi,$dst.lo\n\t" 9145 "XOR $dst.lo,$dst.lo\n" 9146 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9147 "SHL $dst.lo,$shift" %} 9148 ins_encode( shift_left_long( dst, shift ) ); 9149 ins_pipe( pipe_slow ); 9150 %} 9151 9152 // Shift Right Long by 1-31 9153 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9154 match(Set dst (URShiftL dst cnt)); 9155 effect(KILL cr); 9156 ins_cost(200); 9157 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9158 "SHR $dst.hi,$cnt" %} 9159 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9160 ins_encode( move_long_small_shift(dst,cnt) ); 9161 ins_pipe( ialu_reg_long ); 9162 %} 9163 9164 // Shift Right Long by 32-63 9165 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9166 match(Set dst (URShiftL dst cnt)); 9167 effect(KILL cr); 9168 ins_cost(300); 9169 format %{ "MOV $dst.lo,$dst.hi\n" 9170 "\tSHR $dst.lo,$cnt-32\n" 9171 "\tXOR $dst.hi,$dst.hi" %} 9172 opcode(0xC1, 0x5); /* C1 /5 ib */ 9173 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9174 ins_pipe( ialu_reg_long ); 9175 %} 9176 9177 // Shift Right Long by variable 9178 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9179 match(Set dst (URShiftL dst shift)); 9180 effect(KILL cr); 9181 ins_cost(600); 9182 size(17); 9183 format %{ "TEST $shift,32\n\t" 9184 "JEQ,s small\n\t" 9185 "MOV $dst.lo,$dst.hi\n\t" 9186 "XOR $dst.hi,$dst.hi\n" 9187 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9188 "SHR $dst.hi,$shift" %} 9189 ins_encode( shift_right_long( dst, shift ) ); 9190 ins_pipe( pipe_slow ); 9191 %} 9192 9193 // Shift Right Long by 1-31 9194 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9195 match(Set dst (RShiftL dst cnt)); 9196 effect(KILL cr); 9197 ins_cost(200); 9198 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9199 "SAR $dst.hi,$cnt" %} 9200 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9201 ins_encode( move_long_small_shift(dst,cnt) ); 9202 ins_pipe( ialu_reg_long ); 9203 %} 9204 9205 // Shift Right Long by 32-63 9206 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9207 match(Set dst (RShiftL dst cnt)); 9208 effect(KILL cr); 9209 ins_cost(300); 9210 format %{ "MOV $dst.lo,$dst.hi\n" 9211 "\tSAR $dst.lo,$cnt-32\n" 9212 "\tSAR $dst.hi,31" %} 9213 opcode(0xC1, 0x7); /* C1 /7 ib */ 9214 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9215 ins_pipe( ialu_reg_long ); 9216 %} 9217 9218 // Shift Right arithmetic Long by variable 9219 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9220 match(Set dst (RShiftL dst shift)); 9221 effect(KILL cr); 9222 ins_cost(600); 9223 size(18); 9224 format %{ "TEST $shift,32\n\t" 9225 "JEQ,s small\n\t" 9226 "MOV $dst.lo,$dst.hi\n\t" 9227 "SAR $dst.hi,31\n" 9228 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9229 "SAR $dst.hi,$shift" %} 9230 ins_encode( shift_right_arith_long( dst, shift ) ); 9231 ins_pipe( pipe_slow ); 9232 %} 9233 9234 9235 //----------Double Instructions------------------------------------------------ 9236 // Double Math 9237 9238 // Compare & branch 9239 9240 // P6 version of float compare, sets condition codes in EFLAGS 9241 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9242 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9243 match(Set cr (CmpD src1 src2)); 9244 effect(KILL rax); 9245 ins_cost(150); 9246 format %{ "FLD $src1\n\t" 9247 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9248 "JNP exit\n\t" 9249 "MOV ah,1 // saw a NaN, set CF\n\t" 9250 "SAHF\n" 9251 "exit:\tNOP // avoid branch to branch" %} 9252 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9253 ins_encode( Push_Reg_DPR(src1), 9254 OpcP, RegOpc(src2), 9255 cmpF_P6_fixup ); 9256 ins_pipe( pipe_slow ); 9257 %} 9258 9259 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9260 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9261 match(Set cr (CmpD src1 src2)); 9262 ins_cost(150); 9263 format %{ "FLD $src1\n\t" 9264 "FUCOMIP ST,$src2 // P6 instruction" %} 9265 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9266 ins_encode( Push_Reg_DPR(src1), 9267 OpcP, RegOpc(src2)); 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 // Compare & branch 9272 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9273 predicate(UseSSE<=1); 9274 match(Set cr (CmpD src1 src2)); 9275 effect(KILL rax); 9276 ins_cost(200); 9277 format %{ "FLD $src1\n\t" 9278 "FCOMp $src2\n\t" 9279 "FNSTSW AX\n\t" 9280 "TEST AX,0x400\n\t" 9281 "JZ,s flags\n\t" 9282 "MOV AH,1\t# unordered treat as LT\n" 9283 "flags:\tSAHF" %} 9284 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9285 ins_encode( Push_Reg_DPR(src1), 9286 OpcP, RegOpc(src2), 9287 fpu_flags); 9288 ins_pipe( pipe_slow ); 9289 %} 9290 9291 // Compare vs zero into -1,0,1 9292 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9293 predicate(UseSSE<=1); 9294 match(Set dst (CmpD3 src1 zero)); 9295 effect(KILL cr, KILL rax); 9296 ins_cost(280); 9297 format %{ "FTSTD $dst,$src1" %} 9298 opcode(0xE4, 0xD9); 9299 ins_encode( Push_Reg_DPR(src1), 9300 OpcS, OpcP, PopFPU, 9301 CmpF_Result(dst)); 9302 ins_pipe( pipe_slow ); 9303 %} 9304 9305 // Compare into -1,0,1 9306 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9307 predicate(UseSSE<=1); 9308 match(Set dst (CmpD3 src1 src2)); 9309 effect(KILL cr, KILL rax); 9310 ins_cost(300); 9311 format %{ "FCMPD $dst,$src1,$src2" %} 9312 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9313 ins_encode( Push_Reg_DPR(src1), 9314 OpcP, RegOpc(src2), 9315 CmpF_Result(dst)); 9316 ins_pipe( pipe_slow ); 9317 %} 9318 9319 // float compare and set condition codes in EFLAGS by XMM regs 9320 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9321 predicate(UseSSE>=2); 9322 match(Set cr (CmpD src1 src2)); 9323 ins_cost(145); 9324 format %{ "UCOMISD $src1,$src2\n\t" 9325 "JNP,s exit\n\t" 9326 "PUSHF\t# saw NaN, set CF\n\t" 9327 "AND [rsp], #0xffffff2b\n\t" 9328 "POPF\n" 9329 "exit:" %} 9330 ins_encode %{ 9331 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9332 emit_cmpfp_fixup(_masm); 9333 %} 9334 ins_pipe( pipe_slow ); 9335 %} 9336 9337 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9338 predicate(UseSSE>=2); 9339 match(Set cr (CmpD src1 src2)); 9340 ins_cost(100); 9341 format %{ "UCOMISD $src1,$src2" %} 9342 ins_encode %{ 9343 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9344 %} 9345 ins_pipe( pipe_slow ); 9346 %} 9347 9348 // float compare and set condition codes in EFLAGS by XMM regs 9349 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9350 predicate(UseSSE>=2); 9351 match(Set cr (CmpD src1 (LoadD src2))); 9352 ins_cost(145); 9353 format %{ "UCOMISD $src1,$src2\n\t" 9354 "JNP,s exit\n\t" 9355 "PUSHF\t# saw NaN, set CF\n\t" 9356 "AND [rsp], #0xffffff2b\n\t" 9357 "POPF\n" 9358 "exit:" %} 9359 ins_encode %{ 9360 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9361 emit_cmpfp_fixup(_masm); 9362 %} 9363 ins_pipe( pipe_slow ); 9364 %} 9365 9366 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9367 predicate(UseSSE>=2); 9368 match(Set cr (CmpD src1 (LoadD src2))); 9369 ins_cost(100); 9370 format %{ "UCOMISD $src1,$src2" %} 9371 ins_encode %{ 9372 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9373 %} 9374 ins_pipe( pipe_slow ); 9375 %} 9376 9377 // Compare into -1,0,1 in XMM 9378 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9379 predicate(UseSSE>=2); 9380 match(Set dst (CmpD3 src1 src2)); 9381 effect(KILL cr); 9382 ins_cost(255); 9383 format %{ "UCOMISD $src1, $src2\n\t" 9384 "MOV $dst, #-1\n\t" 9385 "JP,s done\n\t" 9386 "JB,s done\n\t" 9387 "SETNE $dst\n\t" 9388 "MOVZB $dst, $dst\n" 9389 "done:" %} 9390 ins_encode %{ 9391 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9392 emit_cmpfp3(_masm, $dst$$Register); 9393 %} 9394 ins_pipe( pipe_slow ); 9395 %} 9396 9397 // Compare into -1,0,1 in XMM and memory 9398 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9399 predicate(UseSSE>=2); 9400 match(Set dst (CmpD3 src1 (LoadD src2))); 9401 effect(KILL cr); 9402 ins_cost(275); 9403 format %{ "UCOMISD $src1, $src2\n\t" 9404 "MOV $dst, #-1\n\t" 9405 "JP,s done\n\t" 9406 "JB,s done\n\t" 9407 "SETNE $dst\n\t" 9408 "MOVZB $dst, $dst\n" 9409 "done:" %} 9410 ins_encode %{ 9411 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9412 emit_cmpfp3(_masm, $dst$$Register); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 9418 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9419 predicate (UseSSE <=1); 9420 match(Set dst (SubD dst src)); 9421 9422 format %{ "FLD $src\n\t" 9423 "DSUBp $dst,ST" %} 9424 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9425 ins_cost(150); 9426 ins_encode( Push_Reg_DPR(src), 9427 OpcP, RegOpc(dst) ); 9428 ins_pipe( fpu_reg_reg ); 9429 %} 9430 9431 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9432 predicate (UseSSE <=1); 9433 match(Set dst (RoundDouble (SubD src1 src2))); 9434 ins_cost(250); 9435 9436 format %{ "FLD $src2\n\t" 9437 "DSUB ST,$src1\n\t" 9438 "FSTP_D $dst\t# D-round" %} 9439 opcode(0xD8, 0x5); 9440 ins_encode( Push_Reg_DPR(src2), 9441 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9442 ins_pipe( fpu_mem_reg_reg ); 9443 %} 9444 9445 9446 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9447 predicate (UseSSE <=1); 9448 match(Set dst (SubD dst (LoadD src))); 9449 ins_cost(150); 9450 9451 format %{ "FLD $src\n\t" 9452 "DSUBp $dst,ST" %} 9453 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9454 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9455 OpcP, RegOpc(dst) ); 9456 ins_pipe( fpu_reg_mem ); 9457 %} 9458 9459 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9460 predicate (UseSSE<=1); 9461 match(Set dst (AbsD src)); 9462 ins_cost(100); 9463 format %{ "FABS" %} 9464 opcode(0xE1, 0xD9); 9465 ins_encode( OpcS, OpcP ); 9466 ins_pipe( fpu_reg_reg ); 9467 %} 9468 9469 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9470 predicate(UseSSE<=1); 9471 match(Set dst (NegD src)); 9472 ins_cost(100); 9473 format %{ "FCHS" %} 9474 opcode(0xE0, 0xD9); 9475 ins_encode( OpcS, OpcP ); 9476 ins_pipe( fpu_reg_reg ); 9477 %} 9478 9479 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9480 predicate(UseSSE<=1); 9481 match(Set dst (AddD dst src)); 9482 format %{ "FLD $src\n\t" 9483 "DADD $dst,ST" %} 9484 size(4); 9485 ins_cost(150); 9486 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9487 ins_encode( Push_Reg_DPR(src), 9488 OpcP, RegOpc(dst) ); 9489 ins_pipe( fpu_reg_reg ); 9490 %} 9491 9492 9493 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9494 predicate(UseSSE<=1); 9495 match(Set dst (RoundDouble (AddD src1 src2))); 9496 ins_cost(250); 9497 9498 format %{ "FLD $src2\n\t" 9499 "DADD ST,$src1\n\t" 9500 "FSTP_D $dst\t# D-round" %} 9501 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9502 ins_encode( Push_Reg_DPR(src2), 9503 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9504 ins_pipe( fpu_mem_reg_reg ); 9505 %} 9506 9507 9508 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9509 predicate(UseSSE<=1); 9510 match(Set dst (AddD dst (LoadD src))); 9511 ins_cost(150); 9512 9513 format %{ "FLD $src\n\t" 9514 "DADDp $dst,ST" %} 9515 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9516 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9517 OpcP, RegOpc(dst) ); 9518 ins_pipe( fpu_reg_mem ); 9519 %} 9520 9521 // add-to-memory 9522 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9523 predicate(UseSSE<=1); 9524 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9525 ins_cost(150); 9526 9527 format %{ "FLD_D $dst\n\t" 9528 "DADD ST,$src\n\t" 9529 "FST_D $dst" %} 9530 opcode(0xDD, 0x0); 9531 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9532 Opcode(0xD8), RegOpc(src), 9533 set_instruction_start, 9534 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9535 ins_pipe( fpu_reg_mem ); 9536 %} 9537 9538 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9539 predicate(UseSSE<=1); 9540 match(Set dst (AddD dst con)); 9541 ins_cost(125); 9542 format %{ "FLD1\n\t" 9543 "DADDp $dst,ST" %} 9544 ins_encode %{ 9545 __ fld1(); 9546 __ faddp($dst$$reg); 9547 %} 9548 ins_pipe(fpu_reg); 9549 %} 9550 9551 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9552 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9553 match(Set dst (AddD dst con)); 9554 ins_cost(200); 9555 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9556 "DADDp $dst,ST" %} 9557 ins_encode %{ 9558 __ fld_d($constantaddress($con)); 9559 __ faddp($dst$$reg); 9560 %} 9561 ins_pipe(fpu_reg_mem); 9562 %} 9563 9564 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9565 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9566 match(Set dst (RoundDouble (AddD src con))); 9567 ins_cost(200); 9568 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9569 "DADD ST,$src\n\t" 9570 "FSTP_D $dst\t# D-round" %} 9571 ins_encode %{ 9572 __ fld_d($constantaddress($con)); 9573 __ fadd($src$$reg); 9574 __ fstp_d(Address(rsp, $dst$$disp)); 9575 %} 9576 ins_pipe(fpu_mem_reg_con); 9577 %} 9578 9579 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9580 predicate(UseSSE<=1); 9581 match(Set dst (MulD dst src)); 9582 format %{ "FLD $src\n\t" 9583 "DMULp $dst,ST" %} 9584 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9585 ins_cost(150); 9586 ins_encode( Push_Reg_DPR(src), 9587 OpcP, RegOpc(dst) ); 9588 ins_pipe( fpu_reg_reg ); 9589 %} 9590 9591 // Strict FP instruction biases argument before multiply then 9592 // biases result to avoid double rounding of subnormals. 9593 // 9594 // scale arg1 by multiplying arg1 by 2^(-15360) 9595 // load arg2 9596 // multiply scaled arg1 by arg2 9597 // rescale product by 2^(15360) 9598 // 9599 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9600 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9601 match(Set dst (MulD dst src)); 9602 ins_cost(1); // Select this instruction for all strict FP double multiplies 9603 9604 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9605 "DMULp $dst,ST\n\t" 9606 "FLD $src\n\t" 9607 "DMULp $dst,ST\n\t" 9608 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9609 "DMULp $dst,ST\n\t" %} 9610 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9611 ins_encode( strictfp_bias1(dst), 9612 Push_Reg_DPR(src), 9613 OpcP, RegOpc(dst), 9614 strictfp_bias2(dst) ); 9615 ins_pipe( fpu_reg_reg ); 9616 %} 9617 9618 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9619 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9620 match(Set dst (MulD dst con)); 9621 ins_cost(200); 9622 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9623 "DMULp $dst,ST" %} 9624 ins_encode %{ 9625 __ fld_d($constantaddress($con)); 9626 __ fmulp($dst$$reg); 9627 %} 9628 ins_pipe(fpu_reg_mem); 9629 %} 9630 9631 9632 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9633 predicate( UseSSE<=1 ); 9634 match(Set dst (MulD dst (LoadD src))); 9635 ins_cost(200); 9636 format %{ "FLD_D $src\n\t" 9637 "DMULp $dst,ST" %} 9638 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9639 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9640 OpcP, RegOpc(dst) ); 9641 ins_pipe( fpu_reg_mem ); 9642 %} 9643 9644 // 9645 // Cisc-alternate to reg-reg multiply 9646 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9647 predicate( UseSSE<=1 ); 9648 match(Set dst (MulD src (LoadD mem))); 9649 ins_cost(250); 9650 format %{ "FLD_D $mem\n\t" 9651 "DMUL ST,$src\n\t" 9652 "FSTP_D $dst" %} 9653 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9654 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9655 OpcReg_FPR(src), 9656 Pop_Reg_DPR(dst) ); 9657 ins_pipe( fpu_reg_reg_mem ); 9658 %} 9659 9660 9661 // MACRO3 -- addDPR a mulDPR 9662 // This instruction is a '2-address' instruction in that the result goes 9663 // back to src2. This eliminates a move from the macro; possibly the 9664 // register allocator will have to add it back (and maybe not). 9665 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9666 predicate( UseSSE<=1 ); 9667 match(Set src2 (AddD (MulD src0 src1) src2)); 9668 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9669 "DMUL ST,$src1\n\t" 9670 "DADDp $src2,ST" %} 9671 ins_cost(250); 9672 opcode(0xDD); /* LoadD DD /0 */ 9673 ins_encode( Push_Reg_FPR(src0), 9674 FMul_ST_reg(src1), 9675 FAddP_reg_ST(src2) ); 9676 ins_pipe( fpu_reg_reg_reg ); 9677 %} 9678 9679 9680 // MACRO3 -- subDPR a mulDPR 9681 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9682 predicate( UseSSE<=1 ); 9683 match(Set src2 (SubD (MulD src0 src1) src2)); 9684 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9685 "DMUL ST,$src1\n\t" 9686 "DSUBRp $src2,ST" %} 9687 ins_cost(250); 9688 ins_encode( Push_Reg_FPR(src0), 9689 FMul_ST_reg(src1), 9690 Opcode(0xDE), Opc_plus(0xE0,src2)); 9691 ins_pipe( fpu_reg_reg_reg ); 9692 %} 9693 9694 9695 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9696 predicate( UseSSE<=1 ); 9697 match(Set dst (DivD dst src)); 9698 9699 format %{ "FLD $src\n\t" 9700 "FDIVp $dst,ST" %} 9701 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9702 ins_cost(150); 9703 ins_encode( Push_Reg_DPR(src), 9704 OpcP, RegOpc(dst) ); 9705 ins_pipe( fpu_reg_reg ); 9706 %} 9707 9708 // Strict FP instruction biases argument before division then 9709 // biases result, to avoid double rounding of subnormals. 9710 // 9711 // scale dividend by multiplying dividend by 2^(-15360) 9712 // load divisor 9713 // divide scaled dividend by divisor 9714 // rescale quotient by 2^(15360) 9715 // 9716 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9717 predicate (UseSSE<=1); 9718 match(Set dst (DivD dst src)); 9719 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9720 ins_cost(01); 9721 9722 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9723 "DMULp $dst,ST\n\t" 9724 "FLD $src\n\t" 9725 "FDIVp $dst,ST\n\t" 9726 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9727 "DMULp $dst,ST\n\t" %} 9728 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9729 ins_encode( strictfp_bias1(dst), 9730 Push_Reg_DPR(src), 9731 OpcP, RegOpc(dst), 9732 strictfp_bias2(dst) ); 9733 ins_pipe( fpu_reg_reg ); 9734 %} 9735 9736 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9737 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9738 match(Set dst (RoundDouble (DivD src1 src2))); 9739 9740 format %{ "FLD $src1\n\t" 9741 "FDIV ST,$src2\n\t" 9742 "FSTP_D $dst\t# D-round" %} 9743 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9744 ins_encode( Push_Reg_DPR(src1), 9745 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9746 ins_pipe( fpu_mem_reg_reg ); 9747 %} 9748 9749 9750 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9751 predicate(UseSSE<=1); 9752 match(Set dst (ModD dst src)); 9753 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9754 9755 format %{ "DMOD $dst,$src" %} 9756 ins_cost(250); 9757 ins_encode(Push_Reg_Mod_DPR(dst, src), 9758 emitModDPR(), 9759 Push_Result_Mod_DPR(src), 9760 Pop_Reg_DPR(dst)); 9761 ins_pipe( pipe_slow ); 9762 %} 9763 9764 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9765 predicate(UseSSE>=2); 9766 match(Set dst (ModD src0 src1)); 9767 effect(KILL rax, KILL cr); 9768 9769 format %{ "SUB ESP,8\t # DMOD\n" 9770 "\tMOVSD [ESP+0],$src1\n" 9771 "\tFLD_D [ESP+0]\n" 9772 "\tMOVSD [ESP+0],$src0\n" 9773 "\tFLD_D [ESP+0]\n" 9774 "loop:\tFPREM\n" 9775 "\tFWAIT\n" 9776 "\tFNSTSW AX\n" 9777 "\tSAHF\n" 9778 "\tJP loop\n" 9779 "\tFSTP_D [ESP+0]\n" 9780 "\tMOVSD $dst,[ESP+0]\n" 9781 "\tADD ESP,8\n" 9782 "\tFSTP ST0\t # Restore FPU Stack" 9783 %} 9784 ins_cost(250); 9785 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9786 ins_pipe( pipe_slow ); 9787 %} 9788 9789 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9790 predicate (UseSSE<=1); 9791 match(Set dst(TanD src)); 9792 format %{ "DTAN $dst" %} 9793 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9794 Opcode(0xDD), Opcode(0xD8)); // fstp st 9795 ins_pipe( pipe_slow ); 9796 %} 9797 9798 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9799 predicate (UseSSE>=2); 9800 match(Set dst(TanD dst)); 9801 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9802 format %{ "DTAN $dst" %} 9803 ins_encode( Push_SrcD(dst), 9804 Opcode(0xD9), Opcode(0xF2), // fptan 9805 Opcode(0xDD), Opcode(0xD8), // fstp st 9806 Push_ResultD(dst) ); 9807 ins_pipe( pipe_slow ); 9808 %} 9809 9810 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9811 predicate (UseSSE<=1); 9812 match(Set dst(AtanD dst src)); 9813 format %{ "DATA $dst,$src" %} 9814 opcode(0xD9, 0xF3); 9815 ins_encode( Push_Reg_DPR(src), 9816 OpcP, OpcS, RegOpc(dst) ); 9817 ins_pipe( pipe_slow ); 9818 %} 9819 9820 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9821 predicate (UseSSE>=2); 9822 match(Set dst(AtanD dst src)); 9823 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9824 format %{ "DATA $dst,$src" %} 9825 opcode(0xD9, 0xF3); 9826 ins_encode( Push_SrcD(src), 9827 OpcP, OpcS, Push_ResultD(dst) ); 9828 ins_pipe( pipe_slow ); 9829 %} 9830 9831 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9832 predicate (UseSSE<=1); 9833 match(Set dst (SqrtD src)); 9834 format %{ "DSQRT $dst,$src" %} 9835 opcode(0xFA, 0xD9); 9836 ins_encode( Push_Reg_DPR(src), 9837 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9842 predicate (UseSSE<=1); 9843 match(Set Y (PowD X Y)); // Raise X to the Yth power 9844 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9845 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9846 ins_encode %{ 9847 __ subptr(rsp, 8); 9848 __ fld_s($X$$reg - 1); 9849 __ fast_pow(); 9850 __ addptr(rsp, 8); 9851 %} 9852 ins_pipe( pipe_slow ); 9853 %} 9854 9855 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9856 predicate (UseSSE>=2); 9857 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9858 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9859 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9860 ins_encode %{ 9861 __ subptr(rsp, 8); 9862 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9863 __ fld_d(Address(rsp, 0)); 9864 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9865 __ fld_d(Address(rsp, 0)); 9866 __ fast_pow(); 9867 __ fstp_d(Address(rsp, 0)); 9868 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9869 __ addptr(rsp, 8); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9875 predicate (UseSSE<=1); 9876 // The source Double operand on FPU stack 9877 match(Set dst (Log10D src)); 9878 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9879 // fxch ; swap ST(0) with ST(1) 9880 // fyl2x ; compute log_10(2) * log_2(x) 9881 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9882 "FXCH \n\t" 9883 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9884 %} 9885 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9886 Opcode(0xD9), Opcode(0xC9), // fxch 9887 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9888 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9893 predicate (UseSSE>=2); 9894 effect(KILL cr); 9895 match(Set dst (Log10D src)); 9896 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9897 // fyl2x ; compute log_10(2) * log_2(x) 9898 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9899 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9900 %} 9901 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9902 Push_SrcD(src), 9903 Opcode(0xD9), Opcode(0xF1), // fyl2x 9904 Push_ResultD(dst)); 9905 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 //-------------Float Instructions------------------------------- 9910 // Float Math 9911 9912 // Code for float compare: 9913 // fcompp(); 9914 // fwait(); fnstsw_ax(); 9915 // sahf(); 9916 // movl(dst, unordered_result); 9917 // jcc(Assembler::parity, exit); 9918 // movl(dst, less_result); 9919 // jcc(Assembler::below, exit); 9920 // movl(dst, equal_result); 9921 // jcc(Assembler::equal, exit); 9922 // movl(dst, greater_result); 9923 // exit: 9924 9925 // P6 version of float compare, sets condition codes in EFLAGS 9926 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9927 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9928 match(Set cr (CmpF src1 src2)); 9929 effect(KILL rax); 9930 ins_cost(150); 9931 format %{ "FLD $src1\n\t" 9932 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9933 "JNP exit\n\t" 9934 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9935 "SAHF\n" 9936 "exit:\tNOP // avoid branch to branch" %} 9937 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9938 ins_encode( Push_Reg_DPR(src1), 9939 OpcP, RegOpc(src2), 9940 cmpF_P6_fixup ); 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9945 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9946 match(Set cr (CmpF src1 src2)); 9947 ins_cost(100); 9948 format %{ "FLD $src1\n\t" 9949 "FUCOMIP ST,$src2 // P6 instruction" %} 9950 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9951 ins_encode( Push_Reg_DPR(src1), 9952 OpcP, RegOpc(src2)); 9953 ins_pipe( pipe_slow ); 9954 %} 9955 9956 9957 // Compare & branch 9958 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9959 predicate(UseSSE == 0); 9960 match(Set cr (CmpF src1 src2)); 9961 effect(KILL rax); 9962 ins_cost(200); 9963 format %{ "FLD $src1\n\t" 9964 "FCOMp $src2\n\t" 9965 "FNSTSW AX\n\t" 9966 "TEST AX,0x400\n\t" 9967 "JZ,s flags\n\t" 9968 "MOV AH,1\t# unordered treat as LT\n" 9969 "flags:\tSAHF" %} 9970 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9971 ins_encode( Push_Reg_DPR(src1), 9972 OpcP, RegOpc(src2), 9973 fpu_flags); 9974 ins_pipe( pipe_slow ); 9975 %} 9976 9977 // Compare vs zero into -1,0,1 9978 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9979 predicate(UseSSE == 0); 9980 match(Set dst (CmpF3 src1 zero)); 9981 effect(KILL cr, KILL rax); 9982 ins_cost(280); 9983 format %{ "FTSTF $dst,$src1" %} 9984 opcode(0xE4, 0xD9); 9985 ins_encode( Push_Reg_DPR(src1), 9986 OpcS, OpcP, PopFPU, 9987 CmpF_Result(dst)); 9988 ins_pipe( pipe_slow ); 9989 %} 9990 9991 // Compare into -1,0,1 9992 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9993 predicate(UseSSE == 0); 9994 match(Set dst (CmpF3 src1 src2)); 9995 effect(KILL cr, KILL rax); 9996 ins_cost(300); 9997 format %{ "FCMPF $dst,$src1,$src2" %} 9998 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9999 ins_encode( Push_Reg_DPR(src1), 10000 OpcP, RegOpc(src2), 10001 CmpF_Result(dst)); 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 // float compare and set condition codes in EFLAGS by XMM regs 10006 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10007 predicate(UseSSE>=1); 10008 match(Set cr (CmpF src1 src2)); 10009 ins_cost(145); 10010 format %{ "UCOMISS $src1,$src2\n\t" 10011 "JNP,s exit\n\t" 10012 "PUSHF\t# saw NaN, set CF\n\t" 10013 "AND [rsp], #0xffffff2b\n\t" 10014 "POPF\n" 10015 "exit:" %} 10016 ins_encode %{ 10017 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10018 emit_cmpfp_fixup(_masm); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10024 predicate(UseSSE>=1); 10025 match(Set cr (CmpF src1 src2)); 10026 ins_cost(100); 10027 format %{ "UCOMISS $src1,$src2" %} 10028 ins_encode %{ 10029 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 // float compare and set condition codes in EFLAGS by XMM regs 10035 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10036 predicate(UseSSE>=1); 10037 match(Set cr (CmpF src1 (LoadF src2))); 10038 ins_cost(165); 10039 format %{ "UCOMISS $src1,$src2\n\t" 10040 "JNP,s exit\n\t" 10041 "PUSHF\t# saw NaN, set CF\n\t" 10042 "AND [rsp], #0xffffff2b\n\t" 10043 "POPF\n" 10044 "exit:" %} 10045 ins_encode %{ 10046 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10047 emit_cmpfp_fixup(_masm); 10048 %} 10049 ins_pipe( pipe_slow ); 10050 %} 10051 10052 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10053 predicate(UseSSE>=1); 10054 match(Set cr (CmpF src1 (LoadF src2))); 10055 ins_cost(100); 10056 format %{ "UCOMISS $src1,$src2" %} 10057 ins_encode %{ 10058 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10059 %} 10060 ins_pipe( pipe_slow ); 10061 %} 10062 10063 // Compare into -1,0,1 in XMM 10064 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10065 predicate(UseSSE>=1); 10066 match(Set dst (CmpF3 src1 src2)); 10067 effect(KILL cr); 10068 ins_cost(255); 10069 format %{ "UCOMISS $src1, $src2\n\t" 10070 "MOV $dst, #-1\n\t" 10071 "JP,s done\n\t" 10072 "JB,s done\n\t" 10073 "SETNE $dst\n\t" 10074 "MOVZB $dst, $dst\n" 10075 "done:" %} 10076 ins_encode %{ 10077 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10078 emit_cmpfp3(_masm, $dst$$Register); 10079 %} 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 // Compare into -1,0,1 in XMM and memory 10084 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10085 predicate(UseSSE>=1); 10086 match(Set dst (CmpF3 src1 (LoadF src2))); 10087 effect(KILL cr); 10088 ins_cost(275); 10089 format %{ "UCOMISS $src1, $src2\n\t" 10090 "MOV $dst, #-1\n\t" 10091 "JP,s done\n\t" 10092 "JB,s done\n\t" 10093 "SETNE $dst\n\t" 10094 "MOVZB $dst, $dst\n" 10095 "done:" %} 10096 ins_encode %{ 10097 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10098 emit_cmpfp3(_masm, $dst$$Register); 10099 %} 10100 ins_pipe( pipe_slow ); 10101 %} 10102 10103 // Spill to obtain 24-bit precision 10104 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10105 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10106 match(Set dst (SubF src1 src2)); 10107 10108 format %{ "FSUB $dst,$src1 - $src2" %} 10109 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10110 ins_encode( Push_Reg_FPR(src1), 10111 OpcReg_FPR(src2), 10112 Pop_Mem_FPR(dst) ); 10113 ins_pipe( fpu_mem_reg_reg ); 10114 %} 10115 // 10116 // This instruction does not round to 24-bits 10117 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10118 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10119 match(Set dst (SubF dst src)); 10120 10121 format %{ "FSUB $dst,$src" %} 10122 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10123 ins_encode( Push_Reg_FPR(src), 10124 OpcP, RegOpc(dst) ); 10125 ins_pipe( fpu_reg_reg ); 10126 %} 10127 10128 // Spill to obtain 24-bit precision 10129 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10130 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10131 match(Set dst (AddF src1 src2)); 10132 10133 format %{ "FADD $dst,$src1,$src2" %} 10134 opcode(0xD8, 0x0); /* D8 C0+i */ 10135 ins_encode( Push_Reg_FPR(src2), 10136 OpcReg_FPR(src1), 10137 Pop_Mem_FPR(dst) ); 10138 ins_pipe( fpu_mem_reg_reg ); 10139 %} 10140 // 10141 // This instruction does not round to 24-bits 10142 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10143 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10144 match(Set dst (AddF dst src)); 10145 10146 format %{ "FLD $src\n\t" 10147 "FADDp $dst,ST" %} 10148 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10149 ins_encode( Push_Reg_FPR(src), 10150 OpcP, RegOpc(dst) ); 10151 ins_pipe( fpu_reg_reg ); 10152 %} 10153 10154 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10155 predicate(UseSSE==0); 10156 match(Set dst (AbsF src)); 10157 ins_cost(100); 10158 format %{ "FABS" %} 10159 opcode(0xE1, 0xD9); 10160 ins_encode( OpcS, OpcP ); 10161 ins_pipe( fpu_reg_reg ); 10162 %} 10163 10164 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10165 predicate(UseSSE==0); 10166 match(Set dst (NegF src)); 10167 ins_cost(100); 10168 format %{ "FCHS" %} 10169 opcode(0xE0, 0xD9); 10170 ins_encode( OpcS, OpcP ); 10171 ins_pipe( fpu_reg_reg ); 10172 %} 10173 10174 // Cisc-alternate to addFPR_reg 10175 // Spill to obtain 24-bit precision 10176 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10177 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10178 match(Set dst (AddF src1 (LoadF src2))); 10179 10180 format %{ "FLD $src2\n\t" 10181 "FADD ST,$src1\n\t" 10182 "FSTP_S $dst" %} 10183 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10184 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10185 OpcReg_FPR(src1), 10186 Pop_Mem_FPR(dst) ); 10187 ins_pipe( fpu_mem_reg_mem ); 10188 %} 10189 // 10190 // Cisc-alternate to addFPR_reg 10191 // This instruction does not round to 24-bits 10192 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10193 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10194 match(Set dst (AddF dst (LoadF src))); 10195 10196 format %{ "FADD $dst,$src" %} 10197 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10198 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10199 OpcP, RegOpc(dst) ); 10200 ins_pipe( fpu_reg_mem ); 10201 %} 10202 10203 // // Following two instructions for _222_mpegaudio 10204 // Spill to obtain 24-bit precision 10205 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10206 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10207 match(Set dst (AddF src1 src2)); 10208 10209 format %{ "FADD $dst,$src1,$src2" %} 10210 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10211 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10212 OpcReg_FPR(src2), 10213 Pop_Mem_FPR(dst) ); 10214 ins_pipe( fpu_mem_reg_mem ); 10215 %} 10216 10217 // Cisc-spill variant 10218 // Spill to obtain 24-bit precision 10219 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10220 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10221 match(Set dst (AddF src1 (LoadF src2))); 10222 10223 format %{ "FADD $dst,$src1,$src2 cisc" %} 10224 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10225 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10226 set_instruction_start, 10227 OpcP, RMopc_Mem(secondary,src1), 10228 Pop_Mem_FPR(dst) ); 10229 ins_pipe( fpu_mem_mem_mem ); 10230 %} 10231 10232 // Spill to obtain 24-bit precision 10233 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10234 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10235 match(Set dst (AddF src1 src2)); 10236 10237 format %{ "FADD $dst,$src1,$src2" %} 10238 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10239 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10240 set_instruction_start, 10241 OpcP, RMopc_Mem(secondary,src1), 10242 Pop_Mem_FPR(dst) ); 10243 ins_pipe( fpu_mem_mem_mem ); 10244 %} 10245 10246 10247 // Spill to obtain 24-bit precision 10248 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10249 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10250 match(Set dst (AddF src con)); 10251 format %{ "FLD $src\n\t" 10252 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10253 "FSTP_S $dst" %} 10254 ins_encode %{ 10255 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10256 __ fadd_s($constantaddress($con)); 10257 __ fstp_s(Address(rsp, $dst$$disp)); 10258 %} 10259 ins_pipe(fpu_mem_reg_con); 10260 %} 10261 // 10262 // This instruction does not round to 24-bits 10263 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10264 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10265 match(Set dst (AddF src con)); 10266 format %{ "FLD $src\n\t" 10267 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10268 "FSTP $dst" %} 10269 ins_encode %{ 10270 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10271 __ fadd_s($constantaddress($con)); 10272 __ fstp_d($dst$$reg); 10273 %} 10274 ins_pipe(fpu_reg_reg_con); 10275 %} 10276 10277 // Spill to obtain 24-bit precision 10278 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10279 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10280 match(Set dst (MulF src1 src2)); 10281 10282 format %{ "FLD $src1\n\t" 10283 "FMUL $src2\n\t" 10284 "FSTP_S $dst" %} 10285 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10286 ins_encode( Push_Reg_FPR(src1), 10287 OpcReg_FPR(src2), 10288 Pop_Mem_FPR(dst) ); 10289 ins_pipe( fpu_mem_reg_reg ); 10290 %} 10291 // 10292 // This instruction does not round to 24-bits 10293 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10294 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10295 match(Set dst (MulF src1 src2)); 10296 10297 format %{ "FLD $src1\n\t" 10298 "FMUL $src2\n\t" 10299 "FSTP_S $dst" %} 10300 opcode(0xD8, 0x1); /* D8 C8+i */ 10301 ins_encode( Push_Reg_FPR(src2), 10302 OpcReg_FPR(src1), 10303 Pop_Reg_FPR(dst) ); 10304 ins_pipe( fpu_reg_reg_reg ); 10305 %} 10306 10307 10308 // Spill to obtain 24-bit precision 10309 // Cisc-alternate to reg-reg multiply 10310 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10311 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10312 match(Set dst (MulF src1 (LoadF src2))); 10313 10314 format %{ "FLD_S $src2\n\t" 10315 "FMUL $src1\n\t" 10316 "FSTP_S $dst" %} 10317 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10318 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10319 OpcReg_FPR(src1), 10320 Pop_Mem_FPR(dst) ); 10321 ins_pipe( fpu_mem_reg_mem ); 10322 %} 10323 // 10324 // This instruction does not round to 24-bits 10325 // Cisc-alternate to reg-reg multiply 10326 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10327 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10328 match(Set dst (MulF src1 (LoadF src2))); 10329 10330 format %{ "FMUL $dst,$src1,$src2" %} 10331 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10332 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10333 OpcReg_FPR(src1), 10334 Pop_Reg_FPR(dst) ); 10335 ins_pipe( fpu_reg_reg_mem ); 10336 %} 10337 10338 // Spill to obtain 24-bit precision 10339 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10340 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10341 match(Set dst (MulF src1 src2)); 10342 10343 format %{ "FMUL $dst,$src1,$src2" %} 10344 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10345 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10346 set_instruction_start, 10347 OpcP, RMopc_Mem(secondary,src1), 10348 Pop_Mem_FPR(dst) ); 10349 ins_pipe( fpu_mem_mem_mem ); 10350 %} 10351 10352 // Spill to obtain 24-bit precision 10353 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (MulF src con)); 10356 10357 format %{ "FLD $src\n\t" 10358 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10359 "FSTP_S $dst" %} 10360 ins_encode %{ 10361 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10362 __ fmul_s($constantaddress($con)); 10363 __ fstp_s(Address(rsp, $dst$$disp)); 10364 %} 10365 ins_pipe(fpu_mem_reg_con); 10366 %} 10367 // 10368 // This instruction does not round to 24-bits 10369 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10370 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10371 match(Set dst (MulF src con)); 10372 10373 format %{ "FLD $src\n\t" 10374 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10375 "FSTP $dst" %} 10376 ins_encode %{ 10377 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10378 __ fmul_s($constantaddress($con)); 10379 __ fstp_d($dst$$reg); 10380 %} 10381 ins_pipe(fpu_reg_reg_con); 10382 %} 10383 10384 10385 // 10386 // MACRO1 -- subsume unshared load into mulFPR 10387 // This instruction does not round to 24-bits 10388 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10389 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10390 match(Set dst (MulF (LoadF mem1) src)); 10391 10392 format %{ "FLD $mem1 ===MACRO1===\n\t" 10393 "FMUL ST,$src\n\t" 10394 "FSTP $dst" %} 10395 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10396 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10397 OpcReg_FPR(src), 10398 Pop_Reg_FPR(dst) ); 10399 ins_pipe( fpu_reg_reg_mem ); 10400 %} 10401 // 10402 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10403 // This instruction does not round to 24-bits 10404 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10405 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10406 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10407 ins_cost(95); 10408 10409 format %{ "FLD $mem1 ===MACRO2===\n\t" 10410 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10411 "FADD ST,$src2\n\t" 10412 "FSTP $dst" %} 10413 opcode(0xD9); /* LoadF D9 /0 */ 10414 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10415 FMul_ST_reg(src1), 10416 FAdd_ST_reg(src2), 10417 Pop_Reg_FPR(dst) ); 10418 ins_pipe( fpu_reg_mem_reg_reg ); 10419 %} 10420 10421 // MACRO3 -- addFPR a mulFPR 10422 // This instruction does not round to 24-bits. It is a '2-address' 10423 // instruction in that the result goes back to src2. This eliminates 10424 // a move from the macro; possibly the register allocator will have 10425 // to add it back (and maybe not). 10426 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10427 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10428 match(Set src2 (AddF (MulF src0 src1) src2)); 10429 10430 format %{ "FLD $src0 ===MACRO3===\n\t" 10431 "FMUL ST,$src1\n\t" 10432 "FADDP $src2,ST" %} 10433 opcode(0xD9); /* LoadF D9 /0 */ 10434 ins_encode( Push_Reg_FPR(src0), 10435 FMul_ST_reg(src1), 10436 FAddP_reg_ST(src2) ); 10437 ins_pipe( fpu_reg_reg_reg ); 10438 %} 10439 10440 // MACRO4 -- divFPR subFPR 10441 // This instruction does not round to 24-bits 10442 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10443 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10444 match(Set dst (DivF (SubF src2 src1) src3)); 10445 10446 format %{ "FLD $src2 ===MACRO4===\n\t" 10447 "FSUB ST,$src1\n\t" 10448 "FDIV ST,$src3\n\t" 10449 "FSTP $dst" %} 10450 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10451 ins_encode( Push_Reg_FPR(src2), 10452 subFPR_divFPR_encode(src1,src3), 10453 Pop_Reg_FPR(dst) ); 10454 ins_pipe( fpu_reg_reg_reg_reg ); 10455 %} 10456 10457 // Spill to obtain 24-bit precision 10458 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10459 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (DivF src1 src2)); 10461 10462 format %{ "FDIV $dst,$src1,$src2" %} 10463 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10464 ins_encode( Push_Reg_FPR(src1), 10465 OpcReg_FPR(src2), 10466 Pop_Mem_FPR(dst) ); 10467 ins_pipe( fpu_mem_reg_reg ); 10468 %} 10469 // 10470 // This instruction does not round to 24-bits 10471 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10472 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10473 match(Set dst (DivF dst src)); 10474 10475 format %{ "FDIV $dst,$src" %} 10476 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10477 ins_encode( Push_Reg_FPR(src), 10478 OpcP, RegOpc(dst) ); 10479 ins_pipe( fpu_reg_reg ); 10480 %} 10481 10482 10483 // Spill to obtain 24-bit precision 10484 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10485 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10486 match(Set dst (ModF src1 src2)); 10487 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10488 10489 format %{ "FMOD $dst,$src1,$src2" %} 10490 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10491 emitModDPR(), 10492 Push_Result_Mod_DPR(src2), 10493 Pop_Mem_FPR(dst)); 10494 ins_pipe( pipe_slow ); 10495 %} 10496 // 10497 // This instruction does not round to 24-bits 10498 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10499 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10500 match(Set dst (ModF dst src)); 10501 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10502 10503 format %{ "FMOD $dst,$src" %} 10504 ins_encode(Push_Reg_Mod_DPR(dst, src), 10505 emitModDPR(), 10506 Push_Result_Mod_DPR(src), 10507 Pop_Reg_FPR(dst)); 10508 ins_pipe( pipe_slow ); 10509 %} 10510 10511 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10512 predicate(UseSSE>=1); 10513 match(Set dst (ModF src0 src1)); 10514 effect(KILL rax, KILL cr); 10515 format %{ "SUB ESP,4\t # FMOD\n" 10516 "\tMOVSS [ESP+0],$src1\n" 10517 "\tFLD_S [ESP+0]\n" 10518 "\tMOVSS [ESP+0],$src0\n" 10519 "\tFLD_S [ESP+0]\n" 10520 "loop:\tFPREM\n" 10521 "\tFWAIT\n" 10522 "\tFNSTSW AX\n" 10523 "\tSAHF\n" 10524 "\tJP loop\n" 10525 "\tFSTP_S [ESP+0]\n" 10526 "\tMOVSS $dst,[ESP+0]\n" 10527 "\tADD ESP,4\n" 10528 "\tFSTP ST0\t # Restore FPU Stack" 10529 %} 10530 ins_cost(250); 10531 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10532 ins_pipe( pipe_slow ); 10533 %} 10534 10535 10536 //----------Arithmetic Conversion Instructions--------------------------------- 10537 // The conversions operations are all Alpha sorted. Please keep it that way! 10538 10539 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10540 predicate(UseSSE==0); 10541 match(Set dst (RoundFloat src)); 10542 ins_cost(125); 10543 format %{ "FST_S $dst,$src\t# F-round" %} 10544 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10545 ins_pipe( fpu_mem_reg ); 10546 %} 10547 10548 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10549 predicate(UseSSE<=1); 10550 match(Set dst (RoundDouble src)); 10551 ins_cost(125); 10552 format %{ "FST_D $dst,$src\t# D-round" %} 10553 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10554 ins_pipe( fpu_mem_reg ); 10555 %} 10556 10557 // Force rounding to 24-bit precision and 6-bit exponent 10558 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10559 predicate(UseSSE==0); 10560 match(Set dst (ConvD2F src)); 10561 format %{ "FST_S $dst,$src\t# F-round" %} 10562 expand %{ 10563 roundFloat_mem_reg(dst,src); 10564 %} 10565 %} 10566 10567 // Force rounding to 24-bit precision and 6-bit exponent 10568 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10569 predicate(UseSSE==1); 10570 match(Set dst (ConvD2F src)); 10571 effect( KILL cr ); 10572 format %{ "SUB ESP,4\n\t" 10573 "FST_S [ESP],$src\t# F-round\n\t" 10574 "MOVSS $dst,[ESP]\n\t" 10575 "ADD ESP,4" %} 10576 ins_encode %{ 10577 __ subptr(rsp, 4); 10578 if ($src$$reg != FPR1L_enc) { 10579 __ fld_s($src$$reg-1); 10580 __ fstp_s(Address(rsp, 0)); 10581 } else { 10582 __ fst_s(Address(rsp, 0)); 10583 } 10584 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10585 __ addptr(rsp, 4); 10586 %} 10587 ins_pipe( pipe_slow ); 10588 %} 10589 10590 // Force rounding double precision to single precision 10591 instruct convD2F_reg(regF dst, regD src) %{ 10592 predicate(UseSSE>=2); 10593 match(Set dst (ConvD2F src)); 10594 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10595 ins_encode %{ 10596 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10597 %} 10598 ins_pipe( pipe_slow ); 10599 %} 10600 10601 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10602 predicate(UseSSE==0); 10603 match(Set dst (ConvF2D src)); 10604 format %{ "FST_S $dst,$src\t# D-round" %} 10605 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10606 ins_pipe( fpu_reg_reg ); 10607 %} 10608 10609 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10610 predicate(UseSSE==1); 10611 match(Set dst (ConvF2D src)); 10612 format %{ "FST_D $dst,$src\t# D-round" %} 10613 expand %{ 10614 roundDouble_mem_reg(dst,src); 10615 %} 10616 %} 10617 10618 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10619 predicate(UseSSE==1); 10620 match(Set dst (ConvF2D src)); 10621 effect( KILL cr ); 10622 format %{ "SUB ESP,4\n\t" 10623 "MOVSS [ESP] $src\n\t" 10624 "FLD_S [ESP]\n\t" 10625 "ADD ESP,4\n\t" 10626 "FSTP $dst\t# D-round" %} 10627 ins_encode %{ 10628 __ subptr(rsp, 4); 10629 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10630 __ fld_s(Address(rsp, 0)); 10631 __ addptr(rsp, 4); 10632 __ fstp_d($dst$$reg); 10633 %} 10634 ins_pipe( pipe_slow ); 10635 %} 10636 10637 instruct convF2D_reg(regD dst, regF src) %{ 10638 predicate(UseSSE>=2); 10639 match(Set dst (ConvF2D src)); 10640 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10641 ins_encode %{ 10642 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10643 %} 10644 ins_pipe( pipe_slow ); 10645 %} 10646 10647 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10648 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10649 predicate(UseSSE<=1); 10650 match(Set dst (ConvD2I src)); 10651 effect( KILL tmp, KILL cr ); 10652 format %{ "FLD $src\t# Convert double to int \n\t" 10653 "FLDCW trunc mode\n\t" 10654 "SUB ESP,4\n\t" 10655 "FISTp [ESP + #0]\n\t" 10656 "FLDCW std/24-bit mode\n\t" 10657 "POP EAX\n\t" 10658 "CMP EAX,0x80000000\n\t" 10659 "JNE,s fast\n\t" 10660 "FLD_D $src\n\t" 10661 "CALL d2i_wrapper\n" 10662 "fast:" %} 10663 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10664 ins_pipe( pipe_slow ); 10665 %} 10666 10667 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10668 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10669 predicate(UseSSE>=2); 10670 match(Set dst (ConvD2I src)); 10671 effect( KILL tmp, KILL cr ); 10672 format %{ "CVTTSD2SI $dst, $src\n\t" 10673 "CMP $dst,0x80000000\n\t" 10674 "JNE,s fast\n\t" 10675 "SUB ESP, 8\n\t" 10676 "MOVSD [ESP], $src\n\t" 10677 "FLD_D [ESP]\n\t" 10678 "ADD ESP, 8\n\t" 10679 "CALL d2i_wrapper\n" 10680 "fast:" %} 10681 ins_encode %{ 10682 Label fast; 10683 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10684 __ cmpl($dst$$Register, 0x80000000); 10685 __ jccb(Assembler::notEqual, fast); 10686 __ subptr(rsp, 8); 10687 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10688 __ fld_d(Address(rsp, 0)); 10689 __ addptr(rsp, 8); 10690 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10691 __ bind(fast); 10692 %} 10693 ins_pipe( pipe_slow ); 10694 %} 10695 10696 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10697 predicate(UseSSE<=1); 10698 match(Set dst (ConvD2L src)); 10699 effect( KILL cr ); 10700 format %{ "FLD $src\t# Convert double to long\n\t" 10701 "FLDCW trunc mode\n\t" 10702 "SUB ESP,8\n\t" 10703 "FISTp [ESP + #0]\n\t" 10704 "FLDCW std/24-bit mode\n\t" 10705 "POP EAX\n\t" 10706 "POP EDX\n\t" 10707 "CMP EDX,0x80000000\n\t" 10708 "JNE,s fast\n\t" 10709 "TEST EAX,EAX\n\t" 10710 "JNE,s fast\n\t" 10711 "FLD $src\n\t" 10712 "CALL d2l_wrapper\n" 10713 "fast:" %} 10714 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10715 ins_pipe( pipe_slow ); 10716 %} 10717 10718 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10719 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10720 predicate (UseSSE>=2); 10721 match(Set dst (ConvD2L src)); 10722 effect( KILL cr ); 10723 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10724 "MOVSD [ESP],$src\n\t" 10725 "FLD_D [ESP]\n\t" 10726 "FLDCW trunc mode\n\t" 10727 "FISTp [ESP + #0]\n\t" 10728 "FLDCW std/24-bit mode\n\t" 10729 "POP EAX\n\t" 10730 "POP EDX\n\t" 10731 "CMP EDX,0x80000000\n\t" 10732 "JNE,s fast\n\t" 10733 "TEST EAX,EAX\n\t" 10734 "JNE,s fast\n\t" 10735 "SUB ESP,8\n\t" 10736 "MOVSD [ESP],$src\n\t" 10737 "FLD_D [ESP]\n\t" 10738 "ADD ESP,8\n\t" 10739 "CALL d2l_wrapper\n" 10740 "fast:" %} 10741 ins_encode %{ 10742 Label fast; 10743 __ subptr(rsp, 8); 10744 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10745 __ fld_d(Address(rsp, 0)); 10746 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10747 __ fistp_d(Address(rsp, 0)); 10748 // Restore the rounding mode, mask the exception 10749 if (Compile::current()->in_24_bit_fp_mode()) { 10750 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10751 } else { 10752 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10753 } 10754 // Load the converted long, adjust CPU stack 10755 __ pop(rax); 10756 __ pop(rdx); 10757 __ cmpl(rdx, 0x80000000); 10758 __ jccb(Assembler::notEqual, fast); 10759 __ testl(rax, rax); 10760 __ jccb(Assembler::notEqual, fast); 10761 __ subptr(rsp, 8); 10762 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10763 __ fld_d(Address(rsp, 0)); 10764 __ addptr(rsp, 8); 10765 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10766 __ bind(fast); 10767 %} 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 // Convert a double to an int. Java semantics require we do complex 10772 // manglations in the corner cases. So we set the rounding mode to 10773 // 'zero', store the darned double down as an int, and reset the 10774 // rounding mode to 'nearest'. The hardware stores a flag value down 10775 // if we would overflow or converted a NAN; we check for this and 10776 // and go the slow path if needed. 10777 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10778 predicate(UseSSE==0); 10779 match(Set dst (ConvF2I src)); 10780 effect( KILL tmp, KILL cr ); 10781 format %{ "FLD $src\t# Convert float to int \n\t" 10782 "FLDCW trunc mode\n\t" 10783 "SUB ESP,4\n\t" 10784 "FISTp [ESP + #0]\n\t" 10785 "FLDCW std/24-bit mode\n\t" 10786 "POP EAX\n\t" 10787 "CMP EAX,0x80000000\n\t" 10788 "JNE,s fast\n\t" 10789 "FLD $src\n\t" 10790 "CALL d2i_wrapper\n" 10791 "fast:" %} 10792 // DPR2I_encoding works for FPR2I 10793 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 // Convert a float in xmm to an int reg. 10798 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10799 predicate(UseSSE>=1); 10800 match(Set dst (ConvF2I src)); 10801 effect( KILL tmp, KILL cr ); 10802 format %{ "CVTTSS2SI $dst, $src\n\t" 10803 "CMP $dst,0x80000000\n\t" 10804 "JNE,s fast\n\t" 10805 "SUB ESP, 4\n\t" 10806 "MOVSS [ESP], $src\n\t" 10807 "FLD [ESP]\n\t" 10808 "ADD ESP, 4\n\t" 10809 "CALL d2i_wrapper\n" 10810 "fast:" %} 10811 ins_encode %{ 10812 Label fast; 10813 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10814 __ cmpl($dst$$Register, 0x80000000); 10815 __ jccb(Assembler::notEqual, fast); 10816 __ subptr(rsp, 4); 10817 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10818 __ fld_s(Address(rsp, 0)); 10819 __ addptr(rsp, 4); 10820 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10821 __ bind(fast); 10822 %} 10823 ins_pipe( pipe_slow ); 10824 %} 10825 10826 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10827 predicate(UseSSE==0); 10828 match(Set dst (ConvF2L src)); 10829 effect( KILL cr ); 10830 format %{ "FLD $src\t# Convert float to long\n\t" 10831 "FLDCW trunc mode\n\t" 10832 "SUB ESP,8\n\t" 10833 "FISTp [ESP + #0]\n\t" 10834 "FLDCW std/24-bit mode\n\t" 10835 "POP EAX\n\t" 10836 "POP EDX\n\t" 10837 "CMP EDX,0x80000000\n\t" 10838 "JNE,s fast\n\t" 10839 "TEST EAX,EAX\n\t" 10840 "JNE,s fast\n\t" 10841 "FLD $src\n\t" 10842 "CALL d2l_wrapper\n" 10843 "fast:" %} 10844 // DPR2L_encoding works for FPR2L 10845 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10846 ins_pipe( pipe_slow ); 10847 %} 10848 10849 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10850 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10851 predicate (UseSSE>=1); 10852 match(Set dst (ConvF2L src)); 10853 effect( KILL cr ); 10854 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10855 "MOVSS [ESP],$src\n\t" 10856 "FLD_S [ESP]\n\t" 10857 "FLDCW trunc mode\n\t" 10858 "FISTp [ESP + #0]\n\t" 10859 "FLDCW std/24-bit mode\n\t" 10860 "POP EAX\n\t" 10861 "POP EDX\n\t" 10862 "CMP EDX,0x80000000\n\t" 10863 "JNE,s fast\n\t" 10864 "TEST EAX,EAX\n\t" 10865 "JNE,s fast\n\t" 10866 "SUB ESP,4\t# Convert float to long\n\t" 10867 "MOVSS [ESP],$src\n\t" 10868 "FLD_S [ESP]\n\t" 10869 "ADD ESP,4\n\t" 10870 "CALL d2l_wrapper\n" 10871 "fast:" %} 10872 ins_encode %{ 10873 Label fast; 10874 __ subptr(rsp, 8); 10875 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10876 __ fld_s(Address(rsp, 0)); 10877 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10878 __ fistp_d(Address(rsp, 0)); 10879 // Restore the rounding mode, mask the exception 10880 if (Compile::current()->in_24_bit_fp_mode()) { 10881 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10882 } else { 10883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10884 } 10885 // Load the converted long, adjust CPU stack 10886 __ pop(rax); 10887 __ pop(rdx); 10888 __ cmpl(rdx, 0x80000000); 10889 __ jccb(Assembler::notEqual, fast); 10890 __ testl(rax, rax); 10891 __ jccb(Assembler::notEqual, fast); 10892 __ subptr(rsp, 4); 10893 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10894 __ fld_s(Address(rsp, 0)); 10895 __ addptr(rsp, 4); 10896 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10897 __ bind(fast); 10898 %} 10899 ins_pipe( pipe_slow ); 10900 %} 10901 10902 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10903 predicate( UseSSE<=1 ); 10904 match(Set dst (ConvI2D src)); 10905 format %{ "FILD $src\n\t" 10906 "FSTP $dst" %} 10907 opcode(0xDB, 0x0); /* DB /0 */ 10908 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10909 ins_pipe( fpu_reg_mem ); 10910 %} 10911 10912 instruct convI2D_reg(regD dst, rRegI src) %{ 10913 predicate( UseSSE>=2 && !UseXmmI2D ); 10914 match(Set dst (ConvI2D src)); 10915 format %{ "CVTSI2SD $dst,$src" %} 10916 ins_encode %{ 10917 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10918 %} 10919 ins_pipe( pipe_slow ); 10920 %} 10921 10922 instruct convI2D_mem(regD dst, memory mem) %{ 10923 predicate( UseSSE>=2 ); 10924 match(Set dst (ConvI2D (LoadI mem))); 10925 format %{ "CVTSI2SD $dst,$mem" %} 10926 ins_encode %{ 10927 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10928 %} 10929 ins_pipe( pipe_slow ); 10930 %} 10931 10932 instruct convXI2D_reg(regD dst, rRegI src) 10933 %{ 10934 predicate( UseSSE>=2 && UseXmmI2D ); 10935 match(Set dst (ConvI2D src)); 10936 10937 format %{ "MOVD $dst,$src\n\t" 10938 "CVTDQ2PD $dst,$dst\t# i2d" %} 10939 ins_encode %{ 10940 __ movdl($dst$$XMMRegister, $src$$Register); 10941 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10942 %} 10943 ins_pipe(pipe_slow); // XXX 10944 %} 10945 10946 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10947 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10948 match(Set dst (ConvI2D (LoadI mem))); 10949 format %{ "FILD $mem\n\t" 10950 "FSTP $dst" %} 10951 opcode(0xDB); /* DB /0 */ 10952 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10953 Pop_Reg_DPR(dst)); 10954 ins_pipe( fpu_reg_mem ); 10955 %} 10956 10957 // Convert a byte to a float; no rounding step needed. 10958 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10959 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10960 match(Set dst (ConvI2F src)); 10961 format %{ "FILD $src\n\t" 10962 "FSTP $dst" %} 10963 10964 opcode(0xDB, 0x0); /* DB /0 */ 10965 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10966 ins_pipe( fpu_reg_mem ); 10967 %} 10968 10969 // In 24-bit mode, force exponent rounding by storing back out 10970 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10971 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10972 match(Set dst (ConvI2F src)); 10973 ins_cost(200); 10974 format %{ "FILD $src\n\t" 10975 "FSTP_S $dst" %} 10976 opcode(0xDB, 0x0); /* DB /0 */ 10977 ins_encode( Push_Mem_I(src), 10978 Pop_Mem_FPR(dst)); 10979 ins_pipe( fpu_mem_mem ); 10980 %} 10981 10982 // In 24-bit mode, force exponent rounding by storing back out 10983 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10984 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10985 match(Set dst (ConvI2F (LoadI mem))); 10986 ins_cost(200); 10987 format %{ "FILD $mem\n\t" 10988 "FSTP_S $dst" %} 10989 opcode(0xDB); /* DB /0 */ 10990 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10991 Pop_Mem_FPR(dst)); 10992 ins_pipe( fpu_mem_mem ); 10993 %} 10994 10995 // This instruction does not round to 24-bits 10996 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10997 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10998 match(Set dst (ConvI2F src)); 10999 format %{ "FILD $src\n\t" 11000 "FSTP $dst" %} 11001 opcode(0xDB, 0x0); /* DB /0 */ 11002 ins_encode( Push_Mem_I(src), 11003 Pop_Reg_FPR(dst)); 11004 ins_pipe( fpu_reg_mem ); 11005 %} 11006 11007 // This instruction does not round to 24-bits 11008 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11009 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11010 match(Set dst (ConvI2F (LoadI mem))); 11011 format %{ "FILD $mem\n\t" 11012 "FSTP $dst" %} 11013 opcode(0xDB); /* DB /0 */ 11014 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11015 Pop_Reg_FPR(dst)); 11016 ins_pipe( fpu_reg_mem ); 11017 %} 11018 11019 // Convert an int to a float in xmm; no rounding step needed. 11020 instruct convI2F_reg(regF dst, rRegI src) %{ 11021 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11022 match(Set dst (ConvI2F src)); 11023 format %{ "CVTSI2SS $dst, $src" %} 11024 ins_encode %{ 11025 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11026 %} 11027 ins_pipe( pipe_slow ); 11028 %} 11029 11030 instruct convXI2F_reg(regF dst, rRegI src) 11031 %{ 11032 predicate( UseSSE>=2 && UseXmmI2F ); 11033 match(Set dst (ConvI2F src)); 11034 11035 format %{ "MOVD $dst,$src\n\t" 11036 "CVTDQ2PS $dst,$dst\t# i2f" %} 11037 ins_encode %{ 11038 __ movdl($dst$$XMMRegister, $src$$Register); 11039 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11040 %} 11041 ins_pipe(pipe_slow); // XXX 11042 %} 11043 11044 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11045 match(Set dst (ConvI2L src)); 11046 effect(KILL cr); 11047 ins_cost(375); 11048 format %{ "MOV $dst.lo,$src\n\t" 11049 "MOV $dst.hi,$src\n\t" 11050 "SAR $dst.hi,31" %} 11051 ins_encode(convert_int_long(dst,src)); 11052 ins_pipe( ialu_reg_reg_long ); 11053 %} 11054 11055 // Zero-extend convert int to long 11056 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11057 match(Set dst (AndL (ConvI2L src) mask) ); 11058 effect( KILL flags ); 11059 ins_cost(250); 11060 format %{ "MOV $dst.lo,$src\n\t" 11061 "XOR $dst.hi,$dst.hi" %} 11062 opcode(0x33); // XOR 11063 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11064 ins_pipe( ialu_reg_reg_long ); 11065 %} 11066 11067 // Zero-extend long 11068 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11069 match(Set dst (AndL src mask) ); 11070 effect( KILL flags ); 11071 ins_cost(250); 11072 format %{ "MOV $dst.lo,$src.lo\n\t" 11073 "XOR $dst.hi,$dst.hi\n\t" %} 11074 opcode(0x33); // XOR 11075 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11076 ins_pipe( ialu_reg_reg_long ); 11077 %} 11078 11079 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11080 predicate (UseSSE<=1); 11081 match(Set dst (ConvL2D src)); 11082 effect( KILL cr ); 11083 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11084 "PUSH $src.lo\n\t" 11085 "FILD ST,[ESP + #0]\n\t" 11086 "ADD ESP,8\n\t" 11087 "FSTP_D $dst\t# D-round" %} 11088 opcode(0xDF, 0x5); /* DF /5 */ 11089 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11090 ins_pipe( pipe_slow ); 11091 %} 11092 11093 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11094 predicate (UseSSE>=2); 11095 match(Set dst (ConvL2D src)); 11096 effect( KILL cr ); 11097 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11098 "PUSH $src.lo\n\t" 11099 "FILD_D [ESP]\n\t" 11100 "FSTP_D [ESP]\n\t" 11101 "MOVSD $dst,[ESP]\n\t" 11102 "ADD ESP,8" %} 11103 opcode(0xDF, 0x5); /* DF /5 */ 11104 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11105 ins_pipe( pipe_slow ); 11106 %} 11107 11108 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11109 predicate (UseSSE>=1); 11110 match(Set dst (ConvL2F src)); 11111 effect( KILL cr ); 11112 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11113 "PUSH $src.lo\n\t" 11114 "FILD_D [ESP]\n\t" 11115 "FSTP_S [ESP]\n\t" 11116 "MOVSS $dst,[ESP]\n\t" 11117 "ADD ESP,8" %} 11118 opcode(0xDF, 0x5); /* DF /5 */ 11119 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11120 ins_pipe( pipe_slow ); 11121 %} 11122 11123 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11124 match(Set dst (ConvL2F src)); 11125 effect( KILL cr ); 11126 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11127 "PUSH $src.lo\n\t" 11128 "FILD ST,[ESP + #0]\n\t" 11129 "ADD ESP,8\n\t" 11130 "FSTP_S $dst\t# F-round" %} 11131 opcode(0xDF, 0x5); /* DF /5 */ 11132 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11133 ins_pipe( pipe_slow ); 11134 %} 11135 11136 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11137 match(Set dst (ConvL2I src)); 11138 effect( DEF dst, USE src ); 11139 format %{ "MOV $dst,$src.lo" %} 11140 ins_encode(enc_CopyL_Lo(dst,src)); 11141 ins_pipe( ialu_reg_reg ); 11142 %} 11143 11144 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11145 match(Set dst (MoveF2I src)); 11146 effect( DEF dst, USE src ); 11147 ins_cost(100); 11148 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11149 ins_encode %{ 11150 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11151 %} 11152 ins_pipe( ialu_reg_mem ); 11153 %} 11154 11155 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11156 predicate(UseSSE==0); 11157 match(Set dst (MoveF2I src)); 11158 effect( DEF dst, USE src ); 11159 11160 ins_cost(125); 11161 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11162 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11163 ins_pipe( fpu_mem_reg ); 11164 %} 11165 11166 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11167 predicate(UseSSE>=1); 11168 match(Set dst (MoveF2I src)); 11169 effect( DEF dst, USE src ); 11170 11171 ins_cost(95); 11172 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11173 ins_encode %{ 11174 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11175 %} 11176 ins_pipe( pipe_slow ); 11177 %} 11178 11179 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11180 predicate(UseSSE>=2); 11181 match(Set dst (MoveF2I src)); 11182 effect( DEF dst, USE src ); 11183 ins_cost(85); 11184 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11185 ins_encode %{ 11186 __ movdl($dst$$Register, $src$$XMMRegister); 11187 %} 11188 ins_pipe( pipe_slow ); 11189 %} 11190 11191 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11192 match(Set dst (MoveI2F src)); 11193 effect( DEF dst, USE src ); 11194 11195 ins_cost(100); 11196 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11197 ins_encode %{ 11198 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11199 %} 11200 ins_pipe( ialu_mem_reg ); 11201 %} 11202 11203 11204 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11205 predicate(UseSSE==0); 11206 match(Set dst (MoveI2F src)); 11207 effect(DEF dst, USE src); 11208 11209 ins_cost(125); 11210 format %{ "FLD_S $src\n\t" 11211 "FSTP $dst\t# MoveI2F_stack_reg" %} 11212 opcode(0xD9); /* D9 /0, FLD m32real */ 11213 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11214 Pop_Reg_FPR(dst) ); 11215 ins_pipe( fpu_reg_mem ); 11216 %} 11217 11218 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11219 predicate(UseSSE>=1); 11220 match(Set dst (MoveI2F src)); 11221 effect( DEF dst, USE src ); 11222 11223 ins_cost(95); 11224 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11225 ins_encode %{ 11226 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11227 %} 11228 ins_pipe( pipe_slow ); 11229 %} 11230 11231 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11232 predicate(UseSSE>=2); 11233 match(Set dst (MoveI2F src)); 11234 effect( DEF dst, USE src ); 11235 11236 ins_cost(85); 11237 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11238 ins_encode %{ 11239 __ movdl($dst$$XMMRegister, $src$$Register); 11240 %} 11241 ins_pipe( pipe_slow ); 11242 %} 11243 11244 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11245 match(Set dst (MoveD2L src)); 11246 effect(DEF dst, USE src); 11247 11248 ins_cost(250); 11249 format %{ "MOV $dst.lo,$src\n\t" 11250 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11251 opcode(0x8B, 0x8B); 11252 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11253 ins_pipe( ialu_mem_long_reg ); 11254 %} 11255 11256 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11257 predicate(UseSSE<=1); 11258 match(Set dst (MoveD2L src)); 11259 effect(DEF dst, USE src); 11260 11261 ins_cost(125); 11262 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11263 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11264 ins_pipe( fpu_mem_reg ); 11265 %} 11266 11267 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11268 predicate(UseSSE>=2); 11269 match(Set dst (MoveD2L src)); 11270 effect(DEF dst, USE src); 11271 ins_cost(95); 11272 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11273 ins_encode %{ 11274 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11275 %} 11276 ins_pipe( pipe_slow ); 11277 %} 11278 11279 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11280 predicate(UseSSE>=2); 11281 match(Set dst (MoveD2L src)); 11282 effect(DEF dst, USE src, TEMP tmp); 11283 ins_cost(85); 11284 format %{ "MOVD $dst.lo,$src\n\t" 11285 "PSHUFLW $tmp,$src,0x4E\n\t" 11286 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11287 ins_encode %{ 11288 __ movdl($dst$$Register, $src$$XMMRegister); 11289 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11290 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11291 %} 11292 ins_pipe( pipe_slow ); 11293 %} 11294 11295 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11296 match(Set dst (MoveL2D src)); 11297 effect(DEF dst, USE src); 11298 11299 ins_cost(200); 11300 format %{ "MOV $dst,$src.lo\n\t" 11301 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11302 opcode(0x89, 0x89); 11303 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11304 ins_pipe( ialu_mem_long_reg ); 11305 %} 11306 11307 11308 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11309 predicate(UseSSE<=1); 11310 match(Set dst (MoveL2D src)); 11311 effect(DEF dst, USE src); 11312 ins_cost(125); 11313 11314 format %{ "FLD_D $src\n\t" 11315 "FSTP $dst\t# MoveL2D_stack_reg" %} 11316 opcode(0xDD); /* DD /0, FLD m64real */ 11317 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11318 Pop_Reg_DPR(dst) ); 11319 ins_pipe( fpu_reg_mem ); 11320 %} 11321 11322 11323 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11324 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11325 match(Set dst (MoveL2D src)); 11326 effect(DEF dst, USE src); 11327 11328 ins_cost(95); 11329 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11330 ins_encode %{ 11331 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11332 %} 11333 ins_pipe( pipe_slow ); 11334 %} 11335 11336 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11337 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11338 match(Set dst (MoveL2D src)); 11339 effect(DEF dst, USE src); 11340 11341 ins_cost(95); 11342 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11343 ins_encode %{ 11344 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11345 %} 11346 ins_pipe( pipe_slow ); 11347 %} 11348 11349 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11350 predicate(UseSSE>=2); 11351 match(Set dst (MoveL2D src)); 11352 effect(TEMP dst, USE src, TEMP tmp); 11353 ins_cost(85); 11354 format %{ "MOVD $dst,$src.lo\n\t" 11355 "MOVD $tmp,$src.hi\n\t" 11356 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11357 ins_encode %{ 11358 __ movdl($dst$$XMMRegister, $src$$Register); 11359 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11360 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11361 %} 11362 ins_pipe( pipe_slow ); 11363 %} 11364 11365 11366 // ======================================================================= 11367 // fast clearing of an array 11368 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11369 predicate(!UseFastStosb); 11370 match(Set dummy (ClearArray cnt base)); 11371 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11372 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11373 "SHL ECX,1\t# Convert doublewords to words\n\t" 11374 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11375 ins_encode %{ 11376 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11377 %} 11378 ins_pipe( pipe_slow ); 11379 %} 11380 11381 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11382 predicate(UseFastStosb); 11383 match(Set dummy (ClearArray cnt base)); 11384 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11385 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11386 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11387 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11388 ins_encode %{ 11389 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11390 %} 11391 ins_pipe( pipe_slow ); 11392 %} 11393 11394 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11395 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11396 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11397 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11398 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11399 11400 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11401 ins_encode %{ 11402 __ string_compare($str1$$Register, $str2$$Register, 11403 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11404 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11405 %} 11406 ins_pipe( pipe_slow ); 11407 %} 11408 11409 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11410 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11411 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11412 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11413 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11414 11415 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11416 ins_encode %{ 11417 __ string_compare($str1$$Register, $str2$$Register, 11418 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11419 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11420 %} 11421 ins_pipe( pipe_slow ); 11422 %} 11423 11424 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11425 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11426 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11427 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11428 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11429 11430 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11431 ins_encode %{ 11432 __ string_compare($str1$$Register, $str2$$Register, 11433 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11434 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11435 %} 11436 ins_pipe( pipe_slow ); 11437 %} 11438 11439 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11440 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11441 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11442 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11443 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11444 11445 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11446 ins_encode %{ 11447 __ string_compare($str2$$Register, $str1$$Register, 11448 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11449 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11450 %} 11451 ins_pipe( pipe_slow ); 11452 %} 11453 11454 // fast string equals 11455 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11456 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11457 match(Set result (StrEquals (Binary str1 str2) cnt)); 11458 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11459 11460 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11461 ins_encode %{ 11462 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11463 $cnt$$Register, $result$$Register, $tmp3$$Register, 11464 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11465 %} 11466 11467 ins_pipe( pipe_slow ); 11468 %} 11469 11470 // fast search of substring with known size. 11471 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11472 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11473 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11474 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11475 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11476 11477 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11478 ins_encode %{ 11479 int icnt2 = (int)$int_cnt2$$constant; 11480 if (icnt2 >= 16) { 11481 // IndexOf for constant substrings with size >= 16 elements 11482 // which don't need to be loaded through stack. 11483 __ string_indexofC8($str1$$Register, $str2$$Register, 11484 $cnt1$$Register, $cnt2$$Register, 11485 icnt2, $result$$Register, 11486 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11487 } else { 11488 // Small strings are loaded through stack if they cross page boundary. 11489 __ string_indexof($str1$$Register, $str2$$Register, 11490 $cnt1$$Register, $cnt2$$Register, 11491 icnt2, $result$$Register, 11492 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11493 } 11494 %} 11495 ins_pipe( pipe_slow ); 11496 %} 11497 11498 // fast search of substring with known size. 11499 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11500 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11501 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11502 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11503 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11504 11505 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11506 ins_encode %{ 11507 int icnt2 = (int)$int_cnt2$$constant; 11508 if (icnt2 >= 8) { 11509 // IndexOf for constant substrings with size >= 8 elements 11510 // which don't need to be loaded through stack. 11511 __ string_indexofC8($str1$$Register, $str2$$Register, 11512 $cnt1$$Register, $cnt2$$Register, 11513 icnt2, $result$$Register, 11514 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11515 } else { 11516 // Small strings are loaded through stack if they cross page boundary. 11517 __ string_indexof($str1$$Register, $str2$$Register, 11518 $cnt1$$Register, $cnt2$$Register, 11519 icnt2, $result$$Register, 11520 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11521 } 11522 %} 11523 ins_pipe( pipe_slow ); 11524 %} 11525 11526 // fast search of substring with known size. 11527 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11528 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11529 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11530 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11531 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11532 11533 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11534 ins_encode %{ 11535 int icnt2 = (int)$int_cnt2$$constant; 11536 if (icnt2 >= 8) { 11537 // IndexOf for constant substrings with size >= 8 elements 11538 // which don't need to be loaded through stack. 11539 __ string_indexofC8($str1$$Register, $str2$$Register, 11540 $cnt1$$Register, $cnt2$$Register, 11541 icnt2, $result$$Register, 11542 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11543 } else { 11544 // Small strings are loaded through stack if they cross page boundary. 11545 __ string_indexof($str1$$Register, $str2$$Register, 11546 $cnt1$$Register, $cnt2$$Register, 11547 icnt2, $result$$Register, 11548 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11549 } 11550 %} 11551 ins_pipe( pipe_slow ); 11552 %} 11553 11554 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11555 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11556 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11557 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11558 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11559 11560 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11561 ins_encode %{ 11562 __ string_indexof($str1$$Register, $str2$$Register, 11563 $cnt1$$Register, $cnt2$$Register, 11564 (-1), $result$$Register, 11565 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11566 %} 11567 ins_pipe( pipe_slow ); 11568 %} 11569 11570 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11571 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11572 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11573 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11574 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11575 11576 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11577 ins_encode %{ 11578 __ string_indexof($str1$$Register, $str2$$Register, 11579 $cnt1$$Register, $cnt2$$Register, 11580 (-1), $result$$Register, 11581 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11582 %} 11583 ins_pipe( pipe_slow ); 11584 %} 11585 11586 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11587 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11588 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11589 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11590 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11591 11592 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11593 ins_encode %{ 11594 __ string_indexof($str1$$Register, $str2$$Register, 11595 $cnt1$$Register, $cnt2$$Register, 11596 (-1), $result$$Register, 11597 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11598 %} 11599 ins_pipe( pipe_slow ); 11600 %} 11601 11602 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11603 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11604 predicate(UseSSE42Intrinsics); 11605 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11606 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11607 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11608 ins_encode %{ 11609 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11610 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11611 %} 11612 ins_pipe( pipe_slow ); 11613 %} 11614 11615 // fast array equals 11616 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11617 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11618 %{ 11619 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11620 match(Set result (AryEq ary1 ary2)); 11621 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11622 //ins_cost(300); 11623 11624 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11625 ins_encode %{ 11626 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11627 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11628 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11629 %} 11630 ins_pipe( pipe_slow ); 11631 %} 11632 11633 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11634 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11635 %{ 11636 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11637 match(Set result (AryEq ary1 ary2)); 11638 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11639 //ins_cost(300); 11640 11641 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11642 ins_encode %{ 11643 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11644 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11645 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11646 %} 11647 ins_pipe( pipe_slow ); 11648 %} 11649 11650 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11651 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11652 %{ 11653 match(Set result (HasNegatives ary1 len)); 11654 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11655 11656 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11657 ins_encode %{ 11658 __ has_negatives($ary1$$Register, $len$$Register, 11659 $result$$Register, $tmp3$$Register, 11660 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11661 %} 11662 ins_pipe( pipe_slow ); 11663 %} 11664 11665 // fast char[] to byte[] compression 11666 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11667 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11668 match(Set result (StrCompressedCopy src (Binary dst len))); 11669 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11670 11671 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11672 ins_encode %{ 11673 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11674 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11675 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11676 %} 11677 ins_pipe( pipe_slow ); 11678 %} 11679 11680 // fast byte[] to char[] inflation 11681 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11682 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11683 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11684 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11685 11686 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11687 ins_encode %{ 11688 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11689 $tmp1$$XMMRegister, $tmp2$$Register); 11690 %} 11691 ins_pipe( pipe_slow ); 11692 %} 11693 11694 // encode char[] to byte[] in ISO_8859_1 11695 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11696 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11697 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11698 match(Set result (EncodeISOArray src (Binary dst len))); 11699 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11700 11701 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11702 ins_encode %{ 11703 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11704 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11705 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11706 %} 11707 ins_pipe( pipe_slow ); 11708 %} 11709 11710 11711 //----------Control Flow Instructions------------------------------------------ 11712 // Signed compare Instructions 11713 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11714 match(Set cr (CmpI op1 op2)); 11715 effect( DEF cr, USE op1, USE op2 ); 11716 format %{ "CMP $op1,$op2" %} 11717 opcode(0x3B); /* Opcode 3B /r */ 11718 ins_encode( OpcP, RegReg( op1, op2) ); 11719 ins_pipe( ialu_cr_reg_reg ); 11720 %} 11721 11722 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11723 match(Set cr (CmpI op1 op2)); 11724 effect( DEF cr, USE op1 ); 11725 format %{ "CMP $op1,$op2" %} 11726 opcode(0x81,0x07); /* Opcode 81 /7 */ 11727 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11728 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11729 ins_pipe( ialu_cr_reg_imm ); 11730 %} 11731 11732 // Cisc-spilled version of cmpI_eReg 11733 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11734 match(Set cr (CmpI op1 (LoadI op2))); 11735 11736 format %{ "CMP $op1,$op2" %} 11737 ins_cost(500); 11738 opcode(0x3B); /* Opcode 3B /r */ 11739 ins_encode( OpcP, RegMem( op1, op2) ); 11740 ins_pipe( ialu_cr_reg_mem ); 11741 %} 11742 11743 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11744 match(Set cr (CmpI src zero)); 11745 effect( DEF cr, USE src ); 11746 11747 format %{ "TEST $src,$src" %} 11748 opcode(0x85); 11749 ins_encode( OpcP, RegReg( src, src ) ); 11750 ins_pipe( ialu_cr_reg_imm ); 11751 %} 11752 11753 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11754 match(Set cr (CmpI (AndI src con) zero)); 11755 11756 format %{ "TEST $src,$con" %} 11757 opcode(0xF7,0x00); 11758 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11759 ins_pipe( ialu_cr_reg_imm ); 11760 %} 11761 11762 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11763 match(Set cr (CmpI (AndI src mem) zero)); 11764 11765 format %{ "TEST $src,$mem" %} 11766 opcode(0x85); 11767 ins_encode( OpcP, RegMem( src, mem ) ); 11768 ins_pipe( ialu_cr_reg_mem ); 11769 %} 11770 11771 // Unsigned compare Instructions; really, same as signed except they 11772 // produce an eFlagsRegU instead of eFlagsReg. 11773 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11774 match(Set cr (CmpU op1 op2)); 11775 11776 format %{ "CMPu $op1,$op2" %} 11777 opcode(0x3B); /* Opcode 3B /r */ 11778 ins_encode( OpcP, RegReg( op1, op2) ); 11779 ins_pipe( ialu_cr_reg_reg ); 11780 %} 11781 11782 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11783 match(Set cr (CmpU op1 op2)); 11784 11785 format %{ "CMPu $op1,$op2" %} 11786 opcode(0x81,0x07); /* Opcode 81 /7 */ 11787 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11788 ins_pipe( ialu_cr_reg_imm ); 11789 %} 11790 11791 // // Cisc-spilled version of cmpU_eReg 11792 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11793 match(Set cr (CmpU op1 (LoadI op2))); 11794 11795 format %{ "CMPu $op1,$op2" %} 11796 ins_cost(500); 11797 opcode(0x3B); /* Opcode 3B /r */ 11798 ins_encode( OpcP, RegMem( op1, op2) ); 11799 ins_pipe( ialu_cr_reg_mem ); 11800 %} 11801 11802 // // Cisc-spilled version of cmpU_eReg 11803 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11804 // match(Set cr (CmpU (LoadI op1) op2)); 11805 // 11806 // format %{ "CMPu $op1,$op2" %} 11807 // ins_cost(500); 11808 // opcode(0x39); /* Opcode 39 /r */ 11809 // ins_encode( OpcP, RegMem( op1, op2) ); 11810 //%} 11811 11812 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11813 match(Set cr (CmpU src zero)); 11814 11815 format %{ "TESTu $src,$src" %} 11816 opcode(0x85); 11817 ins_encode( OpcP, RegReg( src, src ) ); 11818 ins_pipe( ialu_cr_reg_imm ); 11819 %} 11820 11821 // Unsigned pointer compare Instructions 11822 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11823 match(Set cr (CmpP op1 op2)); 11824 11825 format %{ "CMPu $op1,$op2" %} 11826 opcode(0x3B); /* Opcode 3B /r */ 11827 ins_encode( OpcP, RegReg( op1, op2) ); 11828 ins_pipe( ialu_cr_reg_reg ); 11829 %} 11830 11831 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11832 match(Set cr (CmpP op1 op2)); 11833 11834 format %{ "CMPu $op1,$op2" %} 11835 opcode(0x81,0x07); /* Opcode 81 /7 */ 11836 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11837 ins_pipe( ialu_cr_reg_imm ); 11838 %} 11839 11840 // // Cisc-spilled version of cmpP_eReg 11841 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11842 match(Set cr (CmpP op1 (LoadP op2))); 11843 11844 format %{ "CMPu $op1,$op2" %} 11845 ins_cost(500); 11846 opcode(0x3B); /* Opcode 3B /r */ 11847 ins_encode( OpcP, RegMem( op1, op2) ); 11848 ins_pipe( ialu_cr_reg_mem ); 11849 %} 11850 11851 // // Cisc-spilled version of cmpP_eReg 11852 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11853 // match(Set cr (CmpP (LoadP op1) op2)); 11854 // 11855 // format %{ "CMPu $op1,$op2" %} 11856 // ins_cost(500); 11857 // opcode(0x39); /* Opcode 39 /r */ 11858 // ins_encode( OpcP, RegMem( op1, op2) ); 11859 //%} 11860 11861 // Compare raw pointer (used in out-of-heap check). 11862 // Only works because non-oop pointers must be raw pointers 11863 // and raw pointers have no anti-dependencies. 11864 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11865 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11866 match(Set cr (CmpP op1 (LoadP op2))); 11867 11868 format %{ "CMPu $op1,$op2" %} 11869 opcode(0x3B); /* Opcode 3B /r */ 11870 ins_encode( OpcP, RegMem( op1, op2) ); 11871 ins_pipe( ialu_cr_reg_mem ); 11872 %} 11873 11874 // 11875 // This will generate a signed flags result. This should be ok 11876 // since any compare to a zero should be eq/neq. 11877 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11878 match(Set cr (CmpP src zero)); 11879 11880 format %{ "TEST $src,$src" %} 11881 opcode(0x85); 11882 ins_encode( OpcP, RegReg( src, src ) ); 11883 ins_pipe( ialu_cr_reg_imm ); 11884 %} 11885 11886 // Cisc-spilled version of testP_reg 11887 // This will generate a signed flags result. This should be ok 11888 // since any compare to a zero should be eq/neq. 11889 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11890 match(Set cr (CmpP (LoadP op) zero)); 11891 11892 format %{ "TEST $op,0xFFFFFFFF" %} 11893 ins_cost(500); 11894 opcode(0xF7); /* Opcode F7 /0 */ 11895 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11896 ins_pipe( ialu_cr_reg_imm ); 11897 %} 11898 11899 // Yanked all unsigned pointer compare operations. 11900 // Pointer compares are done with CmpP which is already unsigned. 11901 11902 //----------Max and Min-------------------------------------------------------- 11903 // Min Instructions 11904 //// 11905 // *** Min and Max using the conditional move are slower than the 11906 // *** branch version on a Pentium III. 11907 // // Conditional move for min 11908 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11909 // effect( USE_DEF op2, USE op1, USE cr ); 11910 // format %{ "CMOVlt $op2,$op1\t! min" %} 11911 // opcode(0x4C,0x0F); 11912 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11913 // ins_pipe( pipe_cmov_reg ); 11914 //%} 11915 // 11916 //// Min Register with Register (P6 version) 11917 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11918 // predicate(VM_Version::supports_cmov() ); 11919 // match(Set op2 (MinI op1 op2)); 11920 // ins_cost(200); 11921 // expand %{ 11922 // eFlagsReg cr; 11923 // compI_eReg(cr,op1,op2); 11924 // cmovI_reg_lt(op2,op1,cr); 11925 // %} 11926 //%} 11927 11928 // Min Register with Register (generic version) 11929 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11930 match(Set dst (MinI dst src)); 11931 effect(KILL flags); 11932 ins_cost(300); 11933 11934 format %{ "MIN $dst,$src" %} 11935 opcode(0xCC); 11936 ins_encode( min_enc(dst,src) ); 11937 ins_pipe( pipe_slow ); 11938 %} 11939 11940 // Max Register with Register 11941 // *** Min and Max using the conditional move are slower than the 11942 // *** branch version on a Pentium III. 11943 // // Conditional move for max 11944 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11945 // effect( USE_DEF op2, USE op1, USE cr ); 11946 // format %{ "CMOVgt $op2,$op1\t! max" %} 11947 // opcode(0x4F,0x0F); 11948 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11949 // ins_pipe( pipe_cmov_reg ); 11950 //%} 11951 // 11952 // // Max Register with Register (P6 version) 11953 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11954 // predicate(VM_Version::supports_cmov() ); 11955 // match(Set op2 (MaxI op1 op2)); 11956 // ins_cost(200); 11957 // expand %{ 11958 // eFlagsReg cr; 11959 // compI_eReg(cr,op1,op2); 11960 // cmovI_reg_gt(op2,op1,cr); 11961 // %} 11962 //%} 11963 11964 // Max Register with Register (generic version) 11965 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11966 match(Set dst (MaxI dst src)); 11967 effect(KILL flags); 11968 ins_cost(300); 11969 11970 format %{ "MAX $dst,$src" %} 11971 opcode(0xCC); 11972 ins_encode( max_enc(dst,src) ); 11973 ins_pipe( pipe_slow ); 11974 %} 11975 11976 // ============================================================================ 11977 // Counted Loop limit node which represents exact final iterator value. 11978 // Note: the resulting value should fit into integer range since 11979 // counted loops have limit check on overflow. 11980 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11981 match(Set limit (LoopLimit (Binary init limit) stride)); 11982 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11983 ins_cost(300); 11984 11985 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11986 ins_encode %{ 11987 int strd = (int)$stride$$constant; 11988 assert(strd != 1 && strd != -1, "sanity"); 11989 int m1 = (strd > 0) ? 1 : -1; 11990 // Convert limit to long (EAX:EDX) 11991 __ cdql(); 11992 // Convert init to long (init:tmp) 11993 __ movl($tmp$$Register, $init$$Register); 11994 __ sarl($tmp$$Register, 31); 11995 // $limit - $init 11996 __ subl($limit$$Register, $init$$Register); 11997 __ sbbl($limit_hi$$Register, $tmp$$Register); 11998 // + ($stride - 1) 11999 if (strd > 0) { 12000 __ addl($limit$$Register, (strd - 1)); 12001 __ adcl($limit_hi$$Register, 0); 12002 __ movl($tmp$$Register, strd); 12003 } else { 12004 __ addl($limit$$Register, (strd + 1)); 12005 __ adcl($limit_hi$$Register, -1); 12006 __ lneg($limit_hi$$Register, $limit$$Register); 12007 __ movl($tmp$$Register, -strd); 12008 } 12009 // signed devision: (EAX:EDX) / pos_stride 12010 __ idivl($tmp$$Register); 12011 if (strd < 0) { 12012 // restore sign 12013 __ negl($tmp$$Register); 12014 } 12015 // (EAX) * stride 12016 __ mull($tmp$$Register); 12017 // + init (ignore upper bits) 12018 __ addl($limit$$Register, $init$$Register); 12019 %} 12020 ins_pipe( pipe_slow ); 12021 %} 12022 12023 // ============================================================================ 12024 // Branch Instructions 12025 // Jump Table 12026 instruct jumpXtnd(rRegI switch_val) %{ 12027 match(Jump switch_val); 12028 ins_cost(350); 12029 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12030 ins_encode %{ 12031 // Jump to Address(table_base + switch_reg) 12032 Address index(noreg, $switch_val$$Register, Address::times_1); 12033 __ jump(ArrayAddress($constantaddress, index)); 12034 %} 12035 ins_pipe(pipe_jmp); 12036 %} 12037 12038 // Jump Direct - Label defines a relative address from JMP+1 12039 instruct jmpDir(label labl) %{ 12040 match(Goto); 12041 effect(USE labl); 12042 12043 ins_cost(300); 12044 format %{ "JMP $labl" %} 12045 size(5); 12046 ins_encode %{ 12047 Label* L = $labl$$label; 12048 __ jmp(*L, false); // Always long jump 12049 %} 12050 ins_pipe( pipe_jmp ); 12051 %} 12052 12053 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12054 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12055 match(If cop cr); 12056 effect(USE labl); 12057 12058 ins_cost(300); 12059 format %{ "J$cop $labl" %} 12060 size(6); 12061 ins_encode %{ 12062 Label* L = $labl$$label; 12063 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12064 %} 12065 ins_pipe( pipe_jcc ); 12066 %} 12067 12068 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12069 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12070 match(CountedLoopEnd cop cr); 12071 effect(USE labl); 12072 12073 ins_cost(300); 12074 format %{ "J$cop $labl\t# Loop end" %} 12075 size(6); 12076 ins_encode %{ 12077 Label* L = $labl$$label; 12078 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12079 %} 12080 ins_pipe( pipe_jcc ); 12081 %} 12082 12083 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12084 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12085 match(CountedLoopEnd cop cmp); 12086 effect(USE labl); 12087 12088 ins_cost(300); 12089 format %{ "J$cop,u $labl\t# Loop end" %} 12090 size(6); 12091 ins_encode %{ 12092 Label* L = $labl$$label; 12093 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12094 %} 12095 ins_pipe( pipe_jcc ); 12096 %} 12097 12098 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12099 match(CountedLoopEnd cop cmp); 12100 effect(USE labl); 12101 12102 ins_cost(200); 12103 format %{ "J$cop,u $labl\t# Loop end" %} 12104 size(6); 12105 ins_encode %{ 12106 Label* L = $labl$$label; 12107 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12108 %} 12109 ins_pipe( pipe_jcc ); 12110 %} 12111 12112 // Jump Direct Conditional - using unsigned comparison 12113 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12114 match(If cop cmp); 12115 effect(USE labl); 12116 12117 ins_cost(300); 12118 format %{ "J$cop,u $labl" %} 12119 size(6); 12120 ins_encode %{ 12121 Label* L = $labl$$label; 12122 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12123 %} 12124 ins_pipe(pipe_jcc); 12125 %} 12126 12127 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12128 match(If cop cmp); 12129 effect(USE labl); 12130 12131 ins_cost(200); 12132 format %{ "J$cop,u $labl" %} 12133 size(6); 12134 ins_encode %{ 12135 Label* L = $labl$$label; 12136 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12137 %} 12138 ins_pipe(pipe_jcc); 12139 %} 12140 12141 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12142 match(If cop cmp); 12143 effect(USE labl); 12144 12145 ins_cost(200); 12146 format %{ $$template 12147 if ($cop$$cmpcode == Assembler::notEqual) { 12148 $$emit$$"JP,u $labl\n\t" 12149 $$emit$$"J$cop,u $labl" 12150 } else { 12151 $$emit$$"JP,u done\n\t" 12152 $$emit$$"J$cop,u $labl\n\t" 12153 $$emit$$"done:" 12154 } 12155 %} 12156 ins_encode %{ 12157 Label* l = $labl$$label; 12158 if ($cop$$cmpcode == Assembler::notEqual) { 12159 __ jcc(Assembler::parity, *l, false); 12160 __ jcc(Assembler::notEqual, *l, false); 12161 } else if ($cop$$cmpcode == Assembler::equal) { 12162 Label done; 12163 __ jccb(Assembler::parity, done); 12164 __ jcc(Assembler::equal, *l, false); 12165 __ bind(done); 12166 } else { 12167 ShouldNotReachHere(); 12168 } 12169 %} 12170 ins_pipe(pipe_jcc); 12171 %} 12172 12173 // ============================================================================ 12174 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12175 // array for an instance of the superklass. Set a hidden internal cache on a 12176 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12177 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12178 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12179 match(Set result (PartialSubtypeCheck sub super)); 12180 effect( KILL rcx, KILL cr ); 12181 12182 ins_cost(1100); // slightly larger than the next version 12183 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12184 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12185 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12186 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12187 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12188 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12189 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12190 "miss:\t" %} 12191 12192 opcode(0x1); // Force a XOR of EDI 12193 ins_encode( enc_PartialSubtypeCheck() ); 12194 ins_pipe( pipe_slow ); 12195 %} 12196 12197 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12198 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12199 effect( KILL rcx, KILL result ); 12200 12201 ins_cost(1000); 12202 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12203 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12204 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12205 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12206 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12207 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12208 "miss:\t" %} 12209 12210 opcode(0x0); // No need to XOR EDI 12211 ins_encode( enc_PartialSubtypeCheck() ); 12212 ins_pipe( pipe_slow ); 12213 %} 12214 12215 // ============================================================================ 12216 // Branch Instructions -- short offset versions 12217 // 12218 // These instructions are used to replace jumps of a long offset (the default 12219 // match) with jumps of a shorter offset. These instructions are all tagged 12220 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12221 // match rules in general matching. Instead, the ADLC generates a conversion 12222 // method in the MachNode which can be used to do in-place replacement of the 12223 // long variant with the shorter variant. The compiler will determine if a 12224 // branch can be taken by the is_short_branch_offset() predicate in the machine 12225 // specific code section of the file. 12226 12227 // Jump Direct - Label defines a relative address from JMP+1 12228 instruct jmpDir_short(label labl) %{ 12229 match(Goto); 12230 effect(USE labl); 12231 12232 ins_cost(300); 12233 format %{ "JMP,s $labl" %} 12234 size(2); 12235 ins_encode %{ 12236 Label* L = $labl$$label; 12237 __ jmpb(*L); 12238 %} 12239 ins_pipe( pipe_jmp ); 12240 ins_short_branch(1); 12241 %} 12242 12243 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12244 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12245 match(If cop cr); 12246 effect(USE labl); 12247 12248 ins_cost(300); 12249 format %{ "J$cop,s $labl" %} 12250 size(2); 12251 ins_encode %{ 12252 Label* L = $labl$$label; 12253 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12254 %} 12255 ins_pipe( pipe_jcc ); 12256 ins_short_branch(1); 12257 %} 12258 12259 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12260 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12261 match(CountedLoopEnd cop cr); 12262 effect(USE labl); 12263 12264 ins_cost(300); 12265 format %{ "J$cop,s $labl\t# Loop end" %} 12266 size(2); 12267 ins_encode %{ 12268 Label* L = $labl$$label; 12269 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12270 %} 12271 ins_pipe( pipe_jcc ); 12272 ins_short_branch(1); 12273 %} 12274 12275 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12276 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12277 match(CountedLoopEnd cop cmp); 12278 effect(USE labl); 12279 12280 ins_cost(300); 12281 format %{ "J$cop,us $labl\t# Loop end" %} 12282 size(2); 12283 ins_encode %{ 12284 Label* L = $labl$$label; 12285 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12286 %} 12287 ins_pipe( pipe_jcc ); 12288 ins_short_branch(1); 12289 %} 12290 12291 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12292 match(CountedLoopEnd cop cmp); 12293 effect(USE labl); 12294 12295 ins_cost(300); 12296 format %{ "J$cop,us $labl\t# Loop end" %} 12297 size(2); 12298 ins_encode %{ 12299 Label* L = $labl$$label; 12300 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12301 %} 12302 ins_pipe( pipe_jcc ); 12303 ins_short_branch(1); 12304 %} 12305 12306 // Jump Direct Conditional - using unsigned comparison 12307 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12308 match(If cop cmp); 12309 effect(USE labl); 12310 12311 ins_cost(300); 12312 format %{ "J$cop,us $labl" %} 12313 size(2); 12314 ins_encode %{ 12315 Label* L = $labl$$label; 12316 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12317 %} 12318 ins_pipe( pipe_jcc ); 12319 ins_short_branch(1); 12320 %} 12321 12322 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12323 match(If cop cmp); 12324 effect(USE labl); 12325 12326 ins_cost(300); 12327 format %{ "J$cop,us $labl" %} 12328 size(2); 12329 ins_encode %{ 12330 Label* L = $labl$$label; 12331 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12332 %} 12333 ins_pipe( pipe_jcc ); 12334 ins_short_branch(1); 12335 %} 12336 12337 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12338 match(If cop cmp); 12339 effect(USE labl); 12340 12341 ins_cost(300); 12342 format %{ $$template 12343 if ($cop$$cmpcode == Assembler::notEqual) { 12344 $$emit$$"JP,u,s $labl\n\t" 12345 $$emit$$"J$cop,u,s $labl" 12346 } else { 12347 $$emit$$"JP,u,s done\n\t" 12348 $$emit$$"J$cop,u,s $labl\n\t" 12349 $$emit$$"done:" 12350 } 12351 %} 12352 size(4); 12353 ins_encode %{ 12354 Label* l = $labl$$label; 12355 if ($cop$$cmpcode == Assembler::notEqual) { 12356 __ jccb(Assembler::parity, *l); 12357 __ jccb(Assembler::notEqual, *l); 12358 } else if ($cop$$cmpcode == Assembler::equal) { 12359 Label done; 12360 __ jccb(Assembler::parity, done); 12361 __ jccb(Assembler::equal, *l); 12362 __ bind(done); 12363 } else { 12364 ShouldNotReachHere(); 12365 } 12366 %} 12367 ins_pipe(pipe_jcc); 12368 ins_short_branch(1); 12369 %} 12370 12371 // ============================================================================ 12372 // Long Compare 12373 // 12374 // Currently we hold longs in 2 registers. Comparing such values efficiently 12375 // is tricky. The flavor of compare used depends on whether we are testing 12376 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12377 // The GE test is the negated LT test. The LE test can be had by commuting 12378 // the operands (yielding a GE test) and then negating; negate again for the 12379 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12380 // NE test is negated from that. 12381 12382 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12383 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12384 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12385 // are collapsed internally in the ADLC's dfa-gen code. The match for 12386 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12387 // foo match ends up with the wrong leaf. One fix is to not match both 12388 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12389 // both forms beat the trinary form of long-compare and both are very useful 12390 // on Intel which has so few registers. 12391 12392 // Manifest a CmpL result in an integer register. Very painful. 12393 // This is the test to avoid. 12394 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12395 match(Set dst (CmpL3 src1 src2)); 12396 effect( KILL flags ); 12397 ins_cost(1000); 12398 format %{ "XOR $dst,$dst\n\t" 12399 "CMP $src1.hi,$src2.hi\n\t" 12400 "JLT,s m_one\n\t" 12401 "JGT,s p_one\n\t" 12402 "CMP $src1.lo,$src2.lo\n\t" 12403 "JB,s m_one\n\t" 12404 "JEQ,s done\n" 12405 "p_one:\tINC $dst\n\t" 12406 "JMP,s done\n" 12407 "m_one:\tDEC $dst\n" 12408 "done:" %} 12409 ins_encode %{ 12410 Label p_one, m_one, done; 12411 __ xorptr($dst$$Register, $dst$$Register); 12412 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12413 __ jccb(Assembler::less, m_one); 12414 __ jccb(Assembler::greater, p_one); 12415 __ cmpl($src1$$Register, $src2$$Register); 12416 __ jccb(Assembler::below, m_one); 12417 __ jccb(Assembler::equal, done); 12418 __ bind(p_one); 12419 __ incrementl($dst$$Register); 12420 __ jmpb(done); 12421 __ bind(m_one); 12422 __ decrementl($dst$$Register); 12423 __ bind(done); 12424 %} 12425 ins_pipe( pipe_slow ); 12426 %} 12427 12428 //====== 12429 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12430 // compares. Can be used for LE or GT compares by reversing arguments. 12431 // NOT GOOD FOR EQ/NE tests. 12432 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12433 match( Set flags (CmpL src zero )); 12434 ins_cost(100); 12435 format %{ "TEST $src.hi,$src.hi" %} 12436 opcode(0x85); 12437 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12438 ins_pipe( ialu_cr_reg_reg ); 12439 %} 12440 12441 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12442 // compares. Can be used for LE or GT compares by reversing arguments. 12443 // NOT GOOD FOR EQ/NE tests. 12444 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12445 match( Set flags (CmpL src1 src2 )); 12446 effect( TEMP tmp ); 12447 ins_cost(300); 12448 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12449 "MOV $tmp,$src1.hi\n\t" 12450 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12451 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12452 ins_pipe( ialu_cr_reg_reg ); 12453 %} 12454 12455 // Long compares reg < zero/req OR reg >= zero/req. 12456 // Just a wrapper for a normal branch, plus the predicate test. 12457 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12458 match(If cmp flags); 12459 effect(USE labl); 12460 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12461 expand %{ 12462 jmpCon(cmp,flags,labl); // JLT or JGE... 12463 %} 12464 %} 12465 12466 // Compare 2 longs and CMOVE longs. 12467 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12468 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12469 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12470 ins_cost(400); 12471 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12472 "CMOV$cmp $dst.hi,$src.hi" %} 12473 opcode(0x0F,0x40); 12474 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12475 ins_pipe( pipe_cmov_reg_long ); 12476 %} 12477 12478 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12479 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12480 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12481 ins_cost(500); 12482 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12483 "CMOV$cmp $dst.hi,$src.hi" %} 12484 opcode(0x0F,0x40); 12485 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12486 ins_pipe( pipe_cmov_reg_long ); 12487 %} 12488 12489 // Compare 2 longs and CMOVE ints. 12490 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12491 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12492 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12493 ins_cost(200); 12494 format %{ "CMOV$cmp $dst,$src" %} 12495 opcode(0x0F,0x40); 12496 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12497 ins_pipe( pipe_cmov_reg ); 12498 %} 12499 12500 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12501 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12502 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12503 ins_cost(250); 12504 format %{ "CMOV$cmp $dst,$src" %} 12505 opcode(0x0F,0x40); 12506 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12507 ins_pipe( pipe_cmov_mem ); 12508 %} 12509 12510 // Compare 2 longs and CMOVE ints. 12511 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12512 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12513 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12514 ins_cost(200); 12515 format %{ "CMOV$cmp $dst,$src" %} 12516 opcode(0x0F,0x40); 12517 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12518 ins_pipe( pipe_cmov_reg ); 12519 %} 12520 12521 // Compare 2 longs and CMOVE doubles 12522 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12523 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12524 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12525 ins_cost(200); 12526 expand %{ 12527 fcmovDPR_regS(cmp,flags,dst,src); 12528 %} 12529 %} 12530 12531 // Compare 2 longs and CMOVE doubles 12532 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12533 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12534 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12535 ins_cost(200); 12536 expand %{ 12537 fcmovD_regS(cmp,flags,dst,src); 12538 %} 12539 %} 12540 12541 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12542 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12543 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12544 ins_cost(200); 12545 expand %{ 12546 fcmovFPR_regS(cmp,flags,dst,src); 12547 %} 12548 %} 12549 12550 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12551 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12552 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12553 ins_cost(200); 12554 expand %{ 12555 fcmovF_regS(cmp,flags,dst,src); 12556 %} 12557 %} 12558 12559 //====== 12560 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12561 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12562 match( Set flags (CmpL src zero )); 12563 effect(TEMP tmp); 12564 ins_cost(200); 12565 format %{ "MOV $tmp,$src.lo\n\t" 12566 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12567 ins_encode( long_cmp_flags0( src, tmp ) ); 12568 ins_pipe( ialu_reg_reg_long ); 12569 %} 12570 12571 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12572 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12573 match( Set flags (CmpL src1 src2 )); 12574 ins_cost(200+300); 12575 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12576 "JNE,s skip\n\t" 12577 "CMP $src1.hi,$src2.hi\n\t" 12578 "skip:\t" %} 12579 ins_encode( long_cmp_flags1( src1, src2 ) ); 12580 ins_pipe( ialu_cr_reg_reg ); 12581 %} 12582 12583 // Long compare reg == zero/reg OR reg != zero/reg 12584 // Just a wrapper for a normal branch, plus the predicate test. 12585 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12586 match(If cmp flags); 12587 effect(USE labl); 12588 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12589 expand %{ 12590 jmpCon(cmp,flags,labl); // JEQ or JNE... 12591 %} 12592 %} 12593 12594 // Compare 2 longs and CMOVE longs. 12595 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12596 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12597 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12598 ins_cost(400); 12599 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12600 "CMOV$cmp $dst.hi,$src.hi" %} 12601 opcode(0x0F,0x40); 12602 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12603 ins_pipe( pipe_cmov_reg_long ); 12604 %} 12605 12606 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12607 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12608 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12609 ins_cost(500); 12610 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12611 "CMOV$cmp $dst.hi,$src.hi" %} 12612 opcode(0x0F,0x40); 12613 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12614 ins_pipe( pipe_cmov_reg_long ); 12615 %} 12616 12617 // Compare 2 longs and CMOVE ints. 12618 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12619 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12620 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12621 ins_cost(200); 12622 format %{ "CMOV$cmp $dst,$src" %} 12623 opcode(0x0F,0x40); 12624 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12625 ins_pipe( pipe_cmov_reg ); 12626 %} 12627 12628 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12629 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12630 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12631 ins_cost(250); 12632 format %{ "CMOV$cmp $dst,$src" %} 12633 opcode(0x0F,0x40); 12634 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12635 ins_pipe( pipe_cmov_mem ); 12636 %} 12637 12638 // Compare 2 longs and CMOVE ints. 12639 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12640 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12641 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12642 ins_cost(200); 12643 format %{ "CMOV$cmp $dst,$src" %} 12644 opcode(0x0F,0x40); 12645 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12646 ins_pipe( pipe_cmov_reg ); 12647 %} 12648 12649 // Compare 2 longs and CMOVE doubles 12650 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12651 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12652 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12653 ins_cost(200); 12654 expand %{ 12655 fcmovDPR_regS(cmp,flags,dst,src); 12656 %} 12657 %} 12658 12659 // Compare 2 longs and CMOVE doubles 12660 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12661 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12662 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12663 ins_cost(200); 12664 expand %{ 12665 fcmovD_regS(cmp,flags,dst,src); 12666 %} 12667 %} 12668 12669 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12670 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12671 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12672 ins_cost(200); 12673 expand %{ 12674 fcmovFPR_regS(cmp,flags,dst,src); 12675 %} 12676 %} 12677 12678 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12679 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12680 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12681 ins_cost(200); 12682 expand %{ 12683 fcmovF_regS(cmp,flags,dst,src); 12684 %} 12685 %} 12686 12687 //====== 12688 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12689 // Same as cmpL_reg_flags_LEGT except must negate src 12690 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12691 match( Set flags (CmpL src zero )); 12692 effect( TEMP tmp ); 12693 ins_cost(300); 12694 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12695 "CMP $tmp,$src.lo\n\t" 12696 "SBB $tmp,$src.hi\n\t" %} 12697 ins_encode( long_cmp_flags3(src, tmp) ); 12698 ins_pipe( ialu_reg_reg_long ); 12699 %} 12700 12701 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12702 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12703 // requires a commuted test to get the same result. 12704 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12705 match( Set flags (CmpL src1 src2 )); 12706 effect( TEMP tmp ); 12707 ins_cost(300); 12708 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12709 "MOV $tmp,$src2.hi\n\t" 12710 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12711 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12712 ins_pipe( ialu_cr_reg_reg ); 12713 %} 12714 12715 // Long compares reg < zero/req OR reg >= zero/req. 12716 // Just a wrapper for a normal branch, plus the predicate test 12717 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12718 match(If cmp flags); 12719 effect(USE labl); 12720 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12721 ins_cost(300); 12722 expand %{ 12723 jmpCon(cmp,flags,labl); // JGT or JLE... 12724 %} 12725 %} 12726 12727 // Compare 2 longs and CMOVE longs. 12728 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12729 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12730 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12731 ins_cost(400); 12732 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12733 "CMOV$cmp $dst.hi,$src.hi" %} 12734 opcode(0x0F,0x40); 12735 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12736 ins_pipe( pipe_cmov_reg_long ); 12737 %} 12738 12739 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12740 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12741 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12742 ins_cost(500); 12743 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12744 "CMOV$cmp $dst.hi,$src.hi+4" %} 12745 opcode(0x0F,0x40); 12746 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12747 ins_pipe( pipe_cmov_reg_long ); 12748 %} 12749 12750 // Compare 2 longs and CMOVE ints. 12751 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12752 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12753 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12754 ins_cost(200); 12755 format %{ "CMOV$cmp $dst,$src" %} 12756 opcode(0x0F,0x40); 12757 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12758 ins_pipe( pipe_cmov_reg ); 12759 %} 12760 12761 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12762 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12763 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12764 ins_cost(250); 12765 format %{ "CMOV$cmp $dst,$src" %} 12766 opcode(0x0F,0x40); 12767 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12768 ins_pipe( pipe_cmov_mem ); 12769 %} 12770 12771 // Compare 2 longs and CMOVE ptrs. 12772 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12773 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12774 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12775 ins_cost(200); 12776 format %{ "CMOV$cmp $dst,$src" %} 12777 opcode(0x0F,0x40); 12778 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12779 ins_pipe( pipe_cmov_reg ); 12780 %} 12781 12782 // Compare 2 longs and CMOVE doubles 12783 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12784 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12785 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12786 ins_cost(200); 12787 expand %{ 12788 fcmovDPR_regS(cmp,flags,dst,src); 12789 %} 12790 %} 12791 12792 // Compare 2 longs and CMOVE doubles 12793 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12794 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12795 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12796 ins_cost(200); 12797 expand %{ 12798 fcmovD_regS(cmp,flags,dst,src); 12799 %} 12800 %} 12801 12802 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12803 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12804 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12805 ins_cost(200); 12806 expand %{ 12807 fcmovFPR_regS(cmp,flags,dst,src); 12808 %} 12809 %} 12810 12811 12812 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12813 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12814 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12815 ins_cost(200); 12816 expand %{ 12817 fcmovF_regS(cmp,flags,dst,src); 12818 %} 12819 %} 12820 12821 12822 // ============================================================================ 12823 // Procedure Call/Return Instructions 12824 // Call Java Static Instruction 12825 // Note: If this code changes, the corresponding ret_addr_offset() and 12826 // compute_padding() functions will have to be adjusted. 12827 instruct CallStaticJavaDirect(method meth) %{ 12828 match(CallStaticJava); 12829 effect(USE meth); 12830 12831 ins_cost(300); 12832 format %{ "CALL,static " %} 12833 opcode(0xE8); /* E8 cd */ 12834 ins_encode( pre_call_resets, 12835 Java_Static_Call( meth ), 12836 call_epilog, 12837 post_call_FPU ); 12838 ins_pipe( pipe_slow ); 12839 ins_alignment(4); 12840 %} 12841 12842 // Call Java Dynamic Instruction 12843 // Note: If this code changes, the corresponding ret_addr_offset() and 12844 // compute_padding() functions will have to be adjusted. 12845 instruct CallDynamicJavaDirect(method meth) %{ 12846 match(CallDynamicJava); 12847 effect(USE meth); 12848 12849 ins_cost(300); 12850 format %{ "MOV EAX,(oop)-1\n\t" 12851 "CALL,dynamic" %} 12852 opcode(0xE8); /* E8 cd */ 12853 ins_encode( pre_call_resets, 12854 Java_Dynamic_Call( meth ), 12855 call_epilog, 12856 post_call_FPU ); 12857 ins_pipe( pipe_slow ); 12858 ins_alignment(4); 12859 %} 12860 12861 // Call Runtime Instruction 12862 instruct CallRuntimeDirect(method meth) %{ 12863 match(CallRuntime ); 12864 effect(USE meth); 12865 12866 ins_cost(300); 12867 format %{ "CALL,runtime " %} 12868 opcode(0xE8); /* E8 cd */ 12869 // Use FFREEs to clear entries in float stack 12870 ins_encode( pre_call_resets, 12871 FFree_Float_Stack_All, 12872 Java_To_Runtime( meth ), 12873 post_call_FPU ); 12874 ins_pipe( pipe_slow ); 12875 %} 12876 12877 // Call runtime without safepoint 12878 instruct CallLeafDirect(method meth) %{ 12879 match(CallLeaf); 12880 effect(USE meth); 12881 12882 ins_cost(300); 12883 format %{ "CALL_LEAF,runtime " %} 12884 opcode(0xE8); /* E8 cd */ 12885 ins_encode( pre_call_resets, 12886 FFree_Float_Stack_All, 12887 Java_To_Runtime( meth ), 12888 Verify_FPU_For_Leaf, post_call_FPU ); 12889 ins_pipe( pipe_slow ); 12890 %} 12891 12892 instruct CallLeafNoFPDirect(method meth) %{ 12893 match(CallLeafNoFP); 12894 effect(USE meth); 12895 12896 ins_cost(300); 12897 format %{ "CALL_LEAF_NOFP,runtime " %} 12898 opcode(0xE8); /* E8 cd */ 12899 ins_encode(Java_To_Runtime(meth)); 12900 ins_pipe( pipe_slow ); 12901 %} 12902 12903 12904 // Return Instruction 12905 // Remove the return address & jump to it. 12906 instruct Ret() %{ 12907 match(Return); 12908 format %{ "RET" %} 12909 opcode(0xC3); 12910 ins_encode(OpcP); 12911 ins_pipe( pipe_jmp ); 12912 %} 12913 12914 // Tail Call; Jump from runtime stub to Java code. 12915 // Also known as an 'interprocedural jump'. 12916 // Target of jump will eventually return to caller. 12917 // TailJump below removes the return address. 12918 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12919 match(TailCall jump_target method_oop ); 12920 ins_cost(300); 12921 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12922 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12923 ins_encode( OpcP, RegOpc(jump_target) ); 12924 ins_pipe( pipe_jmp ); 12925 %} 12926 12927 12928 // Tail Jump; remove the return address; jump to target. 12929 // TailCall above leaves the return address around. 12930 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12931 match( TailJump jump_target ex_oop ); 12932 ins_cost(300); 12933 format %{ "POP EDX\t# pop return address into dummy\n\t" 12934 "JMP $jump_target " %} 12935 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12936 ins_encode( enc_pop_rdx, 12937 OpcP, RegOpc(jump_target) ); 12938 ins_pipe( pipe_jmp ); 12939 %} 12940 12941 // Create exception oop: created by stack-crawling runtime code. 12942 // Created exception is now available to this handler, and is setup 12943 // just prior to jumping to this handler. No code emitted. 12944 instruct CreateException( eAXRegP ex_oop ) 12945 %{ 12946 match(Set ex_oop (CreateEx)); 12947 12948 size(0); 12949 // use the following format syntax 12950 format %{ "# exception oop is in EAX; no code emitted" %} 12951 ins_encode(); 12952 ins_pipe( empty ); 12953 %} 12954 12955 12956 // Rethrow exception: 12957 // The exception oop will come in the first argument position. 12958 // Then JUMP (not call) to the rethrow stub code. 12959 instruct RethrowException() 12960 %{ 12961 match(Rethrow); 12962 12963 // use the following format syntax 12964 format %{ "JMP rethrow_stub" %} 12965 ins_encode(enc_rethrow); 12966 ins_pipe( pipe_jmp ); 12967 %} 12968 12969 // inlined locking and unlocking 12970 12971 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12972 predicate(Compile::current()->use_rtm()); 12973 match(Set cr (FastLock object box)); 12974 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12975 ins_cost(300); 12976 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12977 ins_encode %{ 12978 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12979 $scr$$Register, $cx1$$Register, $cx2$$Register, 12980 _counters, _rtm_counters, _stack_rtm_counters, 12981 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12982 true, ra_->C->profile_rtm()); 12983 %} 12984 ins_pipe(pipe_slow); 12985 %} 12986 12987 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12988 predicate(!Compile::current()->use_rtm()); 12989 match(Set cr (FastLock object box)); 12990 effect(TEMP tmp, TEMP scr, USE_KILL box); 12991 ins_cost(300); 12992 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12993 ins_encode %{ 12994 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12995 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12996 %} 12997 ins_pipe(pipe_slow); 12998 %} 12999 13000 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13001 match(Set cr (FastUnlock object box)); 13002 effect(TEMP tmp, USE_KILL box); 13003 ins_cost(300); 13004 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13005 ins_encode %{ 13006 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13007 %} 13008 ins_pipe(pipe_slow); 13009 %} 13010 13011 13012 13013 // ============================================================================ 13014 // Safepoint Instruction 13015 instruct safePoint_poll(eFlagsReg cr) %{ 13016 match(SafePoint); 13017 effect(KILL cr); 13018 13019 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13020 // On SPARC that might be acceptable as we can generate the address with 13021 // just a sethi, saving an or. By polling at offset 0 we can end up 13022 // putting additional pressure on the index-0 in the D$. Because of 13023 // alignment (just like the situation at hand) the lower indices tend 13024 // to see more traffic. It'd be better to change the polling address 13025 // to offset 0 of the last $line in the polling page. 13026 13027 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13028 ins_cost(125); 13029 size(6) ; 13030 ins_encode( Safepoint_Poll() ); 13031 ins_pipe( ialu_reg_mem ); 13032 %} 13033 13034 13035 // ============================================================================ 13036 // This name is KNOWN by the ADLC and cannot be changed. 13037 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13038 // for this guy. 13039 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13040 match(Set dst (ThreadLocal)); 13041 effect(DEF dst, KILL cr); 13042 13043 format %{ "MOV $dst, Thread::current()" %} 13044 ins_encode %{ 13045 Register dstReg = as_Register($dst$$reg); 13046 __ get_thread(dstReg); 13047 %} 13048 ins_pipe( ialu_reg_fat ); 13049 %} 13050 13051 13052 13053 //----------PEEPHOLE RULES----------------------------------------------------- 13054 // These must follow all instruction definitions as they use the names 13055 // defined in the instructions definitions. 13056 // 13057 // peepmatch ( root_instr_name [preceding_instruction]* ); 13058 // 13059 // peepconstraint %{ 13060 // (instruction_number.operand_name relational_op instruction_number.operand_name 13061 // [, ...] ); 13062 // // instruction numbers are zero-based using left to right order in peepmatch 13063 // 13064 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13065 // // provide an instruction_number.operand_name for each operand that appears 13066 // // in the replacement instruction's match rule 13067 // 13068 // ---------VM FLAGS--------------------------------------------------------- 13069 // 13070 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13071 // 13072 // Each peephole rule is given an identifying number starting with zero and 13073 // increasing by one in the order seen by the parser. An individual peephole 13074 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13075 // on the command-line. 13076 // 13077 // ---------CURRENT LIMITATIONS---------------------------------------------- 13078 // 13079 // Only match adjacent instructions in same basic block 13080 // Only equality constraints 13081 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13082 // Only one replacement instruction 13083 // 13084 // ---------EXAMPLE---------------------------------------------------------- 13085 // 13086 // // pertinent parts of existing instructions in architecture description 13087 // instruct movI(rRegI dst, rRegI src) %{ 13088 // match(Set dst (CopyI src)); 13089 // %} 13090 // 13091 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13092 // match(Set dst (AddI dst src)); 13093 // effect(KILL cr); 13094 // %} 13095 // 13096 // // Change (inc mov) to lea 13097 // peephole %{ 13098 // // increment preceeded by register-register move 13099 // peepmatch ( incI_eReg movI ); 13100 // // require that the destination register of the increment 13101 // // match the destination register of the move 13102 // peepconstraint ( 0.dst == 1.dst ); 13103 // // construct a replacement instruction that sets 13104 // // the destination to ( move's source register + one ) 13105 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13106 // %} 13107 // 13108 // Implementation no longer uses movX instructions since 13109 // machine-independent system no longer uses CopyX nodes. 13110 // 13111 // peephole %{ 13112 // peepmatch ( incI_eReg movI ); 13113 // peepconstraint ( 0.dst == 1.dst ); 13114 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13115 // %} 13116 // 13117 // peephole %{ 13118 // peepmatch ( decI_eReg movI ); 13119 // peepconstraint ( 0.dst == 1.dst ); 13120 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13121 // %} 13122 // 13123 // peephole %{ 13124 // peepmatch ( addI_eReg_imm movI ); 13125 // peepconstraint ( 0.dst == 1.dst ); 13126 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13127 // %} 13128 // 13129 // peephole %{ 13130 // peepmatch ( addP_eReg_imm movP ); 13131 // peepconstraint ( 0.dst == 1.dst ); 13132 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13133 // %} 13134 13135 // // Change load of spilled value to only a spill 13136 // instruct storeI(memory mem, rRegI src) %{ 13137 // match(Set mem (StoreI mem src)); 13138 // %} 13139 // 13140 // instruct loadI(rRegI dst, memory mem) %{ 13141 // match(Set dst (LoadI mem)); 13142 // %} 13143 // 13144 peephole %{ 13145 peepmatch ( loadI storeI ); 13146 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13147 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13148 %} 13149 13150 //----------SMARTSPILL RULES--------------------------------------------------- 13151 // These must follow all instruction definitions as they use the names 13152 // defined in the instructions definitions.