1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 if (reg_lo+1 == reg_hi) { // double move? 799 if (is_load) { 800 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } else { 805 if (is_load) { 806 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 807 } else { 808 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 809 } 810 } 811 #ifndef PRODUCT 812 } else if (!do_size) { 813 if (size != 0) st->print("\n\t"); 814 if (reg_lo+1 == reg_hi) { // double move? 815 if (is_load) st->print("%s %s,[ESP + #%d]", 816 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 817 Matcher::regName[reg_lo], offset); 818 else st->print("MOVSD [ESP + #%d],%s", 819 offset, Matcher::regName[reg_lo]); 820 } else { 821 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 822 Matcher::regName[reg_lo], offset); 823 else st->print("MOVSS [ESP + #%d],%s", 824 offset, Matcher::regName[reg_lo]); 825 } 826 #endif 827 } 828 bool is_single_byte = false; 829 if ((UseAVX > 2) && (offset != 0)) { 830 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 831 } 832 int offset_size = 0; 833 if (UseAVX > 2 ) { 834 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 835 } else { 836 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 837 } 838 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 840 return size+5+offset_size; 841 } 842 843 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 845 int src_hi, int dst_hi, int size, outputStream* st ) { 846 if (cbuf) { 847 MacroAssembler _masm(cbuf); 848 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 849 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 850 as_XMMRegister(Matcher::_regEncode[src_lo])); 851 } else { 852 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } 855 #ifndef PRODUCT 856 } else if (!do_size) { 857 if (size != 0) st->print("\n\t"); 858 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 859 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 860 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } else { 865 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 866 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } else { 868 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } 870 } 871 #endif 872 } 873 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 874 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 875 int sz = (UseAVX > 2) ? 6 : 4; 876 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 877 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 878 return size + sz; 879 } 880 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 882 int src_hi, int dst_hi, int size, outputStream* st ) { 883 // 32-bit 884 if (cbuf) { 885 MacroAssembler _masm(cbuf); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888 #ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891 #endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894 } 895 896 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 903 as_XMMRegister(Matcher::_regEncode[src_lo])); 904 #ifndef PRODUCT 905 } else if (!do_size) { 906 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 907 #endif 908 } 909 return (UseAVX> 2) ? 6 : 4; 910 } 911 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 913 if( cbuf ) { 914 emit_opcode(*cbuf, 0x8B ); 915 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 916 #ifndef PRODUCT 917 } else if( !do_size ) { 918 if( size != 0 ) st->print("\n\t"); 919 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 920 #endif 921 } 922 return size+2; 923 } 924 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 926 int offset, int size, outputStream* st ) { 927 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 928 if( cbuf ) { 929 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 930 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 931 #ifndef PRODUCT 932 } else if( !do_size ) { 933 if( size != 0 ) st->print("\n\t"); 934 st->print("FLD %s",Matcher::regName[src_lo]); 935 #endif 936 } 937 size += 2; 938 } 939 940 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 941 const char *op_str; 942 int op; 943 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 944 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 945 op = 0xDD; 946 } else { // 32-bit store 947 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 948 op = 0xD9; 949 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 950 } 951 952 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 953 } 954 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 957 int src_hi, int dst_hi, uint ireg, outputStream* st); 958 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 960 int stack_offset, int reg, uint ireg, outputStream* st); 961 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 963 int dst_offset, uint ireg, outputStream* st) { 964 int calc_size = 0; 965 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 966 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 967 switch (ireg) { 968 case Op_VecS: 969 calc_size = 3+src_offset_size + 3+dst_offset_size; 970 break; 971 case Op_VecD: { 972 calc_size = 3+src_offset_size + 3+dst_offset_size; 973 int tmp_src_offset = src_offset + 4; 974 int tmp_dst_offset = dst_offset + 4; 975 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 976 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 977 calc_size += 3+src_offset_size + 3+dst_offset_size; 978 break; 979 } 980 case Op_VecX: 981 case Op_VecY: 982 case Op_VecZ: 983 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 984 break; 985 default: 986 ShouldNotReachHere(); 987 } 988 if (cbuf) { 989 MacroAssembler _masm(cbuf); 990 int offset = __ offset(); 991 switch (ireg) { 992 case Op_VecS: 993 __ pushl(Address(rsp, src_offset)); 994 __ popl (Address(rsp, dst_offset)); 995 break; 996 case Op_VecD: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 __ pushl(Address(rsp, src_offset+4)); 1000 __ popl (Address(rsp, dst_offset+4)); 1001 break; 1002 case Op_VecX: 1003 __ movdqu(Address(rsp, -16), xmm0); 1004 __ movdqu(xmm0, Address(rsp, src_offset)); 1005 __ movdqu(Address(rsp, dst_offset), xmm0); 1006 __ movdqu(xmm0, Address(rsp, -16)); 1007 break; 1008 case Op_VecY: 1009 __ vmovdqu(Address(rsp, -32), xmm0); 1010 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1011 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1012 __ vmovdqu(xmm0, Address(rsp, -32)); 1013 break; 1014 case Op_VecZ: 1015 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1016 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculation"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 break; 1055 case Op_VecZ: 1056 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1057 "vmovdqu xmm0, [rsp + #%d]\n\t" 1058 "vmovdqu [rsp + #%d], xmm0\n\t" 1059 "vmovdqu xmm0, [rsp - #64]", 1060 src_offset, dst_offset); 1061 break; 1062 default: 1063 ShouldNotReachHere(); 1064 } 1065 #endif 1066 } 1067 return calc_size; 1068 } 1069 1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1071 // Get registers to move 1072 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1073 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1074 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1075 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1076 1077 enum RC src_second_rc = rc_class(src_second); 1078 enum RC src_first_rc = rc_class(src_first); 1079 enum RC dst_second_rc = rc_class(dst_second); 1080 enum RC dst_first_rc = rc_class(dst_first); 1081 1082 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1083 1084 // Generate spill code! 1085 int size = 0; 1086 1087 if( src_first == dst_first && src_second == dst_second ) 1088 return size; // Self copy, no move 1089 1090 if (bottom_type()->isa_vect() != NULL) { 1091 uint ireg = ideal_reg(); 1092 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1093 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1094 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1095 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1096 // mem -> mem 1097 int src_offset = ra_->reg2offset(src_first); 1098 int dst_offset = ra_->reg2offset(dst_first); 1099 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1100 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1101 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1102 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1103 int stack_offset = ra_->reg2offset(dst_first); 1104 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1105 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1106 int stack_offset = ra_->reg2offset(src_first); 1107 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1108 } else { 1109 ShouldNotReachHere(); 1110 } 1111 } 1112 1113 // -------------------------------------- 1114 // Check for mem-mem move. push/pop to move. 1115 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1116 if( src_second == dst_first ) { // overlapping stack copy ranges 1117 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1120 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1121 } 1122 // move low bits 1123 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1124 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1125 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1126 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1127 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1128 } 1129 return size; 1130 } 1131 1132 // -------------------------------------- 1133 // Check for integer reg-reg copy 1134 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1135 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1136 1137 // Check for integer store 1138 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1139 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1140 1141 // Check for integer load 1142 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1144 1145 // Check for integer reg-xmm reg copy 1146 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1147 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1148 "no 64 bit integer-float reg moves" ); 1149 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1150 } 1151 // -------------------------------------- 1152 // Check for float reg-reg copy 1153 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1154 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1155 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1156 if( cbuf ) { 1157 1158 // Note the mucking with the register encode to compensate for the 0/1 1159 // indexing issue mentioned in a comment in the reg_def sections 1160 // for FPR registers many lines above here. 1161 1162 if( src_first != FPR1L_num ) { 1163 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1164 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1165 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1166 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1167 } else { 1168 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1169 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1170 } 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1175 else st->print( "FST %s", Matcher::regName[dst_first]); 1176 #endif 1177 } 1178 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1179 } 1180 1181 // Check for float store 1182 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1183 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1184 } 1185 1186 // Check for float load 1187 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1188 int offset = ra_->reg2offset(src_first); 1189 const char *op_str; 1190 int op; 1191 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1192 op_str = "FLD_D"; 1193 op = 0xDD; 1194 } else { // 32-bit load 1195 op_str = "FLD_S"; 1196 op = 0xD9; 1197 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1198 } 1199 if( cbuf ) { 1200 emit_opcode (*cbuf, op ); 1201 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1202 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1203 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1204 #ifndef PRODUCT 1205 } else if( !do_size ) { 1206 if( size != 0 ) st->print("\n\t"); 1207 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1208 #endif 1209 } 1210 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1211 return size + 3+offset_size+2; 1212 } 1213 1214 // Check for xmm reg-reg copy 1215 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1216 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1217 (src_first+1 == src_second && dst_first+1 == dst_second), 1218 "no non-adjacent float-moves" ); 1219 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1220 } 1221 1222 // Check for xmm reg-integer reg copy 1223 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1224 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1225 "no 64 bit float-integer reg moves" ); 1226 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1227 } 1228 1229 // Check for xmm store 1230 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1232 } 1233 1234 // Check for float xmm load 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1236 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1237 } 1238 1239 // Copy from float reg to xmm reg 1240 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1241 // copy to the top of stack from floating point reg 1242 // and use LEA to preserve flags 1243 if( cbuf ) { 1244 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1245 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1246 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1247 emit_d8(*cbuf,0xF8); 1248 #ifndef PRODUCT 1249 } else if( !do_size ) { 1250 if( size != 0 ) st->print("\n\t"); 1251 st->print("LEA ESP,[ESP-8]"); 1252 #endif 1253 } 1254 size += 4; 1255 1256 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1257 1258 // Copy from the temp memory to the xmm reg. 1259 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1260 1261 if( cbuf ) { 1262 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1263 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1264 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1265 emit_d8(*cbuf,0x08); 1266 #ifndef PRODUCT 1267 } else if( !do_size ) { 1268 if( size != 0 ) st->print("\n\t"); 1269 st->print("LEA ESP,[ESP+8]"); 1270 #endif 1271 } 1272 size += 4; 1273 return size; 1274 } 1275 1276 assert( size > 0, "missed a case" ); 1277 1278 // -------------------------------------------------------------------- 1279 // Check for second bits still needing moving. 1280 if( src_second == dst_second ) 1281 return size; // Self copy; no move 1282 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1283 1284 // Check for second word int-int move 1285 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1286 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1287 1288 // Check for second word integer store 1289 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1290 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1291 1292 // Check for second word integer load 1293 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1295 1296 1297 Unimplemented(); 1298 return 0; // Mute compiler 1299 } 1300 1301 #ifndef PRODUCT 1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1303 implementation( NULL, ra_, false, st ); 1304 } 1305 #endif 1306 1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1308 implementation( &cbuf, ra_, false, NULL ); 1309 } 1310 1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1312 return implementation( NULL, ra_, true, NULL ); 1313 } 1314 1315 1316 //============================================================================= 1317 #ifndef PRODUCT 1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1319 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1320 int reg = ra_->get_reg_first(this); 1321 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1322 } 1323 #endif 1324 1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1326 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1327 int reg = ra_->get_encode(this); 1328 if( offset >= 128 ) { 1329 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1330 emit_rm(cbuf, 0x2, reg, 0x04); 1331 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1332 emit_d32(cbuf, offset); 1333 } 1334 else { 1335 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1336 emit_rm(cbuf, 0x1, reg, 0x04); 1337 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1338 emit_d8(cbuf, offset); 1339 } 1340 } 1341 1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1343 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1344 if( offset >= 128 ) { 1345 return 7; 1346 } 1347 else { 1348 return 4; 1349 } 1350 } 1351 1352 //============================================================================= 1353 #ifndef PRODUCT 1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1355 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1356 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1357 st->print_cr("\tNOP"); 1358 st->print_cr("\tNOP"); 1359 if( !OptoBreakpoint ) 1360 st->print_cr("\tNOP"); 1361 } 1362 #endif 1363 1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1365 MacroAssembler masm(&cbuf); 1366 #ifdef ASSERT 1367 uint insts_size = cbuf.insts_size(); 1368 #endif 1369 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1370 masm.jump_cc(Assembler::notEqual, 1371 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1372 /* WARNING these NOPs are critical so that verified entry point is properly 1373 aligned for patching by NativeJump::patch_verified_entry() */ 1374 int nops_cnt = 2; 1375 if( !OptoBreakpoint ) // Leave space for int3 1376 nops_cnt += 1; 1377 masm.nop(nops_cnt); 1378 1379 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1380 } 1381 1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1383 return OptoBreakpoint ? 11 : 12; 1384 } 1385 1386 1387 //============================================================================= 1388 1389 int Matcher::regnum_to_fpu_offset(int regnum) { 1390 return regnum - 32; // The FP registers are in the second chunk 1391 } 1392 1393 // This is UltraSparc specific, true just means we have fast l2f conversion 1394 const bool Matcher::convL2FSupported(void) { 1395 return true; 1396 } 1397 1398 // Is this branch offset short enough that a short branch can be used? 1399 // 1400 // NOTE: If the platform does not provide any short branch variants, then 1401 // this method should return false for offset 0. 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1403 // The passed offset is relative to address of the branch. 1404 // On 86 a branch displacement is calculated relative to address 1405 // of a next instruction. 1406 offset -= br_size; 1407 1408 // the short version of jmpConUCF2 contains multiple branches, 1409 // making the reach slightly less 1410 if (rule == jmpConUCF2_rule) 1411 return (-126 <= offset && offset <= 125); 1412 return (-128 <= offset && offset <= 127); 1413 } 1414 1415 const bool Matcher::isSimpleConstant64(jlong value) { 1416 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1417 return false; 1418 } 1419 1420 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1421 const bool Matcher::init_array_count_is_in_bytes = false; 1422 1423 // Threshold size for cleararray. 1424 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1425 1426 // Needs 2 CMOV's for longs. 1427 const int Matcher::long_cmove_cost() { return 1; } 1428 1429 // No CMOVF/CMOVD with SSE/SSE2 1430 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1431 1432 // Does the CPU require late expand (see block.cpp for description of late expand)? 1433 const bool Matcher::require_postalloc_expand = false; 1434 1435 // Should the Matcher clone shifts on addressing modes, expecting them to 1436 // be subsumed into complex addressing expressions or compute them into 1437 // registers? True for Intel but false for most RISCs 1438 const bool Matcher::clone_shift_expressions = true; 1439 1440 // Do we need to mask the count passed to shift instructions or does 1441 // the cpu only look at the lower 5/6 bits anyway? 1442 const bool Matcher::need_masked_shift_count = false; 1443 1444 bool Matcher::narrow_oop_use_complex_address() { 1445 ShouldNotCallThis(); 1446 return true; 1447 } 1448 1449 bool Matcher::narrow_klass_use_complex_address() { 1450 ShouldNotCallThis(); 1451 return true; 1452 } 1453 1454 1455 // Is it better to copy float constants, or load them directly from memory? 1456 // Intel can load a float constant from a direct address, requiring no 1457 // extra registers. Most RISCs will have to materialize an address into a 1458 // register first, so they would do better to copy the constant from stack. 1459 const bool Matcher::rematerialize_float_constants = true; 1460 1461 // If CPU can load and store mis-aligned doubles directly then no fixup is 1462 // needed. Else we split the double into 2 integer pieces and move it 1463 // piece-by-piece. Only happens when passing doubles into C code as the 1464 // Java calling convention forces doubles to be aligned. 1465 const bool Matcher::misaligned_doubles_ok = true; 1466 1467 1468 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1469 // Get the memory operand from the node 1470 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1471 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1472 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1473 uint opcnt = 1; // First operand 1474 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1475 while( idx >= skipped+num_edges ) { 1476 skipped += num_edges; 1477 opcnt++; // Bump operand count 1478 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1479 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1480 } 1481 1482 MachOper *memory = node->_opnds[opcnt]; 1483 MachOper *new_memory = NULL; 1484 switch (memory->opcode()) { 1485 case DIRECT: 1486 case INDOFFSET32X: 1487 // No transformation necessary. 1488 return; 1489 case INDIRECT: 1490 new_memory = new indirect_win95_safeOper( ); 1491 break; 1492 case INDOFFSET8: 1493 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1494 break; 1495 case INDOFFSET32: 1496 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1497 break; 1498 case INDINDEXOFFSET: 1499 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1500 break; 1501 case INDINDEXSCALE: 1502 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1503 break; 1504 case INDINDEXSCALEOFFSET: 1505 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1506 break; 1507 case LOAD_LONG_INDIRECT: 1508 case LOAD_LONG_INDOFFSET32: 1509 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1510 return; 1511 default: 1512 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1513 return; 1514 } 1515 node->_opnds[opcnt] = new_memory; 1516 } 1517 1518 // Advertise here if the CPU requires explicit rounding operations 1519 // to implement the UseStrictFP mode. 1520 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1521 1522 // Are floats conerted to double when stored to stack during deoptimization? 1523 // On x32 it is stored with convertion only when FPU is used for floats. 1524 bool Matcher::float_in_double() { return (UseSSE == 0); } 1525 1526 // Do ints take an entire long register or just half? 1527 const bool Matcher::int_in_long = false; 1528 1529 // Return whether or not this register is ever used as an argument. This 1530 // function is used on startup to build the trampoline stubs in generateOptoStub. 1531 // Registers not mentioned will be killed by the VM call in the trampoline, and 1532 // arguments in those registers not be available to the callee. 1533 bool Matcher::can_be_java_arg( int reg ) { 1534 if( reg == ECX_num || reg == EDX_num ) return true; 1535 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1536 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1537 return false; 1538 } 1539 1540 bool Matcher::is_spillable_arg( int reg ) { 1541 return can_be_java_arg(reg); 1542 } 1543 1544 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1545 // Use hardware integer DIV instruction when 1546 // it is faster than a code which use multiply. 1547 // Only when constant divisor fits into 32 bit 1548 // (min_jint is excluded to get only correct 1549 // positive 32 bit values from negative). 1550 return VM_Version::has_fast_idiv() && 1551 (divisor == (int)divisor && divisor != min_jint); 1552 } 1553 1554 // Register for DIVI projection of divmodI 1555 RegMask Matcher::divI_proj_mask() { 1556 return EAX_REG_mask(); 1557 } 1558 1559 // Register for MODI projection of divmodI 1560 RegMask Matcher::modI_proj_mask() { 1561 return EDX_REG_mask(); 1562 } 1563 1564 // Register for DIVL projection of divmodL 1565 RegMask Matcher::divL_proj_mask() { 1566 ShouldNotReachHere(); 1567 return RegMask(); 1568 } 1569 1570 // Register for MODL projection of divmodL 1571 RegMask Matcher::modL_proj_mask() { 1572 ShouldNotReachHere(); 1573 return RegMask(); 1574 } 1575 1576 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1577 return NO_REG_mask(); 1578 } 1579 1580 // Returns true if the high 32 bits of the value is known to be zero. 1581 bool is_operand_hi32_zero(Node* n) { 1582 int opc = n->Opcode(); 1583 if (opc == Op_AndL) { 1584 Node* o2 = n->in(2); 1585 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1586 return true; 1587 } 1588 } 1589 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1590 return true; 1591 } 1592 return false; 1593 } 1594 1595 %} 1596 1597 //----------ENCODING BLOCK----------------------------------------------------- 1598 // This block specifies the encoding classes used by the compiler to output 1599 // byte streams. Encoding classes generate functions which are called by 1600 // Machine Instruction Nodes in order to generate the bit encoding of the 1601 // instruction. Operands specify their base encoding interface with the 1602 // interface keyword. There are currently supported four interfaces, 1603 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1604 // operand to generate a function which returns its register number when 1605 // queried. CONST_INTER causes an operand to generate a function which 1606 // returns the value of the constant when queried. MEMORY_INTER causes an 1607 // operand to generate four functions which return the Base Register, the 1608 // Index Register, the Scale Value, and the Offset Value of the operand when 1609 // queried. COND_INTER causes an operand to generate six functions which 1610 // return the encoding code (ie - encoding bits for the instruction) 1611 // associated with each basic boolean condition for a conditional instruction. 1612 // Instructions specify two basic values for encoding. They use the 1613 // ins_encode keyword to specify their encoding class (which must be one of 1614 // the class names specified in the encoding block), and they use the 1615 // opcode keyword to specify, in order, their primary, secondary, and 1616 // tertiary opcode. Only the opcode sections which a particular instruction 1617 // needs for encoding need to be specified. 1618 encode %{ 1619 // Build emit functions for each basic byte or larger field in the intel 1620 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1621 // code in the enc_class source block. Emit functions will live in the 1622 // main source block for now. In future, we can generalize this by 1623 // adding a syntax that specifies the sizes of fields in an order, 1624 // so that the adlc can build the emit functions automagically 1625 1626 // Emit primary opcode 1627 enc_class OpcP %{ 1628 emit_opcode(cbuf, $primary); 1629 %} 1630 1631 // Emit secondary opcode 1632 enc_class OpcS %{ 1633 emit_opcode(cbuf, $secondary); 1634 %} 1635 1636 // Emit opcode directly 1637 enc_class Opcode(immI d8) %{ 1638 emit_opcode(cbuf, $d8$$constant); 1639 %} 1640 1641 enc_class SizePrefix %{ 1642 emit_opcode(cbuf,0x66); 1643 %} 1644 1645 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1646 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1647 %} 1648 1649 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1650 emit_opcode(cbuf,$opcode$$constant); 1651 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1652 %} 1653 1654 enc_class mov_r32_imm0( rRegI dst ) %{ 1655 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1656 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1657 %} 1658 1659 enc_class cdq_enc %{ 1660 // Full implementation of Java idiv and irem; checks for 1661 // special case as described in JVM spec., p.243 & p.271. 1662 // 1663 // normal case special case 1664 // 1665 // input : rax,: dividend min_int 1666 // reg: divisor -1 1667 // 1668 // output: rax,: quotient (= rax, idiv reg) min_int 1669 // rdx: remainder (= rax, irem reg) 0 1670 // 1671 // Code sequnce: 1672 // 1673 // 81 F8 00 00 00 80 cmp rax,80000000h 1674 // 0F 85 0B 00 00 00 jne normal_case 1675 // 33 D2 xor rdx,edx 1676 // 83 F9 FF cmp rcx,0FFh 1677 // 0F 84 03 00 00 00 je done 1678 // normal_case: 1679 // 99 cdq 1680 // F7 F9 idiv rax,ecx 1681 // done: 1682 // 1683 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1684 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1686 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1687 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1688 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1689 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1690 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1691 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1692 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1693 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1694 // normal_case: 1695 emit_opcode(cbuf,0x99); // cdq 1696 // idiv (note: must be emitted by the user of this rule) 1697 // normal: 1698 %} 1699 1700 // Dense encoding for older common ops 1701 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1702 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1703 %} 1704 1705 1706 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1707 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1708 // Check for 8-bit immediate, and set sign extend bit in opcode 1709 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1710 emit_opcode(cbuf, $primary | 0x02); 1711 } 1712 else { // If 32-bit immediate 1713 emit_opcode(cbuf, $primary); 1714 } 1715 %} 1716 1717 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1718 // Emit primary opcode and set sign-extend bit 1719 // Check for 8-bit immediate, and set sign extend bit in opcode 1720 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1721 emit_opcode(cbuf, $primary | 0x02); } 1722 else { // If 32-bit immediate 1723 emit_opcode(cbuf, $primary); 1724 } 1725 // Emit r/m byte with secondary opcode, after primary opcode. 1726 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1727 %} 1728 1729 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1730 // Check for 8-bit immediate, and set sign extend bit in opcode 1731 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1732 $$$emit8$imm$$constant; 1733 } 1734 else { // If 32-bit immediate 1735 // Output immediate 1736 $$$emit32$imm$$constant; 1737 } 1738 %} 1739 1740 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1741 // Emit primary opcode and set sign-extend bit 1742 // Check for 8-bit immediate, and set sign extend bit in opcode 1743 int con = (int)$imm$$constant; // Throw away top bits 1744 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1745 // Emit r/m byte with secondary opcode, after primary opcode. 1746 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1747 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1748 else emit_d32(cbuf,con); 1749 %} 1750 1751 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1752 // Emit primary opcode and set sign-extend bit 1753 // Check for 8-bit immediate, and set sign extend bit in opcode 1754 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1755 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1756 // Emit r/m byte with tertiary opcode, after primary opcode. 1757 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1758 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1759 else emit_d32(cbuf,con); 1760 %} 1761 1762 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1763 emit_cc(cbuf, $secondary, $dst$$reg ); 1764 %} 1765 1766 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1767 int destlo = $dst$$reg; 1768 int desthi = HIGH_FROM_LOW(destlo); 1769 // bswap lo 1770 emit_opcode(cbuf, 0x0F); 1771 emit_cc(cbuf, 0xC8, destlo); 1772 // bswap hi 1773 emit_opcode(cbuf, 0x0F); 1774 emit_cc(cbuf, 0xC8, desthi); 1775 // xchg lo and hi 1776 emit_opcode(cbuf, 0x87); 1777 emit_rm(cbuf, 0x3, destlo, desthi); 1778 %} 1779 1780 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1781 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1782 %} 1783 1784 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1785 $$$emit8$primary; 1786 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1787 %} 1788 1789 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1790 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1791 emit_d8(cbuf, op >> 8 ); 1792 emit_d8(cbuf, op & 255); 1793 %} 1794 1795 // emulate a CMOV with a conditional branch around a MOV 1796 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1797 // Invert sense of branch from sense of CMOV 1798 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1799 emit_d8( cbuf, $brOffs$$constant ); 1800 %} 1801 1802 enc_class enc_PartialSubtypeCheck( ) %{ 1803 Register Redi = as_Register(EDI_enc); // result register 1804 Register Reax = as_Register(EAX_enc); // super class 1805 Register Recx = as_Register(ECX_enc); // killed 1806 Register Resi = as_Register(ESI_enc); // sub class 1807 Label miss; 1808 1809 MacroAssembler _masm(&cbuf); 1810 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1811 NULL, &miss, 1812 /*set_cond_codes:*/ true); 1813 if ($primary) { 1814 __ xorptr(Redi, Redi); 1815 } 1816 __ bind(miss); 1817 %} 1818 1819 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1820 MacroAssembler masm(&cbuf); 1821 int start = masm.offset(); 1822 if (UseSSE >= 2) { 1823 if (VerifyFPU) { 1824 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1825 } 1826 } else { 1827 // External c_calling_convention expects the FPU stack to be 'clean'. 1828 // Compiled code leaves it dirty. Do cleanup now. 1829 masm.empty_FPU_stack(); 1830 } 1831 if (sizeof_FFree_Float_Stack_All == -1) { 1832 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1833 } else { 1834 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1835 } 1836 %} 1837 1838 enc_class Verify_FPU_For_Leaf %{ 1839 if( VerifyFPU ) { 1840 MacroAssembler masm(&cbuf); 1841 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1842 } 1843 %} 1844 1845 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1846 // This is the instruction starting address for relocation info. 1847 cbuf.set_insts_mark(); 1848 $$$emit8$primary; 1849 // CALL directly to the runtime 1850 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1851 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1852 1853 if (UseSSE >= 2) { 1854 MacroAssembler _masm(&cbuf); 1855 BasicType rt = tf()->return_type(); 1856 1857 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1858 // A C runtime call where the return value is unused. In SSE2+ 1859 // mode the result needs to be removed from the FPU stack. It's 1860 // likely that this function call could be removed by the 1861 // optimizer if the C function is a pure function. 1862 __ ffree(0); 1863 } else if (rt == T_FLOAT) { 1864 __ lea(rsp, Address(rsp, -4)); 1865 __ fstp_s(Address(rsp, 0)); 1866 __ movflt(xmm0, Address(rsp, 0)); 1867 __ lea(rsp, Address(rsp, 4)); 1868 } else if (rt == T_DOUBLE) { 1869 __ lea(rsp, Address(rsp, -8)); 1870 __ fstp_d(Address(rsp, 0)); 1871 __ movdbl(xmm0, Address(rsp, 0)); 1872 __ lea(rsp, Address(rsp, 8)); 1873 } 1874 } 1875 %} 1876 1877 1878 enc_class pre_call_resets %{ 1879 // If method sets FPU control word restore it here 1880 debug_only(int off0 = cbuf.insts_size()); 1881 if (ra_->C->in_24_bit_fp_mode()) { 1882 MacroAssembler _masm(&cbuf); 1883 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1884 } 1885 if (ra_->C->max_vector_size() > 16) { 1886 // Clear upper bits of YMM registers when current compiled code uses 1887 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1888 MacroAssembler _masm(&cbuf); 1889 __ vzeroupper(); 1890 } 1891 debug_only(int off1 = cbuf.insts_size()); 1892 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1893 %} 1894 1895 enc_class post_call_FPU %{ 1896 // If method sets FPU control word do it here also 1897 if (Compile::current()->in_24_bit_fp_mode()) { 1898 MacroAssembler masm(&cbuf); 1899 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1900 } 1901 %} 1902 1903 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1904 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1905 // who we intended to call. 1906 cbuf.set_insts_mark(); 1907 $$$emit8$primary; 1908 1909 if (!_method) { 1910 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1911 runtime_call_Relocation::spec(), 1912 RELOC_IMM32); 1913 } else { 1914 int method_index = resolved_method_index(cbuf); 1915 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1916 : static_call_Relocation::spec(method_index); 1917 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1918 rspec, RELOC_DISP32); 1919 // Emit stubs for static call. 1920 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1921 if (stub == NULL) { 1922 ciEnv::current()->record_failure("CodeCache is full"); 1923 return; 1924 } 1925 } 1926 %} 1927 1928 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1929 MacroAssembler _masm(&cbuf); 1930 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1931 %} 1932 1933 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1934 int disp = in_bytes(Method::from_compiled_offset()); 1935 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1936 1937 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1938 cbuf.set_insts_mark(); 1939 $$$emit8$primary; 1940 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1941 emit_d8(cbuf, disp); // Displacement 1942 1943 %} 1944 1945 // Following encoding is no longer used, but may be restored if calling 1946 // convention changes significantly. 1947 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1948 // 1949 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1950 // // int ic_reg = Matcher::inline_cache_reg(); 1951 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1952 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1953 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1954 // 1955 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1956 // // // so we load it immediately before the call 1957 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1958 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1959 // 1960 // // xor rbp,ebp 1961 // emit_opcode(cbuf, 0x33); 1962 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1963 // 1964 // // CALL to interpreter. 1965 // cbuf.set_insts_mark(); 1966 // $$$emit8$primary; 1967 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1968 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1969 // %} 1970 1971 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1972 $$$emit8$primary; 1973 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1974 $$$emit8$shift$$constant; 1975 %} 1976 1977 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1978 // Load immediate does not have a zero or sign extended version 1979 // for 8-bit immediates 1980 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1981 $$$emit32$src$$constant; 1982 %} 1983 1984 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1985 // Load immediate does not have a zero or sign extended version 1986 // for 8-bit immediates 1987 emit_opcode(cbuf, $primary + $dst$$reg); 1988 $$$emit32$src$$constant; 1989 %} 1990 1991 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1992 // Load immediate does not have a zero or sign extended version 1993 // for 8-bit immediates 1994 int dst_enc = $dst$$reg; 1995 int src_con = $src$$constant & 0x0FFFFFFFFL; 1996 if (src_con == 0) { 1997 // xor dst, dst 1998 emit_opcode(cbuf, 0x33); 1999 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2000 } else { 2001 emit_opcode(cbuf, $primary + dst_enc); 2002 emit_d32(cbuf, src_con); 2003 } 2004 %} 2005 2006 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2007 // Load immediate does not have a zero or sign extended version 2008 // for 8-bit immediates 2009 int dst_enc = $dst$$reg + 2; 2010 int src_con = ((julong)($src$$constant)) >> 32; 2011 if (src_con == 0) { 2012 // xor dst, dst 2013 emit_opcode(cbuf, 0x33); 2014 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2015 } else { 2016 emit_opcode(cbuf, $primary + dst_enc); 2017 emit_d32(cbuf, src_con); 2018 } 2019 %} 2020 2021 2022 // Encode a reg-reg copy. If it is useless, then empty encoding. 2023 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2024 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2025 %} 2026 2027 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2028 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2029 %} 2030 2031 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2032 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2033 %} 2034 2035 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2036 $$$emit8$primary; 2037 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2038 %} 2039 2040 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2041 $$$emit8$secondary; 2042 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2043 %} 2044 2045 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2046 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2047 %} 2048 2049 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2050 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2051 %} 2052 2053 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2054 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2055 %} 2056 2057 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2058 // Output immediate 2059 $$$emit32$src$$constant; 2060 %} 2061 2062 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2063 // Output Float immediate bits 2064 jfloat jf = $src$$constant; 2065 int jf_as_bits = jint_cast( jf ); 2066 emit_d32(cbuf, jf_as_bits); 2067 %} 2068 2069 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2070 // Output Float immediate bits 2071 jfloat jf = $src$$constant; 2072 int jf_as_bits = jint_cast( jf ); 2073 emit_d32(cbuf, jf_as_bits); 2074 %} 2075 2076 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2077 // Output immediate 2078 $$$emit16$src$$constant; 2079 %} 2080 2081 enc_class Con_d32(immI src) %{ 2082 emit_d32(cbuf,$src$$constant); 2083 %} 2084 2085 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2086 // Output immediate memory reference 2087 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2088 emit_d32(cbuf, 0x00); 2089 %} 2090 2091 enc_class lock_prefix( ) %{ 2092 if( os::is_MP() ) 2093 emit_opcode(cbuf,0xF0); // [Lock] 2094 %} 2095 2096 // Cmp-xchg long value. 2097 // Note: we need to swap rbx, and rcx before and after the 2098 // cmpxchg8 instruction because the instruction uses 2099 // rcx as the high order word of the new value to store but 2100 // our register encoding uses rbx,. 2101 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2102 2103 // XCHG rbx,ecx 2104 emit_opcode(cbuf,0x87); 2105 emit_opcode(cbuf,0xD9); 2106 // [Lock] 2107 if( os::is_MP() ) 2108 emit_opcode(cbuf,0xF0); 2109 // CMPXCHG8 [Eptr] 2110 emit_opcode(cbuf,0x0F); 2111 emit_opcode(cbuf,0xC7); 2112 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2113 // XCHG rbx,ecx 2114 emit_opcode(cbuf,0x87); 2115 emit_opcode(cbuf,0xD9); 2116 %} 2117 2118 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2119 // [Lock] 2120 if( os::is_MP() ) 2121 emit_opcode(cbuf,0xF0); 2122 2123 // CMPXCHG [Eptr] 2124 emit_opcode(cbuf,0x0F); 2125 emit_opcode(cbuf,0xB1); 2126 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2127 %} 2128 2129 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2130 int res_encoding = $res$$reg; 2131 2132 // MOV res,0 2133 emit_opcode( cbuf, 0xB8 + res_encoding); 2134 emit_d32( cbuf, 0 ); 2135 // JNE,s fail 2136 emit_opcode(cbuf,0x75); 2137 emit_d8(cbuf, 5 ); 2138 // MOV res,1 2139 emit_opcode( cbuf, 0xB8 + res_encoding); 2140 emit_d32( cbuf, 1 ); 2141 // fail: 2142 %} 2143 2144 enc_class set_instruction_start( ) %{ 2145 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2146 %} 2147 2148 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2149 int reg_encoding = $ereg$$reg; 2150 int base = $mem$$base; 2151 int index = $mem$$index; 2152 int scale = $mem$$scale; 2153 int displace = $mem$$disp; 2154 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2155 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2156 %} 2157 2158 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2159 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2160 int base = $mem$$base; 2161 int index = $mem$$index; 2162 int scale = $mem$$scale; 2163 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2164 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2165 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2166 %} 2167 2168 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2169 int r1, r2; 2170 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2171 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2172 emit_opcode(cbuf,0x0F); 2173 emit_opcode(cbuf,$tertiary); 2174 emit_rm(cbuf, 0x3, r1, r2); 2175 emit_d8(cbuf,$cnt$$constant); 2176 emit_d8(cbuf,$primary); 2177 emit_rm(cbuf, 0x3, $secondary, r1); 2178 emit_d8(cbuf,$cnt$$constant); 2179 %} 2180 2181 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2182 emit_opcode( cbuf, 0x8B ); // Move 2183 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2184 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2185 emit_d8(cbuf,$primary); 2186 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2187 emit_d8(cbuf,$cnt$$constant-32); 2188 } 2189 emit_d8(cbuf,$primary); 2190 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2191 emit_d8(cbuf,31); 2192 %} 2193 2194 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2195 int r1, r2; 2196 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2197 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2198 2199 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2200 emit_rm(cbuf, 0x3, r1, r2); 2201 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2202 emit_opcode(cbuf,$primary); 2203 emit_rm(cbuf, 0x3, $secondary, r1); 2204 emit_d8(cbuf,$cnt$$constant-32); 2205 } 2206 emit_opcode(cbuf,0x33); // XOR r2,r2 2207 emit_rm(cbuf, 0x3, r2, r2); 2208 %} 2209 2210 // Clone of RegMem but accepts an extra parameter to access each 2211 // half of a double in memory; it never needs relocation info. 2212 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2213 emit_opcode(cbuf,$opcode$$constant); 2214 int reg_encoding = $rm_reg$$reg; 2215 int base = $mem$$base; 2216 int index = $mem$$index; 2217 int scale = $mem$$scale; 2218 int displace = $mem$$disp + $disp_for_half$$constant; 2219 relocInfo::relocType disp_reloc = relocInfo::none; 2220 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2221 %} 2222 2223 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2224 // 2225 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2226 // and it never needs relocation information. 2227 // Frequently used to move data between FPU's Stack Top and memory. 2228 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2229 int rm_byte_opcode = $rm_opcode$$constant; 2230 int base = $mem$$base; 2231 int index = $mem$$index; 2232 int scale = $mem$$scale; 2233 int displace = $mem$$disp; 2234 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2235 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2236 %} 2237 2238 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2239 int rm_byte_opcode = $rm_opcode$$constant; 2240 int base = $mem$$base; 2241 int index = $mem$$index; 2242 int scale = $mem$$scale; 2243 int displace = $mem$$disp; 2244 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2245 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2246 %} 2247 2248 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2249 int reg_encoding = $dst$$reg; 2250 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2251 int index = 0x04; // 0x04 indicates no index 2252 int scale = 0x00; // 0x00 indicates no scale 2253 int displace = $src1$$constant; // 0x00 indicates no displacement 2254 relocInfo::relocType disp_reloc = relocInfo::none; 2255 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2256 %} 2257 2258 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2259 // Compare dst,src 2260 emit_opcode(cbuf,0x3B); 2261 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2262 // jmp dst < src around move 2263 emit_opcode(cbuf,0x7C); 2264 emit_d8(cbuf,2); 2265 // move dst,src 2266 emit_opcode(cbuf,0x8B); 2267 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2268 %} 2269 2270 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2271 // Compare dst,src 2272 emit_opcode(cbuf,0x3B); 2273 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2274 // jmp dst > src around move 2275 emit_opcode(cbuf,0x7F); 2276 emit_d8(cbuf,2); 2277 // move dst,src 2278 emit_opcode(cbuf,0x8B); 2279 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2280 %} 2281 2282 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2283 // If src is FPR1, we can just FST to store it. 2284 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2285 int reg_encoding = 0x2; // Just store 2286 int base = $mem$$base; 2287 int index = $mem$$index; 2288 int scale = $mem$$scale; 2289 int displace = $mem$$disp; 2290 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2291 if( $src$$reg != FPR1L_enc ) { 2292 reg_encoding = 0x3; // Store & pop 2293 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2294 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2295 } 2296 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2297 emit_opcode(cbuf,$primary); 2298 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2299 %} 2300 2301 enc_class neg_reg(rRegI dst) %{ 2302 // NEG $dst 2303 emit_opcode(cbuf,0xF7); 2304 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2305 %} 2306 2307 enc_class setLT_reg(eCXRegI dst) %{ 2308 // SETLT $dst 2309 emit_opcode(cbuf,0x0F); 2310 emit_opcode(cbuf,0x9C); 2311 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2312 %} 2313 2314 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2315 int tmpReg = $tmp$$reg; 2316 2317 // SUB $p,$q 2318 emit_opcode(cbuf,0x2B); 2319 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2320 // SBB $tmp,$tmp 2321 emit_opcode(cbuf,0x1B); 2322 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2323 // AND $tmp,$y 2324 emit_opcode(cbuf,0x23); 2325 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2326 // ADD $p,$tmp 2327 emit_opcode(cbuf,0x03); 2328 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2329 %} 2330 2331 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2332 // TEST shift,32 2333 emit_opcode(cbuf,0xF7); 2334 emit_rm(cbuf, 0x3, 0, ECX_enc); 2335 emit_d32(cbuf,0x20); 2336 // JEQ,s small 2337 emit_opcode(cbuf, 0x74); 2338 emit_d8(cbuf, 0x04); 2339 // MOV $dst.hi,$dst.lo 2340 emit_opcode( cbuf, 0x8B ); 2341 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2342 // CLR $dst.lo 2343 emit_opcode(cbuf, 0x33); 2344 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2345 // small: 2346 // SHLD $dst.hi,$dst.lo,$shift 2347 emit_opcode(cbuf,0x0F); 2348 emit_opcode(cbuf,0xA5); 2349 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2350 // SHL $dst.lo,$shift" 2351 emit_opcode(cbuf,0xD3); 2352 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2353 %} 2354 2355 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2356 // TEST shift,32 2357 emit_opcode(cbuf,0xF7); 2358 emit_rm(cbuf, 0x3, 0, ECX_enc); 2359 emit_d32(cbuf,0x20); 2360 // JEQ,s small 2361 emit_opcode(cbuf, 0x74); 2362 emit_d8(cbuf, 0x04); 2363 // MOV $dst.lo,$dst.hi 2364 emit_opcode( cbuf, 0x8B ); 2365 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2366 // CLR $dst.hi 2367 emit_opcode(cbuf, 0x33); 2368 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2369 // small: 2370 // SHRD $dst.lo,$dst.hi,$shift 2371 emit_opcode(cbuf,0x0F); 2372 emit_opcode(cbuf,0xAD); 2373 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2374 // SHR $dst.hi,$shift" 2375 emit_opcode(cbuf,0xD3); 2376 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2377 %} 2378 2379 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2380 // TEST shift,32 2381 emit_opcode(cbuf,0xF7); 2382 emit_rm(cbuf, 0x3, 0, ECX_enc); 2383 emit_d32(cbuf,0x20); 2384 // JEQ,s small 2385 emit_opcode(cbuf, 0x74); 2386 emit_d8(cbuf, 0x05); 2387 // MOV $dst.lo,$dst.hi 2388 emit_opcode( cbuf, 0x8B ); 2389 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2390 // SAR $dst.hi,31 2391 emit_opcode(cbuf, 0xC1); 2392 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2393 emit_d8(cbuf, 0x1F ); 2394 // small: 2395 // SHRD $dst.lo,$dst.hi,$shift 2396 emit_opcode(cbuf,0x0F); 2397 emit_opcode(cbuf,0xAD); 2398 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2399 // SAR $dst.hi,$shift" 2400 emit_opcode(cbuf,0xD3); 2401 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2402 %} 2403 2404 2405 // ----------------- Encodings for floating point unit ----------------- 2406 // May leave result in FPU-TOS or FPU reg depending on opcodes 2407 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2408 $$$emit8$primary; 2409 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2410 %} 2411 2412 // Pop argument in FPR0 with FSTP ST(0) 2413 enc_class PopFPU() %{ 2414 emit_opcode( cbuf, 0xDD ); 2415 emit_d8( cbuf, 0xD8 ); 2416 %} 2417 2418 // !!!!! equivalent to Pop_Reg_F 2419 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2420 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2421 emit_d8( cbuf, 0xD8+$dst$$reg ); 2422 %} 2423 2424 enc_class Push_Reg_DPR( regDPR dst ) %{ 2425 emit_opcode( cbuf, 0xD9 ); 2426 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2427 %} 2428 2429 enc_class strictfp_bias1( regDPR dst ) %{ 2430 emit_opcode( cbuf, 0xDB ); // FLD m80real 2431 emit_opcode( cbuf, 0x2D ); 2432 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2433 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2434 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2435 %} 2436 2437 enc_class strictfp_bias2( regDPR dst ) %{ 2438 emit_opcode( cbuf, 0xDB ); // FLD m80real 2439 emit_opcode( cbuf, 0x2D ); 2440 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2441 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2442 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2443 %} 2444 2445 // Special case for moving an integer register to a stack slot. 2446 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2447 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2448 %} 2449 2450 // Special case for moving a register to a stack slot. 2451 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2452 // Opcode already emitted 2453 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2454 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2455 emit_d32(cbuf, $dst$$disp); // Displacement 2456 %} 2457 2458 // Push the integer in stackSlot 'src' onto FP-stack 2459 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2460 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2461 %} 2462 2463 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2464 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2465 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2466 %} 2467 2468 // Same as Pop_Mem_F except for opcode 2469 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2470 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2471 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2472 %} 2473 2474 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2475 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2476 emit_d8( cbuf, 0xD8+$dst$$reg ); 2477 %} 2478 2479 enc_class Push_Reg_FPR( regFPR dst ) %{ 2480 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2481 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2482 %} 2483 2484 // Push FPU's float to a stack-slot, and pop FPU-stack 2485 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2486 int pop = 0x02; 2487 if ($src$$reg != FPR1L_enc) { 2488 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2489 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2490 pop = 0x03; 2491 } 2492 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2493 %} 2494 2495 // Push FPU's double to a stack-slot, and pop FPU-stack 2496 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2497 int pop = 0x02; 2498 if ($src$$reg != FPR1L_enc) { 2499 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2500 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2501 pop = 0x03; 2502 } 2503 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2504 %} 2505 2506 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2507 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2508 int pop = 0xD0 - 1; // -1 since we skip FLD 2509 if ($src$$reg != FPR1L_enc) { 2510 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2511 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2512 pop = 0xD8; 2513 } 2514 emit_opcode( cbuf, 0xDD ); 2515 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2516 %} 2517 2518 2519 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2520 // load dst in FPR0 2521 emit_opcode( cbuf, 0xD9 ); 2522 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2523 if ($src$$reg != FPR1L_enc) { 2524 // fincstp 2525 emit_opcode (cbuf, 0xD9); 2526 emit_opcode (cbuf, 0xF7); 2527 // swap src with FPR1: 2528 // FXCH FPR1 with src 2529 emit_opcode(cbuf, 0xD9); 2530 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2531 // fdecstp 2532 emit_opcode (cbuf, 0xD9); 2533 emit_opcode (cbuf, 0xF6); 2534 } 2535 %} 2536 2537 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2538 MacroAssembler _masm(&cbuf); 2539 __ subptr(rsp, 8); 2540 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2541 __ fld_d(Address(rsp, 0)); 2542 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2543 __ fld_d(Address(rsp, 0)); 2544 %} 2545 2546 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2547 MacroAssembler _masm(&cbuf); 2548 __ subptr(rsp, 4); 2549 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2550 __ fld_s(Address(rsp, 0)); 2551 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2552 __ fld_s(Address(rsp, 0)); 2553 %} 2554 2555 enc_class Push_ResultD(regD dst) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ fstp_d(Address(rsp, 0)); 2558 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2559 __ addptr(rsp, 8); 2560 %} 2561 2562 enc_class Push_ResultF(regF dst, immI d8) %{ 2563 MacroAssembler _masm(&cbuf); 2564 __ fstp_s(Address(rsp, 0)); 2565 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2566 __ addptr(rsp, $d8$$constant); 2567 %} 2568 2569 enc_class Push_SrcD(regD src) %{ 2570 MacroAssembler _masm(&cbuf); 2571 __ subptr(rsp, 8); 2572 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2573 __ fld_d(Address(rsp, 0)); 2574 %} 2575 2576 enc_class push_stack_temp_qword() %{ 2577 MacroAssembler _masm(&cbuf); 2578 __ subptr(rsp, 8); 2579 %} 2580 2581 enc_class pop_stack_temp_qword() %{ 2582 MacroAssembler _masm(&cbuf); 2583 __ addptr(rsp, 8); 2584 %} 2585 2586 enc_class push_xmm_to_fpr1(regD src) %{ 2587 MacroAssembler _masm(&cbuf); 2588 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2589 __ fld_d(Address(rsp, 0)); 2590 %} 2591 2592 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2593 if ($src$$reg != FPR1L_enc) { 2594 // fincstp 2595 emit_opcode (cbuf, 0xD9); 2596 emit_opcode (cbuf, 0xF7); 2597 // FXCH FPR1 with src 2598 emit_opcode(cbuf, 0xD9); 2599 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2600 // fdecstp 2601 emit_opcode (cbuf, 0xD9); 2602 emit_opcode (cbuf, 0xF6); 2603 } 2604 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2605 // // FSTP FPR$dst$$reg 2606 // emit_opcode( cbuf, 0xDD ); 2607 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2608 %} 2609 2610 enc_class fnstsw_sahf_skip_parity() %{ 2611 // fnstsw ax 2612 emit_opcode( cbuf, 0xDF ); 2613 emit_opcode( cbuf, 0xE0 ); 2614 // sahf 2615 emit_opcode( cbuf, 0x9E ); 2616 // jnp ::skip 2617 emit_opcode( cbuf, 0x7B ); 2618 emit_opcode( cbuf, 0x05 ); 2619 %} 2620 2621 enc_class emitModDPR() %{ 2622 // fprem must be iterative 2623 // :: loop 2624 // fprem 2625 emit_opcode( cbuf, 0xD9 ); 2626 emit_opcode( cbuf, 0xF8 ); 2627 // wait 2628 emit_opcode( cbuf, 0x9b ); 2629 // fnstsw ax 2630 emit_opcode( cbuf, 0xDF ); 2631 emit_opcode( cbuf, 0xE0 ); 2632 // sahf 2633 emit_opcode( cbuf, 0x9E ); 2634 // jp ::loop 2635 emit_opcode( cbuf, 0x0F ); 2636 emit_opcode( cbuf, 0x8A ); 2637 emit_opcode( cbuf, 0xF4 ); 2638 emit_opcode( cbuf, 0xFF ); 2639 emit_opcode( cbuf, 0xFF ); 2640 emit_opcode( cbuf, 0xFF ); 2641 %} 2642 2643 enc_class fpu_flags() %{ 2644 // fnstsw_ax 2645 emit_opcode( cbuf, 0xDF); 2646 emit_opcode( cbuf, 0xE0); 2647 // test ax,0x0400 2648 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2649 emit_opcode( cbuf, 0xA9 ); 2650 emit_d16 ( cbuf, 0x0400 ); 2651 // // // This sequence works, but stalls for 12-16 cycles on PPro 2652 // // test rax,0x0400 2653 // emit_opcode( cbuf, 0xA9 ); 2654 // emit_d32 ( cbuf, 0x00000400 ); 2655 // 2656 // jz exit (no unordered comparison) 2657 emit_opcode( cbuf, 0x74 ); 2658 emit_d8 ( cbuf, 0x02 ); 2659 // mov ah,1 - treat as LT case (set carry flag) 2660 emit_opcode( cbuf, 0xB4 ); 2661 emit_d8 ( cbuf, 0x01 ); 2662 // sahf 2663 emit_opcode( cbuf, 0x9E); 2664 %} 2665 2666 enc_class cmpF_P6_fixup() %{ 2667 // Fixup the integer flags in case comparison involved a NaN 2668 // 2669 // JNP exit (no unordered comparison, P-flag is set by NaN) 2670 emit_opcode( cbuf, 0x7B ); 2671 emit_d8 ( cbuf, 0x03 ); 2672 // MOV AH,1 - treat as LT case (set carry flag) 2673 emit_opcode( cbuf, 0xB4 ); 2674 emit_d8 ( cbuf, 0x01 ); 2675 // SAHF 2676 emit_opcode( cbuf, 0x9E); 2677 // NOP // target for branch to avoid branch to branch 2678 emit_opcode( cbuf, 0x90); 2679 %} 2680 2681 // fnstsw_ax(); 2682 // sahf(); 2683 // movl(dst, nan_result); 2684 // jcc(Assembler::parity, exit); 2685 // movl(dst, less_result); 2686 // jcc(Assembler::below, exit); 2687 // movl(dst, equal_result); 2688 // jcc(Assembler::equal, exit); 2689 // movl(dst, greater_result); 2690 2691 // less_result = 1; 2692 // greater_result = -1; 2693 // equal_result = 0; 2694 // nan_result = -1; 2695 2696 enc_class CmpF_Result(rRegI dst) %{ 2697 // fnstsw_ax(); 2698 emit_opcode( cbuf, 0xDF); 2699 emit_opcode( cbuf, 0xE0); 2700 // sahf 2701 emit_opcode( cbuf, 0x9E); 2702 // movl(dst, nan_result); 2703 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2704 emit_d32( cbuf, -1 ); 2705 // jcc(Assembler::parity, exit); 2706 emit_opcode( cbuf, 0x7A ); 2707 emit_d8 ( cbuf, 0x13 ); 2708 // movl(dst, less_result); 2709 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2710 emit_d32( cbuf, -1 ); 2711 // jcc(Assembler::below, exit); 2712 emit_opcode( cbuf, 0x72 ); 2713 emit_d8 ( cbuf, 0x0C ); 2714 // movl(dst, equal_result); 2715 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2716 emit_d32( cbuf, 0 ); 2717 // jcc(Assembler::equal, exit); 2718 emit_opcode( cbuf, 0x74 ); 2719 emit_d8 ( cbuf, 0x05 ); 2720 // movl(dst, greater_result); 2721 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2722 emit_d32( cbuf, 1 ); 2723 %} 2724 2725 2726 // Compare the longs and set flags 2727 // BROKEN! Do Not use as-is 2728 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2729 // CMP $src1.hi,$src2.hi 2730 emit_opcode( cbuf, 0x3B ); 2731 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2732 // JNE,s done 2733 emit_opcode(cbuf,0x75); 2734 emit_d8(cbuf, 2 ); 2735 // CMP $src1.lo,$src2.lo 2736 emit_opcode( cbuf, 0x3B ); 2737 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2738 // done: 2739 %} 2740 2741 enc_class convert_int_long( regL dst, rRegI src ) %{ 2742 // mov $dst.lo,$src 2743 int dst_encoding = $dst$$reg; 2744 int src_encoding = $src$$reg; 2745 encode_Copy( cbuf, dst_encoding , src_encoding ); 2746 // mov $dst.hi,$src 2747 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2748 // sar $dst.hi,31 2749 emit_opcode( cbuf, 0xC1 ); 2750 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2751 emit_d8(cbuf, 0x1F ); 2752 %} 2753 2754 enc_class convert_long_double( eRegL src ) %{ 2755 // push $src.hi 2756 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2757 // push $src.lo 2758 emit_opcode(cbuf, 0x50+$src$$reg ); 2759 // fild 64-bits at [SP] 2760 emit_opcode(cbuf,0xdf); 2761 emit_d8(cbuf, 0x6C); 2762 emit_d8(cbuf, 0x24); 2763 emit_d8(cbuf, 0x00); 2764 // pop stack 2765 emit_opcode(cbuf, 0x83); // add SP, #8 2766 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2767 emit_d8(cbuf, 0x8); 2768 %} 2769 2770 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2771 // IMUL EDX:EAX,$src1 2772 emit_opcode( cbuf, 0xF7 ); 2773 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2774 // SAR EDX,$cnt-32 2775 int shift_count = ((int)$cnt$$constant) - 32; 2776 if (shift_count > 0) { 2777 emit_opcode(cbuf, 0xC1); 2778 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2779 emit_d8(cbuf, shift_count); 2780 } 2781 %} 2782 2783 // this version doesn't have add sp, 8 2784 enc_class convert_long_double2( eRegL src ) %{ 2785 // push $src.hi 2786 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2787 // push $src.lo 2788 emit_opcode(cbuf, 0x50+$src$$reg ); 2789 // fild 64-bits at [SP] 2790 emit_opcode(cbuf,0xdf); 2791 emit_d8(cbuf, 0x6C); 2792 emit_d8(cbuf, 0x24); 2793 emit_d8(cbuf, 0x00); 2794 %} 2795 2796 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2797 // Basic idea: long = (long)int * (long)int 2798 // IMUL EDX:EAX, src 2799 emit_opcode( cbuf, 0xF7 ); 2800 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2801 %} 2802 2803 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2804 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2805 // MUL EDX:EAX, src 2806 emit_opcode( cbuf, 0xF7 ); 2807 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2808 %} 2809 2810 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2811 // Basic idea: lo(result) = lo(x_lo * y_lo) 2812 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2813 // MOV $tmp,$src.lo 2814 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2815 // IMUL $tmp,EDX 2816 emit_opcode( cbuf, 0x0F ); 2817 emit_opcode( cbuf, 0xAF ); 2818 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2819 // MOV EDX,$src.hi 2820 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2821 // IMUL EDX,EAX 2822 emit_opcode( cbuf, 0x0F ); 2823 emit_opcode( cbuf, 0xAF ); 2824 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2825 // ADD $tmp,EDX 2826 emit_opcode( cbuf, 0x03 ); 2827 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2828 // MUL EDX:EAX,$src.lo 2829 emit_opcode( cbuf, 0xF7 ); 2830 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2831 // ADD EDX,ESI 2832 emit_opcode( cbuf, 0x03 ); 2833 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2834 %} 2835 2836 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2837 // Basic idea: lo(result) = lo(src * y_lo) 2838 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2839 // IMUL $tmp,EDX,$src 2840 emit_opcode( cbuf, 0x6B ); 2841 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2842 emit_d8( cbuf, (int)$src$$constant ); 2843 // MOV EDX,$src 2844 emit_opcode(cbuf, 0xB8 + EDX_enc); 2845 emit_d32( cbuf, (int)$src$$constant ); 2846 // MUL EDX:EAX,EDX 2847 emit_opcode( cbuf, 0xF7 ); 2848 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2849 // ADD EDX,ESI 2850 emit_opcode( cbuf, 0x03 ); 2851 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2852 %} 2853 2854 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2855 // PUSH src1.hi 2856 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2857 // PUSH src1.lo 2858 emit_opcode(cbuf, 0x50+$src1$$reg ); 2859 // PUSH src2.hi 2860 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2861 // PUSH src2.lo 2862 emit_opcode(cbuf, 0x50+$src2$$reg ); 2863 // CALL directly to the runtime 2864 cbuf.set_insts_mark(); 2865 emit_opcode(cbuf,0xE8); // Call into runtime 2866 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2867 // Restore stack 2868 emit_opcode(cbuf, 0x83); // add SP, #framesize 2869 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2870 emit_d8(cbuf, 4*4); 2871 %} 2872 2873 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2874 // PUSH src1.hi 2875 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2876 // PUSH src1.lo 2877 emit_opcode(cbuf, 0x50+$src1$$reg ); 2878 // PUSH src2.hi 2879 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2880 // PUSH src2.lo 2881 emit_opcode(cbuf, 0x50+$src2$$reg ); 2882 // CALL directly to the runtime 2883 cbuf.set_insts_mark(); 2884 emit_opcode(cbuf,0xE8); // Call into runtime 2885 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2886 // Restore stack 2887 emit_opcode(cbuf, 0x83); // add SP, #framesize 2888 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2889 emit_d8(cbuf, 4*4); 2890 %} 2891 2892 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2893 // MOV $tmp,$src.lo 2894 emit_opcode(cbuf, 0x8B); 2895 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2896 // OR $tmp,$src.hi 2897 emit_opcode(cbuf, 0x0B); 2898 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2899 %} 2900 2901 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2902 // CMP $src1.lo,$src2.lo 2903 emit_opcode( cbuf, 0x3B ); 2904 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2905 // JNE,s skip 2906 emit_cc(cbuf, 0x70, 0x5); 2907 emit_d8(cbuf,2); 2908 // CMP $src1.hi,$src2.hi 2909 emit_opcode( cbuf, 0x3B ); 2910 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2911 %} 2912 2913 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2914 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2915 emit_opcode( cbuf, 0x3B ); 2916 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2917 // MOV $tmp,$src1.hi 2918 emit_opcode( cbuf, 0x8B ); 2919 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2920 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2921 emit_opcode( cbuf, 0x1B ); 2922 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2923 %} 2924 2925 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2926 // XOR $tmp,$tmp 2927 emit_opcode(cbuf,0x33); // XOR 2928 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2929 // CMP $tmp,$src.lo 2930 emit_opcode( cbuf, 0x3B ); 2931 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2932 // SBB $tmp,$src.hi 2933 emit_opcode( cbuf, 0x1B ); 2934 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2935 %} 2936 2937 // Sniff, sniff... smells like Gnu Superoptimizer 2938 enc_class neg_long( eRegL dst ) %{ 2939 emit_opcode(cbuf,0xF7); // NEG hi 2940 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2941 emit_opcode(cbuf,0xF7); // NEG lo 2942 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2943 emit_opcode(cbuf,0x83); // SBB hi,0 2944 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2945 emit_d8 (cbuf,0 ); 2946 %} 2947 2948 enc_class enc_pop_rdx() %{ 2949 emit_opcode(cbuf,0x5A); 2950 %} 2951 2952 enc_class enc_rethrow() %{ 2953 cbuf.set_insts_mark(); 2954 emit_opcode(cbuf, 0xE9); // jmp entry 2955 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2956 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2957 %} 2958 2959 2960 // Convert a double to an int. Java semantics require we do complex 2961 // manglelations in the corner cases. So we set the rounding mode to 2962 // 'zero', store the darned double down as an int, and reset the 2963 // rounding mode to 'nearest'. The hardware throws an exception which 2964 // patches up the correct value directly to the stack. 2965 enc_class DPR2I_encoding( regDPR src ) %{ 2966 // Flip to round-to-zero mode. We attempted to allow invalid-op 2967 // exceptions here, so that a NAN or other corner-case value will 2968 // thrown an exception (but normal values get converted at full speed). 2969 // However, I2C adapters and other float-stack manglers leave pending 2970 // invalid-op exceptions hanging. We would have to clear them before 2971 // enabling them and that is more expensive than just testing for the 2972 // invalid value Intel stores down in the corner cases. 2973 emit_opcode(cbuf,0xD9); // FLDCW trunc 2974 emit_opcode(cbuf,0x2D); 2975 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2976 // Allocate a word 2977 emit_opcode(cbuf,0x83); // SUB ESP,4 2978 emit_opcode(cbuf,0xEC); 2979 emit_d8(cbuf,0x04); 2980 // Encoding assumes a double has been pushed into FPR0. 2981 // Store down the double as an int, popping the FPU stack 2982 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2983 emit_opcode(cbuf,0x1C); 2984 emit_d8(cbuf,0x24); 2985 // Restore the rounding mode; mask the exception 2986 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2987 emit_opcode(cbuf,0x2D); 2988 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2989 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2990 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2991 2992 // Load the converted int; adjust CPU stack 2993 emit_opcode(cbuf,0x58); // POP EAX 2994 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2995 emit_d32 (cbuf,0x80000000); // 0x80000000 2996 emit_opcode(cbuf,0x75); // JNE around_slow_call 2997 emit_d8 (cbuf,0x07); // Size of slow_call 2998 // Push src onto stack slow-path 2999 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3000 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3001 // CALL directly to the runtime 3002 cbuf.set_insts_mark(); 3003 emit_opcode(cbuf,0xE8); // Call into runtime 3004 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3005 // Carry on here... 3006 %} 3007 3008 enc_class DPR2L_encoding( regDPR src ) %{ 3009 emit_opcode(cbuf,0xD9); // FLDCW trunc 3010 emit_opcode(cbuf,0x2D); 3011 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3012 // Allocate a word 3013 emit_opcode(cbuf,0x83); // SUB ESP,8 3014 emit_opcode(cbuf,0xEC); 3015 emit_d8(cbuf,0x08); 3016 // Encoding assumes a double has been pushed into FPR0. 3017 // Store down the double as a long, popping the FPU stack 3018 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3019 emit_opcode(cbuf,0x3C); 3020 emit_d8(cbuf,0x24); 3021 // Restore the rounding mode; mask the exception 3022 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3023 emit_opcode(cbuf,0x2D); 3024 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3025 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3026 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3027 3028 // Load the converted int; adjust CPU stack 3029 emit_opcode(cbuf,0x58); // POP EAX 3030 emit_opcode(cbuf,0x5A); // POP EDX 3031 emit_opcode(cbuf,0x81); // CMP EDX,imm 3032 emit_d8 (cbuf,0xFA); // rdx 3033 emit_d32 (cbuf,0x80000000); // 0x80000000 3034 emit_opcode(cbuf,0x75); // JNE around_slow_call 3035 emit_d8 (cbuf,0x07+4); // Size of slow_call 3036 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3037 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3038 emit_opcode(cbuf,0x75); // JNE around_slow_call 3039 emit_d8 (cbuf,0x07); // Size of slow_call 3040 // Push src onto stack slow-path 3041 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3042 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3043 // CALL directly to the runtime 3044 cbuf.set_insts_mark(); 3045 emit_opcode(cbuf,0xE8); // Call into runtime 3046 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3047 // Carry on here... 3048 %} 3049 3050 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3051 // Operand was loaded from memory into fp ST (stack top) 3052 // FMUL ST,$src /* D8 C8+i */ 3053 emit_opcode(cbuf, 0xD8); 3054 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3055 %} 3056 3057 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3058 // FADDP ST,src2 /* D8 C0+i */ 3059 emit_opcode(cbuf, 0xD8); 3060 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3061 //could use FADDP src2,fpST /* DE C0+i */ 3062 %} 3063 3064 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3065 // FADDP src2,ST /* DE C0+i */ 3066 emit_opcode(cbuf, 0xDE); 3067 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3068 %} 3069 3070 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3071 // Operand has been loaded into fp ST (stack top) 3072 // FSUB ST,$src1 3073 emit_opcode(cbuf, 0xD8); 3074 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3075 3076 // FDIV 3077 emit_opcode(cbuf, 0xD8); 3078 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3079 %} 3080 3081 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3082 // Operand was loaded from memory into fp ST (stack top) 3083 // FADD ST,$src /* D8 C0+i */ 3084 emit_opcode(cbuf, 0xD8); 3085 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3086 3087 // FMUL ST,src2 /* D8 C*+i */ 3088 emit_opcode(cbuf, 0xD8); 3089 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3090 %} 3091 3092 3093 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3094 // Operand was loaded from memory into fp ST (stack top) 3095 // FADD ST,$src /* D8 C0+i */ 3096 emit_opcode(cbuf, 0xD8); 3097 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3098 3099 // FMULP src2,ST /* DE C8+i */ 3100 emit_opcode(cbuf, 0xDE); 3101 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3102 %} 3103 3104 // Atomically load the volatile long 3105 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3106 emit_opcode(cbuf,0xDF); 3107 int rm_byte_opcode = 0x05; 3108 int base = $mem$$base; 3109 int index = $mem$$index; 3110 int scale = $mem$$scale; 3111 int displace = $mem$$disp; 3112 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3113 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3114 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3115 %} 3116 3117 // Volatile Store Long. Must be atomic, so move it into 3118 // the FP TOS and then do a 64-bit FIST. Has to probe the 3119 // target address before the store (for null-ptr checks) 3120 // so the memory operand is used twice in the encoding. 3121 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3122 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3123 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3124 emit_opcode(cbuf,0xDF); 3125 int rm_byte_opcode = 0x07; 3126 int base = $mem$$base; 3127 int index = $mem$$index; 3128 int scale = $mem$$scale; 3129 int displace = $mem$$disp; 3130 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3131 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3132 %} 3133 3134 // Safepoint Poll. This polls the safepoint page, and causes an 3135 // exception if it is not readable. Unfortunately, it kills the condition code 3136 // in the process 3137 // We current use TESTL [spp],EDI 3138 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3139 3140 enc_class Safepoint_Poll() %{ 3141 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3142 emit_opcode(cbuf,0x85); 3143 emit_rm (cbuf, 0x0, 0x7, 0x5); 3144 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3145 %} 3146 %} 3147 3148 3149 //----------FRAME-------------------------------------------------------------- 3150 // Definition of frame structure and management information. 3151 // 3152 // S T A C K L A Y O U T Allocators stack-slot number 3153 // | (to get allocators register number 3154 // G Owned by | | v add OptoReg::stack0()) 3155 // r CALLER | | 3156 // o | +--------+ pad to even-align allocators stack-slot 3157 // w V | pad0 | numbers; owned by CALLER 3158 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3159 // h ^ | in | 5 3160 // | | args | 4 Holes in incoming args owned by SELF 3161 // | | | | 3 3162 // | | +--------+ 3163 // V | | old out| Empty on Intel, window on Sparc 3164 // | old |preserve| Must be even aligned. 3165 // | SP-+--------+----> Matcher::_old_SP, even aligned 3166 // | | in | 3 area for Intel ret address 3167 // Owned by |preserve| Empty on Sparc. 3168 // SELF +--------+ 3169 // | | pad2 | 2 pad to align old SP 3170 // | +--------+ 1 3171 // | | locks | 0 3172 // | +--------+----> OptoReg::stack0(), even aligned 3173 // | | pad1 | 11 pad to align new SP 3174 // | +--------+ 3175 // | | | 10 3176 // | | spills | 9 spills 3177 // V | | 8 (pad0 slot for callee) 3178 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3179 // ^ | out | 7 3180 // | | args | 6 Holes in outgoing args owned by CALLEE 3181 // Owned by +--------+ 3182 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3183 // | new |preserve| Must be even-aligned. 3184 // | SP-+--------+----> Matcher::_new_SP, even aligned 3185 // | | | 3186 // 3187 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3188 // known from SELF's arguments and the Java calling convention. 3189 // Region 6-7 is determined per call site. 3190 // Note 2: If the calling convention leaves holes in the incoming argument 3191 // area, those holes are owned by SELF. Holes in the outgoing area 3192 // are owned by the CALLEE. Holes should not be nessecary in the 3193 // incoming area, as the Java calling convention is completely under 3194 // the control of the AD file. Doubles can be sorted and packed to 3195 // avoid holes. Holes in the outgoing arguments may be nessecary for 3196 // varargs C calling conventions. 3197 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3198 // even aligned with pad0 as needed. 3199 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3200 // region 6-11 is even aligned; it may be padded out more so that 3201 // the region from SP to FP meets the minimum stack alignment. 3202 3203 frame %{ 3204 // What direction does stack grow in (assumed to be same for C & Java) 3205 stack_direction(TOWARDS_LOW); 3206 3207 // These three registers define part of the calling convention 3208 // between compiled code and the interpreter. 3209 inline_cache_reg(EAX); // Inline Cache Register 3210 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3211 3212 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3213 cisc_spilling_operand_name(indOffset32); 3214 3215 // Number of stack slots consumed by locking an object 3216 sync_stack_slots(1); 3217 3218 // Compiled code's Frame Pointer 3219 frame_pointer(ESP); 3220 // Interpreter stores its frame pointer in a register which is 3221 // stored to the stack by I2CAdaptors. 3222 // I2CAdaptors convert from interpreted java to compiled java. 3223 interpreter_frame_pointer(EBP); 3224 3225 // Stack alignment requirement 3226 // Alignment size in bytes (128-bit -> 16 bytes) 3227 stack_alignment(StackAlignmentInBytes); 3228 3229 // Number of stack slots between incoming argument block and the start of 3230 // a new frame. The PROLOG must add this many slots to the stack. The 3231 // EPILOG must remove this many slots. Intel needs one slot for 3232 // return address and one for rbp, (must save rbp) 3233 in_preserve_stack_slots(2+VerifyStackAtCalls); 3234 3235 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3236 // for calls to C. Supports the var-args backing area for register parms. 3237 varargs_C_out_slots_killed(0); 3238 3239 // The after-PROLOG location of the return address. Location of 3240 // return address specifies a type (REG or STACK) and a number 3241 // representing the register number (i.e. - use a register name) or 3242 // stack slot. 3243 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3244 // Otherwise, it is above the locks and verification slot and alignment word 3245 return_addr(STACK - 1 + 3246 round_to((Compile::current()->in_preserve_stack_slots() + 3247 Compile::current()->fixed_slots()), 3248 stack_alignment_in_slots())); 3249 3250 // Body of function which returns an integer array locating 3251 // arguments either in registers or in stack slots. Passed an array 3252 // of ideal registers called "sig" and a "length" count. Stack-slot 3253 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3254 // arguments for a CALLEE. Incoming stack arguments are 3255 // automatically biased by the preserve_stack_slots field above. 3256 calling_convention %{ 3257 // No difference between ingoing/outgoing just pass false 3258 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3259 %} 3260 3261 3262 // Body of function which returns an integer array locating 3263 // arguments either in registers or in stack slots. Passed an array 3264 // of ideal registers called "sig" and a "length" count. Stack-slot 3265 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3266 // arguments for a CALLEE. Incoming stack arguments are 3267 // automatically biased by the preserve_stack_slots field above. 3268 c_calling_convention %{ 3269 // This is obviously always outgoing 3270 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3271 %} 3272 3273 // Location of C & interpreter return values 3274 c_return_value %{ 3275 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3276 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3277 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3278 3279 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3280 // that C functions return float and double results in XMM0. 3281 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3282 return OptoRegPair(XMM0b_num,XMM0_num); 3283 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3284 return OptoRegPair(OptoReg::Bad,XMM0_num); 3285 3286 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3287 %} 3288 3289 // Location of return values 3290 return_value %{ 3291 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3292 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3293 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3294 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3295 return OptoRegPair(XMM0b_num,XMM0_num); 3296 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3297 return OptoRegPair(OptoReg::Bad,XMM0_num); 3298 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3299 %} 3300 3301 %} 3302 3303 //----------ATTRIBUTES--------------------------------------------------------- 3304 //----------Operand Attributes------------------------------------------------- 3305 op_attrib op_cost(0); // Required cost attribute 3306 3307 //----------Instruction Attributes--------------------------------------------- 3308 ins_attrib ins_cost(100); // Required cost attribute 3309 ins_attrib ins_size(8); // Required size attribute (in bits) 3310 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3311 // non-matching short branch variant of some 3312 // long branch? 3313 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3314 // specifies the alignment that some part of the instruction (not 3315 // necessarily the start) requires. If > 1, a compute_padding() 3316 // function must be provided for the instruction 3317 3318 //----------OPERANDS----------------------------------------------------------- 3319 // Operand definitions must precede instruction definitions for correct parsing 3320 // in the ADLC because operands constitute user defined types which are used in 3321 // instruction definitions. 3322 3323 //----------Simple Operands---------------------------------------------------- 3324 // Immediate Operands 3325 // Integer Immediate 3326 operand immI() %{ 3327 match(ConI); 3328 3329 op_cost(10); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 // Constant for test vs zero 3335 operand immI0() %{ 3336 predicate(n->get_int() == 0); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Constant for increment 3345 operand immI1() %{ 3346 predicate(n->get_int() == 1); 3347 match(ConI); 3348 3349 op_cost(0); 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Constant for decrement 3355 operand immI_M1() %{ 3356 predicate(n->get_int() == -1); 3357 match(ConI); 3358 3359 op_cost(0); 3360 format %{ %} 3361 interface(CONST_INTER); 3362 %} 3363 3364 // Valid scale values for addressing modes 3365 operand immI2() %{ 3366 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3367 match(ConI); 3368 3369 format %{ %} 3370 interface(CONST_INTER); 3371 %} 3372 3373 operand immI8() %{ 3374 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3375 match(ConI); 3376 3377 op_cost(5); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 operand immI16() %{ 3383 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3384 match(ConI); 3385 3386 op_cost(10); 3387 format %{ %} 3388 interface(CONST_INTER); 3389 %} 3390 3391 // Int Immediate non-negative 3392 operand immU31() 3393 %{ 3394 predicate(n->get_int() >= 0); 3395 match(ConI); 3396 3397 op_cost(0); 3398 format %{ %} 3399 interface(CONST_INTER); 3400 %} 3401 3402 // Constant for long shifts 3403 operand immI_32() %{ 3404 predicate( n->get_int() == 32 ); 3405 match(ConI); 3406 3407 op_cost(0); 3408 format %{ %} 3409 interface(CONST_INTER); 3410 %} 3411 3412 operand immI_1_31() %{ 3413 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3414 match(ConI); 3415 3416 op_cost(0); 3417 format %{ %} 3418 interface(CONST_INTER); 3419 %} 3420 3421 operand immI_32_63() %{ 3422 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3423 match(ConI); 3424 op_cost(0); 3425 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 operand immI_1() %{ 3431 predicate( n->get_int() == 1 ); 3432 match(ConI); 3433 3434 op_cost(0); 3435 format %{ %} 3436 interface(CONST_INTER); 3437 %} 3438 3439 operand immI_2() %{ 3440 predicate( n->get_int() == 2 ); 3441 match(ConI); 3442 3443 op_cost(0); 3444 format %{ %} 3445 interface(CONST_INTER); 3446 %} 3447 3448 operand immI_3() %{ 3449 predicate( n->get_int() == 3 ); 3450 match(ConI); 3451 3452 op_cost(0); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 // Pointer Immediate 3458 operand immP() %{ 3459 match(ConP); 3460 3461 op_cost(10); 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 // NULL Pointer Immediate 3467 operand immP0() %{ 3468 predicate( n->get_ptr() == 0 ); 3469 match(ConP); 3470 op_cost(0); 3471 3472 format %{ %} 3473 interface(CONST_INTER); 3474 %} 3475 3476 // Long Immediate 3477 operand immL() %{ 3478 match(ConL); 3479 3480 op_cost(20); 3481 format %{ %} 3482 interface(CONST_INTER); 3483 %} 3484 3485 // Long Immediate zero 3486 operand immL0() %{ 3487 predicate( n->get_long() == 0L ); 3488 match(ConL); 3489 op_cost(0); 3490 3491 format %{ %} 3492 interface(CONST_INTER); 3493 %} 3494 3495 // Long Immediate zero 3496 operand immL_M1() %{ 3497 predicate( n->get_long() == -1L ); 3498 match(ConL); 3499 op_cost(0); 3500 3501 format %{ %} 3502 interface(CONST_INTER); 3503 %} 3504 3505 // Long immediate from 0 to 127. 3506 // Used for a shorter form of long mul by 10. 3507 operand immL_127() %{ 3508 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3509 match(ConL); 3510 op_cost(0); 3511 3512 format %{ %} 3513 interface(CONST_INTER); 3514 %} 3515 3516 // Long Immediate: low 32-bit mask 3517 operand immL_32bits() %{ 3518 predicate(n->get_long() == 0xFFFFFFFFL); 3519 match(ConL); 3520 op_cost(0); 3521 3522 format %{ %} 3523 interface(CONST_INTER); 3524 %} 3525 3526 // Long Immediate: low 32-bit mask 3527 operand immL32() %{ 3528 predicate(n->get_long() == (int)(n->get_long())); 3529 match(ConL); 3530 op_cost(20); 3531 3532 format %{ %} 3533 interface(CONST_INTER); 3534 %} 3535 3536 //Double Immediate zero 3537 operand immDPR0() %{ 3538 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3539 // bug that generates code such that NaNs compare equal to 0.0 3540 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3541 match(ConD); 3542 3543 op_cost(5); 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Double Immediate one 3549 operand immDPR1() %{ 3550 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3551 match(ConD); 3552 3553 op_cost(5); 3554 format %{ %} 3555 interface(CONST_INTER); 3556 %} 3557 3558 // Double Immediate 3559 operand immDPR() %{ 3560 predicate(UseSSE<=1); 3561 match(ConD); 3562 3563 op_cost(5); 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 operand immD() %{ 3569 predicate(UseSSE>=2); 3570 match(ConD); 3571 3572 op_cost(5); 3573 format %{ %} 3574 interface(CONST_INTER); 3575 %} 3576 3577 // Double Immediate zero 3578 operand immD0() %{ 3579 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3580 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3581 // compare equal to -0.0. 3582 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3583 match(ConD); 3584 3585 format %{ %} 3586 interface(CONST_INTER); 3587 %} 3588 3589 // Float Immediate zero 3590 operand immFPR0() %{ 3591 predicate(UseSSE == 0 && n->getf() == 0.0F); 3592 match(ConF); 3593 3594 op_cost(5); 3595 format %{ %} 3596 interface(CONST_INTER); 3597 %} 3598 3599 // Float Immediate one 3600 operand immFPR1() %{ 3601 predicate(UseSSE == 0 && n->getf() == 1.0F); 3602 match(ConF); 3603 3604 op_cost(5); 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 // Float Immediate 3610 operand immFPR() %{ 3611 predicate( UseSSE == 0 ); 3612 match(ConF); 3613 3614 op_cost(5); 3615 format %{ %} 3616 interface(CONST_INTER); 3617 %} 3618 3619 // Float Immediate 3620 operand immF() %{ 3621 predicate(UseSSE >= 1); 3622 match(ConF); 3623 3624 op_cost(5); 3625 format %{ %} 3626 interface(CONST_INTER); 3627 %} 3628 3629 // Float Immediate zero. Zero and not -0.0 3630 operand immF0() %{ 3631 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3632 match(ConF); 3633 3634 op_cost(5); 3635 format %{ %} 3636 interface(CONST_INTER); 3637 %} 3638 3639 // Immediates for special shifts (sign extend) 3640 3641 // Constants for increment 3642 operand immI_16() %{ 3643 predicate( n->get_int() == 16 ); 3644 match(ConI); 3645 3646 format %{ %} 3647 interface(CONST_INTER); 3648 %} 3649 3650 operand immI_24() %{ 3651 predicate( n->get_int() == 24 ); 3652 match(ConI); 3653 3654 format %{ %} 3655 interface(CONST_INTER); 3656 %} 3657 3658 // Constant for byte-wide masking 3659 operand immI_255() %{ 3660 predicate( n->get_int() == 255 ); 3661 match(ConI); 3662 3663 format %{ %} 3664 interface(CONST_INTER); 3665 %} 3666 3667 // Constant for short-wide masking 3668 operand immI_65535() %{ 3669 predicate(n->get_int() == 65535); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 // Register Operands 3677 // Integer Register 3678 operand rRegI() %{ 3679 constraint(ALLOC_IN_RC(int_reg)); 3680 match(RegI); 3681 match(xRegI); 3682 match(eAXRegI); 3683 match(eBXRegI); 3684 match(eCXRegI); 3685 match(eDXRegI); 3686 match(eDIRegI); 3687 match(eSIRegI); 3688 3689 format %{ %} 3690 interface(REG_INTER); 3691 %} 3692 3693 // Subset of Integer Register 3694 operand xRegI(rRegI reg) %{ 3695 constraint(ALLOC_IN_RC(int_x_reg)); 3696 match(reg); 3697 match(eAXRegI); 3698 match(eBXRegI); 3699 match(eCXRegI); 3700 match(eDXRegI); 3701 3702 format %{ %} 3703 interface(REG_INTER); 3704 %} 3705 3706 // Special Registers 3707 operand eAXRegI(xRegI reg) %{ 3708 constraint(ALLOC_IN_RC(eax_reg)); 3709 match(reg); 3710 match(rRegI); 3711 3712 format %{ "EAX" %} 3713 interface(REG_INTER); 3714 %} 3715 3716 // Special Registers 3717 operand eBXRegI(xRegI reg) %{ 3718 constraint(ALLOC_IN_RC(ebx_reg)); 3719 match(reg); 3720 match(rRegI); 3721 3722 format %{ "EBX" %} 3723 interface(REG_INTER); 3724 %} 3725 3726 operand eCXRegI(xRegI reg) %{ 3727 constraint(ALLOC_IN_RC(ecx_reg)); 3728 match(reg); 3729 match(rRegI); 3730 3731 format %{ "ECX" %} 3732 interface(REG_INTER); 3733 %} 3734 3735 operand eDXRegI(xRegI reg) %{ 3736 constraint(ALLOC_IN_RC(edx_reg)); 3737 match(reg); 3738 match(rRegI); 3739 3740 format %{ "EDX" %} 3741 interface(REG_INTER); 3742 %} 3743 3744 operand eDIRegI(xRegI reg) %{ 3745 constraint(ALLOC_IN_RC(edi_reg)); 3746 match(reg); 3747 match(rRegI); 3748 3749 format %{ "EDI" %} 3750 interface(REG_INTER); 3751 %} 3752 3753 operand naxRegI() %{ 3754 constraint(ALLOC_IN_RC(nax_reg)); 3755 match(RegI); 3756 match(eCXRegI); 3757 match(eDXRegI); 3758 match(eSIRegI); 3759 match(eDIRegI); 3760 3761 format %{ %} 3762 interface(REG_INTER); 3763 %} 3764 3765 operand nadxRegI() %{ 3766 constraint(ALLOC_IN_RC(nadx_reg)); 3767 match(RegI); 3768 match(eBXRegI); 3769 match(eCXRegI); 3770 match(eSIRegI); 3771 match(eDIRegI); 3772 3773 format %{ %} 3774 interface(REG_INTER); 3775 %} 3776 3777 operand ncxRegI() %{ 3778 constraint(ALLOC_IN_RC(ncx_reg)); 3779 match(RegI); 3780 match(eAXRegI); 3781 match(eDXRegI); 3782 match(eSIRegI); 3783 match(eDIRegI); 3784 3785 format %{ %} 3786 interface(REG_INTER); 3787 %} 3788 3789 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3790 // // 3791 operand eSIRegI(xRegI reg) %{ 3792 constraint(ALLOC_IN_RC(esi_reg)); 3793 match(reg); 3794 match(rRegI); 3795 3796 format %{ "ESI" %} 3797 interface(REG_INTER); 3798 %} 3799 3800 // Pointer Register 3801 operand anyRegP() %{ 3802 constraint(ALLOC_IN_RC(any_reg)); 3803 match(RegP); 3804 match(eAXRegP); 3805 match(eBXRegP); 3806 match(eCXRegP); 3807 match(eDIRegP); 3808 match(eRegP); 3809 3810 format %{ %} 3811 interface(REG_INTER); 3812 %} 3813 3814 operand eRegP() %{ 3815 constraint(ALLOC_IN_RC(int_reg)); 3816 match(RegP); 3817 match(eAXRegP); 3818 match(eBXRegP); 3819 match(eCXRegP); 3820 match(eDIRegP); 3821 3822 format %{ %} 3823 interface(REG_INTER); 3824 %} 3825 3826 // On windows95, EBP is not safe to use for implicit null tests. 3827 operand eRegP_no_EBP() %{ 3828 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3829 match(RegP); 3830 match(eAXRegP); 3831 match(eBXRegP); 3832 match(eCXRegP); 3833 match(eDIRegP); 3834 3835 op_cost(100); 3836 format %{ %} 3837 interface(REG_INTER); 3838 %} 3839 3840 operand naxRegP() %{ 3841 constraint(ALLOC_IN_RC(nax_reg)); 3842 match(RegP); 3843 match(eBXRegP); 3844 match(eDXRegP); 3845 match(eCXRegP); 3846 match(eSIRegP); 3847 match(eDIRegP); 3848 3849 format %{ %} 3850 interface(REG_INTER); 3851 %} 3852 3853 operand nabxRegP() %{ 3854 constraint(ALLOC_IN_RC(nabx_reg)); 3855 match(RegP); 3856 match(eCXRegP); 3857 match(eDXRegP); 3858 match(eSIRegP); 3859 match(eDIRegP); 3860 3861 format %{ %} 3862 interface(REG_INTER); 3863 %} 3864 3865 operand pRegP() %{ 3866 constraint(ALLOC_IN_RC(p_reg)); 3867 match(RegP); 3868 match(eBXRegP); 3869 match(eDXRegP); 3870 match(eSIRegP); 3871 match(eDIRegP); 3872 3873 format %{ %} 3874 interface(REG_INTER); 3875 %} 3876 3877 // Special Registers 3878 // Return a pointer value 3879 operand eAXRegP(eRegP reg) %{ 3880 constraint(ALLOC_IN_RC(eax_reg)); 3881 match(reg); 3882 format %{ "EAX" %} 3883 interface(REG_INTER); 3884 %} 3885 3886 // Used in AtomicAdd 3887 operand eBXRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(ebx_reg)); 3889 match(reg); 3890 format %{ "EBX" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 // Tail-call (interprocedural jump) to interpreter 3895 operand eCXRegP(eRegP reg) %{ 3896 constraint(ALLOC_IN_RC(ecx_reg)); 3897 match(reg); 3898 format %{ "ECX" %} 3899 interface(REG_INTER); 3900 %} 3901 3902 operand eSIRegP(eRegP reg) %{ 3903 constraint(ALLOC_IN_RC(esi_reg)); 3904 match(reg); 3905 format %{ "ESI" %} 3906 interface(REG_INTER); 3907 %} 3908 3909 // Used in rep stosw 3910 operand eDIRegP(eRegP reg) %{ 3911 constraint(ALLOC_IN_RC(edi_reg)); 3912 match(reg); 3913 format %{ "EDI" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 operand eRegL() %{ 3918 constraint(ALLOC_IN_RC(long_reg)); 3919 match(RegL); 3920 match(eADXRegL); 3921 3922 format %{ %} 3923 interface(REG_INTER); 3924 %} 3925 3926 operand eADXRegL( eRegL reg ) %{ 3927 constraint(ALLOC_IN_RC(eadx_reg)); 3928 match(reg); 3929 3930 format %{ "EDX:EAX" %} 3931 interface(REG_INTER); 3932 %} 3933 3934 operand eBCXRegL( eRegL reg ) %{ 3935 constraint(ALLOC_IN_RC(ebcx_reg)); 3936 match(reg); 3937 3938 format %{ "EBX:ECX" %} 3939 interface(REG_INTER); 3940 %} 3941 3942 // Special case for integer high multiply 3943 operand eADXRegL_low_only() %{ 3944 constraint(ALLOC_IN_RC(eadx_reg)); 3945 match(RegL); 3946 3947 format %{ "EAX" %} 3948 interface(REG_INTER); 3949 %} 3950 3951 // Flags register, used as output of compare instructions 3952 operand eFlagsReg() %{ 3953 constraint(ALLOC_IN_RC(int_flags)); 3954 match(RegFlags); 3955 3956 format %{ "EFLAGS" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 // Flags register, used as output of FLOATING POINT compare instructions 3961 operand eFlagsRegU() %{ 3962 constraint(ALLOC_IN_RC(int_flags)); 3963 match(RegFlags); 3964 3965 format %{ "EFLAGS_U" %} 3966 interface(REG_INTER); 3967 %} 3968 3969 operand eFlagsRegUCF() %{ 3970 constraint(ALLOC_IN_RC(int_flags)); 3971 match(RegFlags); 3972 predicate(false); 3973 3974 format %{ "EFLAGS_U_CF" %} 3975 interface(REG_INTER); 3976 %} 3977 3978 // Condition Code Register used by long compare 3979 operand flagsReg_long_LTGE() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 format %{ "FLAGS_LTGE" %} 3983 interface(REG_INTER); 3984 %} 3985 operand flagsReg_long_EQNE() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_EQNE" %} 3989 interface(REG_INTER); 3990 %} 3991 operand flagsReg_long_LEGT() %{ 3992 constraint(ALLOC_IN_RC(int_flags)); 3993 match(RegFlags); 3994 format %{ "FLAGS_LEGT" %} 3995 interface(REG_INTER); 3996 %} 3997 3998 // Float register operands 3999 operand regDPR() %{ 4000 predicate( UseSSE < 2 ); 4001 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4002 match(RegD); 4003 match(regDPR1); 4004 match(regDPR2); 4005 format %{ %} 4006 interface(REG_INTER); 4007 %} 4008 4009 operand regDPR1(regDPR reg) %{ 4010 predicate( UseSSE < 2 ); 4011 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4012 match(reg); 4013 format %{ "FPR1" %} 4014 interface(REG_INTER); 4015 %} 4016 4017 operand regDPR2(regDPR reg) %{ 4018 predicate( UseSSE < 2 ); 4019 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4020 match(reg); 4021 format %{ "FPR2" %} 4022 interface(REG_INTER); 4023 %} 4024 4025 operand regnotDPR1(regDPR reg) %{ 4026 predicate( UseSSE < 2 ); 4027 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4028 match(reg); 4029 format %{ %} 4030 interface(REG_INTER); 4031 %} 4032 4033 // Float register operands 4034 operand regFPR() %{ 4035 predicate( UseSSE < 2 ); 4036 constraint(ALLOC_IN_RC(fp_flt_reg)); 4037 match(RegF); 4038 match(regFPR1); 4039 format %{ %} 4040 interface(REG_INTER); 4041 %} 4042 4043 // Float register operands 4044 operand regFPR1(regFPR reg) %{ 4045 predicate( UseSSE < 2 ); 4046 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4047 match(reg); 4048 format %{ "FPR1" %} 4049 interface(REG_INTER); 4050 %} 4051 4052 // XMM Float register operands 4053 operand regF() %{ 4054 predicate( UseSSE>=1 ); 4055 constraint(ALLOC_IN_RC(float_reg_legacy)); 4056 match(RegF); 4057 format %{ %} 4058 interface(REG_INTER); 4059 %} 4060 4061 // XMM Double register operands 4062 operand regD() %{ 4063 predicate( UseSSE>=2 ); 4064 constraint(ALLOC_IN_RC(double_reg_legacy)); 4065 match(RegD); 4066 format %{ %} 4067 interface(REG_INTER); 4068 %} 4069 4070 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4071 // runtime code generation via reg_class_dynamic. 4072 operand vecS() %{ 4073 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4074 match(VecS); 4075 4076 format %{ %} 4077 interface(REG_INTER); 4078 %} 4079 4080 operand vecD() %{ 4081 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4082 match(VecD); 4083 4084 format %{ %} 4085 interface(REG_INTER); 4086 %} 4087 4088 operand vecX() %{ 4089 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4090 match(VecX); 4091 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 operand vecY() %{ 4097 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4098 match(VecY); 4099 4100 format %{ %} 4101 interface(REG_INTER); 4102 %} 4103 4104 //----------Memory Operands---------------------------------------------------- 4105 // Direct Memory Operand 4106 operand direct(immP addr) %{ 4107 match(addr); 4108 4109 format %{ "[$addr]" %} 4110 interface(MEMORY_INTER) %{ 4111 base(0xFFFFFFFF); 4112 index(0x4); 4113 scale(0x0); 4114 disp($addr); 4115 %} 4116 %} 4117 4118 // Indirect Memory Operand 4119 operand indirect(eRegP reg) %{ 4120 constraint(ALLOC_IN_RC(int_reg)); 4121 match(reg); 4122 4123 format %{ "[$reg]" %} 4124 interface(MEMORY_INTER) %{ 4125 base($reg); 4126 index(0x4); 4127 scale(0x0); 4128 disp(0x0); 4129 %} 4130 %} 4131 4132 // Indirect Memory Plus Short Offset Operand 4133 operand indOffset8(eRegP reg, immI8 off) %{ 4134 match(AddP reg off); 4135 4136 format %{ "[$reg + $off]" %} 4137 interface(MEMORY_INTER) %{ 4138 base($reg); 4139 index(0x4); 4140 scale(0x0); 4141 disp($off); 4142 %} 4143 %} 4144 4145 // Indirect Memory Plus Long Offset Operand 4146 operand indOffset32(eRegP reg, immI off) %{ 4147 match(AddP reg off); 4148 4149 format %{ "[$reg + $off]" %} 4150 interface(MEMORY_INTER) %{ 4151 base($reg); 4152 index(0x4); 4153 scale(0x0); 4154 disp($off); 4155 %} 4156 %} 4157 4158 // Indirect Memory Plus Long Offset Operand 4159 operand indOffset32X(rRegI reg, immP off) %{ 4160 match(AddP off reg); 4161 4162 format %{ "[$reg + $off]" %} 4163 interface(MEMORY_INTER) %{ 4164 base($reg); 4165 index(0x4); 4166 scale(0x0); 4167 disp($off); 4168 %} 4169 %} 4170 4171 // Indirect Memory Plus Index Register Plus Offset Operand 4172 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4173 match(AddP (AddP reg ireg) off); 4174 4175 op_cost(10); 4176 format %{"[$reg + $off + $ireg]" %} 4177 interface(MEMORY_INTER) %{ 4178 base($reg); 4179 index($ireg); 4180 scale(0x0); 4181 disp($off); 4182 %} 4183 %} 4184 4185 // Indirect Memory Plus Index Register Plus Offset Operand 4186 operand indIndex(eRegP reg, rRegI ireg) %{ 4187 match(AddP reg ireg); 4188 4189 op_cost(10); 4190 format %{"[$reg + $ireg]" %} 4191 interface(MEMORY_INTER) %{ 4192 base($reg); 4193 index($ireg); 4194 scale(0x0); 4195 disp(0x0); 4196 %} 4197 %} 4198 4199 // // ------------------------------------------------------------------------- 4200 // // 486 architecture doesn't support "scale * index + offset" with out a base 4201 // // ------------------------------------------------------------------------- 4202 // // Scaled Memory Operands 4203 // // Indirect Memory Times Scale Plus Offset Operand 4204 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4205 // match(AddP off (LShiftI ireg scale)); 4206 // 4207 // op_cost(10); 4208 // format %{"[$off + $ireg << $scale]" %} 4209 // interface(MEMORY_INTER) %{ 4210 // base(0x4); 4211 // index($ireg); 4212 // scale($scale); 4213 // disp($off); 4214 // %} 4215 // %} 4216 4217 // Indirect Memory Times Scale Plus Index Register 4218 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4219 match(AddP reg (LShiftI ireg scale)); 4220 4221 op_cost(10); 4222 format %{"[$reg + $ireg << $scale]" %} 4223 interface(MEMORY_INTER) %{ 4224 base($reg); 4225 index($ireg); 4226 scale($scale); 4227 disp(0x0); 4228 %} 4229 %} 4230 4231 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4232 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4233 match(AddP (AddP reg (LShiftI ireg scale)) off); 4234 4235 op_cost(10); 4236 format %{"[$reg + $off + $ireg << $scale]" %} 4237 interface(MEMORY_INTER) %{ 4238 base($reg); 4239 index($ireg); 4240 scale($scale); 4241 disp($off); 4242 %} 4243 %} 4244 4245 //----------Load Long Memory Operands------------------------------------------ 4246 // The load-long idiom will use it's address expression again after loading 4247 // the first word of the long. If the load-long destination overlaps with 4248 // registers used in the addressing expression, the 2nd half will be loaded 4249 // from a clobbered address. Fix this by requiring that load-long use 4250 // address registers that do not overlap with the load-long target. 4251 4252 // load-long support 4253 operand load_long_RegP() %{ 4254 constraint(ALLOC_IN_RC(esi_reg)); 4255 match(RegP); 4256 match(eSIRegP); 4257 op_cost(100); 4258 format %{ %} 4259 interface(REG_INTER); 4260 %} 4261 4262 // Indirect Memory Operand Long 4263 operand load_long_indirect(load_long_RegP reg) %{ 4264 constraint(ALLOC_IN_RC(esi_reg)); 4265 match(reg); 4266 4267 format %{ "[$reg]" %} 4268 interface(MEMORY_INTER) %{ 4269 base($reg); 4270 index(0x4); 4271 scale(0x0); 4272 disp(0x0); 4273 %} 4274 %} 4275 4276 // Indirect Memory Plus Long Offset Operand 4277 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4278 match(AddP reg off); 4279 4280 format %{ "[$reg + $off]" %} 4281 interface(MEMORY_INTER) %{ 4282 base($reg); 4283 index(0x4); 4284 scale(0x0); 4285 disp($off); 4286 %} 4287 %} 4288 4289 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4290 4291 4292 //----------Special Memory Operands-------------------------------------------- 4293 // Stack Slot Operand - This operand is used for loading and storing temporary 4294 // values on the stack where a match requires a value to 4295 // flow through memory. 4296 operand stackSlotP(sRegP reg) %{ 4297 constraint(ALLOC_IN_RC(stack_slots)); 4298 // No match rule because this operand is only generated in matching 4299 format %{ "[$reg]" %} 4300 interface(MEMORY_INTER) %{ 4301 base(0x4); // ESP 4302 index(0x4); // No Index 4303 scale(0x0); // No Scale 4304 disp($reg); // Stack Offset 4305 %} 4306 %} 4307 4308 operand stackSlotI(sRegI reg) %{ 4309 constraint(ALLOC_IN_RC(stack_slots)); 4310 // No match rule because this operand is only generated in matching 4311 format %{ "[$reg]" %} 4312 interface(MEMORY_INTER) %{ 4313 base(0x4); // ESP 4314 index(0x4); // No Index 4315 scale(0x0); // No Scale 4316 disp($reg); // Stack Offset 4317 %} 4318 %} 4319 4320 operand stackSlotF(sRegF reg) %{ 4321 constraint(ALLOC_IN_RC(stack_slots)); 4322 // No match rule because this operand is only generated in matching 4323 format %{ "[$reg]" %} 4324 interface(MEMORY_INTER) %{ 4325 base(0x4); // ESP 4326 index(0x4); // No Index 4327 scale(0x0); // No Scale 4328 disp($reg); // Stack Offset 4329 %} 4330 %} 4331 4332 operand stackSlotD(sRegD reg) %{ 4333 constraint(ALLOC_IN_RC(stack_slots)); 4334 // No match rule because this operand is only generated in matching 4335 format %{ "[$reg]" %} 4336 interface(MEMORY_INTER) %{ 4337 base(0x4); // ESP 4338 index(0x4); // No Index 4339 scale(0x0); // No Scale 4340 disp($reg); // Stack Offset 4341 %} 4342 %} 4343 4344 operand stackSlotL(sRegL reg) %{ 4345 constraint(ALLOC_IN_RC(stack_slots)); 4346 // No match rule because this operand is only generated in matching 4347 format %{ "[$reg]" %} 4348 interface(MEMORY_INTER) %{ 4349 base(0x4); // ESP 4350 index(0x4); // No Index 4351 scale(0x0); // No Scale 4352 disp($reg); // Stack Offset 4353 %} 4354 %} 4355 4356 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4357 // Indirect Memory Operand 4358 operand indirect_win95_safe(eRegP_no_EBP reg) 4359 %{ 4360 constraint(ALLOC_IN_RC(int_reg)); 4361 match(reg); 4362 4363 op_cost(100); 4364 format %{ "[$reg]" %} 4365 interface(MEMORY_INTER) %{ 4366 base($reg); 4367 index(0x4); 4368 scale(0x0); 4369 disp(0x0); 4370 %} 4371 %} 4372 4373 // Indirect Memory Plus Short Offset Operand 4374 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4375 %{ 4376 match(AddP reg off); 4377 4378 op_cost(100); 4379 format %{ "[$reg + $off]" %} 4380 interface(MEMORY_INTER) %{ 4381 base($reg); 4382 index(0x4); 4383 scale(0x0); 4384 disp($off); 4385 %} 4386 %} 4387 4388 // Indirect Memory Plus Long Offset Operand 4389 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4390 %{ 4391 match(AddP reg off); 4392 4393 op_cost(100); 4394 format %{ "[$reg + $off]" %} 4395 interface(MEMORY_INTER) %{ 4396 base($reg); 4397 index(0x4); 4398 scale(0x0); 4399 disp($off); 4400 %} 4401 %} 4402 4403 // Indirect Memory Plus Index Register Plus Offset Operand 4404 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4405 %{ 4406 match(AddP (AddP reg ireg) off); 4407 4408 op_cost(100); 4409 format %{"[$reg + $off + $ireg]" %} 4410 interface(MEMORY_INTER) %{ 4411 base($reg); 4412 index($ireg); 4413 scale(0x0); 4414 disp($off); 4415 %} 4416 %} 4417 4418 // Indirect Memory Times Scale Plus Index Register 4419 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4420 %{ 4421 match(AddP reg (LShiftI ireg scale)); 4422 4423 op_cost(100); 4424 format %{"[$reg + $ireg << $scale]" %} 4425 interface(MEMORY_INTER) %{ 4426 base($reg); 4427 index($ireg); 4428 scale($scale); 4429 disp(0x0); 4430 %} 4431 %} 4432 4433 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4434 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4435 %{ 4436 match(AddP (AddP reg (LShiftI ireg scale)) off); 4437 4438 op_cost(100); 4439 format %{"[$reg + $off + $ireg << $scale]" %} 4440 interface(MEMORY_INTER) %{ 4441 base($reg); 4442 index($ireg); 4443 scale($scale); 4444 disp($off); 4445 %} 4446 %} 4447 4448 //----------Conditional Branch Operands---------------------------------------- 4449 // Comparison Op - This is the operation of the comparison, and is limited to 4450 // the following set of codes: 4451 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4452 // 4453 // Other attributes of the comparison, such as unsignedness, are specified 4454 // by the comparison instruction that sets a condition code flags register. 4455 // That result is represented by a flags operand whose subtype is appropriate 4456 // to the unsignedness (etc.) of the comparison. 4457 // 4458 // Later, the instruction which matches both the Comparison Op (a Bool) and 4459 // the flags (produced by the Cmp) specifies the coding of the comparison op 4460 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4461 4462 // Comparision Code 4463 operand cmpOp() %{ 4464 match(Bool); 4465 4466 format %{ "" %} 4467 interface(COND_INTER) %{ 4468 equal(0x4, "e"); 4469 not_equal(0x5, "ne"); 4470 less(0xC, "l"); 4471 greater_equal(0xD, "ge"); 4472 less_equal(0xE, "le"); 4473 greater(0xF, "g"); 4474 overflow(0x0, "o"); 4475 no_overflow(0x1, "no"); 4476 %} 4477 %} 4478 4479 // Comparison Code, unsigned compare. Used by FP also, with 4480 // C2 (unordered) turned into GT or LT already. The other bits 4481 // C0 and C3 are turned into Carry & Zero flags. 4482 operand cmpOpU() %{ 4483 match(Bool); 4484 4485 format %{ "" %} 4486 interface(COND_INTER) %{ 4487 equal(0x4, "e"); 4488 not_equal(0x5, "ne"); 4489 less(0x2, "b"); 4490 greater_equal(0x3, "nb"); 4491 less_equal(0x6, "be"); 4492 greater(0x7, "nbe"); 4493 overflow(0x0, "o"); 4494 no_overflow(0x1, "no"); 4495 %} 4496 %} 4497 4498 // Floating comparisons that don't require any fixup for the unordered case 4499 operand cmpOpUCF() %{ 4500 match(Bool); 4501 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4502 n->as_Bool()->_test._test == BoolTest::ge || 4503 n->as_Bool()->_test._test == BoolTest::le || 4504 n->as_Bool()->_test._test == BoolTest::gt); 4505 format %{ "" %} 4506 interface(COND_INTER) %{ 4507 equal(0x4, "e"); 4508 not_equal(0x5, "ne"); 4509 less(0x2, "b"); 4510 greater_equal(0x3, "nb"); 4511 less_equal(0x6, "be"); 4512 greater(0x7, "nbe"); 4513 overflow(0x0, "o"); 4514 no_overflow(0x1, "no"); 4515 %} 4516 %} 4517 4518 4519 // Floating comparisons that can be fixed up with extra conditional jumps 4520 operand cmpOpUCF2() %{ 4521 match(Bool); 4522 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4523 n->as_Bool()->_test._test == BoolTest::eq); 4524 format %{ "" %} 4525 interface(COND_INTER) %{ 4526 equal(0x4, "e"); 4527 not_equal(0x5, "ne"); 4528 less(0x2, "b"); 4529 greater_equal(0x3, "nb"); 4530 less_equal(0x6, "be"); 4531 greater(0x7, "nbe"); 4532 overflow(0x0, "o"); 4533 no_overflow(0x1, "no"); 4534 %} 4535 %} 4536 4537 // Comparison Code for FP conditional move 4538 operand cmpOp_fcmov() %{ 4539 match(Bool); 4540 4541 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4542 n->as_Bool()->_test._test != BoolTest::no_overflow); 4543 format %{ "" %} 4544 interface(COND_INTER) %{ 4545 equal (0x0C8); 4546 not_equal (0x1C8); 4547 less (0x0C0); 4548 greater_equal(0x1C0); 4549 less_equal (0x0D0); 4550 greater (0x1D0); 4551 overflow(0x0, "o"); // not really supported by the instruction 4552 no_overflow(0x1, "no"); // not really supported by the instruction 4553 %} 4554 %} 4555 4556 // Comparision Code used in long compares 4557 operand cmpOp_commute() %{ 4558 match(Bool); 4559 4560 format %{ "" %} 4561 interface(COND_INTER) %{ 4562 equal(0x4, "e"); 4563 not_equal(0x5, "ne"); 4564 less(0xF, "g"); 4565 greater_equal(0xE, "le"); 4566 less_equal(0xD, "ge"); 4567 greater(0xC, "l"); 4568 overflow(0x0, "o"); 4569 no_overflow(0x1, "no"); 4570 %} 4571 %} 4572 4573 //----------OPERAND CLASSES---------------------------------------------------- 4574 // Operand Classes are groups of operands that are used as to simplify 4575 // instruction definitions by not requiring the AD writer to specify separate 4576 // instructions for every form of operand when the instruction accepts 4577 // multiple operand types with the same basic encoding and format. The classic 4578 // case of this is memory operands. 4579 4580 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4581 indIndex, indIndexScale, indIndexScaleOffset); 4582 4583 // Long memory operations are encoded in 2 instructions and a +4 offset. 4584 // This means some kind of offset is always required and you cannot use 4585 // an oop as the offset (done when working on static globals). 4586 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4587 indIndex, indIndexScale, indIndexScaleOffset); 4588 4589 4590 //----------PIPELINE----------------------------------------------------------- 4591 // Rules which define the behavior of the target architectures pipeline. 4592 pipeline %{ 4593 4594 //----------ATTRIBUTES--------------------------------------------------------- 4595 attributes %{ 4596 variable_size_instructions; // Fixed size instructions 4597 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4598 instruction_unit_size = 1; // An instruction is 1 bytes long 4599 instruction_fetch_unit_size = 16; // The processor fetches one line 4600 instruction_fetch_units = 1; // of 16 bytes 4601 4602 // List of nop instructions 4603 nops( MachNop ); 4604 %} 4605 4606 //----------RESOURCES---------------------------------------------------------- 4607 // Resources are the functional units available to the machine 4608 4609 // Generic P2/P3 pipeline 4610 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4611 // 3 instructions decoded per cycle. 4612 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4613 // 2 ALU op, only ALU0 handles mul/div instructions. 4614 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4615 MS0, MS1, MEM = MS0 | MS1, 4616 BR, FPU, 4617 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4618 4619 //----------PIPELINE DESCRIPTION----------------------------------------------- 4620 // Pipeline Description specifies the stages in the machine's pipeline 4621 4622 // Generic P2/P3 pipeline 4623 pipe_desc(S0, S1, S2, S3, S4, S5); 4624 4625 //----------PIPELINE CLASSES--------------------------------------------------- 4626 // Pipeline Classes describe the stages in which input and output are 4627 // referenced by the hardware pipeline. 4628 4629 // Naming convention: ialu or fpu 4630 // Then: _reg 4631 // Then: _reg if there is a 2nd register 4632 // Then: _long if it's a pair of instructions implementing a long 4633 // Then: _fat if it requires the big decoder 4634 // Or: _mem if it requires the big decoder and a memory unit. 4635 4636 // Integer ALU reg operation 4637 pipe_class ialu_reg(rRegI dst) %{ 4638 single_instruction; 4639 dst : S4(write); 4640 dst : S3(read); 4641 DECODE : S0; // any decoder 4642 ALU : S3; // any alu 4643 %} 4644 4645 // Long ALU reg operation 4646 pipe_class ialu_reg_long(eRegL dst) %{ 4647 instruction_count(2); 4648 dst : S4(write); 4649 dst : S3(read); 4650 DECODE : S0(2); // any 2 decoders 4651 ALU : S3(2); // both alus 4652 %} 4653 4654 // Integer ALU reg operation using big decoder 4655 pipe_class ialu_reg_fat(rRegI dst) %{ 4656 single_instruction; 4657 dst : S4(write); 4658 dst : S3(read); 4659 D0 : S0; // big decoder only 4660 ALU : S3; // any alu 4661 %} 4662 4663 // Long ALU reg operation using big decoder 4664 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4665 instruction_count(2); 4666 dst : S4(write); 4667 dst : S3(read); 4668 D0 : S0(2); // big decoder only; twice 4669 ALU : S3(2); // any 2 alus 4670 %} 4671 4672 // Integer ALU reg-reg operation 4673 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4674 single_instruction; 4675 dst : S4(write); 4676 src : S3(read); 4677 DECODE : S0; // any decoder 4678 ALU : S3; // any alu 4679 %} 4680 4681 // Long ALU reg-reg operation 4682 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4683 instruction_count(2); 4684 dst : S4(write); 4685 src : S3(read); 4686 DECODE : S0(2); // any 2 decoders 4687 ALU : S3(2); // both alus 4688 %} 4689 4690 // Integer ALU reg-reg operation 4691 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4692 single_instruction; 4693 dst : S4(write); 4694 src : S3(read); 4695 D0 : S0; // big decoder only 4696 ALU : S3; // any alu 4697 %} 4698 4699 // Long ALU reg-reg operation 4700 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4701 instruction_count(2); 4702 dst : S4(write); 4703 src : S3(read); 4704 D0 : S0(2); // big decoder only; twice 4705 ALU : S3(2); // both alus 4706 %} 4707 4708 // Integer ALU reg-mem operation 4709 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4710 single_instruction; 4711 dst : S5(write); 4712 mem : S3(read); 4713 D0 : S0; // big decoder only 4714 ALU : S4; // any alu 4715 MEM : S3; // any mem 4716 %} 4717 4718 // Long ALU reg-mem operation 4719 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4720 instruction_count(2); 4721 dst : S5(write); 4722 mem : S3(read); 4723 D0 : S0(2); // big decoder only; twice 4724 ALU : S4(2); // any 2 alus 4725 MEM : S3(2); // both mems 4726 %} 4727 4728 // Integer mem operation (prefetch) 4729 pipe_class ialu_mem(memory mem) 4730 %{ 4731 single_instruction; 4732 mem : S3(read); 4733 D0 : S0; // big decoder only 4734 MEM : S3; // any mem 4735 %} 4736 4737 // Integer Store to Memory 4738 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4739 single_instruction; 4740 mem : S3(read); 4741 src : S5(read); 4742 D0 : S0; // big decoder only 4743 ALU : S4; // any alu 4744 MEM : S3; 4745 %} 4746 4747 // Long Store to Memory 4748 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4749 instruction_count(2); 4750 mem : S3(read); 4751 src : S5(read); 4752 D0 : S0(2); // big decoder only; twice 4753 ALU : S4(2); // any 2 alus 4754 MEM : S3(2); // Both mems 4755 %} 4756 4757 // Integer Store to Memory 4758 pipe_class ialu_mem_imm(memory mem) %{ 4759 single_instruction; 4760 mem : S3(read); 4761 D0 : S0; // big decoder only 4762 ALU : S4; // any alu 4763 MEM : S3; 4764 %} 4765 4766 // Integer ALU0 reg-reg operation 4767 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4768 single_instruction; 4769 dst : S4(write); 4770 src : S3(read); 4771 D0 : S0; // Big decoder only 4772 ALU0 : S3; // only alu0 4773 %} 4774 4775 // Integer ALU0 reg-mem operation 4776 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4777 single_instruction; 4778 dst : S5(write); 4779 mem : S3(read); 4780 D0 : S0; // big decoder only 4781 ALU0 : S4; // ALU0 only 4782 MEM : S3; // any mem 4783 %} 4784 4785 // Integer ALU reg-reg operation 4786 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4787 single_instruction; 4788 cr : S4(write); 4789 src1 : S3(read); 4790 src2 : S3(read); 4791 DECODE : S0; // any decoder 4792 ALU : S3; // any alu 4793 %} 4794 4795 // Integer ALU reg-imm operation 4796 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4797 single_instruction; 4798 cr : S4(write); 4799 src1 : S3(read); 4800 DECODE : S0; // any decoder 4801 ALU : S3; // any alu 4802 %} 4803 4804 // Integer ALU reg-mem operation 4805 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4806 single_instruction; 4807 cr : S4(write); 4808 src1 : S3(read); 4809 src2 : S3(read); 4810 D0 : S0; // big decoder only 4811 ALU : S4; // any alu 4812 MEM : S3; 4813 %} 4814 4815 // Conditional move reg-reg 4816 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4817 instruction_count(4); 4818 y : S4(read); 4819 q : S3(read); 4820 p : S3(read); 4821 DECODE : S0(4); // any decoder 4822 %} 4823 4824 // Conditional move reg-reg 4825 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4826 single_instruction; 4827 dst : S4(write); 4828 src : S3(read); 4829 cr : S3(read); 4830 DECODE : S0; // any decoder 4831 %} 4832 4833 // Conditional move reg-mem 4834 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4835 single_instruction; 4836 dst : S4(write); 4837 src : S3(read); 4838 cr : S3(read); 4839 DECODE : S0; // any decoder 4840 MEM : S3; 4841 %} 4842 4843 // Conditional move reg-reg long 4844 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4845 single_instruction; 4846 dst : S4(write); 4847 src : S3(read); 4848 cr : S3(read); 4849 DECODE : S0(2); // any 2 decoders 4850 %} 4851 4852 // Conditional move double reg-reg 4853 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4854 single_instruction; 4855 dst : S4(write); 4856 src : S3(read); 4857 cr : S3(read); 4858 DECODE : S0; // any decoder 4859 %} 4860 4861 // Float reg-reg operation 4862 pipe_class fpu_reg(regDPR dst) %{ 4863 instruction_count(2); 4864 dst : S3(read); 4865 DECODE : S0(2); // any 2 decoders 4866 FPU : S3; 4867 %} 4868 4869 // Float reg-reg operation 4870 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4871 instruction_count(2); 4872 dst : S4(write); 4873 src : S3(read); 4874 DECODE : S0(2); // any 2 decoders 4875 FPU : S3; 4876 %} 4877 4878 // Float reg-reg operation 4879 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4880 instruction_count(3); 4881 dst : S4(write); 4882 src1 : S3(read); 4883 src2 : S3(read); 4884 DECODE : S0(3); // any 3 decoders 4885 FPU : S3(2); 4886 %} 4887 4888 // Float reg-reg operation 4889 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4890 instruction_count(4); 4891 dst : S4(write); 4892 src1 : S3(read); 4893 src2 : S3(read); 4894 src3 : S3(read); 4895 DECODE : S0(4); // any 3 decoders 4896 FPU : S3(2); 4897 %} 4898 4899 // Float reg-reg operation 4900 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4901 instruction_count(4); 4902 dst : S4(write); 4903 src1 : S3(read); 4904 src2 : S3(read); 4905 src3 : S3(read); 4906 DECODE : S1(3); // any 3 decoders 4907 D0 : S0; // Big decoder only 4908 FPU : S3(2); 4909 MEM : S3; 4910 %} 4911 4912 // Float reg-mem operation 4913 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4914 instruction_count(2); 4915 dst : S5(write); 4916 mem : S3(read); 4917 D0 : S0; // big decoder only 4918 DECODE : S1; // any decoder for FPU POP 4919 FPU : S4; 4920 MEM : S3; // any mem 4921 %} 4922 4923 // Float reg-mem operation 4924 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4925 instruction_count(3); 4926 dst : S5(write); 4927 src1 : S3(read); 4928 mem : S3(read); 4929 D0 : S0; // big decoder only 4930 DECODE : S1(2); // any decoder for FPU POP 4931 FPU : S4; 4932 MEM : S3; // any mem 4933 %} 4934 4935 // Float mem-reg operation 4936 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4937 instruction_count(2); 4938 src : S5(read); 4939 mem : S3(read); 4940 DECODE : S0; // any decoder for FPU PUSH 4941 D0 : S1; // big decoder only 4942 FPU : S4; 4943 MEM : S3; // any mem 4944 %} 4945 4946 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4947 instruction_count(3); 4948 src1 : S3(read); 4949 src2 : S3(read); 4950 mem : S3(read); 4951 DECODE : S0(2); // any decoder for FPU PUSH 4952 D0 : S1; // big decoder only 4953 FPU : S4; 4954 MEM : S3; // any mem 4955 %} 4956 4957 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4958 instruction_count(3); 4959 src1 : S3(read); 4960 src2 : S3(read); 4961 mem : S4(read); 4962 DECODE : S0; // any decoder for FPU PUSH 4963 D0 : S0(2); // big decoder only 4964 FPU : S4; 4965 MEM : S3(2); // any mem 4966 %} 4967 4968 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4969 instruction_count(2); 4970 src1 : S3(read); 4971 dst : S4(read); 4972 D0 : S0(2); // big decoder only 4973 MEM : S3(2); // any mem 4974 %} 4975 4976 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4977 instruction_count(3); 4978 src1 : S3(read); 4979 src2 : S3(read); 4980 dst : S4(read); 4981 D0 : S0(3); // big decoder only 4982 FPU : S4; 4983 MEM : S3(3); // any mem 4984 %} 4985 4986 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4987 instruction_count(3); 4988 src1 : S4(read); 4989 mem : S4(read); 4990 DECODE : S0; // any decoder for FPU PUSH 4991 D0 : S0(2); // big decoder only 4992 FPU : S4; 4993 MEM : S3(2); // any mem 4994 %} 4995 4996 // Float load constant 4997 pipe_class fpu_reg_con(regDPR dst) %{ 4998 instruction_count(2); 4999 dst : S5(write); 5000 D0 : S0; // big decoder only for the load 5001 DECODE : S1; // any decoder for FPU POP 5002 FPU : S4; 5003 MEM : S3; // any mem 5004 %} 5005 5006 // Float load constant 5007 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5008 instruction_count(3); 5009 dst : S5(write); 5010 src : S3(read); 5011 D0 : S0; // big decoder only for the load 5012 DECODE : S1(2); // any decoder for FPU POP 5013 FPU : S4; 5014 MEM : S3; // any mem 5015 %} 5016 5017 // UnConditional branch 5018 pipe_class pipe_jmp( label labl ) %{ 5019 single_instruction; 5020 BR : S3; 5021 %} 5022 5023 // Conditional branch 5024 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5025 single_instruction; 5026 cr : S1(read); 5027 BR : S3; 5028 %} 5029 5030 // Allocation idiom 5031 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5032 instruction_count(1); force_serialization; 5033 fixed_latency(6); 5034 heap_ptr : S3(read); 5035 DECODE : S0(3); 5036 D0 : S2; 5037 MEM : S3; 5038 ALU : S3(2); 5039 dst : S5(write); 5040 BR : S5; 5041 %} 5042 5043 // Generic big/slow expanded idiom 5044 pipe_class pipe_slow( ) %{ 5045 instruction_count(10); multiple_bundles; force_serialization; 5046 fixed_latency(100); 5047 D0 : S0(2); 5048 MEM : S3(2); 5049 %} 5050 5051 // The real do-nothing guy 5052 pipe_class empty( ) %{ 5053 instruction_count(0); 5054 %} 5055 5056 // Define the class for the Nop node 5057 define %{ 5058 MachNop = empty; 5059 %} 5060 5061 %} 5062 5063 //----------INSTRUCTIONS------------------------------------------------------- 5064 // 5065 // match -- States which machine-independent subtree may be replaced 5066 // by this instruction. 5067 // ins_cost -- The estimated cost of this instruction is used by instruction 5068 // selection to identify a minimum cost tree of machine 5069 // instructions that matches a tree of machine-independent 5070 // instructions. 5071 // format -- A string providing the disassembly for this instruction. 5072 // The value of an instruction's operand may be inserted 5073 // by referring to it with a '$' prefix. 5074 // opcode -- Three instruction opcodes may be provided. These are referred 5075 // to within an encode class as $primary, $secondary, and $tertiary 5076 // respectively. The primary opcode is commonly used to 5077 // indicate the type of machine instruction, while secondary 5078 // and tertiary are often used for prefix options or addressing 5079 // modes. 5080 // ins_encode -- A list of encode classes with parameters. The encode class 5081 // name must have been defined in an 'enc_class' specification 5082 // in the encode section of the architecture description. 5083 5084 //----------BSWAP-Instruction-------------------------------------------------- 5085 instruct bytes_reverse_int(rRegI dst) %{ 5086 match(Set dst (ReverseBytesI dst)); 5087 5088 format %{ "BSWAP $dst" %} 5089 opcode(0x0F, 0xC8); 5090 ins_encode( OpcP, OpcSReg(dst) ); 5091 ins_pipe( ialu_reg ); 5092 %} 5093 5094 instruct bytes_reverse_long(eRegL dst) %{ 5095 match(Set dst (ReverseBytesL dst)); 5096 5097 format %{ "BSWAP $dst.lo\n\t" 5098 "BSWAP $dst.hi\n\t" 5099 "XCHG $dst.lo $dst.hi" %} 5100 5101 ins_cost(125); 5102 ins_encode( bswap_long_bytes(dst) ); 5103 ins_pipe( ialu_reg_reg); 5104 %} 5105 5106 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5107 match(Set dst (ReverseBytesUS dst)); 5108 effect(KILL cr); 5109 5110 format %{ "BSWAP $dst\n\t" 5111 "SHR $dst,16\n\t" %} 5112 ins_encode %{ 5113 __ bswapl($dst$$Register); 5114 __ shrl($dst$$Register, 16); 5115 %} 5116 ins_pipe( ialu_reg ); 5117 %} 5118 5119 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5120 match(Set dst (ReverseBytesS dst)); 5121 effect(KILL cr); 5122 5123 format %{ "BSWAP $dst\n\t" 5124 "SAR $dst,16\n\t" %} 5125 ins_encode %{ 5126 __ bswapl($dst$$Register); 5127 __ sarl($dst$$Register, 16); 5128 %} 5129 ins_pipe( ialu_reg ); 5130 %} 5131 5132 5133 //---------- Zeros Count Instructions ------------------------------------------ 5134 5135 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5136 predicate(UseCountLeadingZerosInstruction); 5137 match(Set dst (CountLeadingZerosI src)); 5138 effect(KILL cr); 5139 5140 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5141 ins_encode %{ 5142 __ lzcntl($dst$$Register, $src$$Register); 5143 %} 5144 ins_pipe(ialu_reg); 5145 %} 5146 5147 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5148 predicate(!UseCountLeadingZerosInstruction); 5149 match(Set dst (CountLeadingZerosI src)); 5150 effect(KILL cr); 5151 5152 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5153 "JNZ skip\n\t" 5154 "MOV $dst, -1\n" 5155 "skip:\n\t" 5156 "NEG $dst\n\t" 5157 "ADD $dst, 31" %} 5158 ins_encode %{ 5159 Register Rdst = $dst$$Register; 5160 Register Rsrc = $src$$Register; 5161 Label skip; 5162 __ bsrl(Rdst, Rsrc); 5163 __ jccb(Assembler::notZero, skip); 5164 __ movl(Rdst, -1); 5165 __ bind(skip); 5166 __ negl(Rdst); 5167 __ addl(Rdst, BitsPerInt - 1); 5168 %} 5169 ins_pipe(ialu_reg); 5170 %} 5171 5172 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5173 predicate(UseCountLeadingZerosInstruction); 5174 match(Set dst (CountLeadingZerosL src)); 5175 effect(TEMP dst, KILL cr); 5176 5177 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5178 "JNC done\n\t" 5179 "LZCNT $dst, $src.lo\n\t" 5180 "ADD $dst, 32\n" 5181 "done:" %} 5182 ins_encode %{ 5183 Register Rdst = $dst$$Register; 5184 Register Rsrc = $src$$Register; 5185 Label done; 5186 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5187 __ jccb(Assembler::carryClear, done); 5188 __ lzcntl(Rdst, Rsrc); 5189 __ addl(Rdst, BitsPerInt); 5190 __ bind(done); 5191 %} 5192 ins_pipe(ialu_reg); 5193 %} 5194 5195 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5196 predicate(!UseCountLeadingZerosInstruction); 5197 match(Set dst (CountLeadingZerosL src)); 5198 effect(TEMP dst, KILL cr); 5199 5200 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5201 "JZ msw_is_zero\n\t" 5202 "ADD $dst, 32\n\t" 5203 "JMP not_zero\n" 5204 "msw_is_zero:\n\t" 5205 "BSR $dst, $src.lo\n\t" 5206 "JNZ not_zero\n\t" 5207 "MOV $dst, -1\n" 5208 "not_zero:\n\t" 5209 "NEG $dst\n\t" 5210 "ADD $dst, 63\n" %} 5211 ins_encode %{ 5212 Register Rdst = $dst$$Register; 5213 Register Rsrc = $src$$Register; 5214 Label msw_is_zero; 5215 Label not_zero; 5216 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5217 __ jccb(Assembler::zero, msw_is_zero); 5218 __ addl(Rdst, BitsPerInt); 5219 __ jmpb(not_zero); 5220 __ bind(msw_is_zero); 5221 __ bsrl(Rdst, Rsrc); 5222 __ jccb(Assembler::notZero, not_zero); 5223 __ movl(Rdst, -1); 5224 __ bind(not_zero); 5225 __ negl(Rdst); 5226 __ addl(Rdst, BitsPerLong - 1); 5227 %} 5228 ins_pipe(ialu_reg); 5229 %} 5230 5231 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5232 predicate(UseCountTrailingZerosInstruction); 5233 match(Set dst (CountTrailingZerosI src)); 5234 effect(KILL cr); 5235 5236 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5237 ins_encode %{ 5238 __ tzcntl($dst$$Register, $src$$Register); 5239 %} 5240 ins_pipe(ialu_reg); 5241 %} 5242 5243 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5244 predicate(!UseCountTrailingZerosInstruction); 5245 match(Set dst (CountTrailingZerosI src)); 5246 effect(KILL cr); 5247 5248 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5249 "JNZ done\n\t" 5250 "MOV $dst, 32\n" 5251 "done:" %} 5252 ins_encode %{ 5253 Register Rdst = $dst$$Register; 5254 Label done; 5255 __ bsfl(Rdst, $src$$Register); 5256 __ jccb(Assembler::notZero, done); 5257 __ movl(Rdst, BitsPerInt); 5258 __ bind(done); 5259 %} 5260 ins_pipe(ialu_reg); 5261 %} 5262 5263 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5264 predicate(UseCountTrailingZerosInstruction); 5265 match(Set dst (CountTrailingZerosL src)); 5266 effect(TEMP dst, KILL cr); 5267 5268 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5269 "JNC done\n\t" 5270 "TZCNT $dst, $src.hi\n\t" 5271 "ADD $dst, 32\n" 5272 "done:" %} 5273 ins_encode %{ 5274 Register Rdst = $dst$$Register; 5275 Register Rsrc = $src$$Register; 5276 Label done; 5277 __ tzcntl(Rdst, Rsrc); 5278 __ jccb(Assembler::carryClear, done); 5279 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5280 __ addl(Rdst, BitsPerInt); 5281 __ bind(done); 5282 %} 5283 ins_pipe(ialu_reg); 5284 %} 5285 5286 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5287 predicate(!UseCountTrailingZerosInstruction); 5288 match(Set dst (CountTrailingZerosL src)); 5289 effect(TEMP dst, KILL cr); 5290 5291 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5292 "JNZ done\n\t" 5293 "BSF $dst, $src.hi\n\t" 5294 "JNZ msw_not_zero\n\t" 5295 "MOV $dst, 32\n" 5296 "msw_not_zero:\n\t" 5297 "ADD $dst, 32\n" 5298 "done:" %} 5299 ins_encode %{ 5300 Register Rdst = $dst$$Register; 5301 Register Rsrc = $src$$Register; 5302 Label msw_not_zero; 5303 Label done; 5304 __ bsfl(Rdst, Rsrc); 5305 __ jccb(Assembler::notZero, done); 5306 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5307 __ jccb(Assembler::notZero, msw_not_zero); 5308 __ movl(Rdst, BitsPerInt); 5309 __ bind(msw_not_zero); 5310 __ addl(Rdst, BitsPerInt); 5311 __ bind(done); 5312 %} 5313 ins_pipe(ialu_reg); 5314 %} 5315 5316 5317 //---------- Population Count Instructions ------------------------------------- 5318 5319 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5320 predicate(UsePopCountInstruction); 5321 match(Set dst (PopCountI src)); 5322 effect(KILL cr); 5323 5324 format %{ "POPCNT $dst, $src" %} 5325 ins_encode %{ 5326 __ popcntl($dst$$Register, $src$$Register); 5327 %} 5328 ins_pipe(ialu_reg); 5329 %} 5330 5331 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5332 predicate(UsePopCountInstruction); 5333 match(Set dst (PopCountI (LoadI mem))); 5334 effect(KILL cr); 5335 5336 format %{ "POPCNT $dst, $mem" %} 5337 ins_encode %{ 5338 __ popcntl($dst$$Register, $mem$$Address); 5339 %} 5340 ins_pipe(ialu_reg); 5341 %} 5342 5343 // Note: Long.bitCount(long) returns an int. 5344 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5345 predicate(UsePopCountInstruction); 5346 match(Set dst (PopCountL src)); 5347 effect(KILL cr, TEMP tmp, TEMP dst); 5348 5349 format %{ "POPCNT $dst, $src.lo\n\t" 5350 "POPCNT $tmp, $src.hi\n\t" 5351 "ADD $dst, $tmp" %} 5352 ins_encode %{ 5353 __ popcntl($dst$$Register, $src$$Register); 5354 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5355 __ addl($dst$$Register, $tmp$$Register); 5356 %} 5357 ins_pipe(ialu_reg); 5358 %} 5359 5360 // Note: Long.bitCount(long) returns an int. 5361 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5362 predicate(UsePopCountInstruction); 5363 match(Set dst (PopCountL (LoadL mem))); 5364 effect(KILL cr, TEMP tmp, TEMP dst); 5365 5366 format %{ "POPCNT $dst, $mem\n\t" 5367 "POPCNT $tmp, $mem+4\n\t" 5368 "ADD $dst, $tmp" %} 5369 ins_encode %{ 5370 //__ popcntl($dst$$Register, $mem$$Address$$first); 5371 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5372 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5373 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5374 __ addl($dst$$Register, $tmp$$Register); 5375 %} 5376 ins_pipe(ialu_reg); 5377 %} 5378 5379 5380 //----------Load/Store/Move Instructions--------------------------------------- 5381 //----------Load Instructions-------------------------------------------------- 5382 // Load Byte (8bit signed) 5383 instruct loadB(xRegI dst, memory mem) %{ 5384 match(Set dst (LoadB mem)); 5385 5386 ins_cost(125); 5387 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5388 5389 ins_encode %{ 5390 __ movsbl($dst$$Register, $mem$$Address); 5391 %} 5392 5393 ins_pipe(ialu_reg_mem); 5394 %} 5395 5396 // Load Byte (8bit signed) into Long Register 5397 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5398 match(Set dst (ConvI2L (LoadB mem))); 5399 effect(KILL cr); 5400 5401 ins_cost(375); 5402 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5403 "MOV $dst.hi,$dst.lo\n\t" 5404 "SAR $dst.hi,7" %} 5405 5406 ins_encode %{ 5407 __ movsbl($dst$$Register, $mem$$Address); 5408 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5409 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5410 %} 5411 5412 ins_pipe(ialu_reg_mem); 5413 %} 5414 5415 // Load Unsigned Byte (8bit UNsigned) 5416 instruct loadUB(xRegI dst, memory mem) %{ 5417 match(Set dst (LoadUB mem)); 5418 5419 ins_cost(125); 5420 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5421 5422 ins_encode %{ 5423 __ movzbl($dst$$Register, $mem$$Address); 5424 %} 5425 5426 ins_pipe(ialu_reg_mem); 5427 %} 5428 5429 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5430 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5431 match(Set dst (ConvI2L (LoadUB mem))); 5432 effect(KILL cr); 5433 5434 ins_cost(250); 5435 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5436 "XOR $dst.hi,$dst.hi" %} 5437 5438 ins_encode %{ 5439 Register Rdst = $dst$$Register; 5440 __ movzbl(Rdst, $mem$$Address); 5441 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5442 %} 5443 5444 ins_pipe(ialu_reg_mem); 5445 %} 5446 5447 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5448 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5449 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5450 effect(KILL cr); 5451 5452 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5453 "XOR $dst.hi,$dst.hi\n\t" 5454 "AND $dst.lo,right_n_bits($mask, 8)" %} 5455 ins_encode %{ 5456 Register Rdst = $dst$$Register; 5457 __ movzbl(Rdst, $mem$$Address); 5458 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5459 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5460 %} 5461 ins_pipe(ialu_reg_mem); 5462 %} 5463 5464 // Load Short (16bit signed) 5465 instruct loadS(rRegI dst, memory mem) %{ 5466 match(Set dst (LoadS mem)); 5467 5468 ins_cost(125); 5469 format %{ "MOVSX $dst,$mem\t# short" %} 5470 5471 ins_encode %{ 5472 __ movswl($dst$$Register, $mem$$Address); 5473 %} 5474 5475 ins_pipe(ialu_reg_mem); 5476 %} 5477 5478 // Load Short (16 bit signed) to Byte (8 bit signed) 5479 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5480 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5481 5482 ins_cost(125); 5483 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5484 ins_encode %{ 5485 __ movsbl($dst$$Register, $mem$$Address); 5486 %} 5487 ins_pipe(ialu_reg_mem); 5488 %} 5489 5490 // Load Short (16bit signed) into Long Register 5491 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5492 match(Set dst (ConvI2L (LoadS mem))); 5493 effect(KILL cr); 5494 5495 ins_cost(375); 5496 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5497 "MOV $dst.hi,$dst.lo\n\t" 5498 "SAR $dst.hi,15" %} 5499 5500 ins_encode %{ 5501 __ movswl($dst$$Register, $mem$$Address); 5502 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5503 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5504 %} 5505 5506 ins_pipe(ialu_reg_mem); 5507 %} 5508 5509 // Load Unsigned Short/Char (16bit unsigned) 5510 instruct loadUS(rRegI dst, memory mem) %{ 5511 match(Set dst (LoadUS mem)); 5512 5513 ins_cost(125); 5514 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5515 5516 ins_encode %{ 5517 __ movzwl($dst$$Register, $mem$$Address); 5518 %} 5519 5520 ins_pipe(ialu_reg_mem); 5521 %} 5522 5523 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5524 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5525 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5526 5527 ins_cost(125); 5528 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5529 ins_encode %{ 5530 __ movsbl($dst$$Register, $mem$$Address); 5531 %} 5532 ins_pipe(ialu_reg_mem); 5533 %} 5534 5535 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5536 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5537 match(Set dst (ConvI2L (LoadUS mem))); 5538 effect(KILL cr); 5539 5540 ins_cost(250); 5541 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5542 "XOR $dst.hi,$dst.hi" %} 5543 5544 ins_encode %{ 5545 __ movzwl($dst$$Register, $mem$$Address); 5546 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5547 %} 5548 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5553 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5555 effect(KILL cr); 5556 5557 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5558 "XOR $dst.hi,$dst.hi" %} 5559 ins_encode %{ 5560 Register Rdst = $dst$$Register; 5561 __ movzbl(Rdst, $mem$$Address); 5562 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5563 %} 5564 ins_pipe(ialu_reg_mem); 5565 %} 5566 5567 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5568 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5569 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5570 effect(KILL cr); 5571 5572 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5573 "XOR $dst.hi,$dst.hi\n\t" 5574 "AND $dst.lo,right_n_bits($mask, 16)" %} 5575 ins_encode %{ 5576 Register Rdst = $dst$$Register; 5577 __ movzwl(Rdst, $mem$$Address); 5578 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5579 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5580 %} 5581 ins_pipe(ialu_reg_mem); 5582 %} 5583 5584 // Load Integer 5585 instruct loadI(rRegI dst, memory mem) %{ 5586 match(Set dst (LoadI mem)); 5587 5588 ins_cost(125); 5589 format %{ "MOV $dst,$mem\t# int" %} 5590 5591 ins_encode %{ 5592 __ movl($dst$$Register, $mem$$Address); 5593 %} 5594 5595 ins_pipe(ialu_reg_mem); 5596 %} 5597 5598 // Load Integer (32 bit signed) to Byte (8 bit signed) 5599 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5600 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5601 5602 ins_cost(125); 5603 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5604 ins_encode %{ 5605 __ movsbl($dst$$Register, $mem$$Address); 5606 %} 5607 ins_pipe(ialu_reg_mem); 5608 %} 5609 5610 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5611 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5612 match(Set dst (AndI (LoadI mem) mask)); 5613 5614 ins_cost(125); 5615 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5616 ins_encode %{ 5617 __ movzbl($dst$$Register, $mem$$Address); 5618 %} 5619 ins_pipe(ialu_reg_mem); 5620 %} 5621 5622 // Load Integer (32 bit signed) to Short (16 bit signed) 5623 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5624 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5625 5626 ins_cost(125); 5627 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5628 ins_encode %{ 5629 __ movswl($dst$$Register, $mem$$Address); 5630 %} 5631 ins_pipe(ialu_reg_mem); 5632 %} 5633 5634 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5635 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5636 match(Set dst (AndI (LoadI mem) mask)); 5637 5638 ins_cost(125); 5639 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5640 ins_encode %{ 5641 __ movzwl($dst$$Register, $mem$$Address); 5642 %} 5643 ins_pipe(ialu_reg_mem); 5644 %} 5645 5646 // Load Integer into Long Register 5647 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5648 match(Set dst (ConvI2L (LoadI mem))); 5649 effect(KILL cr); 5650 5651 ins_cost(375); 5652 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5653 "MOV $dst.hi,$dst.lo\n\t" 5654 "SAR $dst.hi,31" %} 5655 5656 ins_encode %{ 5657 __ movl($dst$$Register, $mem$$Address); 5658 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5659 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5660 %} 5661 5662 ins_pipe(ialu_reg_mem); 5663 %} 5664 5665 // Load Integer with mask 0xFF into Long Register 5666 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5667 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5668 effect(KILL cr); 5669 5670 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5671 "XOR $dst.hi,$dst.hi" %} 5672 ins_encode %{ 5673 Register Rdst = $dst$$Register; 5674 __ movzbl(Rdst, $mem$$Address); 5675 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5676 %} 5677 ins_pipe(ialu_reg_mem); 5678 %} 5679 5680 // Load Integer with mask 0xFFFF into Long Register 5681 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5682 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5683 effect(KILL cr); 5684 5685 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5686 "XOR $dst.hi,$dst.hi" %} 5687 ins_encode %{ 5688 Register Rdst = $dst$$Register; 5689 __ movzwl(Rdst, $mem$$Address); 5690 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5691 %} 5692 ins_pipe(ialu_reg_mem); 5693 %} 5694 5695 // Load Integer with 31-bit mask into Long Register 5696 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5697 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5698 effect(KILL cr); 5699 5700 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5701 "XOR $dst.hi,$dst.hi\n\t" 5702 "AND $dst.lo,$mask" %} 5703 ins_encode %{ 5704 Register Rdst = $dst$$Register; 5705 __ movl(Rdst, $mem$$Address); 5706 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5707 __ andl(Rdst, $mask$$constant); 5708 %} 5709 ins_pipe(ialu_reg_mem); 5710 %} 5711 5712 // Load Unsigned Integer into Long Register 5713 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5714 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5715 effect(KILL cr); 5716 5717 ins_cost(250); 5718 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5719 "XOR $dst.hi,$dst.hi" %} 5720 5721 ins_encode %{ 5722 __ movl($dst$$Register, $mem$$Address); 5723 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5724 %} 5725 5726 ins_pipe(ialu_reg_mem); 5727 %} 5728 5729 // Load Long. Cannot clobber address while loading, so restrict address 5730 // register to ESI 5731 instruct loadL(eRegL dst, load_long_memory mem) %{ 5732 predicate(!((LoadLNode*)n)->require_atomic_access()); 5733 match(Set dst (LoadL mem)); 5734 5735 ins_cost(250); 5736 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5737 "MOV $dst.hi,$mem+4" %} 5738 5739 ins_encode %{ 5740 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5741 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5742 __ movl($dst$$Register, Amemlo); 5743 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5744 %} 5745 5746 ins_pipe(ialu_reg_long_mem); 5747 %} 5748 5749 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5750 // then store it down to the stack and reload on the int 5751 // side. 5752 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5753 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5754 match(Set dst (LoadL mem)); 5755 5756 ins_cost(200); 5757 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5758 "FISTp $dst" %} 5759 ins_encode(enc_loadL_volatile(mem,dst)); 5760 ins_pipe( fpu_reg_mem ); 5761 %} 5762 5763 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5764 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5765 match(Set dst (LoadL mem)); 5766 effect(TEMP tmp); 5767 ins_cost(180); 5768 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5769 "MOVSD $dst,$tmp" %} 5770 ins_encode %{ 5771 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5772 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5778 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5779 match(Set dst (LoadL mem)); 5780 effect(TEMP tmp); 5781 ins_cost(160); 5782 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5783 "MOVD $dst.lo,$tmp\n\t" 5784 "PSRLQ $tmp,32\n\t" 5785 "MOVD $dst.hi,$tmp" %} 5786 ins_encode %{ 5787 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5788 __ movdl($dst$$Register, $tmp$$XMMRegister); 5789 __ psrlq($tmp$$XMMRegister, 32); 5790 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 // Load Range 5796 instruct loadRange(rRegI dst, memory mem) %{ 5797 match(Set dst (LoadRange mem)); 5798 5799 ins_cost(125); 5800 format %{ "MOV $dst,$mem" %} 5801 opcode(0x8B); 5802 ins_encode( OpcP, RegMem(dst,mem)); 5803 ins_pipe( ialu_reg_mem ); 5804 %} 5805 5806 5807 // Load Pointer 5808 instruct loadP(eRegP dst, memory mem) %{ 5809 match(Set dst (LoadP mem)); 5810 5811 ins_cost(125); 5812 format %{ "MOV $dst,$mem" %} 5813 opcode(0x8B); 5814 ins_encode( OpcP, RegMem(dst,mem)); 5815 ins_pipe( ialu_reg_mem ); 5816 %} 5817 5818 // Load Klass Pointer 5819 instruct loadKlass(eRegP dst, memory mem) %{ 5820 match(Set dst (LoadKlass mem)); 5821 5822 ins_cost(125); 5823 format %{ "MOV $dst,$mem" %} 5824 opcode(0x8B); 5825 ins_encode( OpcP, RegMem(dst,mem)); 5826 ins_pipe( ialu_reg_mem ); 5827 %} 5828 5829 // Load Double 5830 instruct loadDPR(regDPR dst, memory mem) %{ 5831 predicate(UseSSE<=1); 5832 match(Set dst (LoadD mem)); 5833 5834 ins_cost(150); 5835 format %{ "FLD_D ST,$mem\n\t" 5836 "FSTP $dst" %} 5837 opcode(0xDD); /* DD /0 */ 5838 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5839 Pop_Reg_DPR(dst) ); 5840 ins_pipe( fpu_reg_mem ); 5841 %} 5842 5843 // Load Double to XMM 5844 instruct loadD(regD dst, memory mem) %{ 5845 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5846 match(Set dst (LoadD mem)); 5847 ins_cost(145); 5848 format %{ "MOVSD $dst,$mem" %} 5849 ins_encode %{ 5850 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5851 %} 5852 ins_pipe( pipe_slow ); 5853 %} 5854 5855 instruct loadD_partial(regD dst, memory mem) %{ 5856 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5857 match(Set dst (LoadD mem)); 5858 ins_cost(145); 5859 format %{ "MOVLPD $dst,$mem" %} 5860 ins_encode %{ 5861 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5862 %} 5863 ins_pipe( pipe_slow ); 5864 %} 5865 5866 // Load to XMM register (single-precision floating point) 5867 // MOVSS instruction 5868 instruct loadF(regF dst, memory mem) %{ 5869 predicate(UseSSE>=1); 5870 match(Set dst (LoadF mem)); 5871 ins_cost(145); 5872 format %{ "MOVSS $dst,$mem" %} 5873 ins_encode %{ 5874 __ movflt ($dst$$XMMRegister, $mem$$Address); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 // Load Float 5880 instruct loadFPR(regFPR dst, memory mem) %{ 5881 predicate(UseSSE==0); 5882 match(Set dst (LoadF mem)); 5883 5884 ins_cost(150); 5885 format %{ "FLD_S ST,$mem\n\t" 5886 "FSTP $dst" %} 5887 opcode(0xD9); /* D9 /0 */ 5888 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5889 Pop_Reg_FPR(dst) ); 5890 ins_pipe( fpu_reg_mem ); 5891 %} 5892 5893 // Load Effective Address 5894 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5895 match(Set dst mem); 5896 5897 ins_cost(110); 5898 format %{ "LEA $dst,$mem" %} 5899 opcode(0x8D); 5900 ins_encode( OpcP, RegMem(dst,mem)); 5901 ins_pipe( ialu_reg_reg_fat ); 5902 %} 5903 5904 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5905 match(Set dst mem); 5906 5907 ins_cost(110); 5908 format %{ "LEA $dst,$mem" %} 5909 opcode(0x8D); 5910 ins_encode( OpcP, RegMem(dst,mem)); 5911 ins_pipe( ialu_reg_reg_fat ); 5912 %} 5913 5914 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5915 match(Set dst mem); 5916 5917 ins_cost(110); 5918 format %{ "LEA $dst,$mem" %} 5919 opcode(0x8D); 5920 ins_encode( OpcP, RegMem(dst,mem)); 5921 ins_pipe( ialu_reg_reg_fat ); 5922 %} 5923 5924 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5925 match(Set dst mem); 5926 5927 ins_cost(110); 5928 format %{ "LEA $dst,$mem" %} 5929 opcode(0x8D); 5930 ins_encode( OpcP, RegMem(dst,mem)); 5931 ins_pipe( ialu_reg_reg_fat ); 5932 %} 5933 5934 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5935 match(Set dst mem); 5936 5937 ins_cost(110); 5938 format %{ "LEA $dst,$mem" %} 5939 opcode(0x8D); 5940 ins_encode( OpcP, RegMem(dst,mem)); 5941 ins_pipe( ialu_reg_reg_fat ); 5942 %} 5943 5944 // Load Constant 5945 instruct loadConI(rRegI dst, immI src) %{ 5946 match(Set dst src); 5947 5948 format %{ "MOV $dst,$src" %} 5949 ins_encode( LdImmI(dst, src) ); 5950 ins_pipe( ialu_reg_fat ); 5951 %} 5952 5953 // Load Constant zero 5954 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5955 match(Set dst src); 5956 effect(KILL cr); 5957 5958 ins_cost(50); 5959 format %{ "XOR $dst,$dst" %} 5960 opcode(0x33); /* + rd */ 5961 ins_encode( OpcP, RegReg( dst, dst ) ); 5962 ins_pipe( ialu_reg ); 5963 %} 5964 5965 instruct loadConP(eRegP dst, immP src) %{ 5966 match(Set dst src); 5967 5968 format %{ "MOV $dst,$src" %} 5969 opcode(0xB8); /* + rd */ 5970 ins_encode( LdImmP(dst, src) ); 5971 ins_pipe( ialu_reg_fat ); 5972 %} 5973 5974 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5975 match(Set dst src); 5976 effect(KILL cr); 5977 ins_cost(200); 5978 format %{ "MOV $dst.lo,$src.lo\n\t" 5979 "MOV $dst.hi,$src.hi" %} 5980 opcode(0xB8); 5981 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5982 ins_pipe( ialu_reg_long_fat ); 5983 %} 5984 5985 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5986 match(Set dst src); 5987 effect(KILL cr); 5988 ins_cost(150); 5989 format %{ "XOR $dst.lo,$dst.lo\n\t" 5990 "XOR $dst.hi,$dst.hi" %} 5991 opcode(0x33,0x33); 5992 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5993 ins_pipe( ialu_reg_long ); 5994 %} 5995 5996 // The instruction usage is guarded by predicate in operand immFPR(). 5997 instruct loadConFPR(regFPR dst, immFPR con) %{ 5998 match(Set dst con); 5999 ins_cost(125); 6000 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6001 "FSTP $dst" %} 6002 ins_encode %{ 6003 __ fld_s($constantaddress($con)); 6004 __ fstp_d($dst$$reg); 6005 %} 6006 ins_pipe(fpu_reg_con); 6007 %} 6008 6009 // The instruction usage is guarded by predicate in operand immFPR0(). 6010 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6011 match(Set dst con); 6012 ins_cost(125); 6013 format %{ "FLDZ ST\n\t" 6014 "FSTP $dst" %} 6015 ins_encode %{ 6016 __ fldz(); 6017 __ fstp_d($dst$$reg); 6018 %} 6019 ins_pipe(fpu_reg_con); 6020 %} 6021 6022 // The instruction usage is guarded by predicate in operand immFPR1(). 6023 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6024 match(Set dst con); 6025 ins_cost(125); 6026 format %{ "FLD1 ST\n\t" 6027 "FSTP $dst" %} 6028 ins_encode %{ 6029 __ fld1(); 6030 __ fstp_d($dst$$reg); 6031 %} 6032 ins_pipe(fpu_reg_con); 6033 %} 6034 6035 // The instruction usage is guarded by predicate in operand immF(). 6036 instruct loadConF(regF dst, immF con) %{ 6037 match(Set dst con); 6038 ins_cost(125); 6039 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6040 ins_encode %{ 6041 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6042 %} 6043 ins_pipe(pipe_slow); 6044 %} 6045 6046 // The instruction usage is guarded by predicate in operand immF0(). 6047 instruct loadConF0(regF dst, immF0 src) %{ 6048 match(Set dst src); 6049 ins_cost(100); 6050 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6051 ins_encode %{ 6052 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6053 %} 6054 ins_pipe(pipe_slow); 6055 %} 6056 6057 // The instruction usage is guarded by predicate in operand immDPR(). 6058 instruct loadConDPR(regDPR dst, immDPR con) %{ 6059 match(Set dst con); 6060 ins_cost(125); 6061 6062 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6063 "FSTP $dst" %} 6064 ins_encode %{ 6065 __ fld_d($constantaddress($con)); 6066 __ fstp_d($dst$$reg); 6067 %} 6068 ins_pipe(fpu_reg_con); 6069 %} 6070 6071 // The instruction usage is guarded by predicate in operand immDPR0(). 6072 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6073 match(Set dst con); 6074 ins_cost(125); 6075 6076 format %{ "FLDZ ST\n\t" 6077 "FSTP $dst" %} 6078 ins_encode %{ 6079 __ fldz(); 6080 __ fstp_d($dst$$reg); 6081 %} 6082 ins_pipe(fpu_reg_con); 6083 %} 6084 6085 // The instruction usage is guarded by predicate in operand immDPR1(). 6086 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6087 match(Set dst con); 6088 ins_cost(125); 6089 6090 format %{ "FLD1 ST\n\t" 6091 "FSTP $dst" %} 6092 ins_encode %{ 6093 __ fld1(); 6094 __ fstp_d($dst$$reg); 6095 %} 6096 ins_pipe(fpu_reg_con); 6097 %} 6098 6099 // The instruction usage is guarded by predicate in operand immD(). 6100 instruct loadConD(regD dst, immD con) %{ 6101 match(Set dst con); 6102 ins_cost(125); 6103 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6104 ins_encode %{ 6105 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6106 %} 6107 ins_pipe(pipe_slow); 6108 %} 6109 6110 // The instruction usage is guarded by predicate in operand immD0(). 6111 instruct loadConD0(regD dst, immD0 src) %{ 6112 match(Set dst src); 6113 ins_cost(100); 6114 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6115 ins_encode %{ 6116 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6117 %} 6118 ins_pipe( pipe_slow ); 6119 %} 6120 6121 // Load Stack Slot 6122 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6123 match(Set dst src); 6124 ins_cost(125); 6125 6126 format %{ "MOV $dst,$src" %} 6127 opcode(0x8B); 6128 ins_encode( OpcP, RegMem(dst,src)); 6129 ins_pipe( ialu_reg_mem ); 6130 %} 6131 6132 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6133 match(Set dst src); 6134 6135 ins_cost(200); 6136 format %{ "MOV $dst,$src.lo\n\t" 6137 "MOV $dst+4,$src.hi" %} 6138 opcode(0x8B, 0x8B); 6139 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6140 ins_pipe( ialu_mem_long_reg ); 6141 %} 6142 6143 // Load Stack Slot 6144 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6145 match(Set dst src); 6146 ins_cost(125); 6147 6148 format %{ "MOV $dst,$src" %} 6149 opcode(0x8B); 6150 ins_encode( OpcP, RegMem(dst,src)); 6151 ins_pipe( ialu_reg_mem ); 6152 %} 6153 6154 // Load Stack Slot 6155 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6156 match(Set dst src); 6157 ins_cost(125); 6158 6159 format %{ "FLD_S $src\n\t" 6160 "FSTP $dst" %} 6161 opcode(0xD9); /* D9 /0, FLD m32real */ 6162 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6163 Pop_Reg_FPR(dst) ); 6164 ins_pipe( fpu_reg_mem ); 6165 %} 6166 6167 // Load Stack Slot 6168 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6169 match(Set dst src); 6170 ins_cost(125); 6171 6172 format %{ "FLD_D $src\n\t" 6173 "FSTP $dst" %} 6174 opcode(0xDD); /* DD /0, FLD m64real */ 6175 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6176 Pop_Reg_DPR(dst) ); 6177 ins_pipe( fpu_reg_mem ); 6178 %} 6179 6180 // Prefetch instructions for allocation. 6181 // Must be safe to execute with invalid address (cannot fault). 6182 6183 instruct prefetchAlloc0( memory mem ) %{ 6184 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6185 match(PrefetchAllocation mem); 6186 ins_cost(0); 6187 size(0); 6188 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6189 ins_encode(); 6190 ins_pipe(empty); 6191 %} 6192 6193 instruct prefetchAlloc( memory mem ) %{ 6194 predicate(AllocatePrefetchInstr==3); 6195 match( PrefetchAllocation mem ); 6196 ins_cost(100); 6197 6198 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6199 ins_encode %{ 6200 __ prefetchw($mem$$Address); 6201 %} 6202 ins_pipe(ialu_mem); 6203 %} 6204 6205 instruct prefetchAllocNTA( memory mem ) %{ 6206 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6207 match(PrefetchAllocation mem); 6208 ins_cost(100); 6209 6210 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6211 ins_encode %{ 6212 __ prefetchnta($mem$$Address); 6213 %} 6214 ins_pipe(ialu_mem); 6215 %} 6216 6217 instruct prefetchAllocT0( memory mem ) %{ 6218 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6219 match(PrefetchAllocation mem); 6220 ins_cost(100); 6221 6222 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6223 ins_encode %{ 6224 __ prefetcht0($mem$$Address); 6225 %} 6226 ins_pipe(ialu_mem); 6227 %} 6228 6229 instruct prefetchAllocT2( memory mem ) %{ 6230 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6231 match(PrefetchAllocation mem); 6232 ins_cost(100); 6233 6234 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6235 ins_encode %{ 6236 __ prefetcht2($mem$$Address); 6237 %} 6238 ins_pipe(ialu_mem); 6239 %} 6240 6241 //----------Store Instructions------------------------------------------------- 6242 6243 // Store Byte 6244 instruct storeB(memory mem, xRegI src) %{ 6245 match(Set mem (StoreB mem src)); 6246 6247 ins_cost(125); 6248 format %{ "MOV8 $mem,$src" %} 6249 opcode(0x88); 6250 ins_encode( OpcP, RegMem( src, mem ) ); 6251 ins_pipe( ialu_mem_reg ); 6252 %} 6253 6254 // Store Char/Short 6255 instruct storeC(memory mem, rRegI src) %{ 6256 match(Set mem (StoreC mem src)); 6257 6258 ins_cost(125); 6259 format %{ "MOV16 $mem,$src" %} 6260 opcode(0x89, 0x66); 6261 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6262 ins_pipe( ialu_mem_reg ); 6263 %} 6264 6265 // Store Integer 6266 instruct storeI(memory mem, rRegI src) %{ 6267 match(Set mem (StoreI mem src)); 6268 6269 ins_cost(125); 6270 format %{ "MOV $mem,$src" %} 6271 opcode(0x89); 6272 ins_encode( OpcP, RegMem( src, mem ) ); 6273 ins_pipe( ialu_mem_reg ); 6274 %} 6275 6276 // Store Long 6277 instruct storeL(long_memory mem, eRegL src) %{ 6278 predicate(!((StoreLNode*)n)->require_atomic_access()); 6279 match(Set mem (StoreL mem src)); 6280 6281 ins_cost(200); 6282 format %{ "MOV $mem,$src.lo\n\t" 6283 "MOV $mem+4,$src.hi" %} 6284 opcode(0x89, 0x89); 6285 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6286 ins_pipe( ialu_mem_long_reg ); 6287 %} 6288 6289 // Store Long to Integer 6290 instruct storeL2I(memory mem, eRegL src) %{ 6291 match(Set mem (StoreI mem (ConvL2I src))); 6292 6293 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6294 ins_encode %{ 6295 __ movl($mem$$Address, $src$$Register); 6296 %} 6297 ins_pipe(ialu_mem_reg); 6298 %} 6299 6300 // Volatile Store Long. Must be atomic, so move it into 6301 // the FP TOS and then do a 64-bit FIST. Has to probe the 6302 // target address before the store (for null-ptr checks) 6303 // so the memory operand is used twice in the encoding. 6304 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6305 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6306 match(Set mem (StoreL mem src)); 6307 effect( KILL cr ); 6308 ins_cost(400); 6309 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6310 "FILD $src\n\t" 6311 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6312 opcode(0x3B); 6313 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6314 ins_pipe( fpu_reg_mem ); 6315 %} 6316 6317 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6318 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6319 match(Set mem (StoreL mem src)); 6320 effect( TEMP tmp, KILL cr ); 6321 ins_cost(380); 6322 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6323 "MOVSD $tmp,$src\n\t" 6324 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6325 ins_encode %{ 6326 __ cmpl(rax, $mem$$Address); 6327 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6328 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6334 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6335 match(Set mem (StoreL mem src)); 6336 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6337 ins_cost(360); 6338 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6339 "MOVD $tmp,$src.lo\n\t" 6340 "MOVD $tmp2,$src.hi\n\t" 6341 "PUNPCKLDQ $tmp,$tmp2\n\t" 6342 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6343 ins_encode %{ 6344 __ cmpl(rax, $mem$$Address); 6345 __ movdl($tmp$$XMMRegister, $src$$Register); 6346 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6347 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6348 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6349 %} 6350 ins_pipe( pipe_slow ); 6351 %} 6352 6353 // Store Pointer; for storing unknown oops and raw pointers 6354 instruct storeP(memory mem, anyRegP src) %{ 6355 match(Set mem (StoreP mem src)); 6356 6357 ins_cost(125); 6358 format %{ "MOV $mem,$src" %} 6359 opcode(0x89); 6360 ins_encode( OpcP, RegMem( src, mem ) ); 6361 ins_pipe( ialu_mem_reg ); 6362 %} 6363 6364 // Store Integer Immediate 6365 instruct storeImmI(memory mem, immI src) %{ 6366 match(Set mem (StoreI mem src)); 6367 6368 ins_cost(150); 6369 format %{ "MOV $mem,$src" %} 6370 opcode(0xC7); /* C7 /0 */ 6371 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6372 ins_pipe( ialu_mem_imm ); 6373 %} 6374 6375 // Store Short/Char Immediate 6376 instruct storeImmI16(memory mem, immI16 src) %{ 6377 predicate(UseStoreImmI16); 6378 match(Set mem (StoreC mem src)); 6379 6380 ins_cost(150); 6381 format %{ "MOV16 $mem,$src" %} 6382 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6383 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6384 ins_pipe( ialu_mem_imm ); 6385 %} 6386 6387 // Store Pointer Immediate; null pointers or constant oops that do not 6388 // need card-mark barriers. 6389 instruct storeImmP(memory mem, immP src) %{ 6390 match(Set mem (StoreP mem src)); 6391 6392 ins_cost(150); 6393 format %{ "MOV $mem,$src" %} 6394 opcode(0xC7); /* C7 /0 */ 6395 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6396 ins_pipe( ialu_mem_imm ); 6397 %} 6398 6399 // Store Byte Immediate 6400 instruct storeImmB(memory mem, immI8 src) %{ 6401 match(Set mem (StoreB mem src)); 6402 6403 ins_cost(150); 6404 format %{ "MOV8 $mem,$src" %} 6405 opcode(0xC6); /* C6 /0 */ 6406 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6407 ins_pipe( ialu_mem_imm ); 6408 %} 6409 6410 // Store CMS card-mark Immediate 6411 instruct storeImmCM(memory mem, immI8 src) %{ 6412 match(Set mem (StoreCM mem src)); 6413 6414 ins_cost(150); 6415 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6416 opcode(0xC6); /* C6 /0 */ 6417 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6418 ins_pipe( ialu_mem_imm ); 6419 %} 6420 6421 // Store Double 6422 instruct storeDPR( memory mem, regDPR1 src) %{ 6423 predicate(UseSSE<=1); 6424 match(Set mem (StoreD mem src)); 6425 6426 ins_cost(100); 6427 format %{ "FST_D $mem,$src" %} 6428 opcode(0xDD); /* DD /2 */ 6429 ins_encode( enc_FPR_store(mem,src) ); 6430 ins_pipe( fpu_mem_reg ); 6431 %} 6432 6433 // Store double does rounding on x86 6434 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6435 predicate(UseSSE<=1); 6436 match(Set mem (StoreD mem (RoundDouble src))); 6437 6438 ins_cost(100); 6439 format %{ "FST_D $mem,$src\t# round" %} 6440 opcode(0xDD); /* DD /2 */ 6441 ins_encode( enc_FPR_store(mem,src) ); 6442 ins_pipe( fpu_mem_reg ); 6443 %} 6444 6445 // Store XMM register to memory (double-precision floating points) 6446 // MOVSD instruction 6447 instruct storeD(memory mem, regD src) %{ 6448 predicate(UseSSE>=2); 6449 match(Set mem (StoreD mem src)); 6450 ins_cost(95); 6451 format %{ "MOVSD $mem,$src" %} 6452 ins_encode %{ 6453 __ movdbl($mem$$Address, $src$$XMMRegister); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 // Store XMM register to memory (single-precision floating point) 6459 // MOVSS instruction 6460 instruct storeF(memory mem, regF src) %{ 6461 predicate(UseSSE>=1); 6462 match(Set mem (StoreF mem src)); 6463 ins_cost(95); 6464 format %{ "MOVSS $mem,$src" %} 6465 ins_encode %{ 6466 __ movflt($mem$$Address, $src$$XMMRegister); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 // Store Float 6472 instruct storeFPR( memory mem, regFPR1 src) %{ 6473 predicate(UseSSE==0); 6474 match(Set mem (StoreF mem src)); 6475 6476 ins_cost(100); 6477 format %{ "FST_S $mem,$src" %} 6478 opcode(0xD9); /* D9 /2 */ 6479 ins_encode( enc_FPR_store(mem,src) ); 6480 ins_pipe( fpu_mem_reg ); 6481 %} 6482 6483 // Store Float does rounding on x86 6484 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6485 predicate(UseSSE==0); 6486 match(Set mem (StoreF mem (RoundFloat src))); 6487 6488 ins_cost(100); 6489 format %{ "FST_S $mem,$src\t# round" %} 6490 opcode(0xD9); /* D9 /2 */ 6491 ins_encode( enc_FPR_store(mem,src) ); 6492 ins_pipe( fpu_mem_reg ); 6493 %} 6494 6495 // Store Float does rounding on x86 6496 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6497 predicate(UseSSE<=1); 6498 match(Set mem (StoreF mem (ConvD2F src))); 6499 6500 ins_cost(100); 6501 format %{ "FST_S $mem,$src\t# D-round" %} 6502 opcode(0xD9); /* D9 /2 */ 6503 ins_encode( enc_FPR_store(mem,src) ); 6504 ins_pipe( fpu_mem_reg ); 6505 %} 6506 6507 // Store immediate Float value (it is faster than store from FPU register) 6508 // The instruction usage is guarded by predicate in operand immFPR(). 6509 instruct storeFPR_imm( memory mem, immFPR src) %{ 6510 match(Set mem (StoreF mem src)); 6511 6512 ins_cost(50); 6513 format %{ "MOV $mem,$src\t# store float" %} 6514 opcode(0xC7); /* C7 /0 */ 6515 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6516 ins_pipe( ialu_mem_imm ); 6517 %} 6518 6519 // Store immediate Float value (it is faster than store from XMM register) 6520 // The instruction usage is guarded by predicate in operand immF(). 6521 instruct storeF_imm( memory mem, immF src) %{ 6522 match(Set mem (StoreF mem src)); 6523 6524 ins_cost(50); 6525 format %{ "MOV $mem,$src\t# store float" %} 6526 opcode(0xC7); /* C7 /0 */ 6527 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6528 ins_pipe( ialu_mem_imm ); 6529 %} 6530 6531 // Store Integer to stack slot 6532 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6533 match(Set dst src); 6534 6535 ins_cost(100); 6536 format %{ "MOV $dst,$src" %} 6537 opcode(0x89); 6538 ins_encode( OpcPRegSS( dst, src ) ); 6539 ins_pipe( ialu_mem_reg ); 6540 %} 6541 6542 // Store Integer to stack slot 6543 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6544 match(Set dst src); 6545 6546 ins_cost(100); 6547 format %{ "MOV $dst,$src" %} 6548 opcode(0x89); 6549 ins_encode( OpcPRegSS( dst, src ) ); 6550 ins_pipe( ialu_mem_reg ); 6551 %} 6552 6553 // Store Long to stack slot 6554 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6555 match(Set dst src); 6556 6557 ins_cost(200); 6558 format %{ "MOV $dst,$src.lo\n\t" 6559 "MOV $dst+4,$src.hi" %} 6560 opcode(0x89, 0x89); 6561 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6562 ins_pipe( ialu_mem_long_reg ); 6563 %} 6564 6565 //----------MemBar Instructions----------------------------------------------- 6566 // Memory barrier flavors 6567 6568 instruct membar_acquire() %{ 6569 match(MemBarAcquire); 6570 match(LoadFence); 6571 ins_cost(400); 6572 6573 size(0); 6574 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6575 ins_encode(); 6576 ins_pipe(empty); 6577 %} 6578 6579 instruct membar_acquire_lock() %{ 6580 match(MemBarAcquireLock); 6581 ins_cost(0); 6582 6583 size(0); 6584 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6585 ins_encode( ); 6586 ins_pipe(empty); 6587 %} 6588 6589 instruct membar_release() %{ 6590 match(MemBarRelease); 6591 match(StoreFence); 6592 ins_cost(400); 6593 6594 size(0); 6595 format %{ "MEMBAR-release ! (empty encoding)" %} 6596 ins_encode( ); 6597 ins_pipe(empty); 6598 %} 6599 6600 instruct membar_release_lock() %{ 6601 match(MemBarReleaseLock); 6602 ins_cost(0); 6603 6604 size(0); 6605 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6606 ins_encode( ); 6607 ins_pipe(empty); 6608 %} 6609 6610 instruct membar_volatile(eFlagsReg cr) %{ 6611 match(MemBarVolatile); 6612 effect(KILL cr); 6613 ins_cost(400); 6614 6615 format %{ 6616 $$template 6617 if (os::is_MP()) { 6618 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6619 } else { 6620 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6621 } 6622 %} 6623 ins_encode %{ 6624 __ membar(Assembler::StoreLoad); 6625 %} 6626 ins_pipe(pipe_slow); 6627 %} 6628 6629 instruct unnecessary_membar_volatile() %{ 6630 match(MemBarVolatile); 6631 predicate(Matcher::post_store_load_barrier(n)); 6632 ins_cost(0); 6633 6634 size(0); 6635 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6636 ins_encode( ); 6637 ins_pipe(empty); 6638 %} 6639 6640 instruct membar_storestore() %{ 6641 match(MemBarStoreStore); 6642 ins_cost(0); 6643 6644 size(0); 6645 format %{ "MEMBAR-storestore (empty encoding)" %} 6646 ins_encode( ); 6647 ins_pipe(empty); 6648 %} 6649 6650 //----------Move Instructions-------------------------------------------------- 6651 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6652 match(Set dst (CastX2P src)); 6653 format %{ "# X2P $dst, $src" %} 6654 ins_encode( /*empty encoding*/ ); 6655 ins_cost(0); 6656 ins_pipe(empty); 6657 %} 6658 6659 instruct castP2X(rRegI dst, eRegP src ) %{ 6660 match(Set dst (CastP2X src)); 6661 ins_cost(50); 6662 format %{ "MOV $dst, $src\t# CastP2X" %} 6663 ins_encode( enc_Copy( dst, src) ); 6664 ins_pipe( ialu_reg_reg ); 6665 %} 6666 6667 //----------Conditional Move--------------------------------------------------- 6668 // Conditional move 6669 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6670 predicate(!VM_Version::supports_cmov() ); 6671 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6672 ins_cost(200); 6673 format %{ "J$cop,us skip\t# signed cmove\n\t" 6674 "MOV $dst,$src\n" 6675 "skip:" %} 6676 ins_encode %{ 6677 Label Lskip; 6678 // Invert sense of branch from sense of CMOV 6679 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6680 __ movl($dst$$Register, $src$$Register); 6681 __ bind(Lskip); 6682 %} 6683 ins_pipe( pipe_cmov_reg ); 6684 %} 6685 6686 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6687 predicate(!VM_Version::supports_cmov() ); 6688 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6689 ins_cost(200); 6690 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6691 "MOV $dst,$src\n" 6692 "skip:" %} 6693 ins_encode %{ 6694 Label Lskip; 6695 // Invert sense of branch from sense of CMOV 6696 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6697 __ movl($dst$$Register, $src$$Register); 6698 __ bind(Lskip); 6699 %} 6700 ins_pipe( pipe_cmov_reg ); 6701 %} 6702 6703 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6704 predicate(VM_Version::supports_cmov() ); 6705 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6706 ins_cost(200); 6707 format %{ "CMOV$cop $dst,$src" %} 6708 opcode(0x0F,0x40); 6709 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6710 ins_pipe( pipe_cmov_reg ); 6711 %} 6712 6713 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6714 predicate(VM_Version::supports_cmov() ); 6715 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6716 ins_cost(200); 6717 format %{ "CMOV$cop $dst,$src" %} 6718 opcode(0x0F,0x40); 6719 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6720 ins_pipe( pipe_cmov_reg ); 6721 %} 6722 6723 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6724 predicate(VM_Version::supports_cmov() ); 6725 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6726 ins_cost(200); 6727 expand %{ 6728 cmovI_regU(cop, cr, dst, src); 6729 %} 6730 %} 6731 6732 // Conditional move 6733 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6734 predicate(VM_Version::supports_cmov() ); 6735 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6736 ins_cost(250); 6737 format %{ "CMOV$cop $dst,$src" %} 6738 opcode(0x0F,0x40); 6739 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6740 ins_pipe( pipe_cmov_mem ); 6741 %} 6742 6743 // Conditional move 6744 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6745 predicate(VM_Version::supports_cmov() ); 6746 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6747 ins_cost(250); 6748 format %{ "CMOV$cop $dst,$src" %} 6749 opcode(0x0F,0x40); 6750 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6751 ins_pipe( pipe_cmov_mem ); 6752 %} 6753 6754 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6755 predicate(VM_Version::supports_cmov() ); 6756 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6757 ins_cost(250); 6758 expand %{ 6759 cmovI_memU(cop, cr, dst, src); 6760 %} 6761 %} 6762 6763 // Conditional move 6764 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6765 predicate(VM_Version::supports_cmov() ); 6766 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6767 ins_cost(200); 6768 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6769 opcode(0x0F,0x40); 6770 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6771 ins_pipe( pipe_cmov_reg ); 6772 %} 6773 6774 // Conditional move (non-P6 version) 6775 // Note: a CMoveP is generated for stubs and native wrappers 6776 // regardless of whether we are on a P6, so we 6777 // emulate a cmov here 6778 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6779 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6780 ins_cost(300); 6781 format %{ "Jn$cop skip\n\t" 6782 "MOV $dst,$src\t# pointer\n" 6783 "skip:" %} 6784 opcode(0x8b); 6785 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6786 ins_pipe( pipe_cmov_reg ); 6787 %} 6788 6789 // Conditional move 6790 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6791 predicate(VM_Version::supports_cmov() ); 6792 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6793 ins_cost(200); 6794 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6795 opcode(0x0F,0x40); 6796 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6797 ins_pipe( pipe_cmov_reg ); 6798 %} 6799 6800 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6801 predicate(VM_Version::supports_cmov() ); 6802 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6803 ins_cost(200); 6804 expand %{ 6805 cmovP_regU(cop, cr, dst, src); 6806 %} 6807 %} 6808 6809 // DISABLED: Requires the ADLC to emit a bottom_type call that 6810 // correctly meets the two pointer arguments; one is an incoming 6811 // register but the other is a memory operand. ALSO appears to 6812 // be buggy with implicit null checks. 6813 // 6814 //// Conditional move 6815 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6816 // predicate(VM_Version::supports_cmov() ); 6817 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6818 // ins_cost(250); 6819 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6820 // opcode(0x0F,0x40); 6821 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6822 // ins_pipe( pipe_cmov_mem ); 6823 //%} 6824 // 6825 //// Conditional move 6826 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6827 // predicate(VM_Version::supports_cmov() ); 6828 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6829 // ins_cost(250); 6830 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6831 // opcode(0x0F,0x40); 6832 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6833 // ins_pipe( pipe_cmov_mem ); 6834 //%} 6835 6836 // Conditional move 6837 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6838 predicate(UseSSE<=1); 6839 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6840 ins_cost(200); 6841 format %{ "FCMOV$cop $dst,$src\t# double" %} 6842 opcode(0xDA); 6843 ins_encode( enc_cmov_dpr(cop,src) ); 6844 ins_pipe( pipe_cmovDPR_reg ); 6845 %} 6846 6847 // Conditional move 6848 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6849 predicate(UseSSE==0); 6850 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6851 ins_cost(200); 6852 format %{ "FCMOV$cop $dst,$src\t# float" %} 6853 opcode(0xDA); 6854 ins_encode( enc_cmov_dpr(cop,src) ); 6855 ins_pipe( pipe_cmovDPR_reg ); 6856 %} 6857 6858 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6859 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6860 predicate(UseSSE<=1); 6861 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6862 ins_cost(200); 6863 format %{ "Jn$cop skip\n\t" 6864 "MOV $dst,$src\t# double\n" 6865 "skip:" %} 6866 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6867 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6868 ins_pipe( pipe_cmovDPR_reg ); 6869 %} 6870 6871 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6872 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6873 predicate(UseSSE==0); 6874 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6875 ins_cost(200); 6876 format %{ "Jn$cop skip\n\t" 6877 "MOV $dst,$src\t# float\n" 6878 "skip:" %} 6879 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6880 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6881 ins_pipe( pipe_cmovDPR_reg ); 6882 %} 6883 6884 // No CMOVE with SSE/SSE2 6885 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6886 predicate (UseSSE>=1); 6887 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6888 ins_cost(200); 6889 format %{ "Jn$cop skip\n\t" 6890 "MOVSS $dst,$src\t# float\n" 6891 "skip:" %} 6892 ins_encode %{ 6893 Label skip; 6894 // Invert sense of branch from sense of CMOV 6895 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6896 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6897 __ bind(skip); 6898 %} 6899 ins_pipe( pipe_slow ); 6900 %} 6901 6902 // No CMOVE with SSE/SSE2 6903 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6904 predicate (UseSSE>=2); 6905 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6906 ins_cost(200); 6907 format %{ "Jn$cop skip\n\t" 6908 "MOVSD $dst,$src\t# float\n" 6909 "skip:" %} 6910 ins_encode %{ 6911 Label skip; 6912 // Invert sense of branch from sense of CMOV 6913 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6914 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6915 __ bind(skip); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 // unsigned version 6921 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6922 predicate (UseSSE>=1); 6923 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6924 ins_cost(200); 6925 format %{ "Jn$cop skip\n\t" 6926 "MOVSS $dst,$src\t# float\n" 6927 "skip:" %} 6928 ins_encode %{ 6929 Label skip; 6930 // Invert sense of branch from sense of CMOV 6931 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6932 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6933 __ bind(skip); 6934 %} 6935 ins_pipe( pipe_slow ); 6936 %} 6937 6938 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6939 predicate (UseSSE>=1); 6940 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6941 ins_cost(200); 6942 expand %{ 6943 fcmovF_regU(cop, cr, dst, src); 6944 %} 6945 %} 6946 6947 // unsigned version 6948 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6949 predicate (UseSSE>=2); 6950 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6951 ins_cost(200); 6952 format %{ "Jn$cop skip\n\t" 6953 "MOVSD $dst,$src\t# float\n" 6954 "skip:" %} 6955 ins_encode %{ 6956 Label skip; 6957 // Invert sense of branch from sense of CMOV 6958 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6959 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6960 __ bind(skip); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6966 predicate (UseSSE>=2); 6967 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6968 ins_cost(200); 6969 expand %{ 6970 fcmovD_regU(cop, cr, dst, src); 6971 %} 6972 %} 6973 6974 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6975 predicate(VM_Version::supports_cmov() ); 6976 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6977 ins_cost(200); 6978 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6979 "CMOV$cop $dst.hi,$src.hi" %} 6980 opcode(0x0F,0x40); 6981 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6982 ins_pipe( pipe_cmov_reg_long ); 6983 %} 6984 6985 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6986 predicate(VM_Version::supports_cmov() ); 6987 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6988 ins_cost(200); 6989 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6990 "CMOV$cop $dst.hi,$src.hi" %} 6991 opcode(0x0F,0x40); 6992 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6993 ins_pipe( pipe_cmov_reg_long ); 6994 %} 6995 6996 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6997 predicate(VM_Version::supports_cmov() ); 6998 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6999 ins_cost(200); 7000 expand %{ 7001 cmovL_regU(cop, cr, dst, src); 7002 %} 7003 %} 7004 7005 //----------Arithmetic Instructions-------------------------------------------- 7006 //----------Addition Instructions---------------------------------------------- 7007 7008 // Integer Addition Instructions 7009 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7010 match(Set dst (AddI dst src)); 7011 effect(KILL cr); 7012 7013 size(2); 7014 format %{ "ADD $dst,$src" %} 7015 opcode(0x03); 7016 ins_encode( OpcP, RegReg( dst, src) ); 7017 ins_pipe( ialu_reg_reg ); 7018 %} 7019 7020 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7021 match(Set dst (AddI dst src)); 7022 effect(KILL cr); 7023 7024 format %{ "ADD $dst,$src" %} 7025 opcode(0x81, 0x00); /* /0 id */ 7026 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7027 ins_pipe( ialu_reg ); 7028 %} 7029 7030 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7031 predicate(UseIncDec); 7032 match(Set dst (AddI dst src)); 7033 effect(KILL cr); 7034 7035 size(1); 7036 format %{ "INC $dst" %} 7037 opcode(0x40); /* */ 7038 ins_encode( Opc_plus( primary, dst ) ); 7039 ins_pipe( ialu_reg ); 7040 %} 7041 7042 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7043 match(Set dst (AddI src0 src1)); 7044 ins_cost(110); 7045 7046 format %{ "LEA $dst,[$src0 + $src1]" %} 7047 opcode(0x8D); /* 0x8D /r */ 7048 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7049 ins_pipe( ialu_reg_reg ); 7050 %} 7051 7052 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7053 match(Set dst (AddP src0 src1)); 7054 ins_cost(110); 7055 7056 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7057 opcode(0x8D); /* 0x8D /r */ 7058 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7059 ins_pipe( ialu_reg_reg ); 7060 %} 7061 7062 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7063 predicate(UseIncDec); 7064 match(Set dst (AddI dst src)); 7065 effect(KILL cr); 7066 7067 size(1); 7068 format %{ "DEC $dst" %} 7069 opcode(0x48); /* */ 7070 ins_encode( Opc_plus( primary, dst ) ); 7071 ins_pipe( ialu_reg ); 7072 %} 7073 7074 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7075 match(Set dst (AddP dst src)); 7076 effect(KILL cr); 7077 7078 size(2); 7079 format %{ "ADD $dst,$src" %} 7080 opcode(0x03); 7081 ins_encode( OpcP, RegReg( dst, src) ); 7082 ins_pipe( ialu_reg_reg ); 7083 %} 7084 7085 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7086 match(Set dst (AddP dst src)); 7087 effect(KILL cr); 7088 7089 format %{ "ADD $dst,$src" %} 7090 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7091 // ins_encode( RegImm( dst, src) ); 7092 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7093 ins_pipe( ialu_reg ); 7094 %} 7095 7096 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7097 match(Set dst (AddI dst (LoadI src))); 7098 effect(KILL cr); 7099 7100 ins_cost(125); 7101 format %{ "ADD $dst,$src" %} 7102 opcode(0x03); 7103 ins_encode( OpcP, RegMem( dst, src) ); 7104 ins_pipe( ialu_reg_mem ); 7105 %} 7106 7107 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7108 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7109 effect(KILL cr); 7110 7111 ins_cost(150); 7112 format %{ "ADD $dst,$src" %} 7113 opcode(0x01); /* Opcode 01 /r */ 7114 ins_encode( OpcP, RegMem( src, dst ) ); 7115 ins_pipe( ialu_mem_reg ); 7116 %} 7117 7118 // Add Memory with Immediate 7119 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7120 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7121 effect(KILL cr); 7122 7123 ins_cost(125); 7124 format %{ "ADD $dst,$src" %} 7125 opcode(0x81); /* Opcode 81 /0 id */ 7126 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7127 ins_pipe( ialu_mem_imm ); 7128 %} 7129 7130 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7131 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7132 effect(KILL cr); 7133 7134 ins_cost(125); 7135 format %{ "INC $dst" %} 7136 opcode(0xFF); /* Opcode FF /0 */ 7137 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7138 ins_pipe( ialu_mem_imm ); 7139 %} 7140 7141 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7142 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7143 effect(KILL cr); 7144 7145 ins_cost(125); 7146 format %{ "DEC $dst" %} 7147 opcode(0xFF); /* Opcode FF /1 */ 7148 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7149 ins_pipe( ialu_mem_imm ); 7150 %} 7151 7152 7153 instruct checkCastPP( eRegP dst ) %{ 7154 match(Set dst (CheckCastPP dst)); 7155 7156 size(0); 7157 format %{ "#checkcastPP of $dst" %} 7158 ins_encode( /*empty encoding*/ ); 7159 ins_pipe( empty ); 7160 %} 7161 7162 instruct castPP( eRegP dst ) %{ 7163 match(Set dst (CastPP dst)); 7164 format %{ "#castPP of $dst" %} 7165 ins_encode( /*empty encoding*/ ); 7166 ins_pipe( empty ); 7167 %} 7168 7169 instruct castII( rRegI dst ) %{ 7170 match(Set dst (CastII dst)); 7171 format %{ "#castII of $dst" %} 7172 ins_encode( /*empty encoding*/ ); 7173 ins_cost(0); 7174 ins_pipe( empty ); 7175 %} 7176 7177 7178 // Load-locked - same as a regular pointer load when used with compare-swap 7179 instruct loadPLocked(eRegP dst, memory mem) %{ 7180 match(Set dst (LoadPLocked mem)); 7181 7182 ins_cost(125); 7183 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7184 opcode(0x8B); 7185 ins_encode( OpcP, RegMem(dst,mem)); 7186 ins_pipe( ialu_reg_mem ); 7187 %} 7188 7189 // Conditional-store of the updated heap-top. 7190 // Used during allocation of the shared heap. 7191 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7192 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7193 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7194 // EAX is killed if there is contention, but then it's also unused. 7195 // In the common case of no contention, EAX holds the new oop address. 7196 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7197 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7198 ins_pipe( pipe_cmpxchg ); 7199 %} 7200 7201 // Conditional-store of an int value. 7202 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7203 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7204 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7205 effect(KILL oldval); 7206 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7207 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7208 ins_pipe( pipe_cmpxchg ); 7209 %} 7210 7211 // Conditional-store of a long value. 7212 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7213 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7214 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7215 effect(KILL oldval); 7216 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7217 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7218 "XCHG EBX,ECX" 7219 %} 7220 ins_encode %{ 7221 // Note: we need to swap rbx, and rcx before and after the 7222 // cmpxchg8 instruction because the instruction uses 7223 // rcx as the high order word of the new value to store but 7224 // our register encoding uses rbx. 7225 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7226 if( os::is_MP() ) 7227 __ lock(); 7228 __ cmpxchg8($mem$$Address); 7229 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7230 %} 7231 ins_pipe( pipe_cmpxchg ); 7232 %} 7233 7234 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7235 7236 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7237 predicate(VM_Version::supports_cx8()); 7238 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7239 effect(KILL cr, KILL oldval); 7240 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7241 "MOV $res,0\n\t" 7242 "JNE,s fail\n\t" 7243 "MOV $res,1\n" 7244 "fail:" %} 7245 ins_encode( enc_cmpxchg8(mem_ptr), 7246 enc_flags_ne_to_boolean(res) ); 7247 ins_pipe( pipe_cmpxchg ); 7248 %} 7249 7250 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7251 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7252 effect(KILL cr, KILL oldval); 7253 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7254 "MOV $res,0\n\t" 7255 "JNE,s fail\n\t" 7256 "MOV $res,1\n" 7257 "fail:" %} 7258 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7259 ins_pipe( pipe_cmpxchg ); 7260 %} 7261 7262 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7263 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7264 effect(KILL cr, KILL oldval); 7265 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7266 "MOV $res,0\n\t" 7267 "JNE,s fail\n\t" 7268 "MOV $res,1\n" 7269 "fail:" %} 7270 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7271 ins_pipe( pipe_cmpxchg ); 7272 %} 7273 7274 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7275 predicate(n->as_LoadStore()->result_not_used()); 7276 match(Set dummy (GetAndAddI mem add)); 7277 effect(KILL cr); 7278 format %{ "ADDL [$mem],$add" %} 7279 ins_encode %{ 7280 if (os::is_MP()) { __ lock(); } 7281 __ addl($mem$$Address, $add$$constant); 7282 %} 7283 ins_pipe( pipe_cmpxchg ); 7284 %} 7285 7286 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7287 match(Set newval (GetAndAddI mem newval)); 7288 effect(KILL cr); 7289 format %{ "XADDL [$mem],$newval" %} 7290 ins_encode %{ 7291 if (os::is_MP()) { __ lock(); } 7292 __ xaddl($mem$$Address, $newval$$Register); 7293 %} 7294 ins_pipe( pipe_cmpxchg ); 7295 %} 7296 7297 instruct xchgI( memory mem, rRegI newval) %{ 7298 match(Set newval (GetAndSetI mem newval)); 7299 format %{ "XCHGL $newval,[$mem]" %} 7300 ins_encode %{ 7301 __ xchgl($newval$$Register, $mem$$Address); 7302 %} 7303 ins_pipe( pipe_cmpxchg ); 7304 %} 7305 7306 instruct xchgP( memory mem, pRegP newval) %{ 7307 match(Set newval (GetAndSetP mem newval)); 7308 format %{ "XCHGL $newval,[$mem]" %} 7309 ins_encode %{ 7310 __ xchgl($newval$$Register, $mem$$Address); 7311 %} 7312 ins_pipe( pipe_cmpxchg ); 7313 %} 7314 7315 //----------Subtraction Instructions------------------------------------------- 7316 7317 // Integer Subtraction Instructions 7318 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7319 match(Set dst (SubI dst src)); 7320 effect(KILL cr); 7321 7322 size(2); 7323 format %{ "SUB $dst,$src" %} 7324 opcode(0x2B); 7325 ins_encode( OpcP, RegReg( dst, src) ); 7326 ins_pipe( ialu_reg_reg ); 7327 %} 7328 7329 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7330 match(Set dst (SubI dst src)); 7331 effect(KILL cr); 7332 7333 format %{ "SUB $dst,$src" %} 7334 opcode(0x81,0x05); /* Opcode 81 /5 */ 7335 // ins_encode( RegImm( dst, src) ); 7336 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7337 ins_pipe( ialu_reg ); 7338 %} 7339 7340 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7341 match(Set dst (SubI dst (LoadI src))); 7342 effect(KILL cr); 7343 7344 ins_cost(125); 7345 format %{ "SUB $dst,$src" %} 7346 opcode(0x2B); 7347 ins_encode( OpcP, RegMem( dst, src) ); 7348 ins_pipe( ialu_reg_mem ); 7349 %} 7350 7351 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7352 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7353 effect(KILL cr); 7354 7355 ins_cost(150); 7356 format %{ "SUB $dst,$src" %} 7357 opcode(0x29); /* Opcode 29 /r */ 7358 ins_encode( OpcP, RegMem( src, dst ) ); 7359 ins_pipe( ialu_mem_reg ); 7360 %} 7361 7362 // Subtract from a pointer 7363 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7364 match(Set dst (AddP dst (SubI zero src))); 7365 effect(KILL cr); 7366 7367 size(2); 7368 format %{ "SUB $dst,$src" %} 7369 opcode(0x2B); 7370 ins_encode( OpcP, RegReg( dst, src) ); 7371 ins_pipe( ialu_reg_reg ); 7372 %} 7373 7374 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7375 match(Set dst (SubI zero dst)); 7376 effect(KILL cr); 7377 7378 size(2); 7379 format %{ "NEG $dst" %} 7380 opcode(0xF7,0x03); // Opcode F7 /3 7381 ins_encode( OpcP, RegOpc( dst ) ); 7382 ins_pipe( ialu_reg ); 7383 %} 7384 7385 //----------Multiplication/Division Instructions------------------------------- 7386 // Integer Multiplication Instructions 7387 // Multiply Register 7388 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7389 match(Set dst (MulI dst src)); 7390 effect(KILL cr); 7391 7392 size(3); 7393 ins_cost(300); 7394 format %{ "IMUL $dst,$src" %} 7395 opcode(0xAF, 0x0F); 7396 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7397 ins_pipe( ialu_reg_reg_alu0 ); 7398 %} 7399 7400 // Multiply 32-bit Immediate 7401 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7402 match(Set dst (MulI src imm)); 7403 effect(KILL cr); 7404 7405 ins_cost(300); 7406 format %{ "IMUL $dst,$src,$imm" %} 7407 opcode(0x69); /* 69 /r id */ 7408 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7409 ins_pipe( ialu_reg_reg_alu0 ); 7410 %} 7411 7412 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7413 match(Set dst src); 7414 effect(KILL cr); 7415 7416 // Note that this is artificially increased to make it more expensive than loadConL 7417 ins_cost(250); 7418 format %{ "MOV EAX,$src\t// low word only" %} 7419 opcode(0xB8); 7420 ins_encode( LdImmL_Lo(dst, src) ); 7421 ins_pipe( ialu_reg_fat ); 7422 %} 7423 7424 // Multiply by 32-bit Immediate, taking the shifted high order results 7425 // (special case for shift by 32) 7426 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7427 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7428 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7429 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7430 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7431 effect(USE src1, KILL cr); 7432 7433 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7434 ins_cost(0*100 + 1*400 - 150); 7435 format %{ "IMUL EDX:EAX,$src1" %} 7436 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7437 ins_pipe( pipe_slow ); 7438 %} 7439 7440 // Multiply by 32-bit Immediate, taking the shifted high order results 7441 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7442 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7443 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7444 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7445 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7446 effect(USE src1, KILL cr); 7447 7448 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7449 ins_cost(1*100 + 1*400 - 150); 7450 format %{ "IMUL EDX:EAX,$src1\n\t" 7451 "SAR EDX,$cnt-32" %} 7452 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7453 ins_pipe( pipe_slow ); 7454 %} 7455 7456 // Multiply Memory 32-bit Immediate 7457 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7458 match(Set dst (MulI (LoadI src) imm)); 7459 effect(KILL cr); 7460 7461 ins_cost(300); 7462 format %{ "IMUL $dst,$src,$imm" %} 7463 opcode(0x69); /* 69 /r id */ 7464 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7465 ins_pipe( ialu_reg_mem_alu0 ); 7466 %} 7467 7468 // Multiply Memory 7469 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7470 match(Set dst (MulI dst (LoadI src))); 7471 effect(KILL cr); 7472 7473 ins_cost(350); 7474 format %{ "IMUL $dst,$src" %} 7475 opcode(0xAF, 0x0F); 7476 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7477 ins_pipe( ialu_reg_mem_alu0 ); 7478 %} 7479 7480 // Multiply Register Int to Long 7481 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7482 // Basic Idea: long = (long)int * (long)int 7483 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7484 effect(DEF dst, USE src, USE src1, KILL flags); 7485 7486 ins_cost(300); 7487 format %{ "IMUL $dst,$src1" %} 7488 7489 ins_encode( long_int_multiply( dst, src1 ) ); 7490 ins_pipe( ialu_reg_reg_alu0 ); 7491 %} 7492 7493 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7494 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7495 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7496 effect(KILL flags); 7497 7498 ins_cost(300); 7499 format %{ "MUL $dst,$src1" %} 7500 7501 ins_encode( long_uint_multiply(dst, src1) ); 7502 ins_pipe( ialu_reg_reg_alu0 ); 7503 %} 7504 7505 // Multiply Register Long 7506 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7507 match(Set dst (MulL dst src)); 7508 effect(KILL cr, TEMP tmp); 7509 ins_cost(4*100+3*400); 7510 // Basic idea: lo(result) = lo(x_lo * y_lo) 7511 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7512 format %{ "MOV $tmp,$src.lo\n\t" 7513 "IMUL $tmp,EDX\n\t" 7514 "MOV EDX,$src.hi\n\t" 7515 "IMUL EDX,EAX\n\t" 7516 "ADD $tmp,EDX\n\t" 7517 "MUL EDX:EAX,$src.lo\n\t" 7518 "ADD EDX,$tmp" %} 7519 ins_encode( long_multiply( dst, src, tmp ) ); 7520 ins_pipe( pipe_slow ); 7521 %} 7522 7523 // Multiply Register Long where the left operand's high 32 bits are zero 7524 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7525 predicate(is_operand_hi32_zero(n->in(1))); 7526 match(Set dst (MulL dst src)); 7527 effect(KILL cr, TEMP tmp); 7528 ins_cost(2*100+2*400); 7529 // Basic idea: lo(result) = lo(x_lo * y_lo) 7530 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7531 format %{ "MOV $tmp,$src.hi\n\t" 7532 "IMUL $tmp,EAX\n\t" 7533 "MUL EDX:EAX,$src.lo\n\t" 7534 "ADD EDX,$tmp" %} 7535 ins_encode %{ 7536 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7537 __ imull($tmp$$Register, rax); 7538 __ mull($src$$Register); 7539 __ addl(rdx, $tmp$$Register); 7540 %} 7541 ins_pipe( pipe_slow ); 7542 %} 7543 7544 // Multiply Register Long where the right operand's high 32 bits are zero 7545 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7546 predicate(is_operand_hi32_zero(n->in(2))); 7547 match(Set dst (MulL dst src)); 7548 effect(KILL cr, TEMP tmp); 7549 ins_cost(2*100+2*400); 7550 // Basic idea: lo(result) = lo(x_lo * y_lo) 7551 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7552 format %{ "MOV $tmp,$src.lo\n\t" 7553 "IMUL $tmp,EDX\n\t" 7554 "MUL EDX:EAX,$src.lo\n\t" 7555 "ADD EDX,$tmp" %} 7556 ins_encode %{ 7557 __ movl($tmp$$Register, $src$$Register); 7558 __ imull($tmp$$Register, rdx); 7559 __ mull($src$$Register); 7560 __ addl(rdx, $tmp$$Register); 7561 %} 7562 ins_pipe( pipe_slow ); 7563 %} 7564 7565 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7566 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7567 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7568 match(Set dst (MulL dst src)); 7569 effect(KILL cr); 7570 ins_cost(1*400); 7571 // Basic idea: lo(result) = lo(x_lo * y_lo) 7572 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7573 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7574 ins_encode %{ 7575 __ mull($src$$Register); 7576 %} 7577 ins_pipe( pipe_slow ); 7578 %} 7579 7580 // Multiply Register Long by small constant 7581 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7582 match(Set dst (MulL dst src)); 7583 effect(KILL cr, TEMP tmp); 7584 ins_cost(2*100+2*400); 7585 size(12); 7586 // Basic idea: lo(result) = lo(src * EAX) 7587 // hi(result) = hi(src * EAX) + lo(src * EDX) 7588 format %{ "IMUL $tmp,EDX,$src\n\t" 7589 "MOV EDX,$src\n\t" 7590 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7591 "ADD EDX,$tmp" %} 7592 ins_encode( long_multiply_con( dst, src, tmp ) ); 7593 ins_pipe( pipe_slow ); 7594 %} 7595 7596 // Integer DIV with Register 7597 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7598 match(Set rax (DivI rax div)); 7599 effect(KILL rdx, KILL cr); 7600 size(26); 7601 ins_cost(30*100+10*100); 7602 format %{ "CMP EAX,0x80000000\n\t" 7603 "JNE,s normal\n\t" 7604 "XOR EDX,EDX\n\t" 7605 "CMP ECX,-1\n\t" 7606 "JE,s done\n" 7607 "normal: CDQ\n\t" 7608 "IDIV $div\n\t" 7609 "done:" %} 7610 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7611 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7612 ins_pipe( ialu_reg_reg_alu0 ); 7613 %} 7614 7615 // Divide Register Long 7616 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7617 match(Set dst (DivL src1 src2)); 7618 effect( KILL cr, KILL cx, KILL bx ); 7619 ins_cost(10000); 7620 format %{ "PUSH $src1.hi\n\t" 7621 "PUSH $src1.lo\n\t" 7622 "PUSH $src2.hi\n\t" 7623 "PUSH $src2.lo\n\t" 7624 "CALL SharedRuntime::ldiv\n\t" 7625 "ADD ESP,16" %} 7626 ins_encode( long_div(src1,src2) ); 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 // Integer DIVMOD with Register, both quotient and mod results 7631 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7632 match(DivModI rax div); 7633 effect(KILL cr); 7634 size(26); 7635 ins_cost(30*100+10*100); 7636 format %{ "CMP EAX,0x80000000\n\t" 7637 "JNE,s normal\n\t" 7638 "XOR EDX,EDX\n\t" 7639 "CMP ECX,-1\n\t" 7640 "JE,s done\n" 7641 "normal: CDQ\n\t" 7642 "IDIV $div\n\t" 7643 "done:" %} 7644 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7645 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7646 ins_pipe( pipe_slow ); 7647 %} 7648 7649 // Integer MOD with Register 7650 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7651 match(Set rdx (ModI rax div)); 7652 effect(KILL rax, KILL cr); 7653 7654 size(26); 7655 ins_cost(300); 7656 format %{ "CDQ\n\t" 7657 "IDIV $div" %} 7658 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7659 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7660 ins_pipe( ialu_reg_reg_alu0 ); 7661 %} 7662 7663 // Remainder Register Long 7664 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7665 match(Set dst (ModL src1 src2)); 7666 effect( KILL cr, KILL cx, KILL bx ); 7667 ins_cost(10000); 7668 format %{ "PUSH $src1.hi\n\t" 7669 "PUSH $src1.lo\n\t" 7670 "PUSH $src2.hi\n\t" 7671 "PUSH $src2.lo\n\t" 7672 "CALL SharedRuntime::lrem\n\t" 7673 "ADD ESP,16" %} 7674 ins_encode( long_mod(src1,src2) ); 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 // Divide Register Long (no special case since divisor != -1) 7679 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7680 match(Set dst (DivL dst imm)); 7681 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7682 ins_cost(1000); 7683 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7684 "XOR $tmp2,$tmp2\n\t" 7685 "CMP $tmp,EDX\n\t" 7686 "JA,s fast\n\t" 7687 "MOV $tmp2,EAX\n\t" 7688 "MOV EAX,EDX\n\t" 7689 "MOV EDX,0\n\t" 7690 "JLE,s pos\n\t" 7691 "LNEG EAX : $tmp2\n\t" 7692 "DIV $tmp # unsigned division\n\t" 7693 "XCHG EAX,$tmp2\n\t" 7694 "DIV $tmp\n\t" 7695 "LNEG $tmp2 : EAX\n\t" 7696 "JMP,s done\n" 7697 "pos:\n\t" 7698 "DIV $tmp\n\t" 7699 "XCHG EAX,$tmp2\n" 7700 "fast:\n\t" 7701 "DIV $tmp\n" 7702 "done:\n\t" 7703 "MOV EDX,$tmp2\n\t" 7704 "NEG EDX:EAX # if $imm < 0" %} 7705 ins_encode %{ 7706 int con = (int)$imm$$constant; 7707 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7708 int pcon = (con > 0) ? con : -con; 7709 Label Lfast, Lpos, Ldone; 7710 7711 __ movl($tmp$$Register, pcon); 7712 __ xorl($tmp2$$Register,$tmp2$$Register); 7713 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7714 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7715 7716 __ movl($tmp2$$Register, $dst$$Register); // save 7717 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7718 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7719 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7720 7721 // Negative dividend. 7722 // convert value to positive to use unsigned division 7723 __ lneg($dst$$Register, $tmp2$$Register); 7724 __ divl($tmp$$Register); 7725 __ xchgl($dst$$Register, $tmp2$$Register); 7726 __ divl($tmp$$Register); 7727 // revert result back to negative 7728 __ lneg($tmp2$$Register, $dst$$Register); 7729 __ jmpb(Ldone); 7730 7731 __ bind(Lpos); 7732 __ divl($tmp$$Register); // Use unsigned division 7733 __ xchgl($dst$$Register, $tmp2$$Register); 7734 // Fallthrow for final divide, tmp2 has 32 bit hi result 7735 7736 __ bind(Lfast); 7737 // fast path: src is positive 7738 __ divl($tmp$$Register); // Use unsigned division 7739 7740 __ bind(Ldone); 7741 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7742 if (con < 0) { 7743 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7744 } 7745 %} 7746 ins_pipe( pipe_slow ); 7747 %} 7748 7749 // Remainder Register Long (remainder fit into 32 bits) 7750 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7751 match(Set dst (ModL dst imm)); 7752 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7753 ins_cost(1000); 7754 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7755 "CMP $tmp,EDX\n\t" 7756 "JA,s fast\n\t" 7757 "MOV $tmp2,EAX\n\t" 7758 "MOV EAX,EDX\n\t" 7759 "MOV EDX,0\n\t" 7760 "JLE,s pos\n\t" 7761 "LNEG EAX : $tmp2\n\t" 7762 "DIV $tmp # unsigned division\n\t" 7763 "MOV EAX,$tmp2\n\t" 7764 "DIV $tmp\n\t" 7765 "NEG EDX\n\t" 7766 "JMP,s done\n" 7767 "pos:\n\t" 7768 "DIV $tmp\n\t" 7769 "MOV EAX,$tmp2\n" 7770 "fast:\n\t" 7771 "DIV $tmp\n" 7772 "done:\n\t" 7773 "MOV EAX,EDX\n\t" 7774 "SAR EDX,31\n\t" %} 7775 ins_encode %{ 7776 int con = (int)$imm$$constant; 7777 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7778 int pcon = (con > 0) ? con : -con; 7779 Label Lfast, Lpos, Ldone; 7780 7781 __ movl($tmp$$Register, pcon); 7782 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7783 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7784 7785 __ movl($tmp2$$Register, $dst$$Register); // save 7786 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7787 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7788 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7789 7790 // Negative dividend. 7791 // convert value to positive to use unsigned division 7792 __ lneg($dst$$Register, $tmp2$$Register); 7793 __ divl($tmp$$Register); 7794 __ movl($dst$$Register, $tmp2$$Register); 7795 __ divl($tmp$$Register); 7796 // revert remainder back to negative 7797 __ negl(HIGH_FROM_LOW($dst$$Register)); 7798 __ jmpb(Ldone); 7799 7800 __ bind(Lpos); 7801 __ divl($tmp$$Register); 7802 __ movl($dst$$Register, $tmp2$$Register); 7803 7804 __ bind(Lfast); 7805 // fast path: src is positive 7806 __ divl($tmp$$Register); 7807 7808 __ bind(Ldone); 7809 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7810 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7811 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 // Integer Shift Instructions 7817 // Shift Left by one 7818 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7819 match(Set dst (LShiftI dst shift)); 7820 effect(KILL cr); 7821 7822 size(2); 7823 format %{ "SHL $dst,$shift" %} 7824 opcode(0xD1, 0x4); /* D1 /4 */ 7825 ins_encode( OpcP, RegOpc( dst ) ); 7826 ins_pipe( ialu_reg ); 7827 %} 7828 7829 // Shift Left by 8-bit immediate 7830 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7831 match(Set dst (LShiftI dst shift)); 7832 effect(KILL cr); 7833 7834 size(3); 7835 format %{ "SHL $dst,$shift" %} 7836 opcode(0xC1, 0x4); /* C1 /4 ib */ 7837 ins_encode( RegOpcImm( dst, shift) ); 7838 ins_pipe( ialu_reg ); 7839 %} 7840 7841 // Shift Left by variable 7842 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7843 match(Set dst (LShiftI dst shift)); 7844 effect(KILL cr); 7845 7846 size(2); 7847 format %{ "SHL $dst,$shift" %} 7848 opcode(0xD3, 0x4); /* D3 /4 */ 7849 ins_encode( OpcP, RegOpc( dst ) ); 7850 ins_pipe( ialu_reg_reg ); 7851 %} 7852 7853 // Arithmetic shift right by one 7854 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7855 match(Set dst (RShiftI dst shift)); 7856 effect(KILL cr); 7857 7858 size(2); 7859 format %{ "SAR $dst,$shift" %} 7860 opcode(0xD1, 0x7); /* D1 /7 */ 7861 ins_encode( OpcP, RegOpc( dst ) ); 7862 ins_pipe( ialu_reg ); 7863 %} 7864 7865 // Arithmetic shift right by one 7866 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7867 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7868 effect(KILL cr); 7869 format %{ "SAR $dst,$shift" %} 7870 opcode(0xD1, 0x7); /* D1 /7 */ 7871 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7872 ins_pipe( ialu_mem_imm ); 7873 %} 7874 7875 // Arithmetic Shift Right by 8-bit immediate 7876 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7877 match(Set dst (RShiftI dst shift)); 7878 effect(KILL cr); 7879 7880 size(3); 7881 format %{ "SAR $dst,$shift" %} 7882 opcode(0xC1, 0x7); /* C1 /7 ib */ 7883 ins_encode( RegOpcImm( dst, shift ) ); 7884 ins_pipe( ialu_mem_imm ); 7885 %} 7886 7887 // Arithmetic Shift Right by 8-bit immediate 7888 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7889 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7890 effect(KILL cr); 7891 7892 format %{ "SAR $dst,$shift" %} 7893 opcode(0xC1, 0x7); /* C1 /7 ib */ 7894 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7895 ins_pipe( ialu_mem_imm ); 7896 %} 7897 7898 // Arithmetic Shift Right by variable 7899 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7900 match(Set dst (RShiftI dst shift)); 7901 effect(KILL cr); 7902 7903 size(2); 7904 format %{ "SAR $dst,$shift" %} 7905 opcode(0xD3, 0x7); /* D3 /7 */ 7906 ins_encode( OpcP, RegOpc( dst ) ); 7907 ins_pipe( ialu_reg_reg ); 7908 %} 7909 7910 // Logical shift right by one 7911 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7912 match(Set dst (URShiftI dst shift)); 7913 effect(KILL cr); 7914 7915 size(2); 7916 format %{ "SHR $dst,$shift" %} 7917 opcode(0xD1, 0x5); /* D1 /5 */ 7918 ins_encode( OpcP, RegOpc( dst ) ); 7919 ins_pipe( ialu_reg ); 7920 %} 7921 7922 // Logical Shift Right by 8-bit immediate 7923 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7924 match(Set dst (URShiftI dst shift)); 7925 effect(KILL cr); 7926 7927 size(3); 7928 format %{ "SHR $dst,$shift" %} 7929 opcode(0xC1, 0x5); /* C1 /5 ib */ 7930 ins_encode( RegOpcImm( dst, shift) ); 7931 ins_pipe( ialu_reg ); 7932 %} 7933 7934 7935 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7936 // This idiom is used by the compiler for the i2b bytecode. 7937 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7938 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7939 7940 size(3); 7941 format %{ "MOVSX $dst,$src :8" %} 7942 ins_encode %{ 7943 __ movsbl($dst$$Register, $src$$Register); 7944 %} 7945 ins_pipe(ialu_reg_reg); 7946 %} 7947 7948 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7949 // This idiom is used by the compiler the i2s bytecode. 7950 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7951 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7952 7953 size(3); 7954 format %{ "MOVSX $dst,$src :16" %} 7955 ins_encode %{ 7956 __ movswl($dst$$Register, $src$$Register); 7957 %} 7958 ins_pipe(ialu_reg_reg); 7959 %} 7960 7961 7962 // Logical Shift Right by variable 7963 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7964 match(Set dst (URShiftI dst shift)); 7965 effect(KILL cr); 7966 7967 size(2); 7968 format %{ "SHR $dst,$shift" %} 7969 opcode(0xD3, 0x5); /* D3 /5 */ 7970 ins_encode( OpcP, RegOpc( dst ) ); 7971 ins_pipe( ialu_reg_reg ); 7972 %} 7973 7974 7975 //----------Logical Instructions----------------------------------------------- 7976 //----------Integer Logical Instructions--------------------------------------- 7977 // And Instructions 7978 // And Register with Register 7979 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7980 match(Set dst (AndI dst src)); 7981 effect(KILL cr); 7982 7983 size(2); 7984 format %{ "AND $dst,$src" %} 7985 opcode(0x23); 7986 ins_encode( OpcP, RegReg( dst, src) ); 7987 ins_pipe( ialu_reg_reg ); 7988 %} 7989 7990 // And Register with Immediate 7991 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7992 match(Set dst (AndI dst src)); 7993 effect(KILL cr); 7994 7995 format %{ "AND $dst,$src" %} 7996 opcode(0x81,0x04); /* Opcode 81 /4 */ 7997 // ins_encode( RegImm( dst, src) ); 7998 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7999 ins_pipe( ialu_reg ); 8000 %} 8001 8002 // And Register with Memory 8003 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8004 match(Set dst (AndI dst (LoadI src))); 8005 effect(KILL cr); 8006 8007 ins_cost(125); 8008 format %{ "AND $dst,$src" %} 8009 opcode(0x23); 8010 ins_encode( OpcP, RegMem( dst, src) ); 8011 ins_pipe( ialu_reg_mem ); 8012 %} 8013 8014 // And Memory with Register 8015 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8016 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8017 effect(KILL cr); 8018 8019 ins_cost(150); 8020 format %{ "AND $dst,$src" %} 8021 opcode(0x21); /* Opcode 21 /r */ 8022 ins_encode( OpcP, RegMem( src, dst ) ); 8023 ins_pipe( ialu_mem_reg ); 8024 %} 8025 8026 // And Memory with Immediate 8027 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8028 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8029 effect(KILL cr); 8030 8031 ins_cost(125); 8032 format %{ "AND $dst,$src" %} 8033 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8034 // ins_encode( MemImm( dst, src) ); 8035 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8036 ins_pipe( ialu_mem_imm ); 8037 %} 8038 8039 // BMI1 instructions 8040 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8041 match(Set dst (AndI (XorI src1 minus_1) src2)); 8042 predicate(UseBMI1Instructions); 8043 effect(KILL cr); 8044 8045 format %{ "ANDNL $dst, $src1, $src2" %} 8046 8047 ins_encode %{ 8048 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8049 %} 8050 ins_pipe(ialu_reg); 8051 %} 8052 8053 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8054 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8055 predicate(UseBMI1Instructions); 8056 effect(KILL cr); 8057 8058 ins_cost(125); 8059 format %{ "ANDNL $dst, $src1, $src2" %} 8060 8061 ins_encode %{ 8062 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8063 %} 8064 ins_pipe(ialu_reg_mem); 8065 %} 8066 8067 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8068 match(Set dst (AndI (SubI imm_zero src) src)); 8069 predicate(UseBMI1Instructions); 8070 effect(KILL cr); 8071 8072 format %{ "BLSIL $dst, $src" %} 8073 8074 ins_encode %{ 8075 __ blsil($dst$$Register, $src$$Register); 8076 %} 8077 ins_pipe(ialu_reg); 8078 %} 8079 8080 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8081 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8082 predicate(UseBMI1Instructions); 8083 effect(KILL cr); 8084 8085 ins_cost(125); 8086 format %{ "BLSIL $dst, $src" %} 8087 8088 ins_encode %{ 8089 __ blsil($dst$$Register, $src$$Address); 8090 %} 8091 ins_pipe(ialu_reg_mem); 8092 %} 8093 8094 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8095 %{ 8096 match(Set dst (XorI (AddI src minus_1) src)); 8097 predicate(UseBMI1Instructions); 8098 effect(KILL cr); 8099 8100 format %{ "BLSMSKL $dst, $src" %} 8101 8102 ins_encode %{ 8103 __ blsmskl($dst$$Register, $src$$Register); 8104 %} 8105 8106 ins_pipe(ialu_reg); 8107 %} 8108 8109 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8110 %{ 8111 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8112 predicate(UseBMI1Instructions); 8113 effect(KILL cr); 8114 8115 ins_cost(125); 8116 format %{ "BLSMSKL $dst, $src" %} 8117 8118 ins_encode %{ 8119 __ blsmskl($dst$$Register, $src$$Address); 8120 %} 8121 8122 ins_pipe(ialu_reg_mem); 8123 %} 8124 8125 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8126 %{ 8127 match(Set dst (AndI (AddI src minus_1) src) ); 8128 predicate(UseBMI1Instructions); 8129 effect(KILL cr); 8130 8131 format %{ "BLSRL $dst, $src" %} 8132 8133 ins_encode %{ 8134 __ blsrl($dst$$Register, $src$$Register); 8135 %} 8136 8137 ins_pipe(ialu_reg); 8138 %} 8139 8140 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8141 %{ 8142 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8143 predicate(UseBMI1Instructions); 8144 effect(KILL cr); 8145 8146 ins_cost(125); 8147 format %{ "BLSRL $dst, $src" %} 8148 8149 ins_encode %{ 8150 __ blsrl($dst$$Register, $src$$Address); 8151 %} 8152 8153 ins_pipe(ialu_reg_mem); 8154 %} 8155 8156 // Or Instructions 8157 // Or Register with Register 8158 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8159 match(Set dst (OrI dst src)); 8160 effect(KILL cr); 8161 8162 size(2); 8163 format %{ "OR $dst,$src" %} 8164 opcode(0x0B); 8165 ins_encode( OpcP, RegReg( dst, src) ); 8166 ins_pipe( ialu_reg_reg ); 8167 %} 8168 8169 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8170 match(Set dst (OrI dst (CastP2X src))); 8171 effect(KILL cr); 8172 8173 size(2); 8174 format %{ "OR $dst,$src" %} 8175 opcode(0x0B); 8176 ins_encode( OpcP, RegReg( dst, src) ); 8177 ins_pipe( ialu_reg_reg ); 8178 %} 8179 8180 8181 // Or Register with Immediate 8182 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8183 match(Set dst (OrI dst src)); 8184 effect(KILL cr); 8185 8186 format %{ "OR $dst,$src" %} 8187 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8188 // ins_encode( RegImm( dst, src) ); 8189 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8190 ins_pipe( ialu_reg ); 8191 %} 8192 8193 // Or Register with Memory 8194 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8195 match(Set dst (OrI dst (LoadI src))); 8196 effect(KILL cr); 8197 8198 ins_cost(125); 8199 format %{ "OR $dst,$src" %} 8200 opcode(0x0B); 8201 ins_encode( OpcP, RegMem( dst, src) ); 8202 ins_pipe( ialu_reg_mem ); 8203 %} 8204 8205 // Or Memory with Register 8206 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8207 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8208 effect(KILL cr); 8209 8210 ins_cost(150); 8211 format %{ "OR $dst,$src" %} 8212 opcode(0x09); /* Opcode 09 /r */ 8213 ins_encode( OpcP, RegMem( src, dst ) ); 8214 ins_pipe( ialu_mem_reg ); 8215 %} 8216 8217 // Or Memory with Immediate 8218 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8219 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8220 effect(KILL cr); 8221 8222 ins_cost(125); 8223 format %{ "OR $dst,$src" %} 8224 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8225 // ins_encode( MemImm( dst, src) ); 8226 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8227 ins_pipe( ialu_mem_imm ); 8228 %} 8229 8230 // ROL/ROR 8231 // ROL expand 8232 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8233 effect(USE_DEF dst, USE shift, KILL cr); 8234 8235 format %{ "ROL $dst, $shift" %} 8236 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8237 ins_encode( OpcP, RegOpc( dst )); 8238 ins_pipe( ialu_reg ); 8239 %} 8240 8241 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8242 effect(USE_DEF dst, USE shift, KILL cr); 8243 8244 format %{ "ROL $dst, $shift" %} 8245 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8246 ins_encode( RegOpcImm(dst, shift) ); 8247 ins_pipe(ialu_reg); 8248 %} 8249 8250 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8251 effect(USE_DEF dst, USE shift, KILL cr); 8252 8253 format %{ "ROL $dst, $shift" %} 8254 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8255 ins_encode(OpcP, RegOpc(dst)); 8256 ins_pipe( ialu_reg_reg ); 8257 %} 8258 // end of ROL expand 8259 8260 // ROL 32bit by one once 8261 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8262 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8263 8264 expand %{ 8265 rolI_eReg_imm1(dst, lshift, cr); 8266 %} 8267 %} 8268 8269 // ROL 32bit var by imm8 once 8270 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8271 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8272 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8273 8274 expand %{ 8275 rolI_eReg_imm8(dst, lshift, cr); 8276 %} 8277 %} 8278 8279 // ROL 32bit var by var once 8280 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8281 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8282 8283 expand %{ 8284 rolI_eReg_CL(dst, shift, cr); 8285 %} 8286 %} 8287 8288 // ROL 32bit var by var once 8289 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8290 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8291 8292 expand %{ 8293 rolI_eReg_CL(dst, shift, cr); 8294 %} 8295 %} 8296 8297 // ROR expand 8298 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8299 effect(USE_DEF dst, USE shift, KILL cr); 8300 8301 format %{ "ROR $dst, $shift" %} 8302 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8303 ins_encode( OpcP, RegOpc( dst ) ); 8304 ins_pipe( ialu_reg ); 8305 %} 8306 8307 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8308 effect (USE_DEF dst, USE shift, KILL cr); 8309 8310 format %{ "ROR $dst, $shift" %} 8311 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8312 ins_encode( RegOpcImm(dst, shift) ); 8313 ins_pipe( ialu_reg ); 8314 %} 8315 8316 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8317 effect(USE_DEF dst, USE shift, KILL cr); 8318 8319 format %{ "ROR $dst, $shift" %} 8320 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8321 ins_encode(OpcP, RegOpc(dst)); 8322 ins_pipe( ialu_reg_reg ); 8323 %} 8324 // end of ROR expand 8325 8326 // ROR right once 8327 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8328 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8329 8330 expand %{ 8331 rorI_eReg_imm1(dst, rshift, cr); 8332 %} 8333 %} 8334 8335 // ROR 32bit by immI8 once 8336 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8337 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8338 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8339 8340 expand %{ 8341 rorI_eReg_imm8(dst, rshift, cr); 8342 %} 8343 %} 8344 8345 // ROR 32bit var by var once 8346 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8347 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8348 8349 expand %{ 8350 rorI_eReg_CL(dst, shift, cr); 8351 %} 8352 %} 8353 8354 // ROR 32bit var by var once 8355 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8356 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8357 8358 expand %{ 8359 rorI_eReg_CL(dst, shift, cr); 8360 %} 8361 %} 8362 8363 // Xor Instructions 8364 // Xor Register with Register 8365 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8366 match(Set dst (XorI dst src)); 8367 effect(KILL cr); 8368 8369 size(2); 8370 format %{ "XOR $dst,$src" %} 8371 opcode(0x33); 8372 ins_encode( OpcP, RegReg( dst, src) ); 8373 ins_pipe( ialu_reg_reg ); 8374 %} 8375 8376 // Xor Register with Immediate -1 8377 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8378 match(Set dst (XorI dst imm)); 8379 8380 size(2); 8381 format %{ "NOT $dst" %} 8382 ins_encode %{ 8383 __ notl($dst$$Register); 8384 %} 8385 ins_pipe( ialu_reg ); 8386 %} 8387 8388 // Xor Register with Immediate 8389 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8390 match(Set dst (XorI dst src)); 8391 effect(KILL cr); 8392 8393 format %{ "XOR $dst,$src" %} 8394 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8395 // ins_encode( RegImm( dst, src) ); 8396 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8397 ins_pipe( ialu_reg ); 8398 %} 8399 8400 // Xor Register with Memory 8401 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8402 match(Set dst (XorI dst (LoadI src))); 8403 effect(KILL cr); 8404 8405 ins_cost(125); 8406 format %{ "XOR $dst,$src" %} 8407 opcode(0x33); 8408 ins_encode( OpcP, RegMem(dst, src) ); 8409 ins_pipe( ialu_reg_mem ); 8410 %} 8411 8412 // Xor Memory with Register 8413 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8414 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8415 effect(KILL cr); 8416 8417 ins_cost(150); 8418 format %{ "XOR $dst,$src" %} 8419 opcode(0x31); /* Opcode 31 /r */ 8420 ins_encode( OpcP, RegMem( src, dst ) ); 8421 ins_pipe( ialu_mem_reg ); 8422 %} 8423 8424 // Xor Memory with Immediate 8425 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8426 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8427 effect(KILL cr); 8428 8429 ins_cost(125); 8430 format %{ "XOR $dst,$src" %} 8431 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8432 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8433 ins_pipe( ialu_mem_imm ); 8434 %} 8435 8436 //----------Convert Int to Boolean--------------------------------------------- 8437 8438 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8439 effect( DEF dst, USE src ); 8440 format %{ "MOV $dst,$src" %} 8441 ins_encode( enc_Copy( dst, src) ); 8442 ins_pipe( ialu_reg_reg ); 8443 %} 8444 8445 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8446 effect( USE_DEF dst, USE src, KILL cr ); 8447 8448 size(4); 8449 format %{ "NEG $dst\n\t" 8450 "ADC $dst,$src" %} 8451 ins_encode( neg_reg(dst), 8452 OpcRegReg(0x13,dst,src) ); 8453 ins_pipe( ialu_reg_reg_long ); 8454 %} 8455 8456 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8457 match(Set dst (Conv2B src)); 8458 8459 expand %{ 8460 movI_nocopy(dst,src); 8461 ci2b(dst,src,cr); 8462 %} 8463 %} 8464 8465 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8466 effect( DEF dst, USE src ); 8467 format %{ "MOV $dst,$src" %} 8468 ins_encode( enc_Copy( dst, src) ); 8469 ins_pipe( ialu_reg_reg ); 8470 %} 8471 8472 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8473 effect( USE_DEF dst, USE src, KILL cr ); 8474 format %{ "NEG $dst\n\t" 8475 "ADC $dst,$src" %} 8476 ins_encode( neg_reg(dst), 8477 OpcRegReg(0x13,dst,src) ); 8478 ins_pipe( ialu_reg_reg_long ); 8479 %} 8480 8481 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8482 match(Set dst (Conv2B src)); 8483 8484 expand %{ 8485 movP_nocopy(dst,src); 8486 cp2b(dst,src,cr); 8487 %} 8488 %} 8489 8490 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8491 match(Set dst (CmpLTMask p q)); 8492 effect(KILL cr); 8493 ins_cost(400); 8494 8495 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8496 format %{ "XOR $dst,$dst\n\t" 8497 "CMP $p,$q\n\t" 8498 "SETlt $dst\n\t" 8499 "NEG $dst" %} 8500 ins_encode %{ 8501 Register Rp = $p$$Register; 8502 Register Rq = $q$$Register; 8503 Register Rd = $dst$$Register; 8504 Label done; 8505 __ xorl(Rd, Rd); 8506 __ cmpl(Rp, Rq); 8507 __ setb(Assembler::less, Rd); 8508 __ negl(Rd); 8509 %} 8510 8511 ins_pipe(pipe_slow); 8512 %} 8513 8514 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8515 match(Set dst (CmpLTMask dst zero)); 8516 effect(DEF dst, KILL cr); 8517 ins_cost(100); 8518 8519 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8520 ins_encode %{ 8521 __ sarl($dst$$Register, 31); 8522 %} 8523 ins_pipe(ialu_reg); 8524 %} 8525 8526 /* better to save a register than avoid a branch */ 8527 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8528 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8529 effect(KILL cr); 8530 ins_cost(400); 8531 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8532 "JGE done\n\t" 8533 "ADD $p,$y\n" 8534 "done: " %} 8535 ins_encode %{ 8536 Register Rp = $p$$Register; 8537 Register Rq = $q$$Register; 8538 Register Ry = $y$$Register; 8539 Label done; 8540 __ subl(Rp, Rq); 8541 __ jccb(Assembler::greaterEqual, done); 8542 __ addl(Rp, Ry); 8543 __ bind(done); 8544 %} 8545 8546 ins_pipe(pipe_cmplt); 8547 %} 8548 8549 /* better to save a register than avoid a branch */ 8550 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8551 match(Set y (AndI (CmpLTMask p q) y)); 8552 effect(KILL cr); 8553 8554 ins_cost(300); 8555 8556 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8557 "JLT done\n\t" 8558 "XORL $y, $y\n" 8559 "done: " %} 8560 ins_encode %{ 8561 Register Rp = $p$$Register; 8562 Register Rq = $q$$Register; 8563 Register Ry = $y$$Register; 8564 Label done; 8565 __ cmpl(Rp, Rq); 8566 __ jccb(Assembler::less, done); 8567 __ xorl(Ry, Ry); 8568 __ bind(done); 8569 %} 8570 8571 ins_pipe(pipe_cmplt); 8572 %} 8573 8574 /* If I enable this, I encourage spilling in the inner loop of compress. 8575 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8576 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8577 */ 8578 //----------Overflow Math Instructions----------------------------------------- 8579 8580 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8581 %{ 8582 match(Set cr (OverflowAddI op1 op2)); 8583 effect(DEF cr, USE_KILL op1, USE op2); 8584 8585 format %{ "ADD $op1, $op2\t# overflow check int" %} 8586 8587 ins_encode %{ 8588 __ addl($op1$$Register, $op2$$Register); 8589 %} 8590 ins_pipe(ialu_reg_reg); 8591 %} 8592 8593 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8594 %{ 8595 match(Set cr (OverflowAddI op1 op2)); 8596 effect(DEF cr, USE_KILL op1, USE op2); 8597 8598 format %{ "ADD $op1, $op2\t# overflow check int" %} 8599 8600 ins_encode %{ 8601 __ addl($op1$$Register, $op2$$constant); 8602 %} 8603 ins_pipe(ialu_reg_reg); 8604 %} 8605 8606 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8607 %{ 8608 match(Set cr (OverflowSubI op1 op2)); 8609 8610 format %{ "CMP $op1, $op2\t# overflow check int" %} 8611 ins_encode %{ 8612 __ cmpl($op1$$Register, $op2$$Register); 8613 %} 8614 ins_pipe(ialu_reg_reg); 8615 %} 8616 8617 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8618 %{ 8619 match(Set cr (OverflowSubI op1 op2)); 8620 8621 format %{ "CMP $op1, $op2\t# overflow check int" %} 8622 ins_encode %{ 8623 __ cmpl($op1$$Register, $op2$$constant); 8624 %} 8625 ins_pipe(ialu_reg_reg); 8626 %} 8627 8628 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8629 %{ 8630 match(Set cr (OverflowSubI zero op2)); 8631 effect(DEF cr, USE_KILL op2); 8632 8633 format %{ "NEG $op2\t# overflow check int" %} 8634 ins_encode %{ 8635 __ negl($op2$$Register); 8636 %} 8637 ins_pipe(ialu_reg_reg); 8638 %} 8639 8640 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8641 %{ 8642 match(Set cr (OverflowMulI op1 op2)); 8643 effect(DEF cr, USE_KILL op1, USE op2); 8644 8645 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8646 ins_encode %{ 8647 __ imull($op1$$Register, $op2$$Register); 8648 %} 8649 ins_pipe(ialu_reg_reg_alu0); 8650 %} 8651 8652 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8653 %{ 8654 match(Set cr (OverflowMulI op1 op2)); 8655 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8656 8657 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8658 ins_encode %{ 8659 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8660 %} 8661 ins_pipe(ialu_reg_reg_alu0); 8662 %} 8663 8664 //----------Long Instructions------------------------------------------------ 8665 // Add Long Register with Register 8666 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8667 match(Set dst (AddL dst src)); 8668 effect(KILL cr); 8669 ins_cost(200); 8670 format %{ "ADD $dst.lo,$src.lo\n\t" 8671 "ADC $dst.hi,$src.hi" %} 8672 opcode(0x03, 0x13); 8673 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8674 ins_pipe( ialu_reg_reg_long ); 8675 %} 8676 8677 // Add Long Register with Immediate 8678 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8679 match(Set dst (AddL dst src)); 8680 effect(KILL cr); 8681 format %{ "ADD $dst.lo,$src.lo\n\t" 8682 "ADC $dst.hi,$src.hi" %} 8683 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8684 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8685 ins_pipe( ialu_reg_long ); 8686 %} 8687 8688 // Add Long Register with Memory 8689 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8690 match(Set dst (AddL dst (LoadL mem))); 8691 effect(KILL cr); 8692 ins_cost(125); 8693 format %{ "ADD $dst.lo,$mem\n\t" 8694 "ADC $dst.hi,$mem+4" %} 8695 opcode(0x03, 0x13); 8696 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8697 ins_pipe( ialu_reg_long_mem ); 8698 %} 8699 8700 // Subtract Long Register with Register. 8701 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8702 match(Set dst (SubL dst src)); 8703 effect(KILL cr); 8704 ins_cost(200); 8705 format %{ "SUB $dst.lo,$src.lo\n\t" 8706 "SBB $dst.hi,$src.hi" %} 8707 opcode(0x2B, 0x1B); 8708 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8709 ins_pipe( ialu_reg_reg_long ); 8710 %} 8711 8712 // Subtract Long Register with Immediate 8713 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8714 match(Set dst (SubL dst src)); 8715 effect(KILL cr); 8716 format %{ "SUB $dst.lo,$src.lo\n\t" 8717 "SBB $dst.hi,$src.hi" %} 8718 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8719 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8720 ins_pipe( ialu_reg_long ); 8721 %} 8722 8723 // Subtract Long Register with Memory 8724 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8725 match(Set dst (SubL dst (LoadL mem))); 8726 effect(KILL cr); 8727 ins_cost(125); 8728 format %{ "SUB $dst.lo,$mem\n\t" 8729 "SBB $dst.hi,$mem+4" %} 8730 opcode(0x2B, 0x1B); 8731 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8732 ins_pipe( ialu_reg_long_mem ); 8733 %} 8734 8735 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8736 match(Set dst (SubL zero dst)); 8737 effect(KILL cr); 8738 ins_cost(300); 8739 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8740 ins_encode( neg_long(dst) ); 8741 ins_pipe( ialu_reg_reg_long ); 8742 %} 8743 8744 // And Long Register with Register 8745 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8746 match(Set dst (AndL dst src)); 8747 effect(KILL cr); 8748 format %{ "AND $dst.lo,$src.lo\n\t" 8749 "AND $dst.hi,$src.hi" %} 8750 opcode(0x23,0x23); 8751 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8752 ins_pipe( ialu_reg_reg_long ); 8753 %} 8754 8755 // And Long Register with Immediate 8756 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8757 match(Set dst (AndL dst src)); 8758 effect(KILL cr); 8759 format %{ "AND $dst.lo,$src.lo\n\t" 8760 "AND $dst.hi,$src.hi" %} 8761 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8762 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8763 ins_pipe( ialu_reg_long ); 8764 %} 8765 8766 // And Long Register with Memory 8767 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8768 match(Set dst (AndL dst (LoadL mem))); 8769 effect(KILL cr); 8770 ins_cost(125); 8771 format %{ "AND $dst.lo,$mem\n\t" 8772 "AND $dst.hi,$mem+4" %} 8773 opcode(0x23, 0x23); 8774 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8775 ins_pipe( ialu_reg_long_mem ); 8776 %} 8777 8778 // BMI1 instructions 8779 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8780 match(Set dst (AndL (XorL src1 minus_1) src2)); 8781 predicate(UseBMI1Instructions); 8782 effect(KILL cr, TEMP dst); 8783 8784 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8785 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8786 %} 8787 8788 ins_encode %{ 8789 Register Rdst = $dst$$Register; 8790 Register Rsrc1 = $src1$$Register; 8791 Register Rsrc2 = $src2$$Register; 8792 __ andnl(Rdst, Rsrc1, Rsrc2); 8793 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8794 %} 8795 ins_pipe(ialu_reg_reg_long); 8796 %} 8797 8798 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8799 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8800 predicate(UseBMI1Instructions); 8801 effect(KILL cr, TEMP dst); 8802 8803 ins_cost(125); 8804 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8805 "ANDNL $dst.hi, $src1.hi, $src2+4" 8806 %} 8807 8808 ins_encode %{ 8809 Register Rdst = $dst$$Register; 8810 Register Rsrc1 = $src1$$Register; 8811 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8812 8813 __ andnl(Rdst, Rsrc1, $src2$$Address); 8814 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8815 %} 8816 ins_pipe(ialu_reg_mem); 8817 %} 8818 8819 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8820 match(Set dst (AndL (SubL imm_zero src) src)); 8821 predicate(UseBMI1Instructions); 8822 effect(KILL cr, TEMP dst); 8823 8824 format %{ "MOVL $dst.hi, 0\n\t" 8825 "BLSIL $dst.lo, $src.lo\n\t" 8826 "JNZ done\n\t" 8827 "BLSIL $dst.hi, $src.hi\n" 8828 "done:" 8829 %} 8830 8831 ins_encode %{ 8832 Label done; 8833 Register Rdst = $dst$$Register; 8834 Register Rsrc = $src$$Register; 8835 __ movl(HIGH_FROM_LOW(Rdst), 0); 8836 __ blsil(Rdst, Rsrc); 8837 __ jccb(Assembler::notZero, done); 8838 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8839 __ bind(done); 8840 %} 8841 ins_pipe(ialu_reg); 8842 %} 8843 8844 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8845 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8846 predicate(UseBMI1Instructions); 8847 effect(KILL cr, TEMP dst); 8848 8849 ins_cost(125); 8850 format %{ "MOVL $dst.hi, 0\n\t" 8851 "BLSIL $dst.lo, $src\n\t" 8852 "JNZ done\n\t" 8853 "BLSIL $dst.hi, $src+4\n" 8854 "done:" 8855 %} 8856 8857 ins_encode %{ 8858 Label done; 8859 Register Rdst = $dst$$Register; 8860 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8861 8862 __ movl(HIGH_FROM_LOW(Rdst), 0); 8863 __ blsil(Rdst, $src$$Address); 8864 __ jccb(Assembler::notZero, done); 8865 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8866 __ bind(done); 8867 %} 8868 ins_pipe(ialu_reg_mem); 8869 %} 8870 8871 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8872 %{ 8873 match(Set dst (XorL (AddL src minus_1) src)); 8874 predicate(UseBMI1Instructions); 8875 effect(KILL cr, TEMP dst); 8876 8877 format %{ "MOVL $dst.hi, 0\n\t" 8878 "BLSMSKL $dst.lo, $src.lo\n\t" 8879 "JNC done\n\t" 8880 "BLSMSKL $dst.hi, $src.hi\n" 8881 "done:" 8882 %} 8883 8884 ins_encode %{ 8885 Label done; 8886 Register Rdst = $dst$$Register; 8887 Register Rsrc = $src$$Register; 8888 __ movl(HIGH_FROM_LOW(Rdst), 0); 8889 __ blsmskl(Rdst, Rsrc); 8890 __ jccb(Assembler::carryClear, done); 8891 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8892 __ bind(done); 8893 %} 8894 8895 ins_pipe(ialu_reg); 8896 %} 8897 8898 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8899 %{ 8900 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8901 predicate(UseBMI1Instructions); 8902 effect(KILL cr, TEMP dst); 8903 8904 ins_cost(125); 8905 format %{ "MOVL $dst.hi, 0\n\t" 8906 "BLSMSKL $dst.lo, $src\n\t" 8907 "JNC done\n\t" 8908 "BLSMSKL $dst.hi, $src+4\n" 8909 "done:" 8910 %} 8911 8912 ins_encode %{ 8913 Label done; 8914 Register Rdst = $dst$$Register; 8915 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8916 8917 __ movl(HIGH_FROM_LOW(Rdst), 0); 8918 __ blsmskl(Rdst, $src$$Address); 8919 __ jccb(Assembler::carryClear, done); 8920 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8921 __ bind(done); 8922 %} 8923 8924 ins_pipe(ialu_reg_mem); 8925 %} 8926 8927 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8928 %{ 8929 match(Set dst (AndL (AddL src minus_1) src) ); 8930 predicate(UseBMI1Instructions); 8931 effect(KILL cr, TEMP dst); 8932 8933 format %{ "MOVL $dst.hi, $src.hi\n\t" 8934 "BLSRL $dst.lo, $src.lo\n\t" 8935 "JNC done\n\t" 8936 "BLSRL $dst.hi, $src.hi\n" 8937 "done:" 8938 %} 8939 8940 ins_encode %{ 8941 Label done; 8942 Register Rdst = $dst$$Register; 8943 Register Rsrc = $src$$Register; 8944 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8945 __ blsrl(Rdst, Rsrc); 8946 __ jccb(Assembler::carryClear, done); 8947 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8948 __ bind(done); 8949 %} 8950 8951 ins_pipe(ialu_reg); 8952 %} 8953 8954 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8955 %{ 8956 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8957 predicate(UseBMI1Instructions); 8958 effect(KILL cr, TEMP dst); 8959 8960 ins_cost(125); 8961 format %{ "MOVL $dst.hi, $src+4\n\t" 8962 "BLSRL $dst.lo, $src\n\t" 8963 "JNC done\n\t" 8964 "BLSRL $dst.hi, $src+4\n" 8965 "done:" 8966 %} 8967 8968 ins_encode %{ 8969 Label done; 8970 Register Rdst = $dst$$Register; 8971 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8972 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8973 __ blsrl(Rdst, $src$$Address); 8974 __ jccb(Assembler::carryClear, done); 8975 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8976 __ bind(done); 8977 %} 8978 8979 ins_pipe(ialu_reg_mem); 8980 %} 8981 8982 // Or Long Register with Register 8983 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8984 match(Set dst (OrL dst src)); 8985 effect(KILL cr); 8986 format %{ "OR $dst.lo,$src.lo\n\t" 8987 "OR $dst.hi,$src.hi" %} 8988 opcode(0x0B,0x0B); 8989 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8990 ins_pipe( ialu_reg_reg_long ); 8991 %} 8992 8993 // Or Long Register with Immediate 8994 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8995 match(Set dst (OrL dst src)); 8996 effect(KILL cr); 8997 format %{ "OR $dst.lo,$src.lo\n\t" 8998 "OR $dst.hi,$src.hi" %} 8999 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9000 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9001 ins_pipe( ialu_reg_long ); 9002 %} 9003 9004 // Or Long Register with Memory 9005 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9006 match(Set dst (OrL dst (LoadL mem))); 9007 effect(KILL cr); 9008 ins_cost(125); 9009 format %{ "OR $dst.lo,$mem\n\t" 9010 "OR $dst.hi,$mem+4" %} 9011 opcode(0x0B,0x0B); 9012 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9013 ins_pipe( ialu_reg_long_mem ); 9014 %} 9015 9016 // Xor Long Register with Register 9017 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9018 match(Set dst (XorL dst src)); 9019 effect(KILL cr); 9020 format %{ "XOR $dst.lo,$src.lo\n\t" 9021 "XOR $dst.hi,$src.hi" %} 9022 opcode(0x33,0x33); 9023 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9024 ins_pipe( ialu_reg_reg_long ); 9025 %} 9026 9027 // Xor Long Register with Immediate -1 9028 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9029 match(Set dst (XorL dst imm)); 9030 format %{ "NOT $dst.lo\n\t" 9031 "NOT $dst.hi" %} 9032 ins_encode %{ 9033 __ notl($dst$$Register); 9034 __ notl(HIGH_FROM_LOW($dst$$Register)); 9035 %} 9036 ins_pipe( ialu_reg_long ); 9037 %} 9038 9039 // Xor Long Register with Immediate 9040 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9041 match(Set dst (XorL dst src)); 9042 effect(KILL cr); 9043 format %{ "XOR $dst.lo,$src.lo\n\t" 9044 "XOR $dst.hi,$src.hi" %} 9045 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9046 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9047 ins_pipe( ialu_reg_long ); 9048 %} 9049 9050 // Xor Long Register with Memory 9051 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9052 match(Set dst (XorL dst (LoadL mem))); 9053 effect(KILL cr); 9054 ins_cost(125); 9055 format %{ "XOR $dst.lo,$mem\n\t" 9056 "XOR $dst.hi,$mem+4" %} 9057 opcode(0x33,0x33); 9058 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9059 ins_pipe( ialu_reg_long_mem ); 9060 %} 9061 9062 // Shift Left Long by 1 9063 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9064 predicate(UseNewLongLShift); 9065 match(Set dst (LShiftL dst cnt)); 9066 effect(KILL cr); 9067 ins_cost(100); 9068 format %{ "ADD $dst.lo,$dst.lo\n\t" 9069 "ADC $dst.hi,$dst.hi" %} 9070 ins_encode %{ 9071 __ addl($dst$$Register,$dst$$Register); 9072 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9073 %} 9074 ins_pipe( ialu_reg_long ); 9075 %} 9076 9077 // Shift Left Long by 2 9078 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9079 predicate(UseNewLongLShift); 9080 match(Set dst (LShiftL dst cnt)); 9081 effect(KILL cr); 9082 ins_cost(100); 9083 format %{ "ADD $dst.lo,$dst.lo\n\t" 9084 "ADC $dst.hi,$dst.hi\n\t" 9085 "ADD $dst.lo,$dst.lo\n\t" 9086 "ADC $dst.hi,$dst.hi" %} 9087 ins_encode %{ 9088 __ addl($dst$$Register,$dst$$Register); 9089 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9090 __ addl($dst$$Register,$dst$$Register); 9091 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9092 %} 9093 ins_pipe( ialu_reg_long ); 9094 %} 9095 9096 // Shift Left Long by 3 9097 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9098 predicate(UseNewLongLShift); 9099 match(Set dst (LShiftL dst cnt)); 9100 effect(KILL cr); 9101 ins_cost(100); 9102 format %{ "ADD $dst.lo,$dst.lo\n\t" 9103 "ADC $dst.hi,$dst.hi\n\t" 9104 "ADD $dst.lo,$dst.lo\n\t" 9105 "ADC $dst.hi,$dst.hi\n\t" 9106 "ADD $dst.lo,$dst.lo\n\t" 9107 "ADC $dst.hi,$dst.hi" %} 9108 ins_encode %{ 9109 __ addl($dst$$Register,$dst$$Register); 9110 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9111 __ addl($dst$$Register,$dst$$Register); 9112 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9113 __ addl($dst$$Register,$dst$$Register); 9114 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9115 %} 9116 ins_pipe( ialu_reg_long ); 9117 %} 9118 9119 // Shift Left Long by 1-31 9120 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9121 match(Set dst (LShiftL dst cnt)); 9122 effect(KILL cr); 9123 ins_cost(200); 9124 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9125 "SHL $dst.lo,$cnt" %} 9126 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9127 ins_encode( move_long_small_shift(dst,cnt) ); 9128 ins_pipe( ialu_reg_long ); 9129 %} 9130 9131 // Shift Left Long by 32-63 9132 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9133 match(Set dst (LShiftL dst cnt)); 9134 effect(KILL cr); 9135 ins_cost(300); 9136 format %{ "MOV $dst.hi,$dst.lo\n" 9137 "\tSHL $dst.hi,$cnt-32\n" 9138 "\tXOR $dst.lo,$dst.lo" %} 9139 opcode(0xC1, 0x4); /* C1 /4 ib */ 9140 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9141 ins_pipe( ialu_reg_long ); 9142 %} 9143 9144 // Shift Left Long by variable 9145 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9146 match(Set dst (LShiftL dst shift)); 9147 effect(KILL cr); 9148 ins_cost(500+200); 9149 size(17); 9150 format %{ "TEST $shift,32\n\t" 9151 "JEQ,s small\n\t" 9152 "MOV $dst.hi,$dst.lo\n\t" 9153 "XOR $dst.lo,$dst.lo\n" 9154 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9155 "SHL $dst.lo,$shift" %} 9156 ins_encode( shift_left_long( dst, shift ) ); 9157 ins_pipe( pipe_slow ); 9158 %} 9159 9160 // Shift Right Long by 1-31 9161 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9162 match(Set dst (URShiftL dst cnt)); 9163 effect(KILL cr); 9164 ins_cost(200); 9165 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9166 "SHR $dst.hi,$cnt" %} 9167 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9168 ins_encode( move_long_small_shift(dst,cnt) ); 9169 ins_pipe( ialu_reg_long ); 9170 %} 9171 9172 // Shift Right Long by 32-63 9173 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9174 match(Set dst (URShiftL dst cnt)); 9175 effect(KILL cr); 9176 ins_cost(300); 9177 format %{ "MOV $dst.lo,$dst.hi\n" 9178 "\tSHR $dst.lo,$cnt-32\n" 9179 "\tXOR $dst.hi,$dst.hi" %} 9180 opcode(0xC1, 0x5); /* C1 /5 ib */ 9181 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9182 ins_pipe( ialu_reg_long ); 9183 %} 9184 9185 // Shift Right Long by variable 9186 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9187 match(Set dst (URShiftL dst shift)); 9188 effect(KILL cr); 9189 ins_cost(600); 9190 size(17); 9191 format %{ "TEST $shift,32\n\t" 9192 "JEQ,s small\n\t" 9193 "MOV $dst.lo,$dst.hi\n\t" 9194 "XOR $dst.hi,$dst.hi\n" 9195 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9196 "SHR $dst.hi,$shift" %} 9197 ins_encode( shift_right_long( dst, shift ) ); 9198 ins_pipe( pipe_slow ); 9199 %} 9200 9201 // Shift Right Long by 1-31 9202 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9203 match(Set dst (RShiftL dst cnt)); 9204 effect(KILL cr); 9205 ins_cost(200); 9206 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9207 "SAR $dst.hi,$cnt" %} 9208 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9209 ins_encode( move_long_small_shift(dst,cnt) ); 9210 ins_pipe( ialu_reg_long ); 9211 %} 9212 9213 // Shift Right Long by 32-63 9214 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9215 match(Set dst (RShiftL dst cnt)); 9216 effect(KILL cr); 9217 ins_cost(300); 9218 format %{ "MOV $dst.lo,$dst.hi\n" 9219 "\tSAR $dst.lo,$cnt-32\n" 9220 "\tSAR $dst.hi,31" %} 9221 opcode(0xC1, 0x7); /* C1 /7 ib */ 9222 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9223 ins_pipe( ialu_reg_long ); 9224 %} 9225 9226 // Shift Right arithmetic Long by variable 9227 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9228 match(Set dst (RShiftL dst shift)); 9229 effect(KILL cr); 9230 ins_cost(600); 9231 size(18); 9232 format %{ "TEST $shift,32\n\t" 9233 "JEQ,s small\n\t" 9234 "MOV $dst.lo,$dst.hi\n\t" 9235 "SAR $dst.hi,31\n" 9236 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9237 "SAR $dst.hi,$shift" %} 9238 ins_encode( shift_right_arith_long( dst, shift ) ); 9239 ins_pipe( pipe_slow ); 9240 %} 9241 9242 9243 //----------Double Instructions------------------------------------------------ 9244 // Double Math 9245 9246 // Compare & branch 9247 9248 // P6 version of float compare, sets condition codes in EFLAGS 9249 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9250 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9251 match(Set cr (CmpD src1 src2)); 9252 effect(KILL rax); 9253 ins_cost(150); 9254 format %{ "FLD $src1\n\t" 9255 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9256 "JNP exit\n\t" 9257 "MOV ah,1 // saw a NaN, set CF\n\t" 9258 "SAHF\n" 9259 "exit:\tNOP // avoid branch to branch" %} 9260 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9261 ins_encode( Push_Reg_DPR(src1), 9262 OpcP, RegOpc(src2), 9263 cmpF_P6_fixup ); 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9268 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9269 match(Set cr (CmpD src1 src2)); 9270 ins_cost(150); 9271 format %{ "FLD $src1\n\t" 9272 "FUCOMIP ST,$src2 // P6 instruction" %} 9273 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9274 ins_encode( Push_Reg_DPR(src1), 9275 OpcP, RegOpc(src2)); 9276 ins_pipe( pipe_slow ); 9277 %} 9278 9279 // Compare & branch 9280 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9281 predicate(UseSSE<=1); 9282 match(Set cr (CmpD src1 src2)); 9283 effect(KILL rax); 9284 ins_cost(200); 9285 format %{ "FLD $src1\n\t" 9286 "FCOMp $src2\n\t" 9287 "FNSTSW AX\n\t" 9288 "TEST AX,0x400\n\t" 9289 "JZ,s flags\n\t" 9290 "MOV AH,1\t# unordered treat as LT\n" 9291 "flags:\tSAHF" %} 9292 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9293 ins_encode( Push_Reg_DPR(src1), 9294 OpcP, RegOpc(src2), 9295 fpu_flags); 9296 ins_pipe( pipe_slow ); 9297 %} 9298 9299 // Compare vs zero into -1,0,1 9300 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9301 predicate(UseSSE<=1); 9302 match(Set dst (CmpD3 src1 zero)); 9303 effect(KILL cr, KILL rax); 9304 ins_cost(280); 9305 format %{ "FTSTD $dst,$src1" %} 9306 opcode(0xE4, 0xD9); 9307 ins_encode( Push_Reg_DPR(src1), 9308 OpcS, OpcP, PopFPU, 9309 CmpF_Result(dst)); 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 // Compare into -1,0,1 9314 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9315 predicate(UseSSE<=1); 9316 match(Set dst (CmpD3 src1 src2)); 9317 effect(KILL cr, KILL rax); 9318 ins_cost(300); 9319 format %{ "FCMPD $dst,$src1,$src2" %} 9320 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9321 ins_encode( Push_Reg_DPR(src1), 9322 OpcP, RegOpc(src2), 9323 CmpF_Result(dst)); 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 // float compare and set condition codes in EFLAGS by XMM regs 9328 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9329 predicate(UseSSE>=2); 9330 match(Set cr (CmpD src1 src2)); 9331 ins_cost(145); 9332 format %{ "UCOMISD $src1,$src2\n\t" 9333 "JNP,s exit\n\t" 9334 "PUSHF\t# saw NaN, set CF\n\t" 9335 "AND [rsp], #0xffffff2b\n\t" 9336 "POPF\n" 9337 "exit:" %} 9338 ins_encode %{ 9339 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9340 emit_cmpfp_fixup(_masm); 9341 %} 9342 ins_pipe( pipe_slow ); 9343 %} 9344 9345 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9346 predicate(UseSSE>=2); 9347 match(Set cr (CmpD src1 src2)); 9348 ins_cost(100); 9349 format %{ "UCOMISD $src1,$src2" %} 9350 ins_encode %{ 9351 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9352 %} 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 // float compare and set condition codes in EFLAGS by XMM regs 9357 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9358 predicate(UseSSE>=2); 9359 match(Set cr (CmpD src1 (LoadD src2))); 9360 ins_cost(145); 9361 format %{ "UCOMISD $src1,$src2\n\t" 9362 "JNP,s exit\n\t" 9363 "PUSHF\t# saw NaN, set CF\n\t" 9364 "AND [rsp], #0xffffff2b\n\t" 9365 "POPF\n" 9366 "exit:" %} 9367 ins_encode %{ 9368 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9369 emit_cmpfp_fixup(_masm); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9375 predicate(UseSSE>=2); 9376 match(Set cr (CmpD src1 (LoadD src2))); 9377 ins_cost(100); 9378 format %{ "UCOMISD $src1,$src2" %} 9379 ins_encode %{ 9380 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9381 %} 9382 ins_pipe( pipe_slow ); 9383 %} 9384 9385 // Compare into -1,0,1 in XMM 9386 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9387 predicate(UseSSE>=2); 9388 match(Set dst (CmpD3 src1 src2)); 9389 effect(KILL cr); 9390 ins_cost(255); 9391 format %{ "UCOMISD $src1, $src2\n\t" 9392 "MOV $dst, #-1\n\t" 9393 "JP,s done\n\t" 9394 "JB,s done\n\t" 9395 "SETNE $dst\n\t" 9396 "MOVZB $dst, $dst\n" 9397 "done:" %} 9398 ins_encode %{ 9399 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9400 emit_cmpfp3(_masm, $dst$$Register); 9401 %} 9402 ins_pipe( pipe_slow ); 9403 %} 9404 9405 // Compare into -1,0,1 in XMM and memory 9406 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9407 predicate(UseSSE>=2); 9408 match(Set dst (CmpD3 src1 (LoadD src2))); 9409 effect(KILL cr); 9410 ins_cost(275); 9411 format %{ "UCOMISD $src1, $src2\n\t" 9412 "MOV $dst, #-1\n\t" 9413 "JP,s done\n\t" 9414 "JB,s done\n\t" 9415 "SETNE $dst\n\t" 9416 "MOVZB $dst, $dst\n" 9417 "done:" %} 9418 ins_encode %{ 9419 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9420 emit_cmpfp3(_masm, $dst$$Register); 9421 %} 9422 ins_pipe( pipe_slow ); 9423 %} 9424 9425 9426 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9427 predicate (UseSSE <=1); 9428 match(Set dst (SubD dst src)); 9429 9430 format %{ "FLD $src\n\t" 9431 "DSUBp $dst,ST" %} 9432 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9433 ins_cost(150); 9434 ins_encode( Push_Reg_DPR(src), 9435 OpcP, RegOpc(dst) ); 9436 ins_pipe( fpu_reg_reg ); 9437 %} 9438 9439 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9440 predicate (UseSSE <=1); 9441 match(Set dst (RoundDouble (SubD src1 src2))); 9442 ins_cost(250); 9443 9444 format %{ "FLD $src2\n\t" 9445 "DSUB ST,$src1\n\t" 9446 "FSTP_D $dst\t# D-round" %} 9447 opcode(0xD8, 0x5); 9448 ins_encode( Push_Reg_DPR(src2), 9449 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9450 ins_pipe( fpu_mem_reg_reg ); 9451 %} 9452 9453 9454 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9455 predicate (UseSSE <=1); 9456 match(Set dst (SubD dst (LoadD src))); 9457 ins_cost(150); 9458 9459 format %{ "FLD $src\n\t" 9460 "DSUBp $dst,ST" %} 9461 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9462 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9463 OpcP, RegOpc(dst) ); 9464 ins_pipe( fpu_reg_mem ); 9465 %} 9466 9467 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9468 predicate (UseSSE<=1); 9469 match(Set dst (AbsD src)); 9470 ins_cost(100); 9471 format %{ "FABS" %} 9472 opcode(0xE1, 0xD9); 9473 ins_encode( OpcS, OpcP ); 9474 ins_pipe( fpu_reg_reg ); 9475 %} 9476 9477 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9478 predicate(UseSSE<=1); 9479 match(Set dst (NegD src)); 9480 ins_cost(100); 9481 format %{ "FCHS" %} 9482 opcode(0xE0, 0xD9); 9483 ins_encode( OpcS, OpcP ); 9484 ins_pipe( fpu_reg_reg ); 9485 %} 9486 9487 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9488 predicate(UseSSE<=1); 9489 match(Set dst (AddD dst src)); 9490 format %{ "FLD $src\n\t" 9491 "DADD $dst,ST" %} 9492 size(4); 9493 ins_cost(150); 9494 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9495 ins_encode( Push_Reg_DPR(src), 9496 OpcP, RegOpc(dst) ); 9497 ins_pipe( fpu_reg_reg ); 9498 %} 9499 9500 9501 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9502 predicate(UseSSE<=1); 9503 match(Set dst (RoundDouble (AddD src1 src2))); 9504 ins_cost(250); 9505 9506 format %{ "FLD $src2\n\t" 9507 "DADD ST,$src1\n\t" 9508 "FSTP_D $dst\t# D-round" %} 9509 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9510 ins_encode( Push_Reg_DPR(src2), 9511 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9512 ins_pipe( fpu_mem_reg_reg ); 9513 %} 9514 9515 9516 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9517 predicate(UseSSE<=1); 9518 match(Set dst (AddD dst (LoadD src))); 9519 ins_cost(150); 9520 9521 format %{ "FLD $src\n\t" 9522 "DADDp $dst,ST" %} 9523 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9524 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9525 OpcP, RegOpc(dst) ); 9526 ins_pipe( fpu_reg_mem ); 9527 %} 9528 9529 // add-to-memory 9530 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9531 predicate(UseSSE<=1); 9532 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9533 ins_cost(150); 9534 9535 format %{ "FLD_D $dst\n\t" 9536 "DADD ST,$src\n\t" 9537 "FST_D $dst" %} 9538 opcode(0xDD, 0x0); 9539 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9540 Opcode(0xD8), RegOpc(src), 9541 set_instruction_start, 9542 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9543 ins_pipe( fpu_reg_mem ); 9544 %} 9545 9546 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9547 predicate(UseSSE<=1); 9548 match(Set dst (AddD dst con)); 9549 ins_cost(125); 9550 format %{ "FLD1\n\t" 9551 "DADDp $dst,ST" %} 9552 ins_encode %{ 9553 __ fld1(); 9554 __ faddp($dst$$reg); 9555 %} 9556 ins_pipe(fpu_reg); 9557 %} 9558 9559 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9560 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9561 match(Set dst (AddD dst con)); 9562 ins_cost(200); 9563 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9564 "DADDp $dst,ST" %} 9565 ins_encode %{ 9566 __ fld_d($constantaddress($con)); 9567 __ faddp($dst$$reg); 9568 %} 9569 ins_pipe(fpu_reg_mem); 9570 %} 9571 9572 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9573 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9574 match(Set dst (RoundDouble (AddD src con))); 9575 ins_cost(200); 9576 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9577 "DADD ST,$src\n\t" 9578 "FSTP_D $dst\t# D-round" %} 9579 ins_encode %{ 9580 __ fld_d($constantaddress($con)); 9581 __ fadd($src$$reg); 9582 __ fstp_d(Address(rsp, $dst$$disp)); 9583 %} 9584 ins_pipe(fpu_mem_reg_con); 9585 %} 9586 9587 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9588 predicate(UseSSE<=1); 9589 match(Set dst (MulD dst src)); 9590 format %{ "FLD $src\n\t" 9591 "DMULp $dst,ST" %} 9592 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9593 ins_cost(150); 9594 ins_encode( Push_Reg_DPR(src), 9595 OpcP, RegOpc(dst) ); 9596 ins_pipe( fpu_reg_reg ); 9597 %} 9598 9599 // Strict FP instruction biases argument before multiply then 9600 // biases result to avoid double rounding of subnormals. 9601 // 9602 // scale arg1 by multiplying arg1 by 2^(-15360) 9603 // load arg2 9604 // multiply scaled arg1 by arg2 9605 // rescale product by 2^(15360) 9606 // 9607 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9608 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9609 match(Set dst (MulD dst src)); 9610 ins_cost(1); // Select this instruction for all strict FP double multiplies 9611 9612 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9613 "DMULp $dst,ST\n\t" 9614 "FLD $src\n\t" 9615 "DMULp $dst,ST\n\t" 9616 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9617 "DMULp $dst,ST\n\t" %} 9618 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9619 ins_encode( strictfp_bias1(dst), 9620 Push_Reg_DPR(src), 9621 OpcP, RegOpc(dst), 9622 strictfp_bias2(dst) ); 9623 ins_pipe( fpu_reg_reg ); 9624 %} 9625 9626 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9627 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9628 match(Set dst (MulD dst con)); 9629 ins_cost(200); 9630 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9631 "DMULp $dst,ST" %} 9632 ins_encode %{ 9633 __ fld_d($constantaddress($con)); 9634 __ fmulp($dst$$reg); 9635 %} 9636 ins_pipe(fpu_reg_mem); 9637 %} 9638 9639 9640 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9641 predicate( UseSSE<=1 ); 9642 match(Set dst (MulD dst (LoadD src))); 9643 ins_cost(200); 9644 format %{ "FLD_D $src\n\t" 9645 "DMULp $dst,ST" %} 9646 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9647 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9648 OpcP, RegOpc(dst) ); 9649 ins_pipe( fpu_reg_mem ); 9650 %} 9651 9652 // 9653 // Cisc-alternate to reg-reg multiply 9654 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9655 predicate( UseSSE<=1 ); 9656 match(Set dst (MulD src (LoadD mem))); 9657 ins_cost(250); 9658 format %{ "FLD_D $mem\n\t" 9659 "DMUL ST,$src\n\t" 9660 "FSTP_D $dst" %} 9661 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9662 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9663 OpcReg_FPR(src), 9664 Pop_Reg_DPR(dst) ); 9665 ins_pipe( fpu_reg_reg_mem ); 9666 %} 9667 9668 9669 // MACRO3 -- addDPR a mulDPR 9670 // This instruction is a '2-address' instruction in that the result goes 9671 // back to src2. This eliminates a move from the macro; possibly the 9672 // register allocator will have to add it back (and maybe not). 9673 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9674 predicate( UseSSE<=1 ); 9675 match(Set src2 (AddD (MulD src0 src1) src2)); 9676 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9677 "DMUL ST,$src1\n\t" 9678 "DADDp $src2,ST" %} 9679 ins_cost(250); 9680 opcode(0xDD); /* LoadD DD /0 */ 9681 ins_encode( Push_Reg_FPR(src0), 9682 FMul_ST_reg(src1), 9683 FAddP_reg_ST(src2) ); 9684 ins_pipe( fpu_reg_reg_reg ); 9685 %} 9686 9687 9688 // MACRO3 -- subDPR a mulDPR 9689 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9690 predicate( UseSSE<=1 ); 9691 match(Set src2 (SubD (MulD src0 src1) src2)); 9692 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9693 "DMUL ST,$src1\n\t" 9694 "DSUBRp $src2,ST" %} 9695 ins_cost(250); 9696 ins_encode( Push_Reg_FPR(src0), 9697 FMul_ST_reg(src1), 9698 Opcode(0xDE), Opc_plus(0xE0,src2)); 9699 ins_pipe( fpu_reg_reg_reg ); 9700 %} 9701 9702 9703 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9704 predicate( UseSSE<=1 ); 9705 match(Set dst (DivD dst src)); 9706 9707 format %{ "FLD $src\n\t" 9708 "FDIVp $dst,ST" %} 9709 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9710 ins_cost(150); 9711 ins_encode( Push_Reg_DPR(src), 9712 OpcP, RegOpc(dst) ); 9713 ins_pipe( fpu_reg_reg ); 9714 %} 9715 9716 // Strict FP instruction biases argument before division then 9717 // biases result, to avoid double rounding of subnormals. 9718 // 9719 // scale dividend by multiplying dividend by 2^(-15360) 9720 // load divisor 9721 // divide scaled dividend by divisor 9722 // rescale quotient by 2^(15360) 9723 // 9724 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9725 predicate (UseSSE<=1); 9726 match(Set dst (DivD dst src)); 9727 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9728 ins_cost(01); 9729 9730 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9731 "DMULp $dst,ST\n\t" 9732 "FLD $src\n\t" 9733 "FDIVp $dst,ST\n\t" 9734 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9735 "DMULp $dst,ST\n\t" %} 9736 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9737 ins_encode( strictfp_bias1(dst), 9738 Push_Reg_DPR(src), 9739 OpcP, RegOpc(dst), 9740 strictfp_bias2(dst) ); 9741 ins_pipe( fpu_reg_reg ); 9742 %} 9743 9744 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9745 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9746 match(Set dst (RoundDouble (DivD src1 src2))); 9747 9748 format %{ "FLD $src1\n\t" 9749 "FDIV ST,$src2\n\t" 9750 "FSTP_D $dst\t# D-round" %} 9751 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9752 ins_encode( Push_Reg_DPR(src1), 9753 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9754 ins_pipe( fpu_mem_reg_reg ); 9755 %} 9756 9757 9758 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9759 predicate(UseSSE<=1); 9760 match(Set dst (ModD dst src)); 9761 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9762 9763 format %{ "DMOD $dst,$src" %} 9764 ins_cost(250); 9765 ins_encode(Push_Reg_Mod_DPR(dst, src), 9766 emitModDPR(), 9767 Push_Result_Mod_DPR(src), 9768 Pop_Reg_DPR(dst)); 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9773 predicate(UseSSE>=2); 9774 match(Set dst (ModD src0 src1)); 9775 effect(KILL rax, KILL cr); 9776 9777 format %{ "SUB ESP,8\t # DMOD\n" 9778 "\tMOVSD [ESP+0],$src1\n" 9779 "\tFLD_D [ESP+0]\n" 9780 "\tMOVSD [ESP+0],$src0\n" 9781 "\tFLD_D [ESP+0]\n" 9782 "loop:\tFPREM\n" 9783 "\tFWAIT\n" 9784 "\tFNSTSW AX\n" 9785 "\tSAHF\n" 9786 "\tJP loop\n" 9787 "\tFSTP_D [ESP+0]\n" 9788 "\tMOVSD $dst,[ESP+0]\n" 9789 "\tADD ESP,8\n" 9790 "\tFSTP ST0\t # Restore FPU Stack" 9791 %} 9792 ins_cost(250); 9793 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9798 predicate (UseSSE<=1); 9799 match(Set dst(TanD src)); 9800 format %{ "DTAN $dst" %} 9801 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9802 Opcode(0xDD), Opcode(0xD8)); // fstp st 9803 ins_pipe( pipe_slow ); 9804 %} 9805 9806 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9807 predicate (UseSSE>=2); 9808 match(Set dst(TanD dst)); 9809 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9810 format %{ "DTAN $dst" %} 9811 ins_encode( Push_SrcD(dst), 9812 Opcode(0xD9), Opcode(0xF2), // fptan 9813 Opcode(0xDD), Opcode(0xD8), // fstp st 9814 Push_ResultD(dst) ); 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9819 predicate (UseSSE<=1); 9820 match(Set dst(AtanD dst src)); 9821 format %{ "DATA $dst,$src" %} 9822 opcode(0xD9, 0xF3); 9823 ins_encode( Push_Reg_DPR(src), 9824 OpcP, OpcS, RegOpc(dst) ); 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9829 predicate (UseSSE>=2); 9830 match(Set dst(AtanD dst src)); 9831 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9832 format %{ "DATA $dst,$src" %} 9833 opcode(0xD9, 0xF3); 9834 ins_encode( Push_SrcD(src), 9835 OpcP, OpcS, Push_ResultD(dst) ); 9836 ins_pipe( pipe_slow ); 9837 %} 9838 9839 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9840 predicate (UseSSE<=1); 9841 match(Set dst (SqrtD src)); 9842 format %{ "DSQRT $dst,$src" %} 9843 opcode(0xFA, 0xD9); 9844 ins_encode( Push_Reg_DPR(src), 9845 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9850 predicate (UseSSE<=1); 9851 // The source Double operand on FPU stack 9852 match(Set dst (Log10D src)); 9853 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9854 // fxch ; swap ST(0) with ST(1) 9855 // fyl2x ; compute log_10(2) * log_2(x) 9856 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9857 "FXCH \n\t" 9858 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9859 %} 9860 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9861 Opcode(0xD9), Opcode(0xC9), // fxch 9862 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9863 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9868 predicate (UseSSE>=2); 9869 effect(KILL cr); 9870 match(Set dst (Log10D src)); 9871 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9872 // fyl2x ; compute log_10(2) * log_2(x) 9873 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9874 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9875 %} 9876 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9877 Push_SrcD(src), 9878 Opcode(0xD9), Opcode(0xF1), // fyl2x 9879 Push_ResultD(dst)); 9880 9881 ins_pipe( pipe_slow ); 9882 %} 9883 9884 //-------------Float Instructions------------------------------- 9885 // Float Math 9886 9887 // Code for float compare: 9888 // fcompp(); 9889 // fwait(); fnstsw_ax(); 9890 // sahf(); 9891 // movl(dst, unordered_result); 9892 // jcc(Assembler::parity, exit); 9893 // movl(dst, less_result); 9894 // jcc(Assembler::below, exit); 9895 // movl(dst, equal_result); 9896 // jcc(Assembler::equal, exit); 9897 // movl(dst, greater_result); 9898 // exit: 9899 9900 // P6 version of float compare, sets condition codes in EFLAGS 9901 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9902 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9903 match(Set cr (CmpF src1 src2)); 9904 effect(KILL rax); 9905 ins_cost(150); 9906 format %{ "FLD $src1\n\t" 9907 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9908 "JNP exit\n\t" 9909 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9910 "SAHF\n" 9911 "exit:\tNOP // avoid branch to branch" %} 9912 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9913 ins_encode( Push_Reg_DPR(src1), 9914 OpcP, RegOpc(src2), 9915 cmpF_P6_fixup ); 9916 ins_pipe( pipe_slow ); 9917 %} 9918 9919 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9920 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9921 match(Set cr (CmpF src1 src2)); 9922 ins_cost(100); 9923 format %{ "FLD $src1\n\t" 9924 "FUCOMIP ST,$src2 // P6 instruction" %} 9925 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9926 ins_encode( Push_Reg_DPR(src1), 9927 OpcP, RegOpc(src2)); 9928 ins_pipe( pipe_slow ); 9929 %} 9930 9931 9932 // Compare & branch 9933 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9934 predicate(UseSSE == 0); 9935 match(Set cr (CmpF src1 src2)); 9936 effect(KILL rax); 9937 ins_cost(200); 9938 format %{ "FLD $src1\n\t" 9939 "FCOMp $src2\n\t" 9940 "FNSTSW AX\n\t" 9941 "TEST AX,0x400\n\t" 9942 "JZ,s flags\n\t" 9943 "MOV AH,1\t# unordered treat as LT\n" 9944 "flags:\tSAHF" %} 9945 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9946 ins_encode( Push_Reg_DPR(src1), 9947 OpcP, RegOpc(src2), 9948 fpu_flags); 9949 ins_pipe( pipe_slow ); 9950 %} 9951 9952 // Compare vs zero into -1,0,1 9953 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9954 predicate(UseSSE == 0); 9955 match(Set dst (CmpF3 src1 zero)); 9956 effect(KILL cr, KILL rax); 9957 ins_cost(280); 9958 format %{ "FTSTF $dst,$src1" %} 9959 opcode(0xE4, 0xD9); 9960 ins_encode( Push_Reg_DPR(src1), 9961 OpcS, OpcP, PopFPU, 9962 CmpF_Result(dst)); 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 // Compare into -1,0,1 9967 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9968 predicate(UseSSE == 0); 9969 match(Set dst (CmpF3 src1 src2)); 9970 effect(KILL cr, KILL rax); 9971 ins_cost(300); 9972 format %{ "FCMPF $dst,$src1,$src2" %} 9973 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9974 ins_encode( Push_Reg_DPR(src1), 9975 OpcP, RegOpc(src2), 9976 CmpF_Result(dst)); 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 // float compare and set condition codes in EFLAGS by XMM regs 9981 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 9982 predicate(UseSSE>=1); 9983 match(Set cr (CmpF src1 src2)); 9984 ins_cost(145); 9985 format %{ "UCOMISS $src1,$src2\n\t" 9986 "JNP,s exit\n\t" 9987 "PUSHF\t# saw NaN, set CF\n\t" 9988 "AND [rsp], #0xffffff2b\n\t" 9989 "POPF\n" 9990 "exit:" %} 9991 ins_encode %{ 9992 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 9993 emit_cmpfp_fixup(_masm); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 9999 predicate(UseSSE>=1); 10000 match(Set cr (CmpF src1 src2)); 10001 ins_cost(100); 10002 format %{ "UCOMISS $src1,$src2" %} 10003 ins_encode %{ 10004 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10005 %} 10006 ins_pipe( pipe_slow ); 10007 %} 10008 10009 // float compare and set condition codes in EFLAGS by XMM regs 10010 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10011 predicate(UseSSE>=1); 10012 match(Set cr (CmpF src1 (LoadF src2))); 10013 ins_cost(165); 10014 format %{ "UCOMISS $src1,$src2\n\t" 10015 "JNP,s exit\n\t" 10016 "PUSHF\t# saw NaN, set CF\n\t" 10017 "AND [rsp], #0xffffff2b\n\t" 10018 "POPF\n" 10019 "exit:" %} 10020 ins_encode %{ 10021 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10022 emit_cmpfp_fixup(_masm); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10028 predicate(UseSSE>=1); 10029 match(Set cr (CmpF src1 (LoadF src2))); 10030 ins_cost(100); 10031 format %{ "UCOMISS $src1,$src2" %} 10032 ins_encode %{ 10033 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10034 %} 10035 ins_pipe( pipe_slow ); 10036 %} 10037 10038 // Compare into -1,0,1 in XMM 10039 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10040 predicate(UseSSE>=1); 10041 match(Set dst (CmpF3 src1 src2)); 10042 effect(KILL cr); 10043 ins_cost(255); 10044 format %{ "UCOMISS $src1, $src2\n\t" 10045 "MOV $dst, #-1\n\t" 10046 "JP,s done\n\t" 10047 "JB,s done\n\t" 10048 "SETNE $dst\n\t" 10049 "MOVZB $dst, $dst\n" 10050 "done:" %} 10051 ins_encode %{ 10052 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10053 emit_cmpfp3(_masm, $dst$$Register); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 %} 10057 10058 // Compare into -1,0,1 in XMM and memory 10059 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10060 predicate(UseSSE>=1); 10061 match(Set dst (CmpF3 src1 (LoadF src2))); 10062 effect(KILL cr); 10063 ins_cost(275); 10064 format %{ "UCOMISS $src1, $src2\n\t" 10065 "MOV $dst, #-1\n\t" 10066 "JP,s done\n\t" 10067 "JB,s done\n\t" 10068 "SETNE $dst\n\t" 10069 "MOVZB $dst, $dst\n" 10070 "done:" %} 10071 ins_encode %{ 10072 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10073 emit_cmpfp3(_masm, $dst$$Register); 10074 %} 10075 ins_pipe( pipe_slow ); 10076 %} 10077 10078 // Spill to obtain 24-bit precision 10079 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10080 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10081 match(Set dst (SubF src1 src2)); 10082 10083 format %{ "FSUB $dst,$src1 - $src2" %} 10084 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10085 ins_encode( Push_Reg_FPR(src1), 10086 OpcReg_FPR(src2), 10087 Pop_Mem_FPR(dst) ); 10088 ins_pipe( fpu_mem_reg_reg ); 10089 %} 10090 // 10091 // This instruction does not round to 24-bits 10092 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10093 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10094 match(Set dst (SubF dst src)); 10095 10096 format %{ "FSUB $dst,$src" %} 10097 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10098 ins_encode( Push_Reg_FPR(src), 10099 OpcP, RegOpc(dst) ); 10100 ins_pipe( fpu_reg_reg ); 10101 %} 10102 10103 // Spill to obtain 24-bit precision 10104 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10105 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10106 match(Set dst (AddF src1 src2)); 10107 10108 format %{ "FADD $dst,$src1,$src2" %} 10109 opcode(0xD8, 0x0); /* D8 C0+i */ 10110 ins_encode( Push_Reg_FPR(src2), 10111 OpcReg_FPR(src1), 10112 Pop_Mem_FPR(dst) ); 10113 ins_pipe( fpu_mem_reg_reg ); 10114 %} 10115 // 10116 // This instruction does not round to 24-bits 10117 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10118 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10119 match(Set dst (AddF dst src)); 10120 10121 format %{ "FLD $src\n\t" 10122 "FADDp $dst,ST" %} 10123 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10124 ins_encode( Push_Reg_FPR(src), 10125 OpcP, RegOpc(dst) ); 10126 ins_pipe( fpu_reg_reg ); 10127 %} 10128 10129 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10130 predicate(UseSSE==0); 10131 match(Set dst (AbsF src)); 10132 ins_cost(100); 10133 format %{ "FABS" %} 10134 opcode(0xE1, 0xD9); 10135 ins_encode( OpcS, OpcP ); 10136 ins_pipe( fpu_reg_reg ); 10137 %} 10138 10139 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10140 predicate(UseSSE==0); 10141 match(Set dst (NegF src)); 10142 ins_cost(100); 10143 format %{ "FCHS" %} 10144 opcode(0xE0, 0xD9); 10145 ins_encode( OpcS, OpcP ); 10146 ins_pipe( fpu_reg_reg ); 10147 %} 10148 10149 // Cisc-alternate to addFPR_reg 10150 // Spill to obtain 24-bit precision 10151 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10152 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10153 match(Set dst (AddF src1 (LoadF src2))); 10154 10155 format %{ "FLD $src2\n\t" 10156 "FADD ST,$src1\n\t" 10157 "FSTP_S $dst" %} 10158 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10159 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10160 OpcReg_FPR(src1), 10161 Pop_Mem_FPR(dst) ); 10162 ins_pipe( fpu_mem_reg_mem ); 10163 %} 10164 // 10165 // Cisc-alternate to addFPR_reg 10166 // This instruction does not round to 24-bits 10167 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10168 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10169 match(Set dst (AddF dst (LoadF src))); 10170 10171 format %{ "FADD $dst,$src" %} 10172 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10173 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10174 OpcP, RegOpc(dst) ); 10175 ins_pipe( fpu_reg_mem ); 10176 %} 10177 10178 // // Following two instructions for _222_mpegaudio 10179 // Spill to obtain 24-bit precision 10180 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10181 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10182 match(Set dst (AddF src1 src2)); 10183 10184 format %{ "FADD $dst,$src1,$src2" %} 10185 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10186 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10187 OpcReg_FPR(src2), 10188 Pop_Mem_FPR(dst) ); 10189 ins_pipe( fpu_mem_reg_mem ); 10190 %} 10191 10192 // Cisc-spill variant 10193 // Spill to obtain 24-bit precision 10194 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10195 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10196 match(Set dst (AddF src1 (LoadF src2))); 10197 10198 format %{ "FADD $dst,$src1,$src2 cisc" %} 10199 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10200 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10201 set_instruction_start, 10202 OpcP, RMopc_Mem(secondary,src1), 10203 Pop_Mem_FPR(dst) ); 10204 ins_pipe( fpu_mem_mem_mem ); 10205 %} 10206 10207 // Spill to obtain 24-bit precision 10208 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10209 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10210 match(Set dst (AddF src1 src2)); 10211 10212 format %{ "FADD $dst,$src1,$src2" %} 10213 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10214 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10215 set_instruction_start, 10216 OpcP, RMopc_Mem(secondary,src1), 10217 Pop_Mem_FPR(dst) ); 10218 ins_pipe( fpu_mem_mem_mem ); 10219 %} 10220 10221 10222 // Spill to obtain 24-bit precision 10223 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10224 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10225 match(Set dst (AddF src con)); 10226 format %{ "FLD $src\n\t" 10227 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10228 "FSTP_S $dst" %} 10229 ins_encode %{ 10230 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10231 __ fadd_s($constantaddress($con)); 10232 __ fstp_s(Address(rsp, $dst$$disp)); 10233 %} 10234 ins_pipe(fpu_mem_reg_con); 10235 %} 10236 // 10237 // This instruction does not round to 24-bits 10238 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10239 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10240 match(Set dst (AddF src con)); 10241 format %{ "FLD $src\n\t" 10242 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10243 "FSTP $dst" %} 10244 ins_encode %{ 10245 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10246 __ fadd_s($constantaddress($con)); 10247 __ fstp_d($dst$$reg); 10248 %} 10249 ins_pipe(fpu_reg_reg_con); 10250 %} 10251 10252 // Spill to obtain 24-bit precision 10253 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10254 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10255 match(Set dst (MulF src1 src2)); 10256 10257 format %{ "FLD $src1\n\t" 10258 "FMUL $src2\n\t" 10259 "FSTP_S $dst" %} 10260 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10261 ins_encode( Push_Reg_FPR(src1), 10262 OpcReg_FPR(src2), 10263 Pop_Mem_FPR(dst) ); 10264 ins_pipe( fpu_mem_reg_reg ); 10265 %} 10266 // 10267 // This instruction does not round to 24-bits 10268 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10269 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10270 match(Set dst (MulF src1 src2)); 10271 10272 format %{ "FLD $src1\n\t" 10273 "FMUL $src2\n\t" 10274 "FSTP_S $dst" %} 10275 opcode(0xD8, 0x1); /* D8 C8+i */ 10276 ins_encode( Push_Reg_FPR(src2), 10277 OpcReg_FPR(src1), 10278 Pop_Reg_FPR(dst) ); 10279 ins_pipe( fpu_reg_reg_reg ); 10280 %} 10281 10282 10283 // Spill to obtain 24-bit precision 10284 // Cisc-alternate to reg-reg multiply 10285 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10286 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10287 match(Set dst (MulF src1 (LoadF src2))); 10288 10289 format %{ "FLD_S $src2\n\t" 10290 "FMUL $src1\n\t" 10291 "FSTP_S $dst" %} 10292 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10293 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10294 OpcReg_FPR(src1), 10295 Pop_Mem_FPR(dst) ); 10296 ins_pipe( fpu_mem_reg_mem ); 10297 %} 10298 // 10299 // This instruction does not round to 24-bits 10300 // Cisc-alternate to reg-reg multiply 10301 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10302 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10303 match(Set dst (MulF src1 (LoadF src2))); 10304 10305 format %{ "FMUL $dst,$src1,$src2" %} 10306 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10307 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10308 OpcReg_FPR(src1), 10309 Pop_Reg_FPR(dst) ); 10310 ins_pipe( fpu_reg_reg_mem ); 10311 %} 10312 10313 // Spill to obtain 24-bit precision 10314 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10315 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10316 match(Set dst (MulF src1 src2)); 10317 10318 format %{ "FMUL $dst,$src1,$src2" %} 10319 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10320 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10321 set_instruction_start, 10322 OpcP, RMopc_Mem(secondary,src1), 10323 Pop_Mem_FPR(dst) ); 10324 ins_pipe( fpu_mem_mem_mem ); 10325 %} 10326 10327 // Spill to obtain 24-bit precision 10328 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10329 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10330 match(Set dst (MulF src con)); 10331 10332 format %{ "FLD $src\n\t" 10333 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10334 "FSTP_S $dst" %} 10335 ins_encode %{ 10336 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10337 __ fmul_s($constantaddress($con)); 10338 __ fstp_s(Address(rsp, $dst$$disp)); 10339 %} 10340 ins_pipe(fpu_mem_reg_con); 10341 %} 10342 // 10343 // This instruction does not round to 24-bits 10344 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10345 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10346 match(Set dst (MulF src con)); 10347 10348 format %{ "FLD $src\n\t" 10349 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10350 "FSTP $dst" %} 10351 ins_encode %{ 10352 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10353 __ fmul_s($constantaddress($con)); 10354 __ fstp_d($dst$$reg); 10355 %} 10356 ins_pipe(fpu_reg_reg_con); 10357 %} 10358 10359 10360 // 10361 // MACRO1 -- subsume unshared load into mulFPR 10362 // This instruction does not round to 24-bits 10363 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10364 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10365 match(Set dst (MulF (LoadF mem1) src)); 10366 10367 format %{ "FLD $mem1 ===MACRO1===\n\t" 10368 "FMUL ST,$src\n\t" 10369 "FSTP $dst" %} 10370 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10371 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10372 OpcReg_FPR(src), 10373 Pop_Reg_FPR(dst) ); 10374 ins_pipe( fpu_reg_reg_mem ); 10375 %} 10376 // 10377 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10378 // This instruction does not round to 24-bits 10379 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10380 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10381 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10382 ins_cost(95); 10383 10384 format %{ "FLD $mem1 ===MACRO2===\n\t" 10385 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10386 "FADD ST,$src2\n\t" 10387 "FSTP $dst" %} 10388 opcode(0xD9); /* LoadF D9 /0 */ 10389 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10390 FMul_ST_reg(src1), 10391 FAdd_ST_reg(src2), 10392 Pop_Reg_FPR(dst) ); 10393 ins_pipe( fpu_reg_mem_reg_reg ); 10394 %} 10395 10396 // MACRO3 -- addFPR a mulFPR 10397 // This instruction does not round to 24-bits. It is a '2-address' 10398 // instruction in that the result goes back to src2. This eliminates 10399 // a move from the macro; possibly the register allocator will have 10400 // to add it back (and maybe not). 10401 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10402 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10403 match(Set src2 (AddF (MulF src0 src1) src2)); 10404 10405 format %{ "FLD $src0 ===MACRO3===\n\t" 10406 "FMUL ST,$src1\n\t" 10407 "FADDP $src2,ST" %} 10408 opcode(0xD9); /* LoadF D9 /0 */ 10409 ins_encode( Push_Reg_FPR(src0), 10410 FMul_ST_reg(src1), 10411 FAddP_reg_ST(src2) ); 10412 ins_pipe( fpu_reg_reg_reg ); 10413 %} 10414 10415 // MACRO4 -- divFPR subFPR 10416 // This instruction does not round to 24-bits 10417 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10418 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10419 match(Set dst (DivF (SubF src2 src1) src3)); 10420 10421 format %{ "FLD $src2 ===MACRO4===\n\t" 10422 "FSUB ST,$src1\n\t" 10423 "FDIV ST,$src3\n\t" 10424 "FSTP $dst" %} 10425 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10426 ins_encode( Push_Reg_FPR(src2), 10427 subFPR_divFPR_encode(src1,src3), 10428 Pop_Reg_FPR(dst) ); 10429 ins_pipe( fpu_reg_reg_reg_reg ); 10430 %} 10431 10432 // Spill to obtain 24-bit precision 10433 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10434 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10435 match(Set dst (DivF src1 src2)); 10436 10437 format %{ "FDIV $dst,$src1,$src2" %} 10438 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10439 ins_encode( Push_Reg_FPR(src1), 10440 OpcReg_FPR(src2), 10441 Pop_Mem_FPR(dst) ); 10442 ins_pipe( fpu_mem_reg_reg ); 10443 %} 10444 // 10445 // This instruction does not round to 24-bits 10446 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10447 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10448 match(Set dst (DivF dst src)); 10449 10450 format %{ "FDIV $dst,$src" %} 10451 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10452 ins_encode( Push_Reg_FPR(src), 10453 OpcP, RegOpc(dst) ); 10454 ins_pipe( fpu_reg_reg ); 10455 %} 10456 10457 10458 // Spill to obtain 24-bit precision 10459 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10460 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10461 match(Set dst (ModF src1 src2)); 10462 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10463 10464 format %{ "FMOD $dst,$src1,$src2" %} 10465 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10466 emitModDPR(), 10467 Push_Result_Mod_DPR(src2), 10468 Pop_Mem_FPR(dst)); 10469 ins_pipe( pipe_slow ); 10470 %} 10471 // 10472 // This instruction does not round to 24-bits 10473 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10474 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10475 match(Set dst (ModF dst src)); 10476 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10477 10478 format %{ "FMOD $dst,$src" %} 10479 ins_encode(Push_Reg_Mod_DPR(dst, src), 10480 emitModDPR(), 10481 Push_Result_Mod_DPR(src), 10482 Pop_Reg_FPR(dst)); 10483 ins_pipe( pipe_slow ); 10484 %} 10485 10486 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10487 predicate(UseSSE>=1); 10488 match(Set dst (ModF src0 src1)); 10489 effect(KILL rax, KILL cr); 10490 format %{ "SUB ESP,4\t # FMOD\n" 10491 "\tMOVSS [ESP+0],$src1\n" 10492 "\tFLD_S [ESP+0]\n" 10493 "\tMOVSS [ESP+0],$src0\n" 10494 "\tFLD_S [ESP+0]\n" 10495 "loop:\tFPREM\n" 10496 "\tFWAIT\n" 10497 "\tFNSTSW AX\n" 10498 "\tSAHF\n" 10499 "\tJP loop\n" 10500 "\tFSTP_S [ESP+0]\n" 10501 "\tMOVSS $dst,[ESP+0]\n" 10502 "\tADD ESP,4\n" 10503 "\tFSTP ST0\t # Restore FPU Stack" 10504 %} 10505 ins_cost(250); 10506 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10507 ins_pipe( pipe_slow ); 10508 %} 10509 10510 10511 //----------Arithmetic Conversion Instructions--------------------------------- 10512 // The conversions operations are all Alpha sorted. Please keep it that way! 10513 10514 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10515 predicate(UseSSE==0); 10516 match(Set dst (RoundFloat src)); 10517 ins_cost(125); 10518 format %{ "FST_S $dst,$src\t# F-round" %} 10519 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10520 ins_pipe( fpu_mem_reg ); 10521 %} 10522 10523 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10524 predicate(UseSSE<=1); 10525 match(Set dst (RoundDouble src)); 10526 ins_cost(125); 10527 format %{ "FST_D $dst,$src\t# D-round" %} 10528 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10529 ins_pipe( fpu_mem_reg ); 10530 %} 10531 10532 // Force rounding to 24-bit precision and 6-bit exponent 10533 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10534 predicate(UseSSE==0); 10535 match(Set dst (ConvD2F src)); 10536 format %{ "FST_S $dst,$src\t# F-round" %} 10537 expand %{ 10538 roundFloat_mem_reg(dst,src); 10539 %} 10540 %} 10541 10542 // Force rounding to 24-bit precision and 6-bit exponent 10543 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10544 predicate(UseSSE==1); 10545 match(Set dst (ConvD2F src)); 10546 effect( KILL cr ); 10547 format %{ "SUB ESP,4\n\t" 10548 "FST_S [ESP],$src\t# F-round\n\t" 10549 "MOVSS $dst,[ESP]\n\t" 10550 "ADD ESP,4" %} 10551 ins_encode %{ 10552 __ subptr(rsp, 4); 10553 if ($src$$reg != FPR1L_enc) { 10554 __ fld_s($src$$reg-1); 10555 __ fstp_s(Address(rsp, 0)); 10556 } else { 10557 __ fst_s(Address(rsp, 0)); 10558 } 10559 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10560 __ addptr(rsp, 4); 10561 %} 10562 ins_pipe( pipe_slow ); 10563 %} 10564 10565 // Force rounding double precision to single precision 10566 instruct convD2F_reg(regF dst, regD src) %{ 10567 predicate(UseSSE>=2); 10568 match(Set dst (ConvD2F src)); 10569 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10570 ins_encode %{ 10571 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10572 %} 10573 ins_pipe( pipe_slow ); 10574 %} 10575 10576 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10577 predicate(UseSSE==0); 10578 match(Set dst (ConvF2D src)); 10579 format %{ "FST_S $dst,$src\t# D-round" %} 10580 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10581 ins_pipe( fpu_reg_reg ); 10582 %} 10583 10584 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10585 predicate(UseSSE==1); 10586 match(Set dst (ConvF2D src)); 10587 format %{ "FST_D $dst,$src\t# D-round" %} 10588 expand %{ 10589 roundDouble_mem_reg(dst,src); 10590 %} 10591 %} 10592 10593 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10594 predicate(UseSSE==1); 10595 match(Set dst (ConvF2D src)); 10596 effect( KILL cr ); 10597 format %{ "SUB ESP,4\n\t" 10598 "MOVSS [ESP] $src\n\t" 10599 "FLD_S [ESP]\n\t" 10600 "ADD ESP,4\n\t" 10601 "FSTP $dst\t# D-round" %} 10602 ins_encode %{ 10603 __ subptr(rsp, 4); 10604 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10605 __ fld_s(Address(rsp, 0)); 10606 __ addptr(rsp, 4); 10607 __ fstp_d($dst$$reg); 10608 %} 10609 ins_pipe( pipe_slow ); 10610 %} 10611 10612 instruct convF2D_reg(regD dst, regF src) %{ 10613 predicate(UseSSE>=2); 10614 match(Set dst (ConvF2D src)); 10615 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10616 ins_encode %{ 10617 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10618 %} 10619 ins_pipe( pipe_slow ); 10620 %} 10621 10622 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10623 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10624 predicate(UseSSE<=1); 10625 match(Set dst (ConvD2I src)); 10626 effect( KILL tmp, KILL cr ); 10627 format %{ "FLD $src\t# Convert double to int \n\t" 10628 "FLDCW trunc mode\n\t" 10629 "SUB ESP,4\n\t" 10630 "FISTp [ESP + #0]\n\t" 10631 "FLDCW std/24-bit mode\n\t" 10632 "POP EAX\n\t" 10633 "CMP EAX,0x80000000\n\t" 10634 "JNE,s fast\n\t" 10635 "FLD_D $src\n\t" 10636 "CALL d2i_wrapper\n" 10637 "fast:" %} 10638 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10639 ins_pipe( pipe_slow ); 10640 %} 10641 10642 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10643 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10644 predicate(UseSSE>=2); 10645 match(Set dst (ConvD2I src)); 10646 effect( KILL tmp, KILL cr ); 10647 format %{ "CVTTSD2SI $dst, $src\n\t" 10648 "CMP $dst,0x80000000\n\t" 10649 "JNE,s fast\n\t" 10650 "SUB ESP, 8\n\t" 10651 "MOVSD [ESP], $src\n\t" 10652 "FLD_D [ESP]\n\t" 10653 "ADD ESP, 8\n\t" 10654 "CALL d2i_wrapper\n" 10655 "fast:" %} 10656 ins_encode %{ 10657 Label fast; 10658 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10659 __ cmpl($dst$$Register, 0x80000000); 10660 __ jccb(Assembler::notEqual, fast); 10661 __ subptr(rsp, 8); 10662 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10663 __ fld_d(Address(rsp, 0)); 10664 __ addptr(rsp, 8); 10665 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10666 __ bind(fast); 10667 %} 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10672 predicate(UseSSE<=1); 10673 match(Set dst (ConvD2L src)); 10674 effect( KILL cr ); 10675 format %{ "FLD $src\t# Convert double to long\n\t" 10676 "FLDCW trunc mode\n\t" 10677 "SUB ESP,8\n\t" 10678 "FISTp [ESP + #0]\n\t" 10679 "FLDCW std/24-bit mode\n\t" 10680 "POP EAX\n\t" 10681 "POP EDX\n\t" 10682 "CMP EDX,0x80000000\n\t" 10683 "JNE,s fast\n\t" 10684 "TEST EAX,EAX\n\t" 10685 "JNE,s fast\n\t" 10686 "FLD $src\n\t" 10687 "CALL d2l_wrapper\n" 10688 "fast:" %} 10689 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10690 ins_pipe( pipe_slow ); 10691 %} 10692 10693 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10694 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10695 predicate (UseSSE>=2); 10696 match(Set dst (ConvD2L src)); 10697 effect( KILL cr ); 10698 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10699 "MOVSD [ESP],$src\n\t" 10700 "FLD_D [ESP]\n\t" 10701 "FLDCW trunc mode\n\t" 10702 "FISTp [ESP + #0]\n\t" 10703 "FLDCW std/24-bit mode\n\t" 10704 "POP EAX\n\t" 10705 "POP EDX\n\t" 10706 "CMP EDX,0x80000000\n\t" 10707 "JNE,s fast\n\t" 10708 "TEST EAX,EAX\n\t" 10709 "JNE,s fast\n\t" 10710 "SUB ESP,8\n\t" 10711 "MOVSD [ESP],$src\n\t" 10712 "FLD_D [ESP]\n\t" 10713 "ADD ESP,8\n\t" 10714 "CALL d2l_wrapper\n" 10715 "fast:" %} 10716 ins_encode %{ 10717 Label fast; 10718 __ subptr(rsp, 8); 10719 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10720 __ fld_d(Address(rsp, 0)); 10721 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10722 __ fistp_d(Address(rsp, 0)); 10723 // Restore the rounding mode, mask the exception 10724 if (Compile::current()->in_24_bit_fp_mode()) { 10725 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10726 } else { 10727 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10728 } 10729 // Load the converted long, adjust CPU stack 10730 __ pop(rax); 10731 __ pop(rdx); 10732 __ cmpl(rdx, 0x80000000); 10733 __ jccb(Assembler::notEqual, fast); 10734 __ testl(rax, rax); 10735 __ jccb(Assembler::notEqual, fast); 10736 __ subptr(rsp, 8); 10737 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10738 __ fld_d(Address(rsp, 0)); 10739 __ addptr(rsp, 8); 10740 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10741 __ bind(fast); 10742 %} 10743 ins_pipe( pipe_slow ); 10744 %} 10745 10746 // Convert a double to an int. Java semantics require we do complex 10747 // manglations in the corner cases. So we set the rounding mode to 10748 // 'zero', store the darned double down as an int, and reset the 10749 // rounding mode to 'nearest'. The hardware stores a flag value down 10750 // if we would overflow or converted a NAN; we check for this and 10751 // and go the slow path if needed. 10752 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10753 predicate(UseSSE==0); 10754 match(Set dst (ConvF2I src)); 10755 effect( KILL tmp, KILL cr ); 10756 format %{ "FLD $src\t# Convert float to int \n\t" 10757 "FLDCW trunc mode\n\t" 10758 "SUB ESP,4\n\t" 10759 "FISTp [ESP + #0]\n\t" 10760 "FLDCW std/24-bit mode\n\t" 10761 "POP EAX\n\t" 10762 "CMP EAX,0x80000000\n\t" 10763 "JNE,s fast\n\t" 10764 "FLD $src\n\t" 10765 "CALL d2i_wrapper\n" 10766 "fast:" %} 10767 // DPR2I_encoding works for FPR2I 10768 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10769 ins_pipe( pipe_slow ); 10770 %} 10771 10772 // Convert a float in xmm to an int reg. 10773 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10774 predicate(UseSSE>=1); 10775 match(Set dst (ConvF2I src)); 10776 effect( KILL tmp, KILL cr ); 10777 format %{ "CVTTSS2SI $dst, $src\n\t" 10778 "CMP $dst,0x80000000\n\t" 10779 "JNE,s fast\n\t" 10780 "SUB ESP, 4\n\t" 10781 "MOVSS [ESP], $src\n\t" 10782 "FLD [ESP]\n\t" 10783 "ADD ESP, 4\n\t" 10784 "CALL d2i_wrapper\n" 10785 "fast:" %} 10786 ins_encode %{ 10787 Label fast; 10788 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10789 __ cmpl($dst$$Register, 0x80000000); 10790 __ jccb(Assembler::notEqual, fast); 10791 __ subptr(rsp, 4); 10792 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10793 __ fld_s(Address(rsp, 0)); 10794 __ addptr(rsp, 4); 10795 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10796 __ bind(fast); 10797 %} 10798 ins_pipe( pipe_slow ); 10799 %} 10800 10801 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10802 predicate(UseSSE==0); 10803 match(Set dst (ConvF2L src)); 10804 effect( KILL cr ); 10805 format %{ "FLD $src\t# Convert float to long\n\t" 10806 "FLDCW trunc mode\n\t" 10807 "SUB ESP,8\n\t" 10808 "FISTp [ESP + #0]\n\t" 10809 "FLDCW std/24-bit mode\n\t" 10810 "POP EAX\n\t" 10811 "POP EDX\n\t" 10812 "CMP EDX,0x80000000\n\t" 10813 "JNE,s fast\n\t" 10814 "TEST EAX,EAX\n\t" 10815 "JNE,s fast\n\t" 10816 "FLD $src\n\t" 10817 "CALL d2l_wrapper\n" 10818 "fast:" %} 10819 // DPR2L_encoding works for FPR2L 10820 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10821 ins_pipe( pipe_slow ); 10822 %} 10823 10824 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10825 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10826 predicate (UseSSE>=1); 10827 match(Set dst (ConvF2L src)); 10828 effect( KILL cr ); 10829 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10830 "MOVSS [ESP],$src\n\t" 10831 "FLD_S [ESP]\n\t" 10832 "FLDCW trunc mode\n\t" 10833 "FISTp [ESP + #0]\n\t" 10834 "FLDCW std/24-bit mode\n\t" 10835 "POP EAX\n\t" 10836 "POP EDX\n\t" 10837 "CMP EDX,0x80000000\n\t" 10838 "JNE,s fast\n\t" 10839 "TEST EAX,EAX\n\t" 10840 "JNE,s fast\n\t" 10841 "SUB ESP,4\t# Convert float to long\n\t" 10842 "MOVSS [ESP],$src\n\t" 10843 "FLD_S [ESP]\n\t" 10844 "ADD ESP,4\n\t" 10845 "CALL d2l_wrapper\n" 10846 "fast:" %} 10847 ins_encode %{ 10848 Label fast; 10849 __ subptr(rsp, 8); 10850 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10851 __ fld_s(Address(rsp, 0)); 10852 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10853 __ fistp_d(Address(rsp, 0)); 10854 // Restore the rounding mode, mask the exception 10855 if (Compile::current()->in_24_bit_fp_mode()) { 10856 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10857 } else { 10858 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10859 } 10860 // Load the converted long, adjust CPU stack 10861 __ pop(rax); 10862 __ pop(rdx); 10863 __ cmpl(rdx, 0x80000000); 10864 __ jccb(Assembler::notEqual, fast); 10865 __ testl(rax, rax); 10866 __ jccb(Assembler::notEqual, fast); 10867 __ subptr(rsp, 4); 10868 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10869 __ fld_s(Address(rsp, 0)); 10870 __ addptr(rsp, 4); 10871 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10872 __ bind(fast); 10873 %} 10874 ins_pipe( pipe_slow ); 10875 %} 10876 10877 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10878 predicate( UseSSE<=1 ); 10879 match(Set dst (ConvI2D src)); 10880 format %{ "FILD $src\n\t" 10881 "FSTP $dst" %} 10882 opcode(0xDB, 0x0); /* DB /0 */ 10883 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10884 ins_pipe( fpu_reg_mem ); 10885 %} 10886 10887 instruct convI2D_reg(regD dst, rRegI src) %{ 10888 predicate( UseSSE>=2 && !UseXmmI2D ); 10889 match(Set dst (ConvI2D src)); 10890 format %{ "CVTSI2SD $dst,$src" %} 10891 ins_encode %{ 10892 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10893 %} 10894 ins_pipe( pipe_slow ); 10895 %} 10896 10897 instruct convI2D_mem(regD dst, memory mem) %{ 10898 predicate( UseSSE>=2 ); 10899 match(Set dst (ConvI2D (LoadI mem))); 10900 format %{ "CVTSI2SD $dst,$mem" %} 10901 ins_encode %{ 10902 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10903 %} 10904 ins_pipe( pipe_slow ); 10905 %} 10906 10907 instruct convXI2D_reg(regD dst, rRegI src) 10908 %{ 10909 predicate( UseSSE>=2 && UseXmmI2D ); 10910 match(Set dst (ConvI2D src)); 10911 10912 format %{ "MOVD $dst,$src\n\t" 10913 "CVTDQ2PD $dst,$dst\t# i2d" %} 10914 ins_encode %{ 10915 __ movdl($dst$$XMMRegister, $src$$Register); 10916 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10917 %} 10918 ins_pipe(pipe_slow); // XXX 10919 %} 10920 10921 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10922 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10923 match(Set dst (ConvI2D (LoadI mem))); 10924 format %{ "FILD $mem\n\t" 10925 "FSTP $dst" %} 10926 opcode(0xDB); /* DB /0 */ 10927 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10928 Pop_Reg_DPR(dst)); 10929 ins_pipe( fpu_reg_mem ); 10930 %} 10931 10932 // Convert a byte to a float; no rounding step needed. 10933 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10934 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10935 match(Set dst (ConvI2F src)); 10936 format %{ "FILD $src\n\t" 10937 "FSTP $dst" %} 10938 10939 opcode(0xDB, 0x0); /* DB /0 */ 10940 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10941 ins_pipe( fpu_reg_mem ); 10942 %} 10943 10944 // In 24-bit mode, force exponent rounding by storing back out 10945 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10946 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10947 match(Set dst (ConvI2F src)); 10948 ins_cost(200); 10949 format %{ "FILD $src\n\t" 10950 "FSTP_S $dst" %} 10951 opcode(0xDB, 0x0); /* DB /0 */ 10952 ins_encode( Push_Mem_I(src), 10953 Pop_Mem_FPR(dst)); 10954 ins_pipe( fpu_mem_mem ); 10955 %} 10956 10957 // In 24-bit mode, force exponent rounding by storing back out 10958 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10959 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10960 match(Set dst (ConvI2F (LoadI mem))); 10961 ins_cost(200); 10962 format %{ "FILD $mem\n\t" 10963 "FSTP_S $dst" %} 10964 opcode(0xDB); /* DB /0 */ 10965 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10966 Pop_Mem_FPR(dst)); 10967 ins_pipe( fpu_mem_mem ); 10968 %} 10969 10970 // This instruction does not round to 24-bits 10971 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 10972 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10973 match(Set dst (ConvI2F src)); 10974 format %{ "FILD $src\n\t" 10975 "FSTP $dst" %} 10976 opcode(0xDB, 0x0); /* DB /0 */ 10977 ins_encode( Push_Mem_I(src), 10978 Pop_Reg_FPR(dst)); 10979 ins_pipe( fpu_reg_mem ); 10980 %} 10981 10982 // This instruction does not round to 24-bits 10983 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 10984 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10985 match(Set dst (ConvI2F (LoadI mem))); 10986 format %{ "FILD $mem\n\t" 10987 "FSTP $dst" %} 10988 opcode(0xDB); /* DB /0 */ 10989 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10990 Pop_Reg_FPR(dst)); 10991 ins_pipe( fpu_reg_mem ); 10992 %} 10993 10994 // Convert an int to a float in xmm; no rounding step needed. 10995 instruct convI2F_reg(regF dst, rRegI src) %{ 10996 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 10997 match(Set dst (ConvI2F src)); 10998 format %{ "CVTSI2SS $dst, $src" %} 10999 ins_encode %{ 11000 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11001 %} 11002 ins_pipe( pipe_slow ); 11003 %} 11004 11005 instruct convXI2F_reg(regF dst, rRegI src) 11006 %{ 11007 predicate( UseSSE>=2 && UseXmmI2F ); 11008 match(Set dst (ConvI2F src)); 11009 11010 format %{ "MOVD $dst,$src\n\t" 11011 "CVTDQ2PS $dst,$dst\t# i2f" %} 11012 ins_encode %{ 11013 __ movdl($dst$$XMMRegister, $src$$Register); 11014 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11015 %} 11016 ins_pipe(pipe_slow); // XXX 11017 %} 11018 11019 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11020 match(Set dst (ConvI2L src)); 11021 effect(KILL cr); 11022 ins_cost(375); 11023 format %{ "MOV $dst.lo,$src\n\t" 11024 "MOV $dst.hi,$src\n\t" 11025 "SAR $dst.hi,31" %} 11026 ins_encode(convert_int_long(dst,src)); 11027 ins_pipe( ialu_reg_reg_long ); 11028 %} 11029 11030 // Zero-extend convert int to long 11031 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11032 match(Set dst (AndL (ConvI2L src) mask) ); 11033 effect( KILL flags ); 11034 ins_cost(250); 11035 format %{ "MOV $dst.lo,$src\n\t" 11036 "XOR $dst.hi,$dst.hi" %} 11037 opcode(0x33); // XOR 11038 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11039 ins_pipe( ialu_reg_reg_long ); 11040 %} 11041 11042 // Zero-extend long 11043 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11044 match(Set dst (AndL src mask) ); 11045 effect( KILL flags ); 11046 ins_cost(250); 11047 format %{ "MOV $dst.lo,$src.lo\n\t" 11048 "XOR $dst.hi,$dst.hi\n\t" %} 11049 opcode(0x33); // XOR 11050 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11051 ins_pipe( ialu_reg_reg_long ); 11052 %} 11053 11054 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11055 predicate (UseSSE<=1); 11056 match(Set dst (ConvL2D src)); 11057 effect( KILL cr ); 11058 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11059 "PUSH $src.lo\n\t" 11060 "FILD ST,[ESP + #0]\n\t" 11061 "ADD ESP,8\n\t" 11062 "FSTP_D $dst\t# D-round" %} 11063 opcode(0xDF, 0x5); /* DF /5 */ 11064 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11065 ins_pipe( pipe_slow ); 11066 %} 11067 11068 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11069 predicate (UseSSE>=2); 11070 match(Set dst (ConvL2D src)); 11071 effect( KILL cr ); 11072 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11073 "PUSH $src.lo\n\t" 11074 "FILD_D [ESP]\n\t" 11075 "FSTP_D [ESP]\n\t" 11076 "MOVSD $dst,[ESP]\n\t" 11077 "ADD ESP,8" %} 11078 opcode(0xDF, 0x5); /* DF /5 */ 11079 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11080 ins_pipe( pipe_slow ); 11081 %} 11082 11083 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11084 predicate (UseSSE>=1); 11085 match(Set dst (ConvL2F src)); 11086 effect( KILL cr ); 11087 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11088 "PUSH $src.lo\n\t" 11089 "FILD_D [ESP]\n\t" 11090 "FSTP_S [ESP]\n\t" 11091 "MOVSS $dst,[ESP]\n\t" 11092 "ADD ESP,8" %} 11093 opcode(0xDF, 0x5); /* DF /5 */ 11094 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11095 ins_pipe( pipe_slow ); 11096 %} 11097 11098 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11099 match(Set dst (ConvL2F src)); 11100 effect( KILL cr ); 11101 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11102 "PUSH $src.lo\n\t" 11103 "FILD ST,[ESP + #0]\n\t" 11104 "ADD ESP,8\n\t" 11105 "FSTP_S $dst\t# F-round" %} 11106 opcode(0xDF, 0x5); /* DF /5 */ 11107 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11108 ins_pipe( pipe_slow ); 11109 %} 11110 11111 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11112 match(Set dst (ConvL2I src)); 11113 effect( DEF dst, USE src ); 11114 format %{ "MOV $dst,$src.lo" %} 11115 ins_encode(enc_CopyL_Lo(dst,src)); 11116 ins_pipe( ialu_reg_reg ); 11117 %} 11118 11119 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11120 match(Set dst (MoveF2I src)); 11121 effect( DEF dst, USE src ); 11122 ins_cost(100); 11123 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11124 ins_encode %{ 11125 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11126 %} 11127 ins_pipe( ialu_reg_mem ); 11128 %} 11129 11130 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11131 predicate(UseSSE==0); 11132 match(Set dst (MoveF2I src)); 11133 effect( DEF dst, USE src ); 11134 11135 ins_cost(125); 11136 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11137 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11138 ins_pipe( fpu_mem_reg ); 11139 %} 11140 11141 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11142 predicate(UseSSE>=1); 11143 match(Set dst (MoveF2I src)); 11144 effect( DEF dst, USE src ); 11145 11146 ins_cost(95); 11147 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11148 ins_encode %{ 11149 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11150 %} 11151 ins_pipe( pipe_slow ); 11152 %} 11153 11154 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11155 predicate(UseSSE>=2); 11156 match(Set dst (MoveF2I src)); 11157 effect( DEF dst, USE src ); 11158 ins_cost(85); 11159 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11160 ins_encode %{ 11161 __ movdl($dst$$Register, $src$$XMMRegister); 11162 %} 11163 ins_pipe( pipe_slow ); 11164 %} 11165 11166 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11167 match(Set dst (MoveI2F src)); 11168 effect( DEF dst, USE src ); 11169 11170 ins_cost(100); 11171 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11172 ins_encode %{ 11173 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11174 %} 11175 ins_pipe( ialu_mem_reg ); 11176 %} 11177 11178 11179 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11180 predicate(UseSSE==0); 11181 match(Set dst (MoveI2F src)); 11182 effect(DEF dst, USE src); 11183 11184 ins_cost(125); 11185 format %{ "FLD_S $src\n\t" 11186 "FSTP $dst\t# MoveI2F_stack_reg" %} 11187 opcode(0xD9); /* D9 /0, FLD m32real */ 11188 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11189 Pop_Reg_FPR(dst) ); 11190 ins_pipe( fpu_reg_mem ); 11191 %} 11192 11193 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11194 predicate(UseSSE>=1); 11195 match(Set dst (MoveI2F src)); 11196 effect( DEF dst, USE src ); 11197 11198 ins_cost(95); 11199 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11200 ins_encode %{ 11201 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11202 %} 11203 ins_pipe( pipe_slow ); 11204 %} 11205 11206 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11207 predicate(UseSSE>=2); 11208 match(Set dst (MoveI2F src)); 11209 effect( DEF dst, USE src ); 11210 11211 ins_cost(85); 11212 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11213 ins_encode %{ 11214 __ movdl($dst$$XMMRegister, $src$$Register); 11215 %} 11216 ins_pipe( pipe_slow ); 11217 %} 11218 11219 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11220 match(Set dst (MoveD2L src)); 11221 effect(DEF dst, USE src); 11222 11223 ins_cost(250); 11224 format %{ "MOV $dst.lo,$src\n\t" 11225 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11226 opcode(0x8B, 0x8B); 11227 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11228 ins_pipe( ialu_mem_long_reg ); 11229 %} 11230 11231 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11232 predicate(UseSSE<=1); 11233 match(Set dst (MoveD2L src)); 11234 effect(DEF dst, USE src); 11235 11236 ins_cost(125); 11237 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11238 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11239 ins_pipe( fpu_mem_reg ); 11240 %} 11241 11242 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11243 predicate(UseSSE>=2); 11244 match(Set dst (MoveD2L src)); 11245 effect(DEF dst, USE src); 11246 ins_cost(95); 11247 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11248 ins_encode %{ 11249 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11250 %} 11251 ins_pipe( pipe_slow ); 11252 %} 11253 11254 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11255 predicate(UseSSE>=2); 11256 match(Set dst (MoveD2L src)); 11257 effect(DEF dst, USE src, TEMP tmp); 11258 ins_cost(85); 11259 format %{ "MOVD $dst.lo,$src\n\t" 11260 "PSHUFLW $tmp,$src,0x4E\n\t" 11261 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11262 ins_encode %{ 11263 __ movdl($dst$$Register, $src$$XMMRegister); 11264 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11265 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11266 %} 11267 ins_pipe( pipe_slow ); 11268 %} 11269 11270 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11271 match(Set dst (MoveL2D src)); 11272 effect(DEF dst, USE src); 11273 11274 ins_cost(200); 11275 format %{ "MOV $dst,$src.lo\n\t" 11276 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11277 opcode(0x89, 0x89); 11278 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11279 ins_pipe( ialu_mem_long_reg ); 11280 %} 11281 11282 11283 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11284 predicate(UseSSE<=1); 11285 match(Set dst (MoveL2D src)); 11286 effect(DEF dst, USE src); 11287 ins_cost(125); 11288 11289 format %{ "FLD_D $src\n\t" 11290 "FSTP $dst\t# MoveL2D_stack_reg" %} 11291 opcode(0xDD); /* DD /0, FLD m64real */ 11292 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11293 Pop_Reg_DPR(dst) ); 11294 ins_pipe( fpu_reg_mem ); 11295 %} 11296 11297 11298 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11299 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11300 match(Set dst (MoveL2D src)); 11301 effect(DEF dst, USE src); 11302 11303 ins_cost(95); 11304 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11305 ins_encode %{ 11306 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11307 %} 11308 ins_pipe( pipe_slow ); 11309 %} 11310 11311 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11312 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11313 match(Set dst (MoveL2D src)); 11314 effect(DEF dst, USE src); 11315 11316 ins_cost(95); 11317 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11318 ins_encode %{ 11319 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11320 %} 11321 ins_pipe( pipe_slow ); 11322 %} 11323 11324 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11325 predicate(UseSSE>=2); 11326 match(Set dst (MoveL2D src)); 11327 effect(TEMP dst, USE src, TEMP tmp); 11328 ins_cost(85); 11329 format %{ "MOVD $dst,$src.lo\n\t" 11330 "MOVD $tmp,$src.hi\n\t" 11331 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11332 ins_encode %{ 11333 __ movdl($dst$$XMMRegister, $src$$Register); 11334 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11335 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11336 %} 11337 ins_pipe( pipe_slow ); 11338 %} 11339 11340 11341 // ======================================================================= 11342 // fast clearing of an array 11343 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11344 predicate(!UseFastStosb); 11345 match(Set dummy (ClearArray cnt base)); 11346 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11347 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11348 "SHL ECX,1\t# Convert doublewords to words\n\t" 11349 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11350 ins_encode %{ 11351 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11352 %} 11353 ins_pipe( pipe_slow ); 11354 %} 11355 11356 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11357 predicate(UseFastStosb); 11358 match(Set dummy (ClearArray cnt base)); 11359 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11360 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11361 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11362 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11363 ins_encode %{ 11364 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11365 %} 11366 ins_pipe( pipe_slow ); 11367 %} 11368 11369 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11370 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11371 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11372 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11373 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11374 11375 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11376 ins_encode %{ 11377 __ string_compare($str1$$Register, $str2$$Register, 11378 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11379 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11380 %} 11381 ins_pipe( pipe_slow ); 11382 %} 11383 11384 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11385 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11386 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11387 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11388 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11389 11390 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11391 ins_encode %{ 11392 __ string_compare($str1$$Register, $str2$$Register, 11393 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11394 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11395 %} 11396 ins_pipe( pipe_slow ); 11397 %} 11398 11399 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11400 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11401 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11402 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11403 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11404 11405 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11406 ins_encode %{ 11407 __ string_compare($str1$$Register, $str2$$Register, 11408 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11409 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11410 %} 11411 ins_pipe( pipe_slow ); 11412 %} 11413 11414 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11415 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11416 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11417 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11418 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11419 11420 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11421 ins_encode %{ 11422 __ string_compare($str2$$Register, $str1$$Register, 11423 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11424 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11425 %} 11426 ins_pipe( pipe_slow ); 11427 %} 11428 11429 // fast string equals 11430 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11431 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11432 match(Set result (StrEquals (Binary str1 str2) cnt)); 11433 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11434 11435 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11436 ins_encode %{ 11437 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11438 $cnt$$Register, $result$$Register, $tmp3$$Register, 11439 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11440 %} 11441 11442 ins_pipe( pipe_slow ); 11443 %} 11444 11445 // fast search of substring with known size. 11446 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11447 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11448 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11449 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11450 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11451 11452 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11453 ins_encode %{ 11454 int icnt2 = (int)$int_cnt2$$constant; 11455 if (icnt2 >= 16) { 11456 // IndexOf for constant substrings with size >= 16 elements 11457 // which don't need to be loaded through stack. 11458 __ string_indexofC8($str1$$Register, $str2$$Register, 11459 $cnt1$$Register, $cnt2$$Register, 11460 icnt2, $result$$Register, 11461 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11462 } else { 11463 // Small strings are loaded through stack if they cross page boundary. 11464 __ string_indexof($str1$$Register, $str2$$Register, 11465 $cnt1$$Register, $cnt2$$Register, 11466 icnt2, $result$$Register, 11467 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11468 } 11469 %} 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 // fast search of substring with known size. 11474 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11475 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11476 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11477 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11478 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11479 11480 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11481 ins_encode %{ 11482 int icnt2 = (int)$int_cnt2$$constant; 11483 if (icnt2 >= 8) { 11484 // IndexOf for constant substrings with size >= 8 elements 11485 // which don't need to be loaded through stack. 11486 __ string_indexofC8($str1$$Register, $str2$$Register, 11487 $cnt1$$Register, $cnt2$$Register, 11488 icnt2, $result$$Register, 11489 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11490 } else { 11491 // Small strings are loaded through stack if they cross page boundary. 11492 __ string_indexof($str1$$Register, $str2$$Register, 11493 $cnt1$$Register, $cnt2$$Register, 11494 icnt2, $result$$Register, 11495 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11496 } 11497 %} 11498 ins_pipe( pipe_slow ); 11499 %} 11500 11501 // fast search of substring with known size. 11502 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11503 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11504 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11505 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11506 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11507 11508 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11509 ins_encode %{ 11510 int icnt2 = (int)$int_cnt2$$constant; 11511 if (icnt2 >= 8) { 11512 // IndexOf for constant substrings with size >= 8 elements 11513 // which don't need to be loaded through stack. 11514 __ string_indexofC8($str1$$Register, $str2$$Register, 11515 $cnt1$$Register, $cnt2$$Register, 11516 icnt2, $result$$Register, 11517 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11518 } else { 11519 // Small strings are loaded through stack if they cross page boundary. 11520 __ string_indexof($str1$$Register, $str2$$Register, 11521 $cnt1$$Register, $cnt2$$Register, 11522 icnt2, $result$$Register, 11523 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11524 } 11525 %} 11526 ins_pipe( pipe_slow ); 11527 %} 11528 11529 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11530 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11531 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11532 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11533 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11534 11535 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11536 ins_encode %{ 11537 __ string_indexof($str1$$Register, $str2$$Register, 11538 $cnt1$$Register, $cnt2$$Register, 11539 (-1), $result$$Register, 11540 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11541 %} 11542 ins_pipe( pipe_slow ); 11543 %} 11544 11545 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11546 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11547 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11548 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11549 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11550 11551 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11552 ins_encode %{ 11553 __ string_indexof($str1$$Register, $str2$$Register, 11554 $cnt1$$Register, $cnt2$$Register, 11555 (-1), $result$$Register, 11556 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11557 %} 11558 ins_pipe( pipe_slow ); 11559 %} 11560 11561 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11562 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11563 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11564 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11565 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11566 11567 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11568 ins_encode %{ 11569 __ string_indexof($str1$$Register, $str2$$Register, 11570 $cnt1$$Register, $cnt2$$Register, 11571 (-1), $result$$Register, 11572 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11573 %} 11574 ins_pipe( pipe_slow ); 11575 %} 11576 11577 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11578 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11579 predicate(UseSSE42Intrinsics); 11580 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11581 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11582 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11583 ins_encode %{ 11584 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11585 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11586 %} 11587 ins_pipe( pipe_slow ); 11588 %} 11589 11590 // fast array equals 11591 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11592 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11593 %{ 11594 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11595 match(Set result (AryEq ary1 ary2)); 11596 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11597 //ins_cost(300); 11598 11599 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11600 ins_encode %{ 11601 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11602 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11603 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11604 %} 11605 ins_pipe( pipe_slow ); 11606 %} 11607 11608 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11609 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11610 %{ 11611 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11612 match(Set result (AryEq ary1 ary2)); 11613 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11614 //ins_cost(300); 11615 11616 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11617 ins_encode %{ 11618 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11619 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11620 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11621 %} 11622 ins_pipe( pipe_slow ); 11623 %} 11624 11625 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11626 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11627 %{ 11628 match(Set result (HasNegatives ary1 len)); 11629 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11630 11631 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11632 ins_encode %{ 11633 __ has_negatives($ary1$$Register, $len$$Register, 11634 $result$$Register, $tmp3$$Register, 11635 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11636 %} 11637 ins_pipe( pipe_slow ); 11638 %} 11639 11640 // fast char[] to byte[] compression 11641 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11642 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11643 match(Set result (StrCompressedCopy src (Binary dst len))); 11644 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11645 11646 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11647 ins_encode %{ 11648 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11649 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11650 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11651 %} 11652 ins_pipe( pipe_slow ); 11653 %} 11654 11655 // fast byte[] to char[] inflation 11656 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11657 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11658 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11659 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11660 11661 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11662 ins_encode %{ 11663 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11664 $tmp1$$XMMRegister, $tmp2$$Register); 11665 %} 11666 ins_pipe( pipe_slow ); 11667 %} 11668 11669 // encode char[] to byte[] in ISO_8859_1 11670 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11671 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11672 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11673 match(Set result (EncodeISOArray src (Binary dst len))); 11674 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11675 11676 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11677 ins_encode %{ 11678 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11679 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11680 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11681 %} 11682 ins_pipe( pipe_slow ); 11683 %} 11684 11685 11686 //----------Control Flow Instructions------------------------------------------ 11687 // Signed compare Instructions 11688 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11689 match(Set cr (CmpI op1 op2)); 11690 effect( DEF cr, USE op1, USE op2 ); 11691 format %{ "CMP $op1,$op2" %} 11692 opcode(0x3B); /* Opcode 3B /r */ 11693 ins_encode( OpcP, RegReg( op1, op2) ); 11694 ins_pipe( ialu_cr_reg_reg ); 11695 %} 11696 11697 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11698 match(Set cr (CmpI op1 op2)); 11699 effect( DEF cr, USE op1 ); 11700 format %{ "CMP $op1,$op2" %} 11701 opcode(0x81,0x07); /* Opcode 81 /7 */ 11702 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11703 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11704 ins_pipe( ialu_cr_reg_imm ); 11705 %} 11706 11707 // Cisc-spilled version of cmpI_eReg 11708 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11709 match(Set cr (CmpI op1 (LoadI op2))); 11710 11711 format %{ "CMP $op1,$op2" %} 11712 ins_cost(500); 11713 opcode(0x3B); /* Opcode 3B /r */ 11714 ins_encode( OpcP, RegMem( op1, op2) ); 11715 ins_pipe( ialu_cr_reg_mem ); 11716 %} 11717 11718 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11719 match(Set cr (CmpI src zero)); 11720 effect( DEF cr, USE src ); 11721 11722 format %{ "TEST $src,$src" %} 11723 opcode(0x85); 11724 ins_encode( OpcP, RegReg( src, src ) ); 11725 ins_pipe( ialu_cr_reg_imm ); 11726 %} 11727 11728 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11729 match(Set cr (CmpI (AndI src con) zero)); 11730 11731 format %{ "TEST $src,$con" %} 11732 opcode(0xF7,0x00); 11733 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11734 ins_pipe( ialu_cr_reg_imm ); 11735 %} 11736 11737 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11738 match(Set cr (CmpI (AndI src mem) zero)); 11739 11740 format %{ "TEST $src,$mem" %} 11741 opcode(0x85); 11742 ins_encode( OpcP, RegMem( src, mem ) ); 11743 ins_pipe( ialu_cr_reg_mem ); 11744 %} 11745 11746 // Unsigned compare Instructions; really, same as signed except they 11747 // produce an eFlagsRegU instead of eFlagsReg. 11748 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11749 match(Set cr (CmpU op1 op2)); 11750 11751 format %{ "CMPu $op1,$op2" %} 11752 opcode(0x3B); /* Opcode 3B /r */ 11753 ins_encode( OpcP, RegReg( op1, op2) ); 11754 ins_pipe( ialu_cr_reg_reg ); 11755 %} 11756 11757 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11758 match(Set cr (CmpU op1 op2)); 11759 11760 format %{ "CMPu $op1,$op2" %} 11761 opcode(0x81,0x07); /* Opcode 81 /7 */ 11762 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11763 ins_pipe( ialu_cr_reg_imm ); 11764 %} 11765 11766 // // Cisc-spilled version of cmpU_eReg 11767 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11768 match(Set cr (CmpU op1 (LoadI op2))); 11769 11770 format %{ "CMPu $op1,$op2" %} 11771 ins_cost(500); 11772 opcode(0x3B); /* Opcode 3B /r */ 11773 ins_encode( OpcP, RegMem( op1, op2) ); 11774 ins_pipe( ialu_cr_reg_mem ); 11775 %} 11776 11777 // // Cisc-spilled version of cmpU_eReg 11778 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11779 // match(Set cr (CmpU (LoadI op1) op2)); 11780 // 11781 // format %{ "CMPu $op1,$op2" %} 11782 // ins_cost(500); 11783 // opcode(0x39); /* Opcode 39 /r */ 11784 // ins_encode( OpcP, RegMem( op1, op2) ); 11785 //%} 11786 11787 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11788 match(Set cr (CmpU src zero)); 11789 11790 format %{ "TESTu $src,$src" %} 11791 opcode(0x85); 11792 ins_encode( OpcP, RegReg( src, src ) ); 11793 ins_pipe( ialu_cr_reg_imm ); 11794 %} 11795 11796 // Unsigned pointer compare Instructions 11797 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11798 match(Set cr (CmpP op1 op2)); 11799 11800 format %{ "CMPu $op1,$op2" %} 11801 opcode(0x3B); /* Opcode 3B /r */ 11802 ins_encode( OpcP, RegReg( op1, op2) ); 11803 ins_pipe( ialu_cr_reg_reg ); 11804 %} 11805 11806 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11807 match(Set cr (CmpP op1 op2)); 11808 11809 format %{ "CMPu $op1,$op2" %} 11810 opcode(0x81,0x07); /* Opcode 81 /7 */ 11811 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11812 ins_pipe( ialu_cr_reg_imm ); 11813 %} 11814 11815 // // Cisc-spilled version of cmpP_eReg 11816 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11817 match(Set cr (CmpP op1 (LoadP op2))); 11818 11819 format %{ "CMPu $op1,$op2" %} 11820 ins_cost(500); 11821 opcode(0x3B); /* Opcode 3B /r */ 11822 ins_encode( OpcP, RegMem( op1, op2) ); 11823 ins_pipe( ialu_cr_reg_mem ); 11824 %} 11825 11826 // // Cisc-spilled version of cmpP_eReg 11827 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11828 // match(Set cr (CmpP (LoadP op1) op2)); 11829 // 11830 // format %{ "CMPu $op1,$op2" %} 11831 // ins_cost(500); 11832 // opcode(0x39); /* Opcode 39 /r */ 11833 // ins_encode( OpcP, RegMem( op1, op2) ); 11834 //%} 11835 11836 // Compare raw pointer (used in out-of-heap check). 11837 // Only works because non-oop pointers must be raw pointers 11838 // and raw pointers have no anti-dependencies. 11839 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11840 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11841 match(Set cr (CmpP op1 (LoadP op2))); 11842 11843 format %{ "CMPu $op1,$op2" %} 11844 opcode(0x3B); /* Opcode 3B /r */ 11845 ins_encode( OpcP, RegMem( op1, op2) ); 11846 ins_pipe( ialu_cr_reg_mem ); 11847 %} 11848 11849 // 11850 // This will generate a signed flags result. This should be ok 11851 // since any compare to a zero should be eq/neq. 11852 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11853 match(Set cr (CmpP src zero)); 11854 11855 format %{ "TEST $src,$src" %} 11856 opcode(0x85); 11857 ins_encode( OpcP, RegReg( src, src ) ); 11858 ins_pipe( ialu_cr_reg_imm ); 11859 %} 11860 11861 // Cisc-spilled version of testP_reg 11862 // This will generate a signed flags result. This should be ok 11863 // since any compare to a zero should be eq/neq. 11864 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11865 match(Set cr (CmpP (LoadP op) zero)); 11866 11867 format %{ "TEST $op,0xFFFFFFFF" %} 11868 ins_cost(500); 11869 opcode(0xF7); /* Opcode F7 /0 */ 11870 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11871 ins_pipe( ialu_cr_reg_imm ); 11872 %} 11873 11874 // Yanked all unsigned pointer compare operations. 11875 // Pointer compares are done with CmpP which is already unsigned. 11876 11877 //----------Max and Min-------------------------------------------------------- 11878 // Min Instructions 11879 //// 11880 // *** Min and Max using the conditional move are slower than the 11881 // *** branch version on a Pentium III. 11882 // // Conditional move for min 11883 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11884 // effect( USE_DEF op2, USE op1, USE cr ); 11885 // format %{ "CMOVlt $op2,$op1\t! min" %} 11886 // opcode(0x4C,0x0F); 11887 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11888 // ins_pipe( pipe_cmov_reg ); 11889 //%} 11890 // 11891 //// Min Register with Register (P6 version) 11892 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11893 // predicate(VM_Version::supports_cmov() ); 11894 // match(Set op2 (MinI op1 op2)); 11895 // ins_cost(200); 11896 // expand %{ 11897 // eFlagsReg cr; 11898 // compI_eReg(cr,op1,op2); 11899 // cmovI_reg_lt(op2,op1,cr); 11900 // %} 11901 //%} 11902 11903 // Min Register with Register (generic version) 11904 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11905 match(Set dst (MinI dst src)); 11906 effect(KILL flags); 11907 ins_cost(300); 11908 11909 format %{ "MIN $dst,$src" %} 11910 opcode(0xCC); 11911 ins_encode( min_enc(dst,src) ); 11912 ins_pipe( pipe_slow ); 11913 %} 11914 11915 // Max Register with Register 11916 // *** Min and Max using the conditional move are slower than the 11917 // *** branch version on a Pentium III. 11918 // // Conditional move for max 11919 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11920 // effect( USE_DEF op2, USE op1, USE cr ); 11921 // format %{ "CMOVgt $op2,$op1\t! max" %} 11922 // opcode(0x4F,0x0F); 11923 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11924 // ins_pipe( pipe_cmov_reg ); 11925 //%} 11926 // 11927 // // Max Register with Register (P6 version) 11928 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11929 // predicate(VM_Version::supports_cmov() ); 11930 // match(Set op2 (MaxI op1 op2)); 11931 // ins_cost(200); 11932 // expand %{ 11933 // eFlagsReg cr; 11934 // compI_eReg(cr,op1,op2); 11935 // cmovI_reg_gt(op2,op1,cr); 11936 // %} 11937 //%} 11938 11939 // Max Register with Register (generic version) 11940 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11941 match(Set dst (MaxI dst src)); 11942 effect(KILL flags); 11943 ins_cost(300); 11944 11945 format %{ "MAX $dst,$src" %} 11946 opcode(0xCC); 11947 ins_encode( max_enc(dst,src) ); 11948 ins_pipe( pipe_slow ); 11949 %} 11950 11951 // ============================================================================ 11952 // Counted Loop limit node which represents exact final iterator value. 11953 // Note: the resulting value should fit into integer range since 11954 // counted loops have limit check on overflow. 11955 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11956 match(Set limit (LoopLimit (Binary init limit) stride)); 11957 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11958 ins_cost(300); 11959 11960 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11961 ins_encode %{ 11962 int strd = (int)$stride$$constant; 11963 assert(strd != 1 && strd != -1, "sanity"); 11964 int m1 = (strd > 0) ? 1 : -1; 11965 // Convert limit to long (EAX:EDX) 11966 __ cdql(); 11967 // Convert init to long (init:tmp) 11968 __ movl($tmp$$Register, $init$$Register); 11969 __ sarl($tmp$$Register, 31); 11970 // $limit - $init 11971 __ subl($limit$$Register, $init$$Register); 11972 __ sbbl($limit_hi$$Register, $tmp$$Register); 11973 // + ($stride - 1) 11974 if (strd > 0) { 11975 __ addl($limit$$Register, (strd - 1)); 11976 __ adcl($limit_hi$$Register, 0); 11977 __ movl($tmp$$Register, strd); 11978 } else { 11979 __ addl($limit$$Register, (strd + 1)); 11980 __ adcl($limit_hi$$Register, -1); 11981 __ lneg($limit_hi$$Register, $limit$$Register); 11982 __ movl($tmp$$Register, -strd); 11983 } 11984 // signed devision: (EAX:EDX) / pos_stride 11985 __ idivl($tmp$$Register); 11986 if (strd < 0) { 11987 // restore sign 11988 __ negl($tmp$$Register); 11989 } 11990 // (EAX) * stride 11991 __ mull($tmp$$Register); 11992 // + init (ignore upper bits) 11993 __ addl($limit$$Register, $init$$Register); 11994 %} 11995 ins_pipe( pipe_slow ); 11996 %} 11997 11998 // ============================================================================ 11999 // Branch Instructions 12000 // Jump Table 12001 instruct jumpXtnd(rRegI switch_val) %{ 12002 match(Jump switch_val); 12003 ins_cost(350); 12004 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12005 ins_encode %{ 12006 // Jump to Address(table_base + switch_reg) 12007 Address index(noreg, $switch_val$$Register, Address::times_1); 12008 __ jump(ArrayAddress($constantaddress, index)); 12009 %} 12010 ins_pipe(pipe_jmp); 12011 %} 12012 12013 // Jump Direct - Label defines a relative address from JMP+1 12014 instruct jmpDir(label labl) %{ 12015 match(Goto); 12016 effect(USE labl); 12017 12018 ins_cost(300); 12019 format %{ "JMP $labl" %} 12020 size(5); 12021 ins_encode %{ 12022 Label* L = $labl$$label; 12023 __ jmp(*L, false); // Always long jump 12024 %} 12025 ins_pipe( pipe_jmp ); 12026 %} 12027 12028 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12029 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12030 match(If cop cr); 12031 effect(USE labl); 12032 12033 ins_cost(300); 12034 format %{ "J$cop $labl" %} 12035 size(6); 12036 ins_encode %{ 12037 Label* L = $labl$$label; 12038 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12039 %} 12040 ins_pipe( pipe_jcc ); 12041 %} 12042 12043 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12044 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12045 match(CountedLoopEnd cop cr); 12046 effect(USE labl); 12047 12048 ins_cost(300); 12049 format %{ "J$cop $labl\t# Loop end" %} 12050 size(6); 12051 ins_encode %{ 12052 Label* L = $labl$$label; 12053 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12054 %} 12055 ins_pipe( pipe_jcc ); 12056 %} 12057 12058 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12059 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12060 match(CountedLoopEnd cop cmp); 12061 effect(USE labl); 12062 12063 ins_cost(300); 12064 format %{ "J$cop,u $labl\t# Loop end" %} 12065 size(6); 12066 ins_encode %{ 12067 Label* L = $labl$$label; 12068 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12069 %} 12070 ins_pipe( pipe_jcc ); 12071 %} 12072 12073 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12074 match(CountedLoopEnd cop cmp); 12075 effect(USE labl); 12076 12077 ins_cost(200); 12078 format %{ "J$cop,u $labl\t# Loop end" %} 12079 size(6); 12080 ins_encode %{ 12081 Label* L = $labl$$label; 12082 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12083 %} 12084 ins_pipe( pipe_jcc ); 12085 %} 12086 12087 // Jump Direct Conditional - using unsigned comparison 12088 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12089 match(If cop cmp); 12090 effect(USE labl); 12091 12092 ins_cost(300); 12093 format %{ "J$cop,u $labl" %} 12094 size(6); 12095 ins_encode %{ 12096 Label* L = $labl$$label; 12097 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12098 %} 12099 ins_pipe(pipe_jcc); 12100 %} 12101 12102 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12103 match(If cop cmp); 12104 effect(USE labl); 12105 12106 ins_cost(200); 12107 format %{ "J$cop,u $labl" %} 12108 size(6); 12109 ins_encode %{ 12110 Label* L = $labl$$label; 12111 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12112 %} 12113 ins_pipe(pipe_jcc); 12114 %} 12115 12116 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12117 match(If cop cmp); 12118 effect(USE labl); 12119 12120 ins_cost(200); 12121 format %{ $$template 12122 if ($cop$$cmpcode == Assembler::notEqual) { 12123 $$emit$$"JP,u $labl\n\t" 12124 $$emit$$"J$cop,u $labl" 12125 } else { 12126 $$emit$$"JP,u done\n\t" 12127 $$emit$$"J$cop,u $labl\n\t" 12128 $$emit$$"done:" 12129 } 12130 %} 12131 ins_encode %{ 12132 Label* l = $labl$$label; 12133 if ($cop$$cmpcode == Assembler::notEqual) { 12134 __ jcc(Assembler::parity, *l, false); 12135 __ jcc(Assembler::notEqual, *l, false); 12136 } else if ($cop$$cmpcode == Assembler::equal) { 12137 Label done; 12138 __ jccb(Assembler::parity, done); 12139 __ jcc(Assembler::equal, *l, false); 12140 __ bind(done); 12141 } else { 12142 ShouldNotReachHere(); 12143 } 12144 %} 12145 ins_pipe(pipe_jcc); 12146 %} 12147 12148 // ============================================================================ 12149 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12150 // array for an instance of the superklass. Set a hidden internal cache on a 12151 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12152 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12153 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12154 match(Set result (PartialSubtypeCheck sub super)); 12155 effect( KILL rcx, KILL cr ); 12156 12157 ins_cost(1100); // slightly larger than the next version 12158 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12159 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12160 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12161 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12162 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12163 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12164 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12165 "miss:\t" %} 12166 12167 opcode(0x1); // Force a XOR of EDI 12168 ins_encode( enc_PartialSubtypeCheck() ); 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12173 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12174 effect( KILL rcx, KILL result ); 12175 12176 ins_cost(1000); 12177 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12178 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12179 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12180 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12181 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12182 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12183 "miss:\t" %} 12184 12185 opcode(0x0); // No need to XOR EDI 12186 ins_encode( enc_PartialSubtypeCheck() ); 12187 ins_pipe( pipe_slow ); 12188 %} 12189 12190 // ============================================================================ 12191 // Branch Instructions -- short offset versions 12192 // 12193 // These instructions are used to replace jumps of a long offset (the default 12194 // match) with jumps of a shorter offset. These instructions are all tagged 12195 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12196 // match rules in general matching. Instead, the ADLC generates a conversion 12197 // method in the MachNode which can be used to do in-place replacement of the 12198 // long variant with the shorter variant. The compiler will determine if a 12199 // branch can be taken by the is_short_branch_offset() predicate in the machine 12200 // specific code section of the file. 12201 12202 // Jump Direct - Label defines a relative address from JMP+1 12203 instruct jmpDir_short(label labl) %{ 12204 match(Goto); 12205 effect(USE labl); 12206 12207 ins_cost(300); 12208 format %{ "JMP,s $labl" %} 12209 size(2); 12210 ins_encode %{ 12211 Label* L = $labl$$label; 12212 __ jmpb(*L); 12213 %} 12214 ins_pipe( pipe_jmp ); 12215 ins_short_branch(1); 12216 %} 12217 12218 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12219 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12220 match(If cop cr); 12221 effect(USE labl); 12222 12223 ins_cost(300); 12224 format %{ "J$cop,s $labl" %} 12225 size(2); 12226 ins_encode %{ 12227 Label* L = $labl$$label; 12228 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12229 %} 12230 ins_pipe( pipe_jcc ); 12231 ins_short_branch(1); 12232 %} 12233 12234 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12235 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12236 match(CountedLoopEnd cop cr); 12237 effect(USE labl); 12238 12239 ins_cost(300); 12240 format %{ "J$cop,s $labl\t# Loop end" %} 12241 size(2); 12242 ins_encode %{ 12243 Label* L = $labl$$label; 12244 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12245 %} 12246 ins_pipe( pipe_jcc ); 12247 ins_short_branch(1); 12248 %} 12249 12250 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12251 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12252 match(CountedLoopEnd cop cmp); 12253 effect(USE labl); 12254 12255 ins_cost(300); 12256 format %{ "J$cop,us $labl\t# Loop end" %} 12257 size(2); 12258 ins_encode %{ 12259 Label* L = $labl$$label; 12260 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12261 %} 12262 ins_pipe( pipe_jcc ); 12263 ins_short_branch(1); 12264 %} 12265 12266 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12267 match(CountedLoopEnd cop cmp); 12268 effect(USE labl); 12269 12270 ins_cost(300); 12271 format %{ "J$cop,us $labl\t# Loop end" %} 12272 size(2); 12273 ins_encode %{ 12274 Label* L = $labl$$label; 12275 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12276 %} 12277 ins_pipe( pipe_jcc ); 12278 ins_short_branch(1); 12279 %} 12280 12281 // Jump Direct Conditional - using unsigned comparison 12282 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12283 match(If cop cmp); 12284 effect(USE labl); 12285 12286 ins_cost(300); 12287 format %{ "J$cop,us $labl" %} 12288 size(2); 12289 ins_encode %{ 12290 Label* L = $labl$$label; 12291 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12292 %} 12293 ins_pipe( pipe_jcc ); 12294 ins_short_branch(1); 12295 %} 12296 12297 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12298 match(If cop cmp); 12299 effect(USE labl); 12300 12301 ins_cost(300); 12302 format %{ "J$cop,us $labl" %} 12303 size(2); 12304 ins_encode %{ 12305 Label* L = $labl$$label; 12306 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12307 %} 12308 ins_pipe( pipe_jcc ); 12309 ins_short_branch(1); 12310 %} 12311 12312 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12313 match(If cop cmp); 12314 effect(USE labl); 12315 12316 ins_cost(300); 12317 format %{ $$template 12318 if ($cop$$cmpcode == Assembler::notEqual) { 12319 $$emit$$"JP,u,s $labl\n\t" 12320 $$emit$$"J$cop,u,s $labl" 12321 } else { 12322 $$emit$$"JP,u,s done\n\t" 12323 $$emit$$"J$cop,u,s $labl\n\t" 12324 $$emit$$"done:" 12325 } 12326 %} 12327 size(4); 12328 ins_encode %{ 12329 Label* l = $labl$$label; 12330 if ($cop$$cmpcode == Assembler::notEqual) { 12331 __ jccb(Assembler::parity, *l); 12332 __ jccb(Assembler::notEqual, *l); 12333 } else if ($cop$$cmpcode == Assembler::equal) { 12334 Label done; 12335 __ jccb(Assembler::parity, done); 12336 __ jccb(Assembler::equal, *l); 12337 __ bind(done); 12338 } else { 12339 ShouldNotReachHere(); 12340 } 12341 %} 12342 ins_pipe(pipe_jcc); 12343 ins_short_branch(1); 12344 %} 12345 12346 // ============================================================================ 12347 // Long Compare 12348 // 12349 // Currently we hold longs in 2 registers. Comparing such values efficiently 12350 // is tricky. The flavor of compare used depends on whether we are testing 12351 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12352 // The GE test is the negated LT test. The LE test can be had by commuting 12353 // the operands (yielding a GE test) and then negating; negate again for the 12354 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12355 // NE test is negated from that. 12356 12357 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12358 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12359 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12360 // are collapsed internally in the ADLC's dfa-gen code. The match for 12361 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12362 // foo match ends up with the wrong leaf. One fix is to not match both 12363 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12364 // both forms beat the trinary form of long-compare and both are very useful 12365 // on Intel which has so few registers. 12366 12367 // Manifest a CmpL result in an integer register. Very painful. 12368 // This is the test to avoid. 12369 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12370 match(Set dst (CmpL3 src1 src2)); 12371 effect( KILL flags ); 12372 ins_cost(1000); 12373 format %{ "XOR $dst,$dst\n\t" 12374 "CMP $src1.hi,$src2.hi\n\t" 12375 "JLT,s m_one\n\t" 12376 "JGT,s p_one\n\t" 12377 "CMP $src1.lo,$src2.lo\n\t" 12378 "JB,s m_one\n\t" 12379 "JEQ,s done\n" 12380 "p_one:\tINC $dst\n\t" 12381 "JMP,s done\n" 12382 "m_one:\tDEC $dst\n" 12383 "done:" %} 12384 ins_encode %{ 12385 Label p_one, m_one, done; 12386 __ xorptr($dst$$Register, $dst$$Register); 12387 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12388 __ jccb(Assembler::less, m_one); 12389 __ jccb(Assembler::greater, p_one); 12390 __ cmpl($src1$$Register, $src2$$Register); 12391 __ jccb(Assembler::below, m_one); 12392 __ jccb(Assembler::equal, done); 12393 __ bind(p_one); 12394 __ incrementl($dst$$Register); 12395 __ jmpb(done); 12396 __ bind(m_one); 12397 __ decrementl($dst$$Register); 12398 __ bind(done); 12399 %} 12400 ins_pipe( pipe_slow ); 12401 %} 12402 12403 //====== 12404 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12405 // compares. Can be used for LE or GT compares by reversing arguments. 12406 // NOT GOOD FOR EQ/NE tests. 12407 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12408 match( Set flags (CmpL src zero )); 12409 ins_cost(100); 12410 format %{ "TEST $src.hi,$src.hi" %} 12411 opcode(0x85); 12412 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12413 ins_pipe( ialu_cr_reg_reg ); 12414 %} 12415 12416 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12417 // compares. Can be used for LE or GT compares by reversing arguments. 12418 // NOT GOOD FOR EQ/NE tests. 12419 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12420 match( Set flags (CmpL src1 src2 )); 12421 effect( TEMP tmp ); 12422 ins_cost(300); 12423 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12424 "MOV $tmp,$src1.hi\n\t" 12425 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12426 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12427 ins_pipe( ialu_cr_reg_reg ); 12428 %} 12429 12430 // Long compares reg < zero/req OR reg >= zero/req. 12431 // Just a wrapper for a normal branch, plus the predicate test. 12432 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12433 match(If cmp flags); 12434 effect(USE labl); 12435 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12436 expand %{ 12437 jmpCon(cmp,flags,labl); // JLT or JGE... 12438 %} 12439 %} 12440 12441 // Compare 2 longs and CMOVE longs. 12442 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12443 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12444 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12445 ins_cost(400); 12446 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12447 "CMOV$cmp $dst.hi,$src.hi" %} 12448 opcode(0x0F,0x40); 12449 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12450 ins_pipe( pipe_cmov_reg_long ); 12451 %} 12452 12453 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12454 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12455 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12456 ins_cost(500); 12457 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12458 "CMOV$cmp $dst.hi,$src.hi" %} 12459 opcode(0x0F,0x40); 12460 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12461 ins_pipe( pipe_cmov_reg_long ); 12462 %} 12463 12464 // Compare 2 longs and CMOVE ints. 12465 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12466 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12467 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12468 ins_cost(200); 12469 format %{ "CMOV$cmp $dst,$src" %} 12470 opcode(0x0F,0x40); 12471 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12472 ins_pipe( pipe_cmov_reg ); 12473 %} 12474 12475 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12476 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12477 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12478 ins_cost(250); 12479 format %{ "CMOV$cmp $dst,$src" %} 12480 opcode(0x0F,0x40); 12481 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12482 ins_pipe( pipe_cmov_mem ); 12483 %} 12484 12485 // Compare 2 longs and CMOVE ints. 12486 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12487 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12488 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12489 ins_cost(200); 12490 format %{ "CMOV$cmp $dst,$src" %} 12491 opcode(0x0F,0x40); 12492 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12493 ins_pipe( pipe_cmov_reg ); 12494 %} 12495 12496 // Compare 2 longs and CMOVE doubles 12497 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12498 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12499 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12500 ins_cost(200); 12501 expand %{ 12502 fcmovDPR_regS(cmp,flags,dst,src); 12503 %} 12504 %} 12505 12506 // Compare 2 longs and CMOVE doubles 12507 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12508 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12509 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12510 ins_cost(200); 12511 expand %{ 12512 fcmovD_regS(cmp,flags,dst,src); 12513 %} 12514 %} 12515 12516 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12517 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12518 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12519 ins_cost(200); 12520 expand %{ 12521 fcmovFPR_regS(cmp,flags,dst,src); 12522 %} 12523 %} 12524 12525 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12526 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12527 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12528 ins_cost(200); 12529 expand %{ 12530 fcmovF_regS(cmp,flags,dst,src); 12531 %} 12532 %} 12533 12534 //====== 12535 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12536 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12537 match( Set flags (CmpL src zero )); 12538 effect(TEMP tmp); 12539 ins_cost(200); 12540 format %{ "MOV $tmp,$src.lo\n\t" 12541 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12542 ins_encode( long_cmp_flags0( src, tmp ) ); 12543 ins_pipe( ialu_reg_reg_long ); 12544 %} 12545 12546 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12547 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12548 match( Set flags (CmpL src1 src2 )); 12549 ins_cost(200+300); 12550 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12551 "JNE,s skip\n\t" 12552 "CMP $src1.hi,$src2.hi\n\t" 12553 "skip:\t" %} 12554 ins_encode( long_cmp_flags1( src1, src2 ) ); 12555 ins_pipe( ialu_cr_reg_reg ); 12556 %} 12557 12558 // Long compare reg == zero/reg OR reg != zero/reg 12559 // Just a wrapper for a normal branch, plus the predicate test. 12560 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12561 match(If cmp flags); 12562 effect(USE labl); 12563 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12564 expand %{ 12565 jmpCon(cmp,flags,labl); // JEQ or JNE... 12566 %} 12567 %} 12568 12569 // Compare 2 longs and CMOVE longs. 12570 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12571 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12572 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12573 ins_cost(400); 12574 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12575 "CMOV$cmp $dst.hi,$src.hi" %} 12576 opcode(0x0F,0x40); 12577 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12578 ins_pipe( pipe_cmov_reg_long ); 12579 %} 12580 12581 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12582 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12583 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12584 ins_cost(500); 12585 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12586 "CMOV$cmp $dst.hi,$src.hi" %} 12587 opcode(0x0F,0x40); 12588 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12589 ins_pipe( pipe_cmov_reg_long ); 12590 %} 12591 12592 // Compare 2 longs and CMOVE ints. 12593 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12594 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12595 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12596 ins_cost(200); 12597 format %{ "CMOV$cmp $dst,$src" %} 12598 opcode(0x0F,0x40); 12599 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12600 ins_pipe( pipe_cmov_reg ); 12601 %} 12602 12603 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12604 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12605 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12606 ins_cost(250); 12607 format %{ "CMOV$cmp $dst,$src" %} 12608 opcode(0x0F,0x40); 12609 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12610 ins_pipe( pipe_cmov_mem ); 12611 %} 12612 12613 // Compare 2 longs and CMOVE ints. 12614 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12615 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12616 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12617 ins_cost(200); 12618 format %{ "CMOV$cmp $dst,$src" %} 12619 opcode(0x0F,0x40); 12620 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12621 ins_pipe( pipe_cmov_reg ); 12622 %} 12623 12624 // Compare 2 longs and CMOVE doubles 12625 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12626 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12627 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12628 ins_cost(200); 12629 expand %{ 12630 fcmovDPR_regS(cmp,flags,dst,src); 12631 %} 12632 %} 12633 12634 // Compare 2 longs and CMOVE doubles 12635 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12636 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12637 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12638 ins_cost(200); 12639 expand %{ 12640 fcmovD_regS(cmp,flags,dst,src); 12641 %} 12642 %} 12643 12644 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12645 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12646 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12647 ins_cost(200); 12648 expand %{ 12649 fcmovFPR_regS(cmp,flags,dst,src); 12650 %} 12651 %} 12652 12653 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12654 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12655 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12656 ins_cost(200); 12657 expand %{ 12658 fcmovF_regS(cmp,flags,dst,src); 12659 %} 12660 %} 12661 12662 //====== 12663 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12664 // Same as cmpL_reg_flags_LEGT except must negate src 12665 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12666 match( Set flags (CmpL src zero )); 12667 effect( TEMP tmp ); 12668 ins_cost(300); 12669 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12670 "CMP $tmp,$src.lo\n\t" 12671 "SBB $tmp,$src.hi\n\t" %} 12672 ins_encode( long_cmp_flags3(src, tmp) ); 12673 ins_pipe( ialu_reg_reg_long ); 12674 %} 12675 12676 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12677 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12678 // requires a commuted test to get the same result. 12679 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12680 match( Set flags (CmpL src1 src2 )); 12681 effect( TEMP tmp ); 12682 ins_cost(300); 12683 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12684 "MOV $tmp,$src2.hi\n\t" 12685 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12686 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12687 ins_pipe( ialu_cr_reg_reg ); 12688 %} 12689 12690 // Long compares reg < zero/req OR reg >= zero/req. 12691 // Just a wrapper for a normal branch, plus the predicate test 12692 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12693 match(If cmp flags); 12694 effect(USE labl); 12695 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12696 ins_cost(300); 12697 expand %{ 12698 jmpCon(cmp,flags,labl); // JGT or JLE... 12699 %} 12700 %} 12701 12702 // Compare 2 longs and CMOVE longs. 12703 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12704 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12705 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12706 ins_cost(400); 12707 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12708 "CMOV$cmp $dst.hi,$src.hi" %} 12709 opcode(0x0F,0x40); 12710 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12711 ins_pipe( pipe_cmov_reg_long ); 12712 %} 12713 12714 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12715 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12716 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12717 ins_cost(500); 12718 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12719 "CMOV$cmp $dst.hi,$src.hi+4" %} 12720 opcode(0x0F,0x40); 12721 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12722 ins_pipe( pipe_cmov_reg_long ); 12723 %} 12724 12725 // Compare 2 longs and CMOVE ints. 12726 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12727 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12728 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12729 ins_cost(200); 12730 format %{ "CMOV$cmp $dst,$src" %} 12731 opcode(0x0F,0x40); 12732 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12733 ins_pipe( pipe_cmov_reg ); 12734 %} 12735 12736 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12737 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12738 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12739 ins_cost(250); 12740 format %{ "CMOV$cmp $dst,$src" %} 12741 opcode(0x0F,0x40); 12742 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12743 ins_pipe( pipe_cmov_mem ); 12744 %} 12745 12746 // Compare 2 longs and CMOVE ptrs. 12747 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12748 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12749 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12750 ins_cost(200); 12751 format %{ "CMOV$cmp $dst,$src" %} 12752 opcode(0x0F,0x40); 12753 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12754 ins_pipe( pipe_cmov_reg ); 12755 %} 12756 12757 // Compare 2 longs and CMOVE doubles 12758 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12759 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12760 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12761 ins_cost(200); 12762 expand %{ 12763 fcmovDPR_regS(cmp,flags,dst,src); 12764 %} 12765 %} 12766 12767 // Compare 2 longs and CMOVE doubles 12768 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12769 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12770 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12771 ins_cost(200); 12772 expand %{ 12773 fcmovD_regS(cmp,flags,dst,src); 12774 %} 12775 %} 12776 12777 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12778 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12779 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12780 ins_cost(200); 12781 expand %{ 12782 fcmovFPR_regS(cmp,flags,dst,src); 12783 %} 12784 %} 12785 12786 12787 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12788 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12789 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12790 ins_cost(200); 12791 expand %{ 12792 fcmovF_regS(cmp,flags,dst,src); 12793 %} 12794 %} 12795 12796 12797 // ============================================================================ 12798 // Procedure Call/Return Instructions 12799 // Call Java Static Instruction 12800 // Note: If this code changes, the corresponding ret_addr_offset() and 12801 // compute_padding() functions will have to be adjusted. 12802 instruct CallStaticJavaDirect(method meth) %{ 12803 match(CallStaticJava); 12804 effect(USE meth); 12805 12806 ins_cost(300); 12807 format %{ "CALL,static " %} 12808 opcode(0xE8); /* E8 cd */ 12809 ins_encode( pre_call_resets, 12810 Java_Static_Call( meth ), 12811 call_epilog, 12812 post_call_FPU ); 12813 ins_pipe( pipe_slow ); 12814 ins_alignment(4); 12815 %} 12816 12817 // Call Java Dynamic Instruction 12818 // Note: If this code changes, the corresponding ret_addr_offset() and 12819 // compute_padding() functions will have to be adjusted. 12820 instruct CallDynamicJavaDirect(method meth) %{ 12821 match(CallDynamicJava); 12822 effect(USE meth); 12823 12824 ins_cost(300); 12825 format %{ "MOV EAX,(oop)-1\n\t" 12826 "CALL,dynamic" %} 12827 opcode(0xE8); /* E8 cd */ 12828 ins_encode( pre_call_resets, 12829 Java_Dynamic_Call( meth ), 12830 call_epilog, 12831 post_call_FPU ); 12832 ins_pipe( pipe_slow ); 12833 ins_alignment(4); 12834 %} 12835 12836 // Call Runtime Instruction 12837 instruct CallRuntimeDirect(method meth) %{ 12838 match(CallRuntime ); 12839 effect(USE meth); 12840 12841 ins_cost(300); 12842 format %{ "CALL,runtime " %} 12843 opcode(0xE8); /* E8 cd */ 12844 // Use FFREEs to clear entries in float stack 12845 ins_encode( pre_call_resets, 12846 FFree_Float_Stack_All, 12847 Java_To_Runtime( meth ), 12848 post_call_FPU ); 12849 ins_pipe( pipe_slow ); 12850 %} 12851 12852 // Call runtime without safepoint 12853 instruct CallLeafDirect(method meth) %{ 12854 match(CallLeaf); 12855 effect(USE meth); 12856 12857 ins_cost(300); 12858 format %{ "CALL_LEAF,runtime " %} 12859 opcode(0xE8); /* E8 cd */ 12860 ins_encode( pre_call_resets, 12861 FFree_Float_Stack_All, 12862 Java_To_Runtime( meth ), 12863 Verify_FPU_For_Leaf, post_call_FPU ); 12864 ins_pipe( pipe_slow ); 12865 %} 12866 12867 instruct CallLeafNoFPDirect(method meth) %{ 12868 match(CallLeafNoFP); 12869 effect(USE meth); 12870 12871 ins_cost(300); 12872 format %{ "CALL_LEAF_NOFP,runtime " %} 12873 opcode(0xE8); /* E8 cd */ 12874 ins_encode(Java_To_Runtime(meth)); 12875 ins_pipe( pipe_slow ); 12876 %} 12877 12878 12879 // Return Instruction 12880 // Remove the return address & jump to it. 12881 instruct Ret() %{ 12882 match(Return); 12883 format %{ "RET" %} 12884 opcode(0xC3); 12885 ins_encode(OpcP); 12886 ins_pipe( pipe_jmp ); 12887 %} 12888 12889 // Tail Call; Jump from runtime stub to Java code. 12890 // Also known as an 'interprocedural jump'. 12891 // Target of jump will eventually return to caller. 12892 // TailJump below removes the return address. 12893 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12894 match(TailCall jump_target method_oop ); 12895 ins_cost(300); 12896 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12897 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12898 ins_encode( OpcP, RegOpc(jump_target) ); 12899 ins_pipe( pipe_jmp ); 12900 %} 12901 12902 12903 // Tail Jump; remove the return address; jump to target. 12904 // TailCall above leaves the return address around. 12905 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12906 match( TailJump jump_target ex_oop ); 12907 ins_cost(300); 12908 format %{ "POP EDX\t# pop return address into dummy\n\t" 12909 "JMP $jump_target " %} 12910 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12911 ins_encode( enc_pop_rdx, 12912 OpcP, RegOpc(jump_target) ); 12913 ins_pipe( pipe_jmp ); 12914 %} 12915 12916 // Create exception oop: created by stack-crawling runtime code. 12917 // Created exception is now available to this handler, and is setup 12918 // just prior to jumping to this handler. No code emitted. 12919 instruct CreateException( eAXRegP ex_oop ) 12920 %{ 12921 match(Set ex_oop (CreateEx)); 12922 12923 size(0); 12924 // use the following format syntax 12925 format %{ "# exception oop is in EAX; no code emitted" %} 12926 ins_encode(); 12927 ins_pipe( empty ); 12928 %} 12929 12930 12931 // Rethrow exception: 12932 // The exception oop will come in the first argument position. 12933 // Then JUMP (not call) to the rethrow stub code. 12934 instruct RethrowException() 12935 %{ 12936 match(Rethrow); 12937 12938 // use the following format syntax 12939 format %{ "JMP rethrow_stub" %} 12940 ins_encode(enc_rethrow); 12941 ins_pipe( pipe_jmp ); 12942 %} 12943 12944 // inlined locking and unlocking 12945 12946 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12947 predicate(Compile::current()->use_rtm()); 12948 match(Set cr (FastLock object box)); 12949 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12950 ins_cost(300); 12951 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12952 ins_encode %{ 12953 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12954 $scr$$Register, $cx1$$Register, $cx2$$Register, 12955 _counters, _rtm_counters, _stack_rtm_counters, 12956 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12957 true, ra_->C->profile_rtm()); 12958 %} 12959 ins_pipe(pipe_slow); 12960 %} 12961 12962 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12963 predicate(!Compile::current()->use_rtm()); 12964 match(Set cr (FastLock object box)); 12965 effect(TEMP tmp, TEMP scr, USE_KILL box); 12966 ins_cost(300); 12967 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12968 ins_encode %{ 12969 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12970 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12971 %} 12972 ins_pipe(pipe_slow); 12973 %} 12974 12975 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12976 match(Set cr (FastUnlock object box)); 12977 effect(TEMP tmp, USE_KILL box); 12978 ins_cost(300); 12979 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12980 ins_encode %{ 12981 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12982 %} 12983 ins_pipe(pipe_slow); 12984 %} 12985 12986 12987 12988 // ============================================================================ 12989 // Safepoint Instruction 12990 instruct safePoint_poll(eFlagsReg cr) %{ 12991 match(SafePoint); 12992 effect(KILL cr); 12993 12994 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12995 // On SPARC that might be acceptable as we can generate the address with 12996 // just a sethi, saving an or. By polling at offset 0 we can end up 12997 // putting additional pressure on the index-0 in the D$. Because of 12998 // alignment (just like the situation at hand) the lower indices tend 12999 // to see more traffic. It'd be better to change the polling address 13000 // to offset 0 of the last $line in the polling page. 13001 13002 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13003 ins_cost(125); 13004 size(6) ; 13005 ins_encode( Safepoint_Poll() ); 13006 ins_pipe( ialu_reg_mem ); 13007 %} 13008 13009 13010 // ============================================================================ 13011 // This name is KNOWN by the ADLC and cannot be changed. 13012 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13013 // for this guy. 13014 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13015 match(Set dst (ThreadLocal)); 13016 effect(DEF dst, KILL cr); 13017 13018 format %{ "MOV $dst, Thread::current()" %} 13019 ins_encode %{ 13020 Register dstReg = as_Register($dst$$reg); 13021 __ get_thread(dstReg); 13022 %} 13023 ins_pipe( ialu_reg_fat ); 13024 %} 13025 13026 13027 13028 //----------PEEPHOLE RULES----------------------------------------------------- 13029 // These must follow all instruction definitions as they use the names 13030 // defined in the instructions definitions. 13031 // 13032 // peepmatch ( root_instr_name [preceding_instruction]* ); 13033 // 13034 // peepconstraint %{ 13035 // (instruction_number.operand_name relational_op instruction_number.operand_name 13036 // [, ...] ); 13037 // // instruction numbers are zero-based using left to right order in peepmatch 13038 // 13039 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13040 // // provide an instruction_number.operand_name for each operand that appears 13041 // // in the replacement instruction's match rule 13042 // 13043 // ---------VM FLAGS--------------------------------------------------------- 13044 // 13045 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13046 // 13047 // Each peephole rule is given an identifying number starting with zero and 13048 // increasing by one in the order seen by the parser. An individual peephole 13049 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13050 // on the command-line. 13051 // 13052 // ---------CURRENT LIMITATIONS---------------------------------------------- 13053 // 13054 // Only match adjacent instructions in same basic block 13055 // Only equality constraints 13056 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13057 // Only one replacement instruction 13058 // 13059 // ---------EXAMPLE---------------------------------------------------------- 13060 // 13061 // // pertinent parts of existing instructions in architecture description 13062 // instruct movI(rRegI dst, rRegI src) %{ 13063 // match(Set dst (CopyI src)); 13064 // %} 13065 // 13066 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13067 // match(Set dst (AddI dst src)); 13068 // effect(KILL cr); 13069 // %} 13070 // 13071 // // Change (inc mov) to lea 13072 // peephole %{ 13073 // // increment preceeded by register-register move 13074 // peepmatch ( incI_eReg movI ); 13075 // // require that the destination register of the increment 13076 // // match the destination register of the move 13077 // peepconstraint ( 0.dst == 1.dst ); 13078 // // construct a replacement instruction that sets 13079 // // the destination to ( move's source register + one ) 13080 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13081 // %} 13082 // 13083 // Implementation no longer uses movX instructions since 13084 // machine-independent system no longer uses CopyX nodes. 13085 // 13086 // peephole %{ 13087 // peepmatch ( incI_eReg movI ); 13088 // peepconstraint ( 0.dst == 1.dst ); 13089 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13090 // %} 13091 // 13092 // peephole %{ 13093 // peepmatch ( decI_eReg movI ); 13094 // peepconstraint ( 0.dst == 1.dst ); 13095 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13096 // %} 13097 // 13098 // peephole %{ 13099 // peepmatch ( addI_eReg_imm movI ); 13100 // peepconstraint ( 0.dst == 1.dst ); 13101 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13102 // %} 13103 // 13104 // peephole %{ 13105 // peepmatch ( addP_eReg_imm movP ); 13106 // peepconstraint ( 0.dst == 1.dst ); 13107 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13108 // %} 13109 13110 // // Change load of spilled value to only a spill 13111 // instruct storeI(memory mem, rRegI src) %{ 13112 // match(Set mem (StoreI mem src)); 13113 // %} 13114 // 13115 // instruct loadI(rRegI dst, memory mem) %{ 13116 // match(Set dst (LoadI mem)); 13117 // %} 13118 // 13119 peephole %{ 13120 peepmatch ( loadI storeI ); 13121 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13122 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13123 %} 13124 13125 //----------SMARTSPILL RULES--------------------------------------------------- 13126 // These must follow all instruction definitions as they use the names 13127 // defined in the instructions definitions.