1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 if(UseAVX <= 2) { 295 size += 3; // vzeroupper 296 } 297 } 298 return size; 299 } 300 301 // !!!!! Special hack to get all type of calls to specify the byte offset 302 // from the start of the call to the point where the return address 303 // will point. 304 int MachCallStaticJavaNode::ret_addr_offset() { 305 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 306 } 307 308 int MachCallDynamicJavaNode::ret_addr_offset() { 309 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 310 } 311 312 static int sizeof_FFree_Float_Stack_All = -1; 313 314 int MachCallRuntimeNode::ret_addr_offset() { 315 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 316 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 317 } 318 319 // Indicate if the safepoint node needs the polling page as an input. 320 // Since x86 does have absolute addressing, it doesn't. 321 bool SafePointNode::needs_polling_address_input() { 322 return false; 323 } 324 325 // 326 // Compute padding required for nodes which need alignment 327 // 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 1; // skip call opcode byte 334 return round_to(current_offset, alignment_required()) - current_offset; 335 } 336 337 // The address of the call instruction needs to be 4-byte aligned to 338 // ensure that it does not span a cache line so that it can be patched. 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 340 current_offset += pre_call_resets_size(); // skip fldcw, if any 341 current_offset += 5; // skip MOV instruction 342 current_offset += 1; // skip call opcode byte 343 return round_to(current_offset, alignment_required()) - current_offset; 344 } 345 346 // EMIT_RM() 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 348 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 349 cbuf.insts()->emit_int8(c); 350 } 351 352 // EMIT_CC() 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 354 unsigned char c = (unsigned char)( f1 | f2 ); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_OPCODE() 359 void emit_opcode(CodeBuffer &cbuf, int code) { 360 cbuf.insts()->emit_int8((unsigned char) code); 361 } 362 363 // EMIT_OPCODE() w/ relocation information 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 365 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 366 emit_opcode(cbuf, code); 367 } 368 369 // EMIT_D8() 370 void emit_d8(CodeBuffer &cbuf, int d8) { 371 cbuf.insts()->emit_int8((unsigned char) d8); 372 } 373 374 // EMIT_D16() 375 void emit_d16(CodeBuffer &cbuf, int d16) { 376 cbuf.insts()->emit_int16(d16); 377 } 378 379 // EMIT_D32() 380 void emit_d32(CodeBuffer &cbuf, int d32) { 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 386 int format) { 387 cbuf.relocate(cbuf.insts_mark(), reloc, format); 388 cbuf.insts()->emit_int32(d32); 389 } 390 391 // emit 32 bit value and construct relocation entry from RelocationHolder 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 393 int format) { 394 #ifdef ASSERT 395 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 396 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 397 } 398 #endif 399 cbuf.relocate(cbuf.insts_mark(), rspec, format); 400 cbuf.insts()->emit_int32(d32); 401 } 402 403 // Access stack slot for load or store 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 405 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 406 if( -128 <= disp && disp <= 127 ) { 407 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 408 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 409 emit_d8 (cbuf, disp); // Displacement // R/M byte 410 } else { 411 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d32(cbuf, disp); // Displacement // R/M byte 414 } 415 } 416 417 // rRegI ereg, memory mem) %{ // emit_reg_mem 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 419 // There is no index & no scale, use form without SIB byte 420 if ((index == 0x4) && 421 (scale == 0) && (base != ESP_enc)) { 422 // If no displacement, mode is 0x0; unless base is [EBP] 423 if ( (displace == 0) && (base != EBP_enc) ) { 424 emit_rm(cbuf, 0x0, reg_encoding, base); 425 } 426 else { // If 8-bit displacement, mode 0x1 427 if ((displace >= -128) && (displace <= 127) 428 && (disp_reloc == relocInfo::none) ) { 429 emit_rm(cbuf, 0x1, reg_encoding, base); 430 emit_d8(cbuf, displace); 431 } 432 else { // If 32-bit displacement 433 if (base == -1) { // Special flag for absolute address 434 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 435 // (manual lies; no SIB needed here) 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 else { // Normal base + offset 443 emit_rm(cbuf, 0x2, reg_encoding, base); 444 if ( disp_reloc != relocInfo::none ) { 445 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 446 } else { 447 emit_d32 (cbuf, displace); 448 } 449 } 450 } 451 } 452 } 453 else { // Else, encode with the SIB byte 454 // If no displacement, mode is 0x0; unless base is [EBP] 455 if (displace == 0 && (base != EBP_enc)) { // If no displacement 456 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 457 emit_rm(cbuf, scale, index, base); 458 } 459 else { // If 8-bit displacement, mode 0x1 460 if ((displace >= -128) && (displace <= 127) 461 && (disp_reloc == relocInfo::none) ) { 462 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 emit_d8(cbuf, displace); 465 } 466 else { // If 32-bit displacement 467 if (base == 0x04 ) { 468 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, 0x04); 470 } else { 471 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 472 emit_rm(cbuf, scale, index, base); 473 } 474 if ( disp_reloc != relocInfo::none ) { 475 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 476 } else { 477 emit_d32 (cbuf, displace); 478 } 479 } 480 } 481 } 482 } 483 484 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 486 if( dst_encoding == src_encoding ) { 487 // reg-reg copy, use an empty encoding 488 } else { 489 emit_opcode( cbuf, 0x8B ); 490 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 491 } 492 } 493 494 void emit_cmpfp_fixup(MacroAssembler& _masm) { 495 Label exit; 496 __ jccb(Assembler::noParity, exit); 497 __ pushf(); 498 // 499 // comiss/ucomiss instructions set ZF,PF,CF flags and 500 // zero OF,AF,SF for NaN values. 501 // Fixup flags by zeroing ZF,PF so that compare of NaN 502 // values returns 'less than' result (CF is set). 503 // Leave the rest of flags unchanged. 504 // 505 // 7 6 5 4 3 2 1 0 506 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 507 // 0 0 1 0 1 0 1 1 (0x2B) 508 // 509 __ andl(Address(rsp, 0), 0xffffff2b); 510 __ popf(); 511 __ bind(exit); 512 } 513 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 515 Label done; 516 __ movl(dst, -1); 517 __ jcc(Assembler::parity, done); 518 __ jcc(Assembler::below, done); 519 __ setb(Assembler::notEqual, dst); 520 __ movzbl(dst, dst); 521 __ bind(done); 522 } 523 524 525 //============================================================================= 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 527 528 int Compile::ConstantTable::calculate_table_base_offset() const { 529 return 0; // absolute addressing, no offset 530 } 531 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 534 ShouldNotReachHere(); 535 } 536 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 538 // Empty encoding 539 } 540 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 542 return 0; 543 } 544 545 #ifndef PRODUCT 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 st->print("# MachConstantBaseNode (empty encoding)"); 548 } 549 #endif 550 551 552 //============================================================================= 553 #ifndef PRODUCT 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 555 Compile* C = ra_->C; 556 557 int framesize = C->frame_size_in_bytes(); 558 int bangsize = C->bang_size_in_bytes(); 559 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 560 // Remove wordSize for return addr which is already pushed. 561 framesize -= wordSize; 562 563 if (C->need_stack_bang(bangsize)) { 564 framesize -= wordSize; 565 st->print("# stack bang (%d bytes)", bangsize); 566 st->print("\n\t"); 567 st->print("PUSH EBP\t# Save EBP"); 568 if (PreserveFramePointer) { 569 st->print("\n\t"); 570 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 571 } 572 if (framesize) { 573 st->print("\n\t"); 574 st->print("SUB ESP, #%d\t# Create frame",framesize); 575 } 576 } else { 577 st->print("SUB ESP, #%d\t# Create frame",framesize); 578 st->print("\n\t"); 579 framesize -= wordSize; 580 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 581 if (PreserveFramePointer) { 582 st->print("\n\t"); 583 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 584 if (framesize > 0) { 585 st->print("\n\t"); 586 st->print("ADD EBP, #%d", framesize); 587 } 588 } 589 } 590 591 if (VerifyStackAtCalls) { 592 st->print("\n\t"); 593 framesize -= wordSize; 594 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 595 } 596 597 if( C->in_24_bit_fp_mode() ) { 598 st->print("\n\t"); 599 st->print("FLDCW \t# load 24 bit fpu control word"); 600 } 601 if (UseSSE >= 2 && VerifyFPU) { 602 st->print("\n\t"); 603 st->print("# verify FPU stack (must be clean on entry)"); 604 } 605 606 #ifdef ASSERT 607 if (VerifyStackAtCalls) { 608 st->print("\n\t"); 609 st->print("# stack alignment check"); 610 } 611 #endif 612 st->cr(); 613 } 614 #endif 615 616 617 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 618 Compile* C = ra_->C; 619 MacroAssembler _masm(&cbuf); 620 621 int framesize = C->frame_size_in_bytes(); 622 int bangsize = C->bang_size_in_bytes(); 623 624 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 625 626 C->set_frame_complete(cbuf.insts_size()); 627 628 if (C->has_mach_constant_base_node()) { 629 // NOTE: We set the table base offset here because users might be 630 // emitted before MachConstantBaseNode. 631 Compile::ConstantTable& constant_table = C->constant_table(); 632 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 633 } 634 } 635 636 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 637 return MachNode::size(ra_); // too many variables; just compute it the hard way 638 } 639 640 int MachPrologNode::reloc() const { 641 return 0; // a large enough number 642 } 643 644 //============================================================================= 645 #ifndef PRODUCT 646 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 647 Compile *C = ra_->C; 648 int framesize = C->frame_size_in_bytes(); 649 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 650 // Remove two words for return addr and rbp, 651 framesize -= 2*wordSize; 652 653 if (C->max_vector_size() > 16) { 654 st->print("VZEROUPPER"); 655 st->cr(); st->print("\t"); 656 } 657 if (C->in_24_bit_fp_mode()) { 658 st->print("FLDCW standard control word"); 659 st->cr(); st->print("\t"); 660 } 661 if (framesize) { 662 st->print("ADD ESP,%d\t# Destroy frame",framesize); 663 st->cr(); st->print("\t"); 664 } 665 st->print_cr("POPL EBP"); st->print("\t"); 666 if (do_polling() && C->is_method_compilation()) { 667 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 668 st->cr(); st->print("\t"); 669 } 670 } 671 #endif 672 673 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 674 Compile *C = ra_->C; 675 676 if (C->max_vector_size() > 16) { 677 // Clear upper bits of YMM registers when current compiled code uses 678 // wide vectors to avoid AVX <-> SSE transition penalty during call. 679 MacroAssembler masm(&cbuf); 680 masm.vzeroupper(); 681 } 682 // If method set FPU control word, restore to standard control word 683 if (C->in_24_bit_fp_mode()) { 684 MacroAssembler masm(&cbuf); 685 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 686 } 687 688 int framesize = C->frame_size_in_bytes(); 689 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 690 // Remove two words for return addr and rbp, 691 framesize -= 2*wordSize; 692 693 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 694 695 if (framesize >= 128) { 696 emit_opcode(cbuf, 0x81); // add SP, #framesize 697 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 698 emit_d32(cbuf, framesize); 699 } else if (framesize) { 700 emit_opcode(cbuf, 0x83); // add SP, #framesize 701 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 702 emit_d8(cbuf, framesize); 703 } 704 705 emit_opcode(cbuf, 0x58 | EBP_enc); 706 707 if (do_polling() && C->is_method_compilation()) { 708 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 709 emit_opcode(cbuf,0x85); 710 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 711 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 712 } 713 } 714 715 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 716 Compile *C = ra_->C; 717 // If method set FPU control word, restore to standard control word 718 int size = C->in_24_bit_fp_mode() ? 6 : 0; 719 if (C->max_vector_size() > 16) size += 3; // vzeroupper 720 if (do_polling() && C->is_method_compilation()) size += 6; 721 722 int framesize = C->frame_size_in_bytes(); 723 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 724 // Remove two words for return addr and rbp, 725 framesize -= 2*wordSize; 726 727 size++; // popl rbp, 728 729 if (framesize >= 128) { 730 size += 6; 731 } else { 732 size += framesize ? 3 : 0; 733 } 734 return size; 735 } 736 737 int MachEpilogNode::reloc() const { 738 return 0; // a large enough number 739 } 740 741 const Pipeline * MachEpilogNode::pipeline() const { 742 return MachNode::pipeline_class(); 743 } 744 745 int MachEpilogNode::safepoint_offset() const { return 0; } 746 747 //============================================================================= 748 749 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 750 static enum RC rc_class( OptoReg::Name reg ) { 751 752 if( !OptoReg::is_valid(reg) ) return rc_bad; 753 if (OptoReg::is_stack(reg)) return rc_stack; 754 755 VMReg r = OptoReg::as_VMReg(reg); 756 if (r->is_Register()) return rc_int; 757 if (r->is_FloatRegister()) { 758 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 759 return rc_float; 760 } 761 assert(r->is_XMMRegister(), "must be"); 762 return rc_xmm; 763 } 764 765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 766 int opcode, const char *op_str, int size, outputStream* st ) { 767 if( cbuf ) { 768 emit_opcode (*cbuf, opcode ); 769 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 770 #ifndef PRODUCT 771 } else if( !do_size ) { 772 if( size != 0 ) st->print("\n\t"); 773 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 774 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 775 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 776 } else { // FLD, FST, PUSH, POP 777 st->print("%s [ESP + #%d]",op_str,offset); 778 } 779 #endif 780 } 781 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 782 return size+3+offset_size; 783 } 784 785 // Helper for XMM registers. Extra opcode bits, limited syntax. 786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 787 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 788 int in_size_in_bits = Assembler::EVEX_32bit; 789 int evex_encoding = 0; 790 if (reg_lo+1 == reg_hi) { 791 in_size_in_bits = Assembler::EVEX_64bit; 792 evex_encoding = Assembler::VEX_W; 793 } 794 if (cbuf) { 795 MacroAssembler _masm(cbuf); 796 if (reg_lo+1 == reg_hi) { // double move? 797 if (is_load) { 798 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 799 } else { 800 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 801 } 802 } else { 803 if (is_load) { 804 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 805 } else { 806 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 807 } 808 } 809 #ifndef PRODUCT 810 } else if (!do_size) { 811 if (size != 0) st->print("\n\t"); 812 if (reg_lo+1 == reg_hi) { // double move? 813 if (is_load) st->print("%s %s,[ESP + #%d]", 814 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 815 Matcher::regName[reg_lo], offset); 816 else st->print("MOVSD [ESP + #%d],%s", 817 offset, Matcher::regName[reg_lo]); 818 } else { 819 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSS [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } 824 #endif 825 } 826 bool is_single_byte = false; 827 if ((UseAVX > 2) && (offset != 0)) { 828 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 829 } 830 int offset_size = 0; 831 if (UseAVX > 2 ) { 832 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 833 } else { 834 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 835 } 836 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 837 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 838 return size+5+offset_size; 839 } 840 841 842 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 843 int src_hi, int dst_hi, int size, outputStream* st ) { 844 if (cbuf) { 845 MacroAssembler _masm(cbuf); 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 848 as_XMMRegister(Matcher::_regEncode[src_lo])); 849 } else { 850 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 851 as_XMMRegister(Matcher::_regEncode[src_lo])); 852 } 853 #ifndef PRODUCT 854 } else if (!do_size) { 855 if (size != 0) st->print("\n\t"); 856 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 857 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 858 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } else { 860 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } 862 } else { 863 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 864 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } else { 866 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } 868 } 869 #endif 870 } 871 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 872 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 873 int sz = (UseAVX > 2) ? 6 : 4; 874 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 875 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 876 return size + sz; 877 } 878 879 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 880 int src_hi, int dst_hi, int size, outputStream* st ) { 881 // 32-bit 882 if (cbuf) { 883 MacroAssembler _masm(cbuf); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 901 as_XMMRegister(Matcher::_regEncode[src_lo])); 902 #ifndef PRODUCT 903 } else if (!do_size) { 904 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 905 #endif 906 } 907 return (UseAVX> 2) ? 6 : 4; 908 } 909 910 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 911 if( cbuf ) { 912 emit_opcode(*cbuf, 0x8B ); 913 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 914 #ifndef PRODUCT 915 } else if( !do_size ) { 916 if( size != 0 ) st->print("\n\t"); 917 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 918 #endif 919 } 920 return size+2; 921 } 922 923 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 924 int offset, int size, outputStream* st ) { 925 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 926 if( cbuf ) { 927 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 928 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 929 #ifndef PRODUCT 930 } else if( !do_size ) { 931 if( size != 0 ) st->print("\n\t"); 932 st->print("FLD %s",Matcher::regName[src_lo]); 933 #endif 934 } 935 size += 2; 936 } 937 938 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 939 const char *op_str; 940 int op; 941 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 942 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 943 op = 0xDD; 944 } else { // 32-bit store 945 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 946 op = 0xD9; 947 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 948 } 949 950 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 951 } 952 953 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 954 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 955 int src_hi, int dst_hi, uint ireg, outputStream* st); 956 957 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 958 int stack_offset, int reg, uint ireg, outputStream* st); 959 960 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 961 int dst_offset, uint ireg, outputStream* st) { 962 int calc_size = 0; 963 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 964 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 965 switch (ireg) { 966 case Op_VecS: 967 calc_size = 3+src_offset_size + 3+dst_offset_size; 968 break; 969 case Op_VecD: 970 calc_size = 3+src_offset_size + 3+dst_offset_size; 971 src_offset += 4; 972 dst_offset += 4; 973 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 974 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 975 calc_size += 3+src_offset_size + 3+dst_offset_size; 976 break; 977 case Op_VecX: 978 case Op_VecY: 979 case Op_VecZ: 980 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 981 break; 982 default: 983 ShouldNotReachHere(); 984 } 985 if (cbuf) { 986 MacroAssembler _masm(cbuf); 987 int offset = __ offset(); 988 switch (ireg) { 989 case Op_VecS: 990 __ pushl(Address(rsp, src_offset)); 991 __ popl (Address(rsp, dst_offset)); 992 break; 993 case Op_VecD: 994 __ pushl(Address(rsp, src_offset)); 995 __ popl (Address(rsp, dst_offset)); 996 __ pushl(Address(rsp, src_offset+4)); 997 __ popl (Address(rsp, dst_offset+4)); 998 break; 999 case Op_VecX: 1000 __ movdqu(Address(rsp, -16), xmm0); 1001 __ movdqu(xmm0, Address(rsp, src_offset)); 1002 __ movdqu(Address(rsp, dst_offset), xmm0); 1003 __ movdqu(xmm0, Address(rsp, -16)); 1004 break; 1005 case Op_VecY: 1006 __ vmovdqu(Address(rsp, -32), xmm0); 1007 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1008 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1009 __ vmovdqu(xmm0, Address(rsp, -32)); 1010 case Op_VecZ: 1011 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1012 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1013 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1014 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1015 break; 1016 default: 1017 ShouldNotReachHere(); 1018 } 1019 int size = __ offset() - offset; 1020 assert(size == calc_size, "incorrect size calculattion"); 1021 return size; 1022 #ifndef PRODUCT 1023 } else if (!do_size) { 1024 switch (ireg) { 1025 case Op_VecS: 1026 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1027 "popl [rsp + #%d]", 1028 src_offset, dst_offset); 1029 break; 1030 case Op_VecD: 1031 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1032 "popq [rsp + #%d]\n\t" 1033 "pushl [rsp + #%d]\n\t" 1034 "popq [rsp + #%d]", 1035 src_offset, dst_offset, src_offset+4, dst_offset+4); 1036 break; 1037 case Op_VecX: 1038 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1039 "movdqu xmm0, [rsp + #%d]\n\t" 1040 "movdqu [rsp + #%d], xmm0\n\t" 1041 "movdqu xmm0, [rsp - #16]", 1042 src_offset, dst_offset); 1043 break; 1044 case Op_VecY: 1045 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1046 "vmovdqu xmm0, [rsp + #%d]\n\t" 1047 "vmovdqu [rsp + #%d], xmm0\n\t" 1048 "vmovdqu xmm0, [rsp - #32]", 1049 src_offset, dst_offset); 1050 case Op_VecZ: 1051 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1052 "vmovdqu xmm0, [rsp + #%d]\n\t" 1053 "vmovdqu [rsp + #%d], xmm0\n\t" 1054 "vmovdqu xmm0, [rsp - #64]", 1055 src_offset, dst_offset); 1056 break; 1057 default: 1058 ShouldNotReachHere(); 1059 } 1060 #endif 1061 } 1062 return calc_size; 1063 } 1064 1065 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1066 // Get registers to move 1067 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1068 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1069 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1070 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1071 1072 enum RC src_second_rc = rc_class(src_second); 1073 enum RC src_first_rc = rc_class(src_first); 1074 enum RC dst_second_rc = rc_class(dst_second); 1075 enum RC dst_first_rc = rc_class(dst_first); 1076 1077 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1078 1079 // Generate spill code! 1080 int size = 0; 1081 1082 if( src_first == dst_first && src_second == dst_second ) 1083 return size; // Self copy, no move 1084 1085 if (bottom_type()->isa_vect() != NULL) { 1086 uint ireg = ideal_reg(); 1087 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1088 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1089 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1090 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1091 // mem -> mem 1092 int src_offset = ra_->reg2offset(src_first); 1093 int dst_offset = ra_->reg2offset(dst_first); 1094 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1095 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1096 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1097 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1098 int stack_offset = ra_->reg2offset(dst_first); 1099 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1100 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1101 int stack_offset = ra_->reg2offset(src_first); 1102 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1103 } else { 1104 ShouldNotReachHere(); 1105 } 1106 } 1107 1108 // -------------------------------------- 1109 // Check for mem-mem move. push/pop to move. 1110 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1111 if( src_second == dst_first ) { // overlapping stack copy ranges 1112 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1113 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1114 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1115 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1116 } 1117 // move low bits 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1120 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1121 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1122 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1123 } 1124 return size; 1125 } 1126 1127 // -------------------------------------- 1128 // Check for integer reg-reg copy 1129 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1130 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1131 1132 // Check for integer store 1133 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1134 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1135 1136 // Check for integer load 1137 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1138 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1139 1140 // Check for integer reg-xmm reg copy 1141 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1142 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1143 "no 64 bit integer-float reg moves" ); 1144 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1145 } 1146 // -------------------------------------- 1147 // Check for float reg-reg copy 1148 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1149 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1150 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1151 if( cbuf ) { 1152 1153 // Note the mucking with the register encode to compensate for the 0/1 1154 // indexing issue mentioned in a comment in the reg_def sections 1155 // for FPR registers many lines above here. 1156 1157 if( src_first != FPR1L_num ) { 1158 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1159 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1160 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1161 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1162 } else { 1163 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1164 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1165 } 1166 #ifndef PRODUCT 1167 } else if( !do_size ) { 1168 if( size != 0 ) st->print("\n\t"); 1169 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1170 else st->print( "FST %s", Matcher::regName[dst_first]); 1171 #endif 1172 } 1173 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1174 } 1175 1176 // Check for float store 1177 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1178 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1179 } 1180 1181 // Check for float load 1182 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1183 int offset = ra_->reg2offset(src_first); 1184 const char *op_str; 1185 int op; 1186 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1187 op_str = "FLD_D"; 1188 op = 0xDD; 1189 } else { // 32-bit load 1190 op_str = "FLD_S"; 1191 op = 0xD9; 1192 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1193 } 1194 if( cbuf ) { 1195 emit_opcode (*cbuf, op ); 1196 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1197 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1198 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1199 #ifndef PRODUCT 1200 } else if( !do_size ) { 1201 if( size != 0 ) st->print("\n\t"); 1202 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1203 #endif 1204 } 1205 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1206 return size + 3+offset_size+2; 1207 } 1208 1209 // Check for xmm reg-reg copy 1210 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1211 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1212 (src_first+1 == src_second && dst_first+1 == dst_second), 1213 "no non-adjacent float-moves" ); 1214 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1215 } 1216 1217 // Check for xmm reg-integer reg copy 1218 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1219 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1220 "no 64 bit float-integer reg moves" ); 1221 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1222 } 1223 1224 // Check for xmm store 1225 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1226 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1227 } 1228 1229 // Check for float xmm load 1230 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1232 } 1233 1234 // Copy from float reg to xmm reg 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1236 // copy to the top of stack from floating point reg 1237 // and use LEA to preserve flags 1238 if( cbuf ) { 1239 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1240 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1241 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1242 emit_d8(*cbuf,0xF8); 1243 #ifndef PRODUCT 1244 } else if( !do_size ) { 1245 if( size != 0 ) st->print("\n\t"); 1246 st->print("LEA ESP,[ESP-8]"); 1247 #endif 1248 } 1249 size += 4; 1250 1251 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1252 1253 // Copy from the temp memory to the xmm reg. 1254 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1255 1256 if( cbuf ) { 1257 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1258 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1259 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1260 emit_d8(*cbuf,0x08); 1261 #ifndef PRODUCT 1262 } else if( !do_size ) { 1263 if( size != 0 ) st->print("\n\t"); 1264 st->print("LEA ESP,[ESP+8]"); 1265 #endif 1266 } 1267 size += 4; 1268 return size; 1269 } 1270 1271 assert( size > 0, "missed a case" ); 1272 1273 // -------------------------------------------------------------------- 1274 // Check for second bits still needing moving. 1275 if( src_second == dst_second ) 1276 return size; // Self copy; no move 1277 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1278 1279 // Check for second word int-int move 1280 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1281 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1282 1283 // Check for second word integer store 1284 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1285 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1286 1287 // Check for second word integer load 1288 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1289 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1290 1291 1292 Unimplemented(); 1293 return 0; // Mute compiler 1294 } 1295 1296 #ifndef PRODUCT 1297 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1298 implementation( NULL, ra_, false, st ); 1299 } 1300 #endif 1301 1302 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1303 implementation( &cbuf, ra_, false, NULL ); 1304 } 1305 1306 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1307 return implementation( NULL, ra_, true, NULL ); 1308 } 1309 1310 1311 //============================================================================= 1312 #ifndef PRODUCT 1313 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1314 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1315 int reg = ra_->get_reg_first(this); 1316 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1317 } 1318 #endif 1319 1320 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1321 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1322 int reg = ra_->get_encode(this); 1323 if( offset >= 128 ) { 1324 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1325 emit_rm(cbuf, 0x2, reg, 0x04); 1326 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1327 emit_d32(cbuf, offset); 1328 } 1329 else { 1330 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1331 emit_rm(cbuf, 0x1, reg, 0x04); 1332 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1333 emit_d8(cbuf, offset); 1334 } 1335 } 1336 1337 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1338 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1339 if( offset >= 128 ) { 1340 return 7; 1341 } 1342 else { 1343 return 4; 1344 } 1345 } 1346 1347 //============================================================================= 1348 #ifndef PRODUCT 1349 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1350 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1351 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1352 st->print_cr("\tNOP"); 1353 st->print_cr("\tNOP"); 1354 if( !OptoBreakpoint ) 1355 st->print_cr("\tNOP"); 1356 } 1357 #endif 1358 1359 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1360 MacroAssembler masm(&cbuf); 1361 #ifdef ASSERT 1362 uint insts_size = cbuf.insts_size(); 1363 #endif 1364 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1365 masm.jump_cc(Assembler::notEqual, 1366 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1367 /* WARNING these NOPs are critical so that verified entry point is properly 1368 aligned for patching by NativeJump::patch_verified_entry() */ 1369 int nops_cnt = 2; 1370 if( !OptoBreakpoint ) // Leave space for int3 1371 nops_cnt += 1; 1372 masm.nop(nops_cnt); 1373 1374 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1375 } 1376 1377 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1378 return OptoBreakpoint ? 11 : 12; 1379 } 1380 1381 1382 //============================================================================= 1383 1384 int Matcher::regnum_to_fpu_offset(int regnum) { 1385 return regnum - 32; // The FP registers are in the second chunk 1386 } 1387 1388 // This is UltraSparc specific, true just means we have fast l2f conversion 1389 const bool Matcher::convL2FSupported(void) { 1390 return true; 1391 } 1392 1393 // Is this branch offset short enough that a short branch can be used? 1394 // 1395 // NOTE: If the platform does not provide any short branch variants, then 1396 // this method should return false for offset 0. 1397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1398 // The passed offset is relative to address of the branch. 1399 // On 86 a branch displacement is calculated relative to address 1400 // of a next instruction. 1401 offset -= br_size; 1402 1403 // the short version of jmpConUCF2 contains multiple branches, 1404 // making the reach slightly less 1405 if (rule == jmpConUCF2_rule) 1406 return (-126 <= offset && offset <= 125); 1407 return (-128 <= offset && offset <= 127); 1408 } 1409 1410 const bool Matcher::isSimpleConstant64(jlong value) { 1411 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1412 return false; 1413 } 1414 1415 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1416 const bool Matcher::init_array_count_is_in_bytes = false; 1417 1418 // Threshold size for cleararray. 1419 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1420 1421 // Needs 2 CMOV's for longs. 1422 const int Matcher::long_cmove_cost() { return 1; } 1423 1424 // No CMOVF/CMOVD with SSE/SSE2 1425 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1426 1427 // Does the CPU require late expand (see block.cpp for description of late expand)? 1428 const bool Matcher::require_postalloc_expand = false; 1429 1430 // Should the Matcher clone shifts on addressing modes, expecting them to 1431 // be subsumed into complex addressing expressions or compute them into 1432 // registers? True for Intel but false for most RISCs 1433 const bool Matcher::clone_shift_expressions = true; 1434 1435 // Do we need to mask the count passed to shift instructions or does 1436 // the cpu only look at the lower 5/6 bits anyway? 1437 const bool Matcher::need_masked_shift_count = false; 1438 1439 bool Matcher::narrow_oop_use_complex_address() { 1440 ShouldNotCallThis(); 1441 return true; 1442 } 1443 1444 bool Matcher::narrow_klass_use_complex_address() { 1445 ShouldNotCallThis(); 1446 return true; 1447 } 1448 1449 1450 // Is it better to copy float constants, or load them directly from memory? 1451 // Intel can load a float constant from a direct address, requiring no 1452 // extra registers. Most RISCs will have to materialize an address into a 1453 // register first, so they would do better to copy the constant from stack. 1454 const bool Matcher::rematerialize_float_constants = true; 1455 1456 // If CPU can load and store mis-aligned doubles directly then no fixup is 1457 // needed. Else we split the double into 2 integer pieces and move it 1458 // piece-by-piece. Only happens when passing doubles into C code as the 1459 // Java calling convention forces doubles to be aligned. 1460 const bool Matcher::misaligned_doubles_ok = true; 1461 1462 1463 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1464 // Get the memory operand from the node 1465 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1466 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1467 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1468 uint opcnt = 1; // First operand 1469 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1470 while( idx >= skipped+num_edges ) { 1471 skipped += num_edges; 1472 opcnt++; // Bump operand count 1473 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1474 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1475 } 1476 1477 MachOper *memory = node->_opnds[opcnt]; 1478 MachOper *new_memory = NULL; 1479 switch (memory->opcode()) { 1480 case DIRECT: 1481 case INDOFFSET32X: 1482 // No transformation necessary. 1483 return; 1484 case INDIRECT: 1485 new_memory = new indirect_win95_safeOper( ); 1486 break; 1487 case INDOFFSET8: 1488 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1489 break; 1490 case INDOFFSET32: 1491 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1492 break; 1493 case INDINDEXOFFSET: 1494 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDINDEXSCALE: 1497 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1498 break; 1499 case INDINDEXSCALEOFFSET: 1500 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1501 break; 1502 case LOAD_LONG_INDIRECT: 1503 case LOAD_LONG_INDOFFSET32: 1504 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1505 return; 1506 default: 1507 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1508 return; 1509 } 1510 node->_opnds[opcnt] = new_memory; 1511 } 1512 1513 // Advertise here if the CPU requires explicit rounding operations 1514 // to implement the UseStrictFP mode. 1515 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1516 1517 // Are floats conerted to double when stored to stack during deoptimization? 1518 // On x32 it is stored with convertion only when FPU is used for floats. 1519 bool Matcher::float_in_double() { return (UseSSE == 0); } 1520 1521 // Do ints take an entire long register or just half? 1522 const bool Matcher::int_in_long = false; 1523 1524 // Return whether or not this register is ever used as an argument. This 1525 // function is used on startup to build the trampoline stubs in generateOptoStub. 1526 // Registers not mentioned will be killed by the VM call in the trampoline, and 1527 // arguments in those registers not be available to the callee. 1528 bool Matcher::can_be_java_arg( int reg ) { 1529 if( reg == ECX_num || reg == EDX_num ) return true; 1530 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1531 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1532 return false; 1533 } 1534 1535 bool Matcher::is_spillable_arg( int reg ) { 1536 return can_be_java_arg(reg); 1537 } 1538 1539 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1540 // Use hardware integer DIV instruction when 1541 // it is faster than a code which use multiply. 1542 // Only when constant divisor fits into 32 bit 1543 // (min_jint is excluded to get only correct 1544 // positive 32 bit values from negative). 1545 return VM_Version::has_fast_idiv() && 1546 (divisor == (int)divisor && divisor != min_jint); 1547 } 1548 1549 // Register for DIVI projection of divmodI 1550 RegMask Matcher::divI_proj_mask() { 1551 return EAX_REG_mask(); 1552 } 1553 1554 // Register for MODI projection of divmodI 1555 RegMask Matcher::modI_proj_mask() { 1556 return EDX_REG_mask(); 1557 } 1558 1559 // Register for DIVL projection of divmodL 1560 RegMask Matcher::divL_proj_mask() { 1561 ShouldNotReachHere(); 1562 return RegMask(); 1563 } 1564 1565 // Register for MODL projection of divmodL 1566 RegMask Matcher::modL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1572 return NO_REG_mask(); 1573 } 1574 1575 // Returns true if the high 32 bits of the value is known to be zero. 1576 bool is_operand_hi32_zero(Node* n) { 1577 int opc = n->Opcode(); 1578 if (opc == Op_AndL) { 1579 Node* o2 = n->in(2); 1580 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1581 return true; 1582 } 1583 } 1584 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1585 return true; 1586 } 1587 return false; 1588 } 1589 1590 %} 1591 1592 //----------ENCODING BLOCK----------------------------------------------------- 1593 // This block specifies the encoding classes used by the compiler to output 1594 // byte streams. Encoding classes generate functions which are called by 1595 // Machine Instruction Nodes in order to generate the bit encoding of the 1596 // instruction. Operands specify their base encoding interface with the 1597 // interface keyword. There are currently supported four interfaces, 1598 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1599 // operand to generate a function which returns its register number when 1600 // queried. CONST_INTER causes an operand to generate a function which 1601 // returns the value of the constant when queried. MEMORY_INTER causes an 1602 // operand to generate four functions which return the Base Register, the 1603 // Index Register, the Scale Value, and the Offset Value of the operand when 1604 // queried. COND_INTER causes an operand to generate six functions which 1605 // return the encoding code (ie - encoding bits for the instruction) 1606 // associated with each basic boolean condition for a conditional instruction. 1607 // Instructions specify two basic values for encoding. They use the 1608 // ins_encode keyword to specify their encoding class (which must be one of 1609 // the class names specified in the encoding block), and they use the 1610 // opcode keyword to specify, in order, their primary, secondary, and 1611 // tertiary opcode. Only the opcode sections which a particular instruction 1612 // needs for encoding need to be specified. 1613 encode %{ 1614 // Build emit functions for each basic byte or larger field in the intel 1615 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1616 // code in the enc_class source block. Emit functions will live in the 1617 // main source block for now. In future, we can generalize this by 1618 // adding a syntax that specifies the sizes of fields in an order, 1619 // so that the adlc can build the emit functions automagically 1620 1621 // Emit primary opcode 1622 enc_class OpcP %{ 1623 emit_opcode(cbuf, $primary); 1624 %} 1625 1626 // Emit secondary opcode 1627 enc_class OpcS %{ 1628 emit_opcode(cbuf, $secondary); 1629 %} 1630 1631 // Emit opcode directly 1632 enc_class Opcode(immI d8) %{ 1633 emit_opcode(cbuf, $d8$$constant); 1634 %} 1635 1636 enc_class SizePrefix %{ 1637 emit_opcode(cbuf,0x66); 1638 %} 1639 1640 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1641 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1642 %} 1643 1644 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1645 emit_opcode(cbuf,$opcode$$constant); 1646 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1647 %} 1648 1649 enc_class mov_r32_imm0( rRegI dst ) %{ 1650 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1651 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1652 %} 1653 1654 enc_class cdq_enc %{ 1655 // Full implementation of Java idiv and irem; checks for 1656 // special case as described in JVM spec., p.243 & p.271. 1657 // 1658 // normal case special case 1659 // 1660 // input : rax,: dividend min_int 1661 // reg: divisor -1 1662 // 1663 // output: rax,: quotient (= rax, idiv reg) min_int 1664 // rdx: remainder (= rax, irem reg) 0 1665 // 1666 // Code sequnce: 1667 // 1668 // 81 F8 00 00 00 80 cmp rax,80000000h 1669 // 0F 85 0B 00 00 00 jne normal_case 1670 // 33 D2 xor rdx,edx 1671 // 83 F9 FF cmp rcx,0FFh 1672 // 0F 84 03 00 00 00 je done 1673 // normal_case: 1674 // 99 cdq 1675 // F7 F9 idiv rax,ecx 1676 // done: 1677 // 1678 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1679 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1680 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1681 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1682 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1683 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1684 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1685 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1686 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1687 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1688 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1689 // normal_case: 1690 emit_opcode(cbuf,0x99); // cdq 1691 // idiv (note: must be emitted by the user of this rule) 1692 // normal: 1693 %} 1694 1695 // Dense encoding for older common ops 1696 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1697 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1698 %} 1699 1700 1701 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1702 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1703 // Check for 8-bit immediate, and set sign extend bit in opcode 1704 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1705 emit_opcode(cbuf, $primary | 0x02); 1706 } 1707 else { // If 32-bit immediate 1708 emit_opcode(cbuf, $primary); 1709 } 1710 %} 1711 1712 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1713 // Emit primary opcode and set sign-extend bit 1714 // Check for 8-bit immediate, and set sign extend bit in opcode 1715 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1716 emit_opcode(cbuf, $primary | 0x02); } 1717 else { // If 32-bit immediate 1718 emit_opcode(cbuf, $primary); 1719 } 1720 // Emit r/m byte with secondary opcode, after primary opcode. 1721 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1722 %} 1723 1724 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1725 // Check for 8-bit immediate, and set sign extend bit in opcode 1726 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1727 $$$emit8$imm$$constant; 1728 } 1729 else { // If 32-bit immediate 1730 // Output immediate 1731 $$$emit32$imm$$constant; 1732 } 1733 %} 1734 1735 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1736 // Emit primary opcode and set sign-extend bit 1737 // Check for 8-bit immediate, and set sign extend bit in opcode 1738 int con = (int)$imm$$constant; // Throw away top bits 1739 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1740 // Emit r/m byte with secondary opcode, after primary opcode. 1741 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1742 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1743 else emit_d32(cbuf,con); 1744 %} 1745 1746 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1747 // Emit primary opcode and set sign-extend bit 1748 // Check for 8-bit immediate, and set sign extend bit in opcode 1749 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1750 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1751 // Emit r/m byte with tertiary opcode, after primary opcode. 1752 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1753 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1754 else emit_d32(cbuf,con); 1755 %} 1756 1757 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1758 emit_cc(cbuf, $secondary, $dst$$reg ); 1759 %} 1760 1761 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1762 int destlo = $dst$$reg; 1763 int desthi = HIGH_FROM_LOW(destlo); 1764 // bswap lo 1765 emit_opcode(cbuf, 0x0F); 1766 emit_cc(cbuf, 0xC8, destlo); 1767 // bswap hi 1768 emit_opcode(cbuf, 0x0F); 1769 emit_cc(cbuf, 0xC8, desthi); 1770 // xchg lo and hi 1771 emit_opcode(cbuf, 0x87); 1772 emit_rm(cbuf, 0x3, destlo, desthi); 1773 %} 1774 1775 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1776 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1777 %} 1778 1779 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1780 $$$emit8$primary; 1781 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1782 %} 1783 1784 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1785 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1786 emit_d8(cbuf, op >> 8 ); 1787 emit_d8(cbuf, op & 255); 1788 %} 1789 1790 // emulate a CMOV with a conditional branch around a MOV 1791 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1792 // Invert sense of branch from sense of CMOV 1793 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1794 emit_d8( cbuf, $brOffs$$constant ); 1795 %} 1796 1797 enc_class enc_PartialSubtypeCheck( ) %{ 1798 Register Redi = as_Register(EDI_enc); // result register 1799 Register Reax = as_Register(EAX_enc); // super class 1800 Register Recx = as_Register(ECX_enc); // killed 1801 Register Resi = as_Register(ESI_enc); // sub class 1802 Label miss; 1803 1804 MacroAssembler _masm(&cbuf); 1805 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1806 NULL, &miss, 1807 /*set_cond_codes:*/ true); 1808 if ($primary) { 1809 __ xorptr(Redi, Redi); 1810 } 1811 __ bind(miss); 1812 %} 1813 1814 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1815 MacroAssembler masm(&cbuf); 1816 int start = masm.offset(); 1817 if (UseSSE >= 2) { 1818 if (VerifyFPU) { 1819 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1820 } 1821 } else { 1822 // External c_calling_convention expects the FPU stack to be 'clean'. 1823 // Compiled code leaves it dirty. Do cleanup now. 1824 masm.empty_FPU_stack(); 1825 } 1826 if (sizeof_FFree_Float_Stack_All == -1) { 1827 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1828 } else { 1829 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1830 } 1831 %} 1832 1833 enc_class Verify_FPU_For_Leaf %{ 1834 if( VerifyFPU ) { 1835 MacroAssembler masm(&cbuf); 1836 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1837 } 1838 %} 1839 1840 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1841 // This is the instruction starting address for relocation info. 1842 cbuf.set_insts_mark(); 1843 $$$emit8$primary; 1844 // CALL directly to the runtime 1845 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1846 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1847 1848 if (UseSSE >= 2) { 1849 MacroAssembler _masm(&cbuf); 1850 BasicType rt = tf()->return_type(); 1851 1852 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1853 // A C runtime call where the return value is unused. In SSE2+ 1854 // mode the result needs to be removed from the FPU stack. It's 1855 // likely that this function call could be removed by the 1856 // optimizer if the C function is a pure function. 1857 __ ffree(0); 1858 } else if (rt == T_FLOAT) { 1859 __ lea(rsp, Address(rsp, -4)); 1860 __ fstp_s(Address(rsp, 0)); 1861 __ movflt(xmm0, Address(rsp, 0)); 1862 __ lea(rsp, Address(rsp, 4)); 1863 } else if (rt == T_DOUBLE) { 1864 __ lea(rsp, Address(rsp, -8)); 1865 __ fstp_d(Address(rsp, 0)); 1866 __ movdbl(xmm0, Address(rsp, 0)); 1867 __ lea(rsp, Address(rsp, 8)); 1868 } 1869 } 1870 %} 1871 1872 1873 enc_class pre_call_resets %{ 1874 // If method sets FPU control word restore it here 1875 debug_only(int off0 = cbuf.insts_size()); 1876 if (ra_->C->in_24_bit_fp_mode()) { 1877 MacroAssembler _masm(&cbuf); 1878 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1879 } 1880 if (ra_->C->max_vector_size() > 16) { 1881 // Clear upper bits of YMM registers when current compiled code uses 1882 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1883 MacroAssembler _masm(&cbuf); 1884 __ vzeroupper(); 1885 } 1886 debug_only(int off1 = cbuf.insts_size()); 1887 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1888 %} 1889 1890 enc_class post_call_FPU %{ 1891 // If method sets FPU control word do it here also 1892 if (Compile::current()->in_24_bit_fp_mode()) { 1893 MacroAssembler masm(&cbuf); 1894 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1895 } 1896 %} 1897 1898 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1899 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1900 // who we intended to call. 1901 cbuf.set_insts_mark(); 1902 $$$emit8$primary; 1903 if (!_method) { 1904 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1905 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1906 } else if (_optimized_virtual) { 1907 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1908 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1909 } else { 1910 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1911 static_call_Relocation::spec(), RELOC_IMM32 ); 1912 } 1913 if (_method) { // Emit stub for static call. 1914 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1915 if (stub == NULL) { 1916 ciEnv::current()->record_failure("CodeCache is full"); 1917 return; 1918 } 1919 } 1920 %} 1921 1922 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1923 MacroAssembler _masm(&cbuf); 1924 __ ic_call((address)$meth$$method); 1925 %} 1926 1927 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1928 int disp = in_bytes(Method::from_compiled_offset()); 1929 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1930 1931 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1932 cbuf.set_insts_mark(); 1933 $$$emit8$primary; 1934 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1935 emit_d8(cbuf, disp); // Displacement 1936 1937 %} 1938 1939 // Following encoding is no longer used, but may be restored if calling 1940 // convention changes significantly. 1941 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1942 // 1943 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1944 // // int ic_reg = Matcher::inline_cache_reg(); 1945 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1946 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1947 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1948 // 1949 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1950 // // // so we load it immediately before the call 1951 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1952 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1953 // 1954 // // xor rbp,ebp 1955 // emit_opcode(cbuf, 0x33); 1956 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1957 // 1958 // // CALL to interpreter. 1959 // cbuf.set_insts_mark(); 1960 // $$$emit8$primary; 1961 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1962 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1963 // %} 1964 1965 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1966 $$$emit8$primary; 1967 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1968 $$$emit8$shift$$constant; 1969 %} 1970 1971 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1972 // Load immediate does not have a zero or sign extended version 1973 // for 8-bit immediates 1974 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1975 $$$emit32$src$$constant; 1976 %} 1977 1978 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1979 // Load immediate does not have a zero or sign extended version 1980 // for 8-bit immediates 1981 emit_opcode(cbuf, $primary + $dst$$reg); 1982 $$$emit32$src$$constant; 1983 %} 1984 1985 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1986 // Load immediate does not have a zero or sign extended version 1987 // for 8-bit immediates 1988 int dst_enc = $dst$$reg; 1989 int src_con = $src$$constant & 0x0FFFFFFFFL; 1990 if (src_con == 0) { 1991 // xor dst, dst 1992 emit_opcode(cbuf, 0x33); 1993 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1994 } else { 1995 emit_opcode(cbuf, $primary + dst_enc); 1996 emit_d32(cbuf, src_con); 1997 } 1998 %} 1999 2000 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2001 // Load immediate does not have a zero or sign extended version 2002 // for 8-bit immediates 2003 int dst_enc = $dst$$reg + 2; 2004 int src_con = ((julong)($src$$constant)) >> 32; 2005 if (src_con == 0) { 2006 // xor dst, dst 2007 emit_opcode(cbuf, 0x33); 2008 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2009 } else { 2010 emit_opcode(cbuf, $primary + dst_enc); 2011 emit_d32(cbuf, src_con); 2012 } 2013 %} 2014 2015 2016 // Encode a reg-reg copy. If it is useless, then empty encoding. 2017 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2018 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2019 %} 2020 2021 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2026 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2027 %} 2028 2029 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2030 $$$emit8$primary; 2031 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2032 %} 2033 2034 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2035 $$$emit8$secondary; 2036 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2037 %} 2038 2039 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2040 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2041 %} 2042 2043 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2045 %} 2046 2047 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2048 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2052 // Output immediate 2053 $$$emit32$src$$constant; 2054 %} 2055 2056 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2057 // Output Float immediate bits 2058 jfloat jf = $src$$constant; 2059 int jf_as_bits = jint_cast( jf ); 2060 emit_d32(cbuf, jf_as_bits); 2061 %} 2062 2063 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2064 // Output Float immediate bits 2065 jfloat jf = $src$$constant; 2066 int jf_as_bits = jint_cast( jf ); 2067 emit_d32(cbuf, jf_as_bits); 2068 %} 2069 2070 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2071 // Output immediate 2072 $$$emit16$src$$constant; 2073 %} 2074 2075 enc_class Con_d32(immI src) %{ 2076 emit_d32(cbuf,$src$$constant); 2077 %} 2078 2079 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2080 // Output immediate memory reference 2081 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2082 emit_d32(cbuf, 0x00); 2083 %} 2084 2085 enc_class lock_prefix( ) %{ 2086 if( os::is_MP() ) 2087 emit_opcode(cbuf,0xF0); // [Lock] 2088 %} 2089 2090 // Cmp-xchg long value. 2091 // Note: we need to swap rbx, and rcx before and after the 2092 // cmpxchg8 instruction because the instruction uses 2093 // rcx as the high order word of the new value to store but 2094 // our register encoding uses rbx,. 2095 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2096 2097 // XCHG rbx,ecx 2098 emit_opcode(cbuf,0x87); 2099 emit_opcode(cbuf,0xD9); 2100 // [Lock] 2101 if( os::is_MP() ) 2102 emit_opcode(cbuf,0xF0); 2103 // CMPXCHG8 [Eptr] 2104 emit_opcode(cbuf,0x0F); 2105 emit_opcode(cbuf,0xC7); 2106 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2107 // XCHG rbx,ecx 2108 emit_opcode(cbuf,0x87); 2109 emit_opcode(cbuf,0xD9); 2110 %} 2111 2112 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2113 // [Lock] 2114 if( os::is_MP() ) 2115 emit_opcode(cbuf,0xF0); 2116 2117 // CMPXCHG [Eptr] 2118 emit_opcode(cbuf,0x0F); 2119 emit_opcode(cbuf,0xB1); 2120 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2121 %} 2122 2123 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2124 int res_encoding = $res$$reg; 2125 2126 // MOV res,0 2127 emit_opcode( cbuf, 0xB8 + res_encoding); 2128 emit_d32( cbuf, 0 ); 2129 // JNE,s fail 2130 emit_opcode(cbuf,0x75); 2131 emit_d8(cbuf, 5 ); 2132 // MOV res,1 2133 emit_opcode( cbuf, 0xB8 + res_encoding); 2134 emit_d32( cbuf, 1 ); 2135 // fail: 2136 %} 2137 2138 enc_class set_instruction_start( ) %{ 2139 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2140 %} 2141 2142 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2143 int reg_encoding = $ereg$$reg; 2144 int base = $mem$$base; 2145 int index = $mem$$index; 2146 int scale = $mem$$scale; 2147 int displace = $mem$$disp; 2148 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2149 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2150 %} 2151 2152 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2153 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2154 int base = $mem$$base; 2155 int index = $mem$$index; 2156 int scale = $mem$$scale; 2157 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2158 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2159 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2160 %} 2161 2162 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2163 int r1, r2; 2164 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2165 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2166 emit_opcode(cbuf,0x0F); 2167 emit_opcode(cbuf,$tertiary); 2168 emit_rm(cbuf, 0x3, r1, r2); 2169 emit_d8(cbuf,$cnt$$constant); 2170 emit_d8(cbuf,$primary); 2171 emit_rm(cbuf, 0x3, $secondary, r1); 2172 emit_d8(cbuf,$cnt$$constant); 2173 %} 2174 2175 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2176 emit_opcode( cbuf, 0x8B ); // Move 2177 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2178 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2179 emit_d8(cbuf,$primary); 2180 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2181 emit_d8(cbuf,$cnt$$constant-32); 2182 } 2183 emit_d8(cbuf,$primary); 2184 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2185 emit_d8(cbuf,31); 2186 %} 2187 2188 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2189 int r1, r2; 2190 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2191 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2192 2193 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2194 emit_rm(cbuf, 0x3, r1, r2); 2195 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2196 emit_opcode(cbuf,$primary); 2197 emit_rm(cbuf, 0x3, $secondary, r1); 2198 emit_d8(cbuf,$cnt$$constant-32); 2199 } 2200 emit_opcode(cbuf,0x33); // XOR r2,r2 2201 emit_rm(cbuf, 0x3, r2, r2); 2202 %} 2203 2204 // Clone of RegMem but accepts an extra parameter to access each 2205 // half of a double in memory; it never needs relocation info. 2206 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2207 emit_opcode(cbuf,$opcode$$constant); 2208 int reg_encoding = $rm_reg$$reg; 2209 int base = $mem$$base; 2210 int index = $mem$$index; 2211 int scale = $mem$$scale; 2212 int displace = $mem$$disp + $disp_for_half$$constant; 2213 relocInfo::relocType disp_reloc = relocInfo::none; 2214 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2215 %} 2216 2217 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2218 // 2219 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2220 // and it never needs relocation information. 2221 // Frequently used to move data between FPU's Stack Top and memory. 2222 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2223 int rm_byte_opcode = $rm_opcode$$constant; 2224 int base = $mem$$base; 2225 int index = $mem$$index; 2226 int scale = $mem$$scale; 2227 int displace = $mem$$disp; 2228 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2229 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2230 %} 2231 2232 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2233 int rm_byte_opcode = $rm_opcode$$constant; 2234 int base = $mem$$base; 2235 int index = $mem$$index; 2236 int scale = $mem$$scale; 2237 int displace = $mem$$disp; 2238 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2239 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2240 %} 2241 2242 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2243 int reg_encoding = $dst$$reg; 2244 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2245 int index = 0x04; // 0x04 indicates no index 2246 int scale = 0x00; // 0x00 indicates no scale 2247 int displace = $src1$$constant; // 0x00 indicates no displacement 2248 relocInfo::relocType disp_reloc = relocInfo::none; 2249 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2250 %} 2251 2252 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2253 // Compare dst,src 2254 emit_opcode(cbuf,0x3B); 2255 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2256 // jmp dst < src around move 2257 emit_opcode(cbuf,0x7C); 2258 emit_d8(cbuf,2); 2259 // move dst,src 2260 emit_opcode(cbuf,0x8B); 2261 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2262 %} 2263 2264 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2265 // Compare dst,src 2266 emit_opcode(cbuf,0x3B); 2267 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2268 // jmp dst > src around move 2269 emit_opcode(cbuf,0x7F); 2270 emit_d8(cbuf,2); 2271 // move dst,src 2272 emit_opcode(cbuf,0x8B); 2273 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2274 %} 2275 2276 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2277 // If src is FPR1, we can just FST to store it. 2278 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2279 int reg_encoding = 0x2; // Just store 2280 int base = $mem$$base; 2281 int index = $mem$$index; 2282 int scale = $mem$$scale; 2283 int displace = $mem$$disp; 2284 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2285 if( $src$$reg != FPR1L_enc ) { 2286 reg_encoding = 0x3; // Store & pop 2287 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2288 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2289 } 2290 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2291 emit_opcode(cbuf,$primary); 2292 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2293 %} 2294 2295 enc_class neg_reg(rRegI dst) %{ 2296 // NEG $dst 2297 emit_opcode(cbuf,0xF7); 2298 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2299 %} 2300 2301 enc_class setLT_reg(eCXRegI dst) %{ 2302 // SETLT $dst 2303 emit_opcode(cbuf,0x0F); 2304 emit_opcode(cbuf,0x9C); 2305 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2306 %} 2307 2308 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2309 int tmpReg = $tmp$$reg; 2310 2311 // SUB $p,$q 2312 emit_opcode(cbuf,0x2B); 2313 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2314 // SBB $tmp,$tmp 2315 emit_opcode(cbuf,0x1B); 2316 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2317 // AND $tmp,$y 2318 emit_opcode(cbuf,0x23); 2319 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2320 // ADD $p,$tmp 2321 emit_opcode(cbuf,0x03); 2322 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2323 %} 2324 2325 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2326 // TEST shift,32 2327 emit_opcode(cbuf,0xF7); 2328 emit_rm(cbuf, 0x3, 0, ECX_enc); 2329 emit_d32(cbuf,0x20); 2330 // JEQ,s small 2331 emit_opcode(cbuf, 0x74); 2332 emit_d8(cbuf, 0x04); 2333 // MOV $dst.hi,$dst.lo 2334 emit_opcode( cbuf, 0x8B ); 2335 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2336 // CLR $dst.lo 2337 emit_opcode(cbuf, 0x33); 2338 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2339 // small: 2340 // SHLD $dst.hi,$dst.lo,$shift 2341 emit_opcode(cbuf,0x0F); 2342 emit_opcode(cbuf,0xA5); 2343 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2344 // SHL $dst.lo,$shift" 2345 emit_opcode(cbuf,0xD3); 2346 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2347 %} 2348 2349 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2350 // TEST shift,32 2351 emit_opcode(cbuf,0xF7); 2352 emit_rm(cbuf, 0x3, 0, ECX_enc); 2353 emit_d32(cbuf,0x20); 2354 // JEQ,s small 2355 emit_opcode(cbuf, 0x74); 2356 emit_d8(cbuf, 0x04); 2357 // MOV $dst.lo,$dst.hi 2358 emit_opcode( cbuf, 0x8B ); 2359 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2360 // CLR $dst.hi 2361 emit_opcode(cbuf, 0x33); 2362 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2363 // small: 2364 // SHRD $dst.lo,$dst.hi,$shift 2365 emit_opcode(cbuf,0x0F); 2366 emit_opcode(cbuf,0xAD); 2367 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2368 // SHR $dst.hi,$shift" 2369 emit_opcode(cbuf,0xD3); 2370 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2371 %} 2372 2373 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2374 // TEST shift,32 2375 emit_opcode(cbuf,0xF7); 2376 emit_rm(cbuf, 0x3, 0, ECX_enc); 2377 emit_d32(cbuf,0x20); 2378 // JEQ,s small 2379 emit_opcode(cbuf, 0x74); 2380 emit_d8(cbuf, 0x05); 2381 // MOV $dst.lo,$dst.hi 2382 emit_opcode( cbuf, 0x8B ); 2383 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2384 // SAR $dst.hi,31 2385 emit_opcode(cbuf, 0xC1); 2386 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2387 emit_d8(cbuf, 0x1F ); 2388 // small: 2389 // SHRD $dst.lo,$dst.hi,$shift 2390 emit_opcode(cbuf,0x0F); 2391 emit_opcode(cbuf,0xAD); 2392 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2393 // SAR $dst.hi,$shift" 2394 emit_opcode(cbuf,0xD3); 2395 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2396 %} 2397 2398 2399 // ----------------- Encodings for floating point unit ----------------- 2400 // May leave result in FPU-TOS or FPU reg depending on opcodes 2401 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2402 $$$emit8$primary; 2403 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2404 %} 2405 2406 // Pop argument in FPR0 with FSTP ST(0) 2407 enc_class PopFPU() %{ 2408 emit_opcode( cbuf, 0xDD ); 2409 emit_d8( cbuf, 0xD8 ); 2410 %} 2411 2412 // !!!!! equivalent to Pop_Reg_F 2413 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2414 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2415 emit_d8( cbuf, 0xD8+$dst$$reg ); 2416 %} 2417 2418 enc_class Push_Reg_DPR( regDPR dst ) %{ 2419 emit_opcode( cbuf, 0xD9 ); 2420 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2421 %} 2422 2423 enc_class strictfp_bias1( regDPR dst ) %{ 2424 emit_opcode( cbuf, 0xDB ); // FLD m80real 2425 emit_opcode( cbuf, 0x2D ); 2426 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2427 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2428 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2429 %} 2430 2431 enc_class strictfp_bias2( regDPR dst ) %{ 2432 emit_opcode( cbuf, 0xDB ); // FLD m80real 2433 emit_opcode( cbuf, 0x2D ); 2434 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2435 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2436 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2437 %} 2438 2439 // Special case for moving an integer register to a stack slot. 2440 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2441 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2442 %} 2443 2444 // Special case for moving a register to a stack slot. 2445 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2446 // Opcode already emitted 2447 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2448 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2449 emit_d32(cbuf, $dst$$disp); // Displacement 2450 %} 2451 2452 // Push the integer in stackSlot 'src' onto FP-stack 2453 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2454 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2455 %} 2456 2457 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2458 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2459 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2460 %} 2461 2462 // Same as Pop_Mem_F except for opcode 2463 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2464 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2465 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2466 %} 2467 2468 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2469 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2470 emit_d8( cbuf, 0xD8+$dst$$reg ); 2471 %} 2472 2473 enc_class Push_Reg_FPR( regFPR dst ) %{ 2474 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2475 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2476 %} 2477 2478 // Push FPU's float to a stack-slot, and pop FPU-stack 2479 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2480 int pop = 0x02; 2481 if ($src$$reg != FPR1L_enc) { 2482 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2483 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2484 pop = 0x03; 2485 } 2486 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2487 %} 2488 2489 // Push FPU's double to a stack-slot, and pop FPU-stack 2490 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2491 int pop = 0x02; 2492 if ($src$$reg != FPR1L_enc) { 2493 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2494 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2495 pop = 0x03; 2496 } 2497 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2498 %} 2499 2500 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2501 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2502 int pop = 0xD0 - 1; // -1 since we skip FLD 2503 if ($src$$reg != FPR1L_enc) { 2504 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2505 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2506 pop = 0xD8; 2507 } 2508 emit_opcode( cbuf, 0xDD ); 2509 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2510 %} 2511 2512 2513 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2514 // load dst in FPR0 2515 emit_opcode( cbuf, 0xD9 ); 2516 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2517 if ($src$$reg != FPR1L_enc) { 2518 // fincstp 2519 emit_opcode (cbuf, 0xD9); 2520 emit_opcode (cbuf, 0xF7); 2521 // swap src with FPR1: 2522 // FXCH FPR1 with src 2523 emit_opcode(cbuf, 0xD9); 2524 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2525 // fdecstp 2526 emit_opcode (cbuf, 0xD9); 2527 emit_opcode (cbuf, 0xF6); 2528 } 2529 %} 2530 2531 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2532 MacroAssembler _masm(&cbuf); 2533 __ subptr(rsp, 8); 2534 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2535 __ fld_d(Address(rsp, 0)); 2536 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2537 __ fld_d(Address(rsp, 0)); 2538 %} 2539 2540 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2541 MacroAssembler _masm(&cbuf); 2542 __ subptr(rsp, 4); 2543 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2544 __ fld_s(Address(rsp, 0)); 2545 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2546 __ fld_s(Address(rsp, 0)); 2547 %} 2548 2549 enc_class Push_ResultD(regD dst) %{ 2550 MacroAssembler _masm(&cbuf); 2551 __ fstp_d(Address(rsp, 0)); 2552 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2553 __ addptr(rsp, 8); 2554 %} 2555 2556 enc_class Push_ResultF(regF dst, immI d8) %{ 2557 MacroAssembler _masm(&cbuf); 2558 __ fstp_s(Address(rsp, 0)); 2559 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2560 __ addptr(rsp, $d8$$constant); 2561 %} 2562 2563 enc_class Push_SrcD(regD src) %{ 2564 MacroAssembler _masm(&cbuf); 2565 __ subptr(rsp, 8); 2566 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2567 __ fld_d(Address(rsp, 0)); 2568 %} 2569 2570 enc_class push_stack_temp_qword() %{ 2571 MacroAssembler _masm(&cbuf); 2572 __ subptr(rsp, 8); 2573 %} 2574 2575 enc_class pop_stack_temp_qword() %{ 2576 MacroAssembler _masm(&cbuf); 2577 __ addptr(rsp, 8); 2578 %} 2579 2580 enc_class push_xmm_to_fpr1(regD src) %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2583 __ fld_d(Address(rsp, 0)); 2584 %} 2585 2586 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2587 if ($src$$reg != FPR1L_enc) { 2588 // fincstp 2589 emit_opcode (cbuf, 0xD9); 2590 emit_opcode (cbuf, 0xF7); 2591 // FXCH FPR1 with src 2592 emit_opcode(cbuf, 0xD9); 2593 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2594 // fdecstp 2595 emit_opcode (cbuf, 0xD9); 2596 emit_opcode (cbuf, 0xF6); 2597 } 2598 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2599 // // FSTP FPR$dst$$reg 2600 // emit_opcode( cbuf, 0xDD ); 2601 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2602 %} 2603 2604 enc_class fnstsw_sahf_skip_parity() %{ 2605 // fnstsw ax 2606 emit_opcode( cbuf, 0xDF ); 2607 emit_opcode( cbuf, 0xE0 ); 2608 // sahf 2609 emit_opcode( cbuf, 0x9E ); 2610 // jnp ::skip 2611 emit_opcode( cbuf, 0x7B ); 2612 emit_opcode( cbuf, 0x05 ); 2613 %} 2614 2615 enc_class emitModDPR() %{ 2616 // fprem must be iterative 2617 // :: loop 2618 // fprem 2619 emit_opcode( cbuf, 0xD9 ); 2620 emit_opcode( cbuf, 0xF8 ); 2621 // wait 2622 emit_opcode( cbuf, 0x9b ); 2623 // fnstsw ax 2624 emit_opcode( cbuf, 0xDF ); 2625 emit_opcode( cbuf, 0xE0 ); 2626 // sahf 2627 emit_opcode( cbuf, 0x9E ); 2628 // jp ::loop 2629 emit_opcode( cbuf, 0x0F ); 2630 emit_opcode( cbuf, 0x8A ); 2631 emit_opcode( cbuf, 0xF4 ); 2632 emit_opcode( cbuf, 0xFF ); 2633 emit_opcode( cbuf, 0xFF ); 2634 emit_opcode( cbuf, 0xFF ); 2635 %} 2636 2637 enc_class fpu_flags() %{ 2638 // fnstsw_ax 2639 emit_opcode( cbuf, 0xDF); 2640 emit_opcode( cbuf, 0xE0); 2641 // test ax,0x0400 2642 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2643 emit_opcode( cbuf, 0xA9 ); 2644 emit_d16 ( cbuf, 0x0400 ); 2645 // // // This sequence works, but stalls for 12-16 cycles on PPro 2646 // // test rax,0x0400 2647 // emit_opcode( cbuf, 0xA9 ); 2648 // emit_d32 ( cbuf, 0x00000400 ); 2649 // 2650 // jz exit (no unordered comparison) 2651 emit_opcode( cbuf, 0x74 ); 2652 emit_d8 ( cbuf, 0x02 ); 2653 // mov ah,1 - treat as LT case (set carry flag) 2654 emit_opcode( cbuf, 0xB4 ); 2655 emit_d8 ( cbuf, 0x01 ); 2656 // sahf 2657 emit_opcode( cbuf, 0x9E); 2658 %} 2659 2660 enc_class cmpF_P6_fixup() %{ 2661 // Fixup the integer flags in case comparison involved a NaN 2662 // 2663 // JNP exit (no unordered comparison, P-flag is set by NaN) 2664 emit_opcode( cbuf, 0x7B ); 2665 emit_d8 ( cbuf, 0x03 ); 2666 // MOV AH,1 - treat as LT case (set carry flag) 2667 emit_opcode( cbuf, 0xB4 ); 2668 emit_d8 ( cbuf, 0x01 ); 2669 // SAHF 2670 emit_opcode( cbuf, 0x9E); 2671 // NOP // target for branch to avoid branch to branch 2672 emit_opcode( cbuf, 0x90); 2673 %} 2674 2675 // fnstsw_ax(); 2676 // sahf(); 2677 // movl(dst, nan_result); 2678 // jcc(Assembler::parity, exit); 2679 // movl(dst, less_result); 2680 // jcc(Assembler::below, exit); 2681 // movl(dst, equal_result); 2682 // jcc(Assembler::equal, exit); 2683 // movl(dst, greater_result); 2684 2685 // less_result = 1; 2686 // greater_result = -1; 2687 // equal_result = 0; 2688 // nan_result = -1; 2689 2690 enc_class CmpF_Result(rRegI dst) %{ 2691 // fnstsw_ax(); 2692 emit_opcode( cbuf, 0xDF); 2693 emit_opcode( cbuf, 0xE0); 2694 // sahf 2695 emit_opcode( cbuf, 0x9E); 2696 // movl(dst, nan_result); 2697 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2698 emit_d32( cbuf, -1 ); 2699 // jcc(Assembler::parity, exit); 2700 emit_opcode( cbuf, 0x7A ); 2701 emit_d8 ( cbuf, 0x13 ); 2702 // movl(dst, less_result); 2703 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2704 emit_d32( cbuf, -1 ); 2705 // jcc(Assembler::below, exit); 2706 emit_opcode( cbuf, 0x72 ); 2707 emit_d8 ( cbuf, 0x0C ); 2708 // movl(dst, equal_result); 2709 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2710 emit_d32( cbuf, 0 ); 2711 // jcc(Assembler::equal, exit); 2712 emit_opcode( cbuf, 0x74 ); 2713 emit_d8 ( cbuf, 0x05 ); 2714 // movl(dst, greater_result); 2715 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2716 emit_d32( cbuf, 1 ); 2717 %} 2718 2719 2720 // Compare the longs and set flags 2721 // BROKEN! Do Not use as-is 2722 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2723 // CMP $src1.hi,$src2.hi 2724 emit_opcode( cbuf, 0x3B ); 2725 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2726 // JNE,s done 2727 emit_opcode(cbuf,0x75); 2728 emit_d8(cbuf, 2 ); 2729 // CMP $src1.lo,$src2.lo 2730 emit_opcode( cbuf, 0x3B ); 2731 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2732 // done: 2733 %} 2734 2735 enc_class convert_int_long( regL dst, rRegI src ) %{ 2736 // mov $dst.lo,$src 2737 int dst_encoding = $dst$$reg; 2738 int src_encoding = $src$$reg; 2739 encode_Copy( cbuf, dst_encoding , src_encoding ); 2740 // mov $dst.hi,$src 2741 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2742 // sar $dst.hi,31 2743 emit_opcode( cbuf, 0xC1 ); 2744 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2745 emit_d8(cbuf, 0x1F ); 2746 %} 2747 2748 enc_class convert_long_double( eRegL src ) %{ 2749 // push $src.hi 2750 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2751 // push $src.lo 2752 emit_opcode(cbuf, 0x50+$src$$reg ); 2753 // fild 64-bits at [SP] 2754 emit_opcode(cbuf,0xdf); 2755 emit_d8(cbuf, 0x6C); 2756 emit_d8(cbuf, 0x24); 2757 emit_d8(cbuf, 0x00); 2758 // pop stack 2759 emit_opcode(cbuf, 0x83); // add SP, #8 2760 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2761 emit_d8(cbuf, 0x8); 2762 %} 2763 2764 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2765 // IMUL EDX:EAX,$src1 2766 emit_opcode( cbuf, 0xF7 ); 2767 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2768 // SAR EDX,$cnt-32 2769 int shift_count = ((int)$cnt$$constant) - 32; 2770 if (shift_count > 0) { 2771 emit_opcode(cbuf, 0xC1); 2772 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2773 emit_d8(cbuf, shift_count); 2774 } 2775 %} 2776 2777 // this version doesn't have add sp, 8 2778 enc_class convert_long_double2( eRegL src ) %{ 2779 // push $src.hi 2780 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2781 // push $src.lo 2782 emit_opcode(cbuf, 0x50+$src$$reg ); 2783 // fild 64-bits at [SP] 2784 emit_opcode(cbuf,0xdf); 2785 emit_d8(cbuf, 0x6C); 2786 emit_d8(cbuf, 0x24); 2787 emit_d8(cbuf, 0x00); 2788 %} 2789 2790 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2791 // Basic idea: long = (long)int * (long)int 2792 // IMUL EDX:EAX, src 2793 emit_opcode( cbuf, 0xF7 ); 2794 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2795 %} 2796 2797 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2798 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2799 // MUL EDX:EAX, src 2800 emit_opcode( cbuf, 0xF7 ); 2801 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2802 %} 2803 2804 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2805 // Basic idea: lo(result) = lo(x_lo * y_lo) 2806 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2807 // MOV $tmp,$src.lo 2808 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2809 // IMUL $tmp,EDX 2810 emit_opcode( cbuf, 0x0F ); 2811 emit_opcode( cbuf, 0xAF ); 2812 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2813 // MOV EDX,$src.hi 2814 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2815 // IMUL EDX,EAX 2816 emit_opcode( cbuf, 0x0F ); 2817 emit_opcode( cbuf, 0xAF ); 2818 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2819 // ADD $tmp,EDX 2820 emit_opcode( cbuf, 0x03 ); 2821 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2822 // MUL EDX:EAX,$src.lo 2823 emit_opcode( cbuf, 0xF7 ); 2824 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2825 // ADD EDX,ESI 2826 emit_opcode( cbuf, 0x03 ); 2827 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2828 %} 2829 2830 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2831 // Basic idea: lo(result) = lo(src * y_lo) 2832 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2833 // IMUL $tmp,EDX,$src 2834 emit_opcode( cbuf, 0x6B ); 2835 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2836 emit_d8( cbuf, (int)$src$$constant ); 2837 // MOV EDX,$src 2838 emit_opcode(cbuf, 0xB8 + EDX_enc); 2839 emit_d32( cbuf, (int)$src$$constant ); 2840 // MUL EDX:EAX,EDX 2841 emit_opcode( cbuf, 0xF7 ); 2842 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2843 // ADD EDX,ESI 2844 emit_opcode( cbuf, 0x03 ); 2845 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2846 %} 2847 2848 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2849 // PUSH src1.hi 2850 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2851 // PUSH src1.lo 2852 emit_opcode(cbuf, 0x50+$src1$$reg ); 2853 // PUSH src2.hi 2854 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2855 // PUSH src2.lo 2856 emit_opcode(cbuf, 0x50+$src2$$reg ); 2857 // CALL directly to the runtime 2858 cbuf.set_insts_mark(); 2859 emit_opcode(cbuf,0xE8); // Call into runtime 2860 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2861 // Restore stack 2862 emit_opcode(cbuf, 0x83); // add SP, #framesize 2863 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2864 emit_d8(cbuf, 4*4); 2865 %} 2866 2867 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2868 // PUSH src1.hi 2869 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2870 // PUSH src1.lo 2871 emit_opcode(cbuf, 0x50+$src1$$reg ); 2872 // PUSH src2.hi 2873 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2874 // PUSH src2.lo 2875 emit_opcode(cbuf, 0x50+$src2$$reg ); 2876 // CALL directly to the runtime 2877 cbuf.set_insts_mark(); 2878 emit_opcode(cbuf,0xE8); // Call into runtime 2879 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2880 // Restore stack 2881 emit_opcode(cbuf, 0x83); // add SP, #framesize 2882 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2883 emit_d8(cbuf, 4*4); 2884 %} 2885 2886 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2887 // MOV $tmp,$src.lo 2888 emit_opcode(cbuf, 0x8B); 2889 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2890 // OR $tmp,$src.hi 2891 emit_opcode(cbuf, 0x0B); 2892 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2893 %} 2894 2895 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2896 // CMP $src1.lo,$src2.lo 2897 emit_opcode( cbuf, 0x3B ); 2898 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2899 // JNE,s skip 2900 emit_cc(cbuf, 0x70, 0x5); 2901 emit_d8(cbuf,2); 2902 // CMP $src1.hi,$src2.hi 2903 emit_opcode( cbuf, 0x3B ); 2904 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2905 %} 2906 2907 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2908 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2909 emit_opcode( cbuf, 0x3B ); 2910 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2911 // MOV $tmp,$src1.hi 2912 emit_opcode( cbuf, 0x8B ); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2914 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2915 emit_opcode( cbuf, 0x1B ); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2917 %} 2918 2919 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2920 // XOR $tmp,$tmp 2921 emit_opcode(cbuf,0x33); // XOR 2922 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2923 // CMP $tmp,$src.lo 2924 emit_opcode( cbuf, 0x3B ); 2925 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2926 // SBB $tmp,$src.hi 2927 emit_opcode( cbuf, 0x1B ); 2928 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2929 %} 2930 2931 // Sniff, sniff... smells like Gnu Superoptimizer 2932 enc_class neg_long( eRegL dst ) %{ 2933 emit_opcode(cbuf,0xF7); // NEG hi 2934 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2935 emit_opcode(cbuf,0xF7); // NEG lo 2936 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2937 emit_opcode(cbuf,0x83); // SBB hi,0 2938 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2939 emit_d8 (cbuf,0 ); 2940 %} 2941 2942 enc_class enc_pop_rdx() %{ 2943 emit_opcode(cbuf,0x5A); 2944 %} 2945 2946 enc_class enc_rethrow() %{ 2947 cbuf.set_insts_mark(); 2948 emit_opcode(cbuf, 0xE9); // jmp entry 2949 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2950 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2951 %} 2952 2953 2954 // Convert a double to an int. Java semantics require we do complex 2955 // manglelations in the corner cases. So we set the rounding mode to 2956 // 'zero', store the darned double down as an int, and reset the 2957 // rounding mode to 'nearest'. The hardware throws an exception which 2958 // patches up the correct value directly to the stack. 2959 enc_class DPR2I_encoding( regDPR src ) %{ 2960 // Flip to round-to-zero mode. We attempted to allow invalid-op 2961 // exceptions here, so that a NAN or other corner-case value will 2962 // thrown an exception (but normal values get converted at full speed). 2963 // However, I2C adapters and other float-stack manglers leave pending 2964 // invalid-op exceptions hanging. We would have to clear them before 2965 // enabling them and that is more expensive than just testing for the 2966 // invalid value Intel stores down in the corner cases. 2967 emit_opcode(cbuf,0xD9); // FLDCW trunc 2968 emit_opcode(cbuf,0x2D); 2969 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2970 // Allocate a word 2971 emit_opcode(cbuf,0x83); // SUB ESP,4 2972 emit_opcode(cbuf,0xEC); 2973 emit_d8(cbuf,0x04); 2974 // Encoding assumes a double has been pushed into FPR0. 2975 // Store down the double as an int, popping the FPU stack 2976 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2977 emit_opcode(cbuf,0x1C); 2978 emit_d8(cbuf,0x24); 2979 // Restore the rounding mode; mask the exception 2980 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2981 emit_opcode(cbuf,0x2D); 2982 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2983 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2984 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2985 2986 // Load the converted int; adjust CPU stack 2987 emit_opcode(cbuf,0x58); // POP EAX 2988 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2989 emit_d32 (cbuf,0x80000000); // 0x80000000 2990 emit_opcode(cbuf,0x75); // JNE around_slow_call 2991 emit_d8 (cbuf,0x07); // Size of slow_call 2992 // Push src onto stack slow-path 2993 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2994 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2995 // CALL directly to the runtime 2996 cbuf.set_insts_mark(); 2997 emit_opcode(cbuf,0xE8); // Call into runtime 2998 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2999 // Carry on here... 3000 %} 3001 3002 enc_class DPR2L_encoding( regDPR src ) %{ 3003 emit_opcode(cbuf,0xD9); // FLDCW trunc 3004 emit_opcode(cbuf,0x2D); 3005 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3006 // Allocate a word 3007 emit_opcode(cbuf,0x83); // SUB ESP,8 3008 emit_opcode(cbuf,0xEC); 3009 emit_d8(cbuf,0x08); 3010 // Encoding assumes a double has been pushed into FPR0. 3011 // Store down the double as a long, popping the FPU stack 3012 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3013 emit_opcode(cbuf,0x3C); 3014 emit_d8(cbuf,0x24); 3015 // Restore the rounding mode; mask the exception 3016 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3017 emit_opcode(cbuf,0x2D); 3018 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3019 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3020 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3021 3022 // Load the converted int; adjust CPU stack 3023 emit_opcode(cbuf,0x58); // POP EAX 3024 emit_opcode(cbuf,0x5A); // POP EDX 3025 emit_opcode(cbuf,0x81); // CMP EDX,imm 3026 emit_d8 (cbuf,0xFA); // rdx 3027 emit_d32 (cbuf,0x80000000); // 0x80000000 3028 emit_opcode(cbuf,0x75); // JNE around_slow_call 3029 emit_d8 (cbuf,0x07+4); // Size of slow_call 3030 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3031 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3032 emit_opcode(cbuf,0x75); // JNE around_slow_call 3033 emit_d8 (cbuf,0x07); // Size of slow_call 3034 // Push src onto stack slow-path 3035 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3036 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3037 // CALL directly to the runtime 3038 cbuf.set_insts_mark(); 3039 emit_opcode(cbuf,0xE8); // Call into runtime 3040 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3041 // Carry on here... 3042 %} 3043 3044 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3045 // Operand was loaded from memory into fp ST (stack top) 3046 // FMUL ST,$src /* D8 C8+i */ 3047 emit_opcode(cbuf, 0xD8); 3048 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3049 %} 3050 3051 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3052 // FADDP ST,src2 /* D8 C0+i */ 3053 emit_opcode(cbuf, 0xD8); 3054 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3055 //could use FADDP src2,fpST /* DE C0+i */ 3056 %} 3057 3058 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3059 // FADDP src2,ST /* DE C0+i */ 3060 emit_opcode(cbuf, 0xDE); 3061 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3062 %} 3063 3064 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3065 // Operand has been loaded into fp ST (stack top) 3066 // FSUB ST,$src1 3067 emit_opcode(cbuf, 0xD8); 3068 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3069 3070 // FDIV 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3073 %} 3074 3075 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3076 // Operand was loaded from memory into fp ST (stack top) 3077 // FADD ST,$src /* D8 C0+i */ 3078 emit_opcode(cbuf, 0xD8); 3079 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3080 3081 // FMUL ST,src2 /* D8 C*+i */ 3082 emit_opcode(cbuf, 0xD8); 3083 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3084 %} 3085 3086 3087 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3088 // Operand was loaded from memory into fp ST (stack top) 3089 // FADD ST,$src /* D8 C0+i */ 3090 emit_opcode(cbuf, 0xD8); 3091 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3092 3093 // FMULP src2,ST /* DE C8+i */ 3094 emit_opcode(cbuf, 0xDE); 3095 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3096 %} 3097 3098 // Atomically load the volatile long 3099 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3100 emit_opcode(cbuf,0xDF); 3101 int rm_byte_opcode = 0x05; 3102 int base = $mem$$base; 3103 int index = $mem$$index; 3104 int scale = $mem$$scale; 3105 int displace = $mem$$disp; 3106 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3107 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3108 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3109 %} 3110 3111 // Volatile Store Long. Must be atomic, so move it into 3112 // the FP TOS and then do a 64-bit FIST. Has to probe the 3113 // target address before the store (for null-ptr checks) 3114 // so the memory operand is used twice in the encoding. 3115 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3116 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3117 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3118 emit_opcode(cbuf,0xDF); 3119 int rm_byte_opcode = 0x07; 3120 int base = $mem$$base; 3121 int index = $mem$$index; 3122 int scale = $mem$$scale; 3123 int displace = $mem$$disp; 3124 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3125 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3126 %} 3127 3128 // Safepoint Poll. This polls the safepoint page, and causes an 3129 // exception if it is not readable. Unfortunately, it kills the condition code 3130 // in the process 3131 // We current use TESTL [spp],EDI 3132 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3133 3134 enc_class Safepoint_Poll() %{ 3135 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3136 emit_opcode(cbuf,0x85); 3137 emit_rm (cbuf, 0x0, 0x7, 0x5); 3138 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3139 %} 3140 %} 3141 3142 3143 //----------FRAME-------------------------------------------------------------- 3144 // Definition of frame structure and management information. 3145 // 3146 // S T A C K L A Y O U T Allocators stack-slot number 3147 // | (to get allocators register number 3148 // G Owned by | | v add OptoReg::stack0()) 3149 // r CALLER | | 3150 // o | +--------+ pad to even-align allocators stack-slot 3151 // w V | pad0 | numbers; owned by CALLER 3152 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3153 // h ^ | in | 5 3154 // | | args | 4 Holes in incoming args owned by SELF 3155 // | | | | 3 3156 // | | +--------+ 3157 // V | | old out| Empty on Intel, window on Sparc 3158 // | old |preserve| Must be even aligned. 3159 // | SP-+--------+----> Matcher::_old_SP, even aligned 3160 // | | in | 3 area for Intel ret address 3161 // Owned by |preserve| Empty on Sparc. 3162 // SELF +--------+ 3163 // | | pad2 | 2 pad to align old SP 3164 // | +--------+ 1 3165 // | | locks | 0 3166 // | +--------+----> OptoReg::stack0(), even aligned 3167 // | | pad1 | 11 pad to align new SP 3168 // | +--------+ 3169 // | | | 10 3170 // | | spills | 9 spills 3171 // V | | 8 (pad0 slot for callee) 3172 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3173 // ^ | out | 7 3174 // | | args | 6 Holes in outgoing args owned by CALLEE 3175 // Owned by +--------+ 3176 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3177 // | new |preserve| Must be even-aligned. 3178 // | SP-+--------+----> Matcher::_new_SP, even aligned 3179 // | | | 3180 // 3181 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3182 // known from SELF's arguments and the Java calling convention. 3183 // Region 6-7 is determined per call site. 3184 // Note 2: If the calling convention leaves holes in the incoming argument 3185 // area, those holes are owned by SELF. Holes in the outgoing area 3186 // are owned by the CALLEE. Holes should not be nessecary in the 3187 // incoming area, as the Java calling convention is completely under 3188 // the control of the AD file. Doubles can be sorted and packed to 3189 // avoid holes. Holes in the outgoing arguments may be nessecary for 3190 // varargs C calling conventions. 3191 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3192 // even aligned with pad0 as needed. 3193 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3194 // region 6-11 is even aligned; it may be padded out more so that 3195 // the region from SP to FP meets the minimum stack alignment. 3196 3197 frame %{ 3198 // What direction does stack grow in (assumed to be same for C & Java) 3199 stack_direction(TOWARDS_LOW); 3200 3201 // These three registers define part of the calling convention 3202 // between compiled code and the interpreter. 3203 inline_cache_reg(EAX); // Inline Cache Register 3204 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3205 3206 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3207 cisc_spilling_operand_name(indOffset32); 3208 3209 // Number of stack slots consumed by locking an object 3210 sync_stack_slots(1); 3211 3212 // Compiled code's Frame Pointer 3213 frame_pointer(ESP); 3214 // Interpreter stores its frame pointer in a register which is 3215 // stored to the stack by I2CAdaptors. 3216 // I2CAdaptors convert from interpreted java to compiled java. 3217 interpreter_frame_pointer(EBP); 3218 3219 // Stack alignment requirement 3220 // Alignment size in bytes (128-bit -> 16 bytes) 3221 stack_alignment(StackAlignmentInBytes); 3222 3223 // Number of stack slots between incoming argument block and the start of 3224 // a new frame. The PROLOG must add this many slots to the stack. The 3225 // EPILOG must remove this many slots. Intel needs one slot for 3226 // return address and one for rbp, (must save rbp) 3227 in_preserve_stack_slots(2+VerifyStackAtCalls); 3228 3229 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3230 // for calls to C. Supports the var-args backing area for register parms. 3231 varargs_C_out_slots_killed(0); 3232 3233 // The after-PROLOG location of the return address. Location of 3234 // return address specifies a type (REG or STACK) and a number 3235 // representing the register number (i.e. - use a register name) or 3236 // stack slot. 3237 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3238 // Otherwise, it is above the locks and verification slot and alignment word 3239 return_addr(STACK - 1 + 3240 round_to((Compile::current()->in_preserve_stack_slots() + 3241 Compile::current()->fixed_slots()), 3242 stack_alignment_in_slots())); 3243 3244 // Body of function which returns an integer array locating 3245 // arguments either in registers or in stack slots. Passed an array 3246 // of ideal registers called "sig" and a "length" count. Stack-slot 3247 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3248 // arguments for a CALLEE. Incoming stack arguments are 3249 // automatically biased by the preserve_stack_slots field above. 3250 calling_convention %{ 3251 // No difference between ingoing/outgoing just pass false 3252 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3253 %} 3254 3255 3256 // Body of function which returns an integer array locating 3257 // arguments either in registers or in stack slots. Passed an array 3258 // of ideal registers called "sig" and a "length" count. Stack-slot 3259 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3260 // arguments for a CALLEE. Incoming stack arguments are 3261 // automatically biased by the preserve_stack_slots field above. 3262 c_calling_convention %{ 3263 // This is obviously always outgoing 3264 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3265 %} 3266 3267 // Location of C & interpreter return values 3268 c_return_value %{ 3269 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3270 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3271 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3272 3273 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3274 // that C functions return float and double results in XMM0. 3275 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3276 return OptoRegPair(XMM0b_num,XMM0_num); 3277 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3278 return OptoRegPair(OptoReg::Bad,XMM0_num); 3279 3280 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3281 %} 3282 3283 // Location of return values 3284 return_value %{ 3285 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3286 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3287 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3288 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3289 return OptoRegPair(XMM0b_num,XMM0_num); 3290 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3291 return OptoRegPair(OptoReg::Bad,XMM0_num); 3292 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3293 %} 3294 3295 %} 3296 3297 //----------ATTRIBUTES--------------------------------------------------------- 3298 //----------Operand Attributes------------------------------------------------- 3299 op_attrib op_cost(0); // Required cost attribute 3300 3301 //----------Instruction Attributes--------------------------------------------- 3302 ins_attrib ins_cost(100); // Required cost attribute 3303 ins_attrib ins_size(8); // Required size attribute (in bits) 3304 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3305 // non-matching short branch variant of some 3306 // long branch? 3307 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3308 // specifies the alignment that some part of the instruction (not 3309 // necessarily the start) requires. If > 1, a compute_padding() 3310 // function must be provided for the instruction 3311 3312 //----------OPERANDS----------------------------------------------------------- 3313 // Operand definitions must precede instruction definitions for correct parsing 3314 // in the ADLC because operands constitute user defined types which are used in 3315 // instruction definitions. 3316 3317 //----------Simple Operands---------------------------------------------------- 3318 // Immediate Operands 3319 // Integer Immediate 3320 operand immI() %{ 3321 match(ConI); 3322 3323 op_cost(10); 3324 format %{ %} 3325 interface(CONST_INTER); 3326 %} 3327 3328 // Constant for test vs zero 3329 operand immI0() %{ 3330 predicate(n->get_int() == 0); 3331 match(ConI); 3332 3333 op_cost(0); 3334 format %{ %} 3335 interface(CONST_INTER); 3336 %} 3337 3338 // Constant for increment 3339 operand immI1() %{ 3340 predicate(n->get_int() == 1); 3341 match(ConI); 3342 3343 op_cost(0); 3344 format %{ %} 3345 interface(CONST_INTER); 3346 %} 3347 3348 // Constant for decrement 3349 operand immI_M1() %{ 3350 predicate(n->get_int() == -1); 3351 match(ConI); 3352 3353 op_cost(0); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 // Valid scale values for addressing modes 3359 operand immI2() %{ 3360 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3361 match(ConI); 3362 3363 format %{ %} 3364 interface(CONST_INTER); 3365 %} 3366 3367 operand immI8() %{ 3368 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3369 match(ConI); 3370 3371 op_cost(5); 3372 format %{ %} 3373 interface(CONST_INTER); 3374 %} 3375 3376 operand immI16() %{ 3377 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3378 match(ConI); 3379 3380 op_cost(10); 3381 format %{ %} 3382 interface(CONST_INTER); 3383 %} 3384 3385 // Int Immediate non-negative 3386 operand immU31() 3387 %{ 3388 predicate(n->get_int() >= 0); 3389 match(ConI); 3390 3391 op_cost(0); 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 // Constant for long shifts 3397 operand immI_32() %{ 3398 predicate( n->get_int() == 32 ); 3399 match(ConI); 3400 3401 op_cost(0); 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 operand immI_1_31() %{ 3407 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3408 match(ConI); 3409 3410 op_cost(0); 3411 format %{ %} 3412 interface(CONST_INTER); 3413 %} 3414 3415 operand immI_32_63() %{ 3416 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3417 match(ConI); 3418 op_cost(0); 3419 3420 format %{ %} 3421 interface(CONST_INTER); 3422 %} 3423 3424 operand immI_1() %{ 3425 predicate( n->get_int() == 1 ); 3426 match(ConI); 3427 3428 op_cost(0); 3429 format %{ %} 3430 interface(CONST_INTER); 3431 %} 3432 3433 operand immI_2() %{ 3434 predicate( n->get_int() == 2 ); 3435 match(ConI); 3436 3437 op_cost(0); 3438 format %{ %} 3439 interface(CONST_INTER); 3440 %} 3441 3442 operand immI_3() %{ 3443 predicate( n->get_int() == 3 ); 3444 match(ConI); 3445 3446 op_cost(0); 3447 format %{ %} 3448 interface(CONST_INTER); 3449 %} 3450 3451 // Pointer Immediate 3452 operand immP() %{ 3453 match(ConP); 3454 3455 op_cost(10); 3456 format %{ %} 3457 interface(CONST_INTER); 3458 %} 3459 3460 // NULL Pointer Immediate 3461 operand immP0() %{ 3462 predicate( n->get_ptr() == 0 ); 3463 match(ConP); 3464 op_cost(0); 3465 3466 format %{ %} 3467 interface(CONST_INTER); 3468 %} 3469 3470 // Long Immediate 3471 operand immL() %{ 3472 match(ConL); 3473 3474 op_cost(20); 3475 format %{ %} 3476 interface(CONST_INTER); 3477 %} 3478 3479 // Long Immediate zero 3480 operand immL0() %{ 3481 predicate( n->get_long() == 0L ); 3482 match(ConL); 3483 op_cost(0); 3484 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 // Long Immediate zero 3490 operand immL_M1() %{ 3491 predicate( n->get_long() == -1L ); 3492 match(ConL); 3493 op_cost(0); 3494 3495 format %{ %} 3496 interface(CONST_INTER); 3497 %} 3498 3499 // Long immediate from 0 to 127. 3500 // Used for a shorter form of long mul by 10. 3501 operand immL_127() %{ 3502 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3503 match(ConL); 3504 op_cost(0); 3505 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Long Immediate: low 32-bit mask 3511 operand immL_32bits() %{ 3512 predicate(n->get_long() == 0xFFFFFFFFL); 3513 match(ConL); 3514 op_cost(0); 3515 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Long Immediate: low 32-bit mask 3521 operand immL32() %{ 3522 predicate(n->get_long() == (int)(n->get_long())); 3523 match(ConL); 3524 op_cost(20); 3525 3526 format %{ %} 3527 interface(CONST_INTER); 3528 %} 3529 3530 //Double Immediate zero 3531 operand immDPR0() %{ 3532 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3533 // bug that generates code such that NaNs compare equal to 0.0 3534 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3535 match(ConD); 3536 3537 op_cost(5); 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 // Double Immediate one 3543 operand immDPR1() %{ 3544 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3545 match(ConD); 3546 3547 op_cost(5); 3548 format %{ %} 3549 interface(CONST_INTER); 3550 %} 3551 3552 // Double Immediate 3553 operand immDPR() %{ 3554 predicate(UseSSE<=1); 3555 match(ConD); 3556 3557 op_cost(5); 3558 format %{ %} 3559 interface(CONST_INTER); 3560 %} 3561 3562 operand immD() %{ 3563 predicate(UseSSE>=2); 3564 match(ConD); 3565 3566 op_cost(5); 3567 format %{ %} 3568 interface(CONST_INTER); 3569 %} 3570 3571 // Double Immediate zero 3572 operand immD0() %{ 3573 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3574 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3575 // compare equal to -0.0. 3576 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3577 match(ConD); 3578 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 // Float Immediate zero 3584 operand immFPR0() %{ 3585 predicate(UseSSE == 0 && n->getf() == 0.0F); 3586 match(ConF); 3587 3588 op_cost(5); 3589 format %{ %} 3590 interface(CONST_INTER); 3591 %} 3592 3593 // Float Immediate one 3594 operand immFPR1() %{ 3595 predicate(UseSSE == 0 && n->getf() == 1.0F); 3596 match(ConF); 3597 3598 op_cost(5); 3599 format %{ %} 3600 interface(CONST_INTER); 3601 %} 3602 3603 // Float Immediate 3604 operand immFPR() %{ 3605 predicate( UseSSE == 0 ); 3606 match(ConF); 3607 3608 op_cost(5); 3609 format %{ %} 3610 interface(CONST_INTER); 3611 %} 3612 3613 // Float Immediate 3614 operand immF() %{ 3615 predicate(UseSSE >= 1); 3616 match(ConF); 3617 3618 op_cost(5); 3619 format %{ %} 3620 interface(CONST_INTER); 3621 %} 3622 3623 // Float Immediate zero. Zero and not -0.0 3624 operand immF0() %{ 3625 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3626 match(ConF); 3627 3628 op_cost(5); 3629 format %{ %} 3630 interface(CONST_INTER); 3631 %} 3632 3633 // Immediates for special shifts (sign extend) 3634 3635 // Constants for increment 3636 operand immI_16() %{ 3637 predicate( n->get_int() == 16 ); 3638 match(ConI); 3639 3640 format %{ %} 3641 interface(CONST_INTER); 3642 %} 3643 3644 operand immI_24() %{ 3645 predicate( n->get_int() == 24 ); 3646 match(ConI); 3647 3648 format %{ %} 3649 interface(CONST_INTER); 3650 %} 3651 3652 // Constant for byte-wide masking 3653 operand immI_255() %{ 3654 predicate( n->get_int() == 255 ); 3655 match(ConI); 3656 3657 format %{ %} 3658 interface(CONST_INTER); 3659 %} 3660 3661 // Constant for short-wide masking 3662 operand immI_65535() %{ 3663 predicate(n->get_int() == 65535); 3664 match(ConI); 3665 3666 format %{ %} 3667 interface(CONST_INTER); 3668 %} 3669 3670 // Register Operands 3671 // Integer Register 3672 operand rRegI() %{ 3673 constraint(ALLOC_IN_RC(int_reg)); 3674 match(RegI); 3675 match(xRegI); 3676 match(eAXRegI); 3677 match(eBXRegI); 3678 match(eCXRegI); 3679 match(eDXRegI); 3680 match(eDIRegI); 3681 match(eSIRegI); 3682 3683 format %{ %} 3684 interface(REG_INTER); 3685 %} 3686 3687 // Subset of Integer Register 3688 operand xRegI(rRegI reg) %{ 3689 constraint(ALLOC_IN_RC(int_x_reg)); 3690 match(reg); 3691 match(eAXRegI); 3692 match(eBXRegI); 3693 match(eCXRegI); 3694 match(eDXRegI); 3695 3696 format %{ %} 3697 interface(REG_INTER); 3698 %} 3699 3700 // Special Registers 3701 operand eAXRegI(xRegI reg) %{ 3702 constraint(ALLOC_IN_RC(eax_reg)); 3703 match(reg); 3704 match(rRegI); 3705 3706 format %{ "EAX" %} 3707 interface(REG_INTER); 3708 %} 3709 3710 // Special Registers 3711 operand eBXRegI(xRegI reg) %{ 3712 constraint(ALLOC_IN_RC(ebx_reg)); 3713 match(reg); 3714 match(rRegI); 3715 3716 format %{ "EBX" %} 3717 interface(REG_INTER); 3718 %} 3719 3720 operand eCXRegI(xRegI reg) %{ 3721 constraint(ALLOC_IN_RC(ecx_reg)); 3722 match(reg); 3723 match(rRegI); 3724 3725 format %{ "ECX" %} 3726 interface(REG_INTER); 3727 %} 3728 3729 operand eDXRegI(xRegI reg) %{ 3730 constraint(ALLOC_IN_RC(edx_reg)); 3731 match(reg); 3732 match(rRegI); 3733 3734 format %{ "EDX" %} 3735 interface(REG_INTER); 3736 %} 3737 3738 operand eDIRegI(xRegI reg) %{ 3739 constraint(ALLOC_IN_RC(edi_reg)); 3740 match(reg); 3741 match(rRegI); 3742 3743 format %{ "EDI" %} 3744 interface(REG_INTER); 3745 %} 3746 3747 operand naxRegI() %{ 3748 constraint(ALLOC_IN_RC(nax_reg)); 3749 match(RegI); 3750 match(eCXRegI); 3751 match(eDXRegI); 3752 match(eSIRegI); 3753 match(eDIRegI); 3754 3755 format %{ %} 3756 interface(REG_INTER); 3757 %} 3758 3759 operand nadxRegI() %{ 3760 constraint(ALLOC_IN_RC(nadx_reg)); 3761 match(RegI); 3762 match(eBXRegI); 3763 match(eCXRegI); 3764 match(eSIRegI); 3765 match(eDIRegI); 3766 3767 format %{ %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand ncxRegI() %{ 3772 constraint(ALLOC_IN_RC(ncx_reg)); 3773 match(RegI); 3774 match(eAXRegI); 3775 match(eDXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3784 // // 3785 operand eSIRegI(xRegI reg) %{ 3786 constraint(ALLOC_IN_RC(esi_reg)); 3787 match(reg); 3788 match(rRegI); 3789 3790 format %{ "ESI" %} 3791 interface(REG_INTER); 3792 %} 3793 3794 // Pointer Register 3795 operand anyRegP() %{ 3796 constraint(ALLOC_IN_RC(any_reg)); 3797 match(RegP); 3798 match(eAXRegP); 3799 match(eBXRegP); 3800 match(eCXRegP); 3801 match(eDIRegP); 3802 match(eRegP); 3803 3804 format %{ %} 3805 interface(REG_INTER); 3806 %} 3807 3808 operand eRegP() %{ 3809 constraint(ALLOC_IN_RC(int_reg)); 3810 match(RegP); 3811 match(eAXRegP); 3812 match(eBXRegP); 3813 match(eCXRegP); 3814 match(eDIRegP); 3815 3816 format %{ %} 3817 interface(REG_INTER); 3818 %} 3819 3820 // On windows95, EBP is not safe to use for implicit null tests. 3821 operand eRegP_no_EBP() %{ 3822 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3823 match(RegP); 3824 match(eAXRegP); 3825 match(eBXRegP); 3826 match(eCXRegP); 3827 match(eDIRegP); 3828 3829 op_cost(100); 3830 format %{ %} 3831 interface(REG_INTER); 3832 %} 3833 3834 operand naxRegP() %{ 3835 constraint(ALLOC_IN_RC(nax_reg)); 3836 match(RegP); 3837 match(eBXRegP); 3838 match(eDXRegP); 3839 match(eCXRegP); 3840 match(eSIRegP); 3841 match(eDIRegP); 3842 3843 format %{ %} 3844 interface(REG_INTER); 3845 %} 3846 3847 operand nabxRegP() %{ 3848 constraint(ALLOC_IN_RC(nabx_reg)); 3849 match(RegP); 3850 match(eCXRegP); 3851 match(eDXRegP); 3852 match(eSIRegP); 3853 match(eDIRegP); 3854 3855 format %{ %} 3856 interface(REG_INTER); 3857 %} 3858 3859 operand pRegP() %{ 3860 constraint(ALLOC_IN_RC(p_reg)); 3861 match(RegP); 3862 match(eBXRegP); 3863 match(eDXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 // Special Registers 3872 // Return a pointer value 3873 operand eAXRegP(eRegP reg) %{ 3874 constraint(ALLOC_IN_RC(eax_reg)); 3875 match(reg); 3876 format %{ "EAX" %} 3877 interface(REG_INTER); 3878 %} 3879 3880 // Used in AtomicAdd 3881 operand eBXRegP(eRegP reg) %{ 3882 constraint(ALLOC_IN_RC(ebx_reg)); 3883 match(reg); 3884 format %{ "EBX" %} 3885 interface(REG_INTER); 3886 %} 3887 3888 // Tail-call (interprocedural jump) to interpreter 3889 operand eCXRegP(eRegP reg) %{ 3890 constraint(ALLOC_IN_RC(ecx_reg)); 3891 match(reg); 3892 format %{ "ECX" %} 3893 interface(REG_INTER); 3894 %} 3895 3896 operand eSIRegP(eRegP reg) %{ 3897 constraint(ALLOC_IN_RC(esi_reg)); 3898 match(reg); 3899 format %{ "ESI" %} 3900 interface(REG_INTER); 3901 %} 3902 3903 // Used in rep stosw 3904 operand eDIRegP(eRegP reg) %{ 3905 constraint(ALLOC_IN_RC(edi_reg)); 3906 match(reg); 3907 format %{ "EDI" %} 3908 interface(REG_INTER); 3909 %} 3910 3911 operand eRegL() %{ 3912 constraint(ALLOC_IN_RC(long_reg)); 3913 match(RegL); 3914 match(eADXRegL); 3915 3916 format %{ %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eADXRegL( eRegL reg ) %{ 3921 constraint(ALLOC_IN_RC(eadx_reg)); 3922 match(reg); 3923 3924 format %{ "EDX:EAX" %} 3925 interface(REG_INTER); 3926 %} 3927 3928 operand eBCXRegL( eRegL reg ) %{ 3929 constraint(ALLOC_IN_RC(ebcx_reg)); 3930 match(reg); 3931 3932 format %{ "EBX:ECX" %} 3933 interface(REG_INTER); 3934 %} 3935 3936 // Special case for integer high multiply 3937 operand eADXRegL_low_only() %{ 3938 constraint(ALLOC_IN_RC(eadx_reg)); 3939 match(RegL); 3940 3941 format %{ "EAX" %} 3942 interface(REG_INTER); 3943 %} 3944 3945 // Flags register, used as output of compare instructions 3946 operand eFlagsReg() %{ 3947 constraint(ALLOC_IN_RC(int_flags)); 3948 match(RegFlags); 3949 3950 format %{ "EFLAGS" %} 3951 interface(REG_INTER); 3952 %} 3953 3954 // Flags register, used as output of FLOATING POINT compare instructions 3955 operand eFlagsRegU() %{ 3956 constraint(ALLOC_IN_RC(int_flags)); 3957 match(RegFlags); 3958 3959 format %{ "EFLAGS_U" %} 3960 interface(REG_INTER); 3961 %} 3962 3963 operand eFlagsRegUCF() %{ 3964 constraint(ALLOC_IN_RC(int_flags)); 3965 match(RegFlags); 3966 predicate(false); 3967 3968 format %{ "EFLAGS_U_CF" %} 3969 interface(REG_INTER); 3970 %} 3971 3972 // Condition Code Register used by long compare 3973 operand flagsReg_long_LTGE() %{ 3974 constraint(ALLOC_IN_RC(int_flags)); 3975 match(RegFlags); 3976 format %{ "FLAGS_LTGE" %} 3977 interface(REG_INTER); 3978 %} 3979 operand flagsReg_long_EQNE() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 format %{ "FLAGS_EQNE" %} 3983 interface(REG_INTER); 3984 %} 3985 operand flagsReg_long_LEGT() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_LEGT" %} 3989 interface(REG_INTER); 3990 %} 3991 3992 // Float register operands 3993 operand regDPR() %{ 3994 predicate( UseSSE < 2 ); 3995 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3996 match(RegD); 3997 match(regDPR1); 3998 match(regDPR2); 3999 format %{ %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand regDPR1(regDPR reg) %{ 4004 predicate( UseSSE < 2 ); 4005 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4006 match(reg); 4007 format %{ "FPR1" %} 4008 interface(REG_INTER); 4009 %} 4010 4011 operand regDPR2(regDPR reg) %{ 4012 predicate( UseSSE < 2 ); 4013 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4014 match(reg); 4015 format %{ "FPR2" %} 4016 interface(REG_INTER); 4017 %} 4018 4019 operand regnotDPR1(regDPR reg) %{ 4020 predicate( UseSSE < 2 ); 4021 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4022 match(reg); 4023 format %{ %} 4024 interface(REG_INTER); 4025 %} 4026 4027 // Float register operands 4028 operand regFPR() %{ 4029 predicate( UseSSE < 2 ); 4030 constraint(ALLOC_IN_RC(fp_flt_reg)); 4031 match(RegF); 4032 match(regFPR1); 4033 format %{ %} 4034 interface(REG_INTER); 4035 %} 4036 4037 // Float register operands 4038 operand regFPR1(regFPR reg) %{ 4039 predicate( UseSSE < 2 ); 4040 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4041 match(reg); 4042 format %{ "FPR1" %} 4043 interface(REG_INTER); 4044 %} 4045 4046 // XMM Float register operands 4047 operand regF() %{ 4048 predicate( UseSSE>=1 ); 4049 constraint(ALLOC_IN_RC(float_reg_legacy)); 4050 match(RegF); 4051 format %{ %} 4052 interface(REG_INTER); 4053 %} 4054 4055 // XMM Double register operands 4056 operand regD() %{ 4057 predicate( UseSSE>=2 ); 4058 constraint(ALLOC_IN_RC(double_reg_legacy)); 4059 match(RegD); 4060 format %{ %} 4061 interface(REG_INTER); 4062 %} 4063 4064 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4065 // runtime code generation via reg_class_dynamic. 4066 operand vecS() %{ 4067 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4068 match(VecS); 4069 4070 format %{ %} 4071 interface(REG_INTER); 4072 %} 4073 4074 operand vecD() %{ 4075 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4076 match(VecD); 4077 4078 format %{ %} 4079 interface(REG_INTER); 4080 %} 4081 4082 operand vecX() %{ 4083 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4084 match(VecX); 4085 4086 format %{ %} 4087 interface(REG_INTER); 4088 %} 4089 4090 operand vecY() %{ 4091 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4092 match(VecY); 4093 4094 format %{ %} 4095 interface(REG_INTER); 4096 %} 4097 4098 //----------Memory Operands---------------------------------------------------- 4099 // Direct Memory Operand 4100 operand direct(immP addr) %{ 4101 match(addr); 4102 4103 format %{ "[$addr]" %} 4104 interface(MEMORY_INTER) %{ 4105 base(0xFFFFFFFF); 4106 index(0x4); 4107 scale(0x0); 4108 disp($addr); 4109 %} 4110 %} 4111 4112 // Indirect Memory Operand 4113 operand indirect(eRegP reg) %{ 4114 constraint(ALLOC_IN_RC(int_reg)); 4115 match(reg); 4116 4117 format %{ "[$reg]" %} 4118 interface(MEMORY_INTER) %{ 4119 base($reg); 4120 index(0x4); 4121 scale(0x0); 4122 disp(0x0); 4123 %} 4124 %} 4125 4126 // Indirect Memory Plus Short Offset Operand 4127 operand indOffset8(eRegP reg, immI8 off) %{ 4128 match(AddP reg off); 4129 4130 format %{ "[$reg + $off]" %} 4131 interface(MEMORY_INTER) %{ 4132 base($reg); 4133 index(0x4); 4134 scale(0x0); 4135 disp($off); 4136 %} 4137 %} 4138 4139 // Indirect Memory Plus Long Offset Operand 4140 operand indOffset32(eRegP reg, immI off) %{ 4141 match(AddP reg off); 4142 4143 format %{ "[$reg + $off]" %} 4144 interface(MEMORY_INTER) %{ 4145 base($reg); 4146 index(0x4); 4147 scale(0x0); 4148 disp($off); 4149 %} 4150 %} 4151 4152 // Indirect Memory Plus Long Offset Operand 4153 operand indOffset32X(rRegI reg, immP off) %{ 4154 match(AddP off reg); 4155 4156 format %{ "[$reg + $off]" %} 4157 interface(MEMORY_INTER) %{ 4158 base($reg); 4159 index(0x4); 4160 scale(0x0); 4161 disp($off); 4162 %} 4163 %} 4164 4165 // Indirect Memory Plus Index Register Plus Offset Operand 4166 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4167 match(AddP (AddP reg ireg) off); 4168 4169 op_cost(10); 4170 format %{"[$reg + $off + $ireg]" %} 4171 interface(MEMORY_INTER) %{ 4172 base($reg); 4173 index($ireg); 4174 scale(0x0); 4175 disp($off); 4176 %} 4177 %} 4178 4179 // Indirect Memory Plus Index Register Plus Offset Operand 4180 operand indIndex(eRegP reg, rRegI ireg) %{ 4181 match(AddP reg ireg); 4182 4183 op_cost(10); 4184 format %{"[$reg + $ireg]" %} 4185 interface(MEMORY_INTER) %{ 4186 base($reg); 4187 index($ireg); 4188 scale(0x0); 4189 disp(0x0); 4190 %} 4191 %} 4192 4193 // // ------------------------------------------------------------------------- 4194 // // 486 architecture doesn't support "scale * index + offset" with out a base 4195 // // ------------------------------------------------------------------------- 4196 // // Scaled Memory Operands 4197 // // Indirect Memory Times Scale Plus Offset Operand 4198 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4199 // match(AddP off (LShiftI ireg scale)); 4200 // 4201 // op_cost(10); 4202 // format %{"[$off + $ireg << $scale]" %} 4203 // interface(MEMORY_INTER) %{ 4204 // base(0x4); 4205 // index($ireg); 4206 // scale($scale); 4207 // disp($off); 4208 // %} 4209 // %} 4210 4211 // Indirect Memory Times Scale Plus Index Register 4212 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4213 match(AddP reg (LShiftI ireg scale)); 4214 4215 op_cost(10); 4216 format %{"[$reg + $ireg << $scale]" %} 4217 interface(MEMORY_INTER) %{ 4218 base($reg); 4219 index($ireg); 4220 scale($scale); 4221 disp(0x0); 4222 %} 4223 %} 4224 4225 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4226 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4227 match(AddP (AddP reg (LShiftI ireg scale)) off); 4228 4229 op_cost(10); 4230 format %{"[$reg + $off + $ireg << $scale]" %} 4231 interface(MEMORY_INTER) %{ 4232 base($reg); 4233 index($ireg); 4234 scale($scale); 4235 disp($off); 4236 %} 4237 %} 4238 4239 //----------Load Long Memory Operands------------------------------------------ 4240 // The load-long idiom will use it's address expression again after loading 4241 // the first word of the long. If the load-long destination overlaps with 4242 // registers used in the addressing expression, the 2nd half will be loaded 4243 // from a clobbered address. Fix this by requiring that load-long use 4244 // address registers that do not overlap with the load-long target. 4245 4246 // load-long support 4247 operand load_long_RegP() %{ 4248 constraint(ALLOC_IN_RC(esi_reg)); 4249 match(RegP); 4250 match(eSIRegP); 4251 op_cost(100); 4252 format %{ %} 4253 interface(REG_INTER); 4254 %} 4255 4256 // Indirect Memory Operand Long 4257 operand load_long_indirect(load_long_RegP reg) %{ 4258 constraint(ALLOC_IN_RC(esi_reg)); 4259 match(reg); 4260 4261 format %{ "[$reg]" %} 4262 interface(MEMORY_INTER) %{ 4263 base($reg); 4264 index(0x4); 4265 scale(0x0); 4266 disp(0x0); 4267 %} 4268 %} 4269 4270 // Indirect Memory Plus Long Offset Operand 4271 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4272 match(AddP reg off); 4273 4274 format %{ "[$reg + $off]" %} 4275 interface(MEMORY_INTER) %{ 4276 base($reg); 4277 index(0x4); 4278 scale(0x0); 4279 disp($off); 4280 %} 4281 %} 4282 4283 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4284 4285 4286 //----------Special Memory Operands-------------------------------------------- 4287 // Stack Slot Operand - This operand is used for loading and storing temporary 4288 // values on the stack where a match requires a value to 4289 // flow through memory. 4290 operand stackSlotP(sRegP reg) %{ 4291 constraint(ALLOC_IN_RC(stack_slots)); 4292 // No match rule because this operand is only generated in matching 4293 format %{ "[$reg]" %} 4294 interface(MEMORY_INTER) %{ 4295 base(0x4); // ESP 4296 index(0x4); // No Index 4297 scale(0x0); // No Scale 4298 disp($reg); // Stack Offset 4299 %} 4300 %} 4301 4302 operand stackSlotI(sRegI reg) %{ 4303 constraint(ALLOC_IN_RC(stack_slots)); 4304 // No match rule because this operand is only generated in matching 4305 format %{ "[$reg]" %} 4306 interface(MEMORY_INTER) %{ 4307 base(0x4); // ESP 4308 index(0x4); // No Index 4309 scale(0x0); // No Scale 4310 disp($reg); // Stack Offset 4311 %} 4312 %} 4313 4314 operand stackSlotF(sRegF reg) %{ 4315 constraint(ALLOC_IN_RC(stack_slots)); 4316 // No match rule because this operand is only generated in matching 4317 format %{ "[$reg]" %} 4318 interface(MEMORY_INTER) %{ 4319 base(0x4); // ESP 4320 index(0x4); // No Index 4321 scale(0x0); // No Scale 4322 disp($reg); // Stack Offset 4323 %} 4324 %} 4325 4326 operand stackSlotD(sRegD reg) %{ 4327 constraint(ALLOC_IN_RC(stack_slots)); 4328 // No match rule because this operand is only generated in matching 4329 format %{ "[$reg]" %} 4330 interface(MEMORY_INTER) %{ 4331 base(0x4); // ESP 4332 index(0x4); // No Index 4333 scale(0x0); // No Scale 4334 disp($reg); // Stack Offset 4335 %} 4336 %} 4337 4338 operand stackSlotL(sRegL reg) %{ 4339 constraint(ALLOC_IN_RC(stack_slots)); 4340 // No match rule because this operand is only generated in matching 4341 format %{ "[$reg]" %} 4342 interface(MEMORY_INTER) %{ 4343 base(0x4); // ESP 4344 index(0x4); // No Index 4345 scale(0x0); // No Scale 4346 disp($reg); // Stack Offset 4347 %} 4348 %} 4349 4350 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4351 // Indirect Memory Operand 4352 operand indirect_win95_safe(eRegP_no_EBP reg) 4353 %{ 4354 constraint(ALLOC_IN_RC(int_reg)); 4355 match(reg); 4356 4357 op_cost(100); 4358 format %{ "[$reg]" %} 4359 interface(MEMORY_INTER) %{ 4360 base($reg); 4361 index(0x4); 4362 scale(0x0); 4363 disp(0x0); 4364 %} 4365 %} 4366 4367 // Indirect Memory Plus Short Offset Operand 4368 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4369 %{ 4370 match(AddP reg off); 4371 4372 op_cost(100); 4373 format %{ "[$reg + $off]" %} 4374 interface(MEMORY_INTER) %{ 4375 base($reg); 4376 index(0x4); 4377 scale(0x0); 4378 disp($off); 4379 %} 4380 %} 4381 4382 // Indirect Memory Plus Long Offset Operand 4383 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4384 %{ 4385 match(AddP reg off); 4386 4387 op_cost(100); 4388 format %{ "[$reg + $off]" %} 4389 interface(MEMORY_INTER) %{ 4390 base($reg); 4391 index(0x4); 4392 scale(0x0); 4393 disp($off); 4394 %} 4395 %} 4396 4397 // Indirect Memory Plus Index Register Plus Offset Operand 4398 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4399 %{ 4400 match(AddP (AddP reg ireg) off); 4401 4402 op_cost(100); 4403 format %{"[$reg + $off + $ireg]" %} 4404 interface(MEMORY_INTER) %{ 4405 base($reg); 4406 index($ireg); 4407 scale(0x0); 4408 disp($off); 4409 %} 4410 %} 4411 4412 // Indirect Memory Times Scale Plus Index Register 4413 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4414 %{ 4415 match(AddP reg (LShiftI ireg scale)); 4416 4417 op_cost(100); 4418 format %{"[$reg + $ireg << $scale]" %} 4419 interface(MEMORY_INTER) %{ 4420 base($reg); 4421 index($ireg); 4422 scale($scale); 4423 disp(0x0); 4424 %} 4425 %} 4426 4427 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4428 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4429 %{ 4430 match(AddP (AddP reg (LShiftI ireg scale)) off); 4431 4432 op_cost(100); 4433 format %{"[$reg + $off + $ireg << $scale]" %} 4434 interface(MEMORY_INTER) %{ 4435 base($reg); 4436 index($ireg); 4437 scale($scale); 4438 disp($off); 4439 %} 4440 %} 4441 4442 //----------Conditional Branch Operands---------------------------------------- 4443 // Comparison Op - This is the operation of the comparison, and is limited to 4444 // the following set of codes: 4445 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4446 // 4447 // Other attributes of the comparison, such as unsignedness, are specified 4448 // by the comparison instruction that sets a condition code flags register. 4449 // That result is represented by a flags operand whose subtype is appropriate 4450 // to the unsignedness (etc.) of the comparison. 4451 // 4452 // Later, the instruction which matches both the Comparison Op (a Bool) and 4453 // the flags (produced by the Cmp) specifies the coding of the comparison op 4454 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4455 4456 // Comparision Code 4457 operand cmpOp() %{ 4458 match(Bool); 4459 4460 format %{ "" %} 4461 interface(COND_INTER) %{ 4462 equal(0x4, "e"); 4463 not_equal(0x5, "ne"); 4464 less(0xC, "l"); 4465 greater_equal(0xD, "ge"); 4466 less_equal(0xE, "le"); 4467 greater(0xF, "g"); 4468 overflow(0x0, "o"); 4469 no_overflow(0x1, "no"); 4470 %} 4471 %} 4472 4473 // Comparison Code, unsigned compare. Used by FP also, with 4474 // C2 (unordered) turned into GT or LT already. The other bits 4475 // C0 and C3 are turned into Carry & Zero flags. 4476 operand cmpOpU() %{ 4477 match(Bool); 4478 4479 format %{ "" %} 4480 interface(COND_INTER) %{ 4481 equal(0x4, "e"); 4482 not_equal(0x5, "ne"); 4483 less(0x2, "b"); 4484 greater_equal(0x3, "nb"); 4485 less_equal(0x6, "be"); 4486 greater(0x7, "nbe"); 4487 overflow(0x0, "o"); 4488 no_overflow(0x1, "no"); 4489 %} 4490 %} 4491 4492 // Floating comparisons that don't require any fixup for the unordered case 4493 operand cmpOpUCF() %{ 4494 match(Bool); 4495 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4496 n->as_Bool()->_test._test == BoolTest::ge || 4497 n->as_Bool()->_test._test == BoolTest::le || 4498 n->as_Bool()->_test._test == BoolTest::gt); 4499 format %{ "" %} 4500 interface(COND_INTER) %{ 4501 equal(0x4, "e"); 4502 not_equal(0x5, "ne"); 4503 less(0x2, "b"); 4504 greater_equal(0x3, "nb"); 4505 less_equal(0x6, "be"); 4506 greater(0x7, "nbe"); 4507 overflow(0x0, "o"); 4508 no_overflow(0x1, "no"); 4509 %} 4510 %} 4511 4512 4513 // Floating comparisons that can be fixed up with extra conditional jumps 4514 operand cmpOpUCF2() %{ 4515 match(Bool); 4516 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4517 n->as_Bool()->_test._test == BoolTest::eq); 4518 format %{ "" %} 4519 interface(COND_INTER) %{ 4520 equal(0x4, "e"); 4521 not_equal(0x5, "ne"); 4522 less(0x2, "b"); 4523 greater_equal(0x3, "nb"); 4524 less_equal(0x6, "be"); 4525 greater(0x7, "nbe"); 4526 overflow(0x0, "o"); 4527 no_overflow(0x1, "no"); 4528 %} 4529 %} 4530 4531 // Comparison Code for FP conditional move 4532 operand cmpOp_fcmov() %{ 4533 match(Bool); 4534 4535 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4536 n->as_Bool()->_test._test != BoolTest::no_overflow); 4537 format %{ "" %} 4538 interface(COND_INTER) %{ 4539 equal (0x0C8); 4540 not_equal (0x1C8); 4541 less (0x0C0); 4542 greater_equal(0x1C0); 4543 less_equal (0x0D0); 4544 greater (0x1D0); 4545 overflow(0x0, "o"); // not really supported by the instruction 4546 no_overflow(0x1, "no"); // not really supported by the instruction 4547 %} 4548 %} 4549 4550 // Comparision Code used in long compares 4551 operand cmpOp_commute() %{ 4552 match(Bool); 4553 4554 format %{ "" %} 4555 interface(COND_INTER) %{ 4556 equal(0x4, "e"); 4557 not_equal(0x5, "ne"); 4558 less(0xF, "g"); 4559 greater_equal(0xE, "le"); 4560 less_equal(0xD, "ge"); 4561 greater(0xC, "l"); 4562 overflow(0x0, "o"); 4563 no_overflow(0x1, "no"); 4564 %} 4565 %} 4566 4567 //----------OPERAND CLASSES---------------------------------------------------- 4568 // Operand Classes are groups of operands that are used as to simplify 4569 // instruction definitions by not requiring the AD writer to specify separate 4570 // instructions for every form of operand when the instruction accepts 4571 // multiple operand types with the same basic encoding and format. The classic 4572 // case of this is memory operands. 4573 4574 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4575 indIndex, indIndexScale, indIndexScaleOffset); 4576 4577 // Long memory operations are encoded in 2 instructions and a +4 offset. 4578 // This means some kind of offset is always required and you cannot use 4579 // an oop as the offset (done when working on static globals). 4580 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4581 indIndex, indIndexScale, indIndexScaleOffset); 4582 4583 4584 //----------PIPELINE----------------------------------------------------------- 4585 // Rules which define the behavior of the target architectures pipeline. 4586 pipeline %{ 4587 4588 //----------ATTRIBUTES--------------------------------------------------------- 4589 attributes %{ 4590 variable_size_instructions; // Fixed size instructions 4591 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4592 instruction_unit_size = 1; // An instruction is 1 bytes long 4593 instruction_fetch_unit_size = 16; // The processor fetches one line 4594 instruction_fetch_units = 1; // of 16 bytes 4595 4596 // List of nop instructions 4597 nops( MachNop ); 4598 %} 4599 4600 //----------RESOURCES---------------------------------------------------------- 4601 // Resources are the functional units available to the machine 4602 4603 // Generic P2/P3 pipeline 4604 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4605 // 3 instructions decoded per cycle. 4606 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4607 // 2 ALU op, only ALU0 handles mul/div instructions. 4608 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4609 MS0, MS1, MEM = MS0 | MS1, 4610 BR, FPU, 4611 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4612 4613 //----------PIPELINE DESCRIPTION----------------------------------------------- 4614 // Pipeline Description specifies the stages in the machine's pipeline 4615 4616 // Generic P2/P3 pipeline 4617 pipe_desc(S0, S1, S2, S3, S4, S5); 4618 4619 //----------PIPELINE CLASSES--------------------------------------------------- 4620 // Pipeline Classes describe the stages in which input and output are 4621 // referenced by the hardware pipeline. 4622 4623 // Naming convention: ialu or fpu 4624 // Then: _reg 4625 // Then: _reg if there is a 2nd register 4626 // Then: _long if it's a pair of instructions implementing a long 4627 // Then: _fat if it requires the big decoder 4628 // Or: _mem if it requires the big decoder and a memory unit. 4629 4630 // Integer ALU reg operation 4631 pipe_class ialu_reg(rRegI dst) %{ 4632 single_instruction; 4633 dst : S4(write); 4634 dst : S3(read); 4635 DECODE : S0; // any decoder 4636 ALU : S3; // any alu 4637 %} 4638 4639 // Long ALU reg operation 4640 pipe_class ialu_reg_long(eRegL dst) %{ 4641 instruction_count(2); 4642 dst : S4(write); 4643 dst : S3(read); 4644 DECODE : S0(2); // any 2 decoders 4645 ALU : S3(2); // both alus 4646 %} 4647 4648 // Integer ALU reg operation using big decoder 4649 pipe_class ialu_reg_fat(rRegI dst) %{ 4650 single_instruction; 4651 dst : S4(write); 4652 dst : S3(read); 4653 D0 : S0; // big decoder only 4654 ALU : S3; // any alu 4655 %} 4656 4657 // Long ALU reg operation using big decoder 4658 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4659 instruction_count(2); 4660 dst : S4(write); 4661 dst : S3(read); 4662 D0 : S0(2); // big decoder only; twice 4663 ALU : S3(2); // any 2 alus 4664 %} 4665 4666 // Integer ALU reg-reg operation 4667 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4668 single_instruction; 4669 dst : S4(write); 4670 src : S3(read); 4671 DECODE : S0; // any decoder 4672 ALU : S3; // any alu 4673 %} 4674 4675 // Long ALU reg-reg operation 4676 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4677 instruction_count(2); 4678 dst : S4(write); 4679 src : S3(read); 4680 DECODE : S0(2); // any 2 decoders 4681 ALU : S3(2); // both alus 4682 %} 4683 4684 // Integer ALU reg-reg operation 4685 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4686 single_instruction; 4687 dst : S4(write); 4688 src : S3(read); 4689 D0 : S0; // big decoder only 4690 ALU : S3; // any alu 4691 %} 4692 4693 // Long ALU reg-reg operation 4694 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4695 instruction_count(2); 4696 dst : S4(write); 4697 src : S3(read); 4698 D0 : S0(2); // big decoder only; twice 4699 ALU : S3(2); // both alus 4700 %} 4701 4702 // Integer ALU reg-mem operation 4703 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4704 single_instruction; 4705 dst : S5(write); 4706 mem : S3(read); 4707 D0 : S0; // big decoder only 4708 ALU : S4; // any alu 4709 MEM : S3; // any mem 4710 %} 4711 4712 // Long ALU reg-mem operation 4713 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4714 instruction_count(2); 4715 dst : S5(write); 4716 mem : S3(read); 4717 D0 : S0(2); // big decoder only; twice 4718 ALU : S4(2); // any 2 alus 4719 MEM : S3(2); // both mems 4720 %} 4721 4722 // Integer mem operation (prefetch) 4723 pipe_class ialu_mem(memory mem) 4724 %{ 4725 single_instruction; 4726 mem : S3(read); 4727 D0 : S0; // big decoder only 4728 MEM : S3; // any mem 4729 %} 4730 4731 // Integer Store to Memory 4732 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4733 single_instruction; 4734 mem : S3(read); 4735 src : S5(read); 4736 D0 : S0; // big decoder only 4737 ALU : S4; // any alu 4738 MEM : S3; 4739 %} 4740 4741 // Long Store to Memory 4742 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4743 instruction_count(2); 4744 mem : S3(read); 4745 src : S5(read); 4746 D0 : S0(2); // big decoder only; twice 4747 ALU : S4(2); // any 2 alus 4748 MEM : S3(2); // Both mems 4749 %} 4750 4751 // Integer Store to Memory 4752 pipe_class ialu_mem_imm(memory mem) %{ 4753 single_instruction; 4754 mem : S3(read); 4755 D0 : S0; // big decoder only 4756 ALU : S4; // any alu 4757 MEM : S3; 4758 %} 4759 4760 // Integer ALU0 reg-reg operation 4761 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4762 single_instruction; 4763 dst : S4(write); 4764 src : S3(read); 4765 D0 : S0; // Big decoder only 4766 ALU0 : S3; // only alu0 4767 %} 4768 4769 // Integer ALU0 reg-mem operation 4770 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4771 single_instruction; 4772 dst : S5(write); 4773 mem : S3(read); 4774 D0 : S0; // big decoder only 4775 ALU0 : S4; // ALU0 only 4776 MEM : S3; // any mem 4777 %} 4778 4779 // Integer ALU reg-reg operation 4780 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4781 single_instruction; 4782 cr : S4(write); 4783 src1 : S3(read); 4784 src2 : S3(read); 4785 DECODE : S0; // any decoder 4786 ALU : S3; // any alu 4787 %} 4788 4789 // Integer ALU reg-imm operation 4790 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4791 single_instruction; 4792 cr : S4(write); 4793 src1 : S3(read); 4794 DECODE : S0; // any decoder 4795 ALU : S3; // any alu 4796 %} 4797 4798 // Integer ALU reg-mem operation 4799 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4800 single_instruction; 4801 cr : S4(write); 4802 src1 : S3(read); 4803 src2 : S3(read); 4804 D0 : S0; // big decoder only 4805 ALU : S4; // any alu 4806 MEM : S3; 4807 %} 4808 4809 // Conditional move reg-reg 4810 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4811 instruction_count(4); 4812 y : S4(read); 4813 q : S3(read); 4814 p : S3(read); 4815 DECODE : S0(4); // any decoder 4816 %} 4817 4818 // Conditional move reg-reg 4819 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4820 single_instruction; 4821 dst : S4(write); 4822 src : S3(read); 4823 cr : S3(read); 4824 DECODE : S0; // any decoder 4825 %} 4826 4827 // Conditional move reg-mem 4828 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4829 single_instruction; 4830 dst : S4(write); 4831 src : S3(read); 4832 cr : S3(read); 4833 DECODE : S0; // any decoder 4834 MEM : S3; 4835 %} 4836 4837 // Conditional move reg-reg long 4838 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4839 single_instruction; 4840 dst : S4(write); 4841 src : S3(read); 4842 cr : S3(read); 4843 DECODE : S0(2); // any 2 decoders 4844 %} 4845 4846 // Conditional move double reg-reg 4847 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4848 single_instruction; 4849 dst : S4(write); 4850 src : S3(read); 4851 cr : S3(read); 4852 DECODE : S0; // any decoder 4853 %} 4854 4855 // Float reg-reg operation 4856 pipe_class fpu_reg(regDPR dst) %{ 4857 instruction_count(2); 4858 dst : S3(read); 4859 DECODE : S0(2); // any 2 decoders 4860 FPU : S3; 4861 %} 4862 4863 // Float reg-reg operation 4864 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4865 instruction_count(2); 4866 dst : S4(write); 4867 src : S3(read); 4868 DECODE : S0(2); // any 2 decoders 4869 FPU : S3; 4870 %} 4871 4872 // Float reg-reg operation 4873 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4874 instruction_count(3); 4875 dst : S4(write); 4876 src1 : S3(read); 4877 src2 : S3(read); 4878 DECODE : S0(3); // any 3 decoders 4879 FPU : S3(2); 4880 %} 4881 4882 // Float reg-reg operation 4883 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4884 instruction_count(4); 4885 dst : S4(write); 4886 src1 : S3(read); 4887 src2 : S3(read); 4888 src3 : S3(read); 4889 DECODE : S0(4); // any 3 decoders 4890 FPU : S3(2); 4891 %} 4892 4893 // Float reg-reg operation 4894 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4895 instruction_count(4); 4896 dst : S4(write); 4897 src1 : S3(read); 4898 src2 : S3(read); 4899 src3 : S3(read); 4900 DECODE : S1(3); // any 3 decoders 4901 D0 : S0; // Big decoder only 4902 FPU : S3(2); 4903 MEM : S3; 4904 %} 4905 4906 // Float reg-mem operation 4907 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4908 instruction_count(2); 4909 dst : S5(write); 4910 mem : S3(read); 4911 D0 : S0; // big decoder only 4912 DECODE : S1; // any decoder for FPU POP 4913 FPU : S4; 4914 MEM : S3; // any mem 4915 %} 4916 4917 // Float reg-mem operation 4918 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4919 instruction_count(3); 4920 dst : S5(write); 4921 src1 : S3(read); 4922 mem : S3(read); 4923 D0 : S0; // big decoder only 4924 DECODE : S1(2); // any decoder for FPU POP 4925 FPU : S4; 4926 MEM : S3; // any mem 4927 %} 4928 4929 // Float mem-reg operation 4930 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4931 instruction_count(2); 4932 src : S5(read); 4933 mem : S3(read); 4934 DECODE : S0; // any decoder for FPU PUSH 4935 D0 : S1; // big decoder only 4936 FPU : S4; 4937 MEM : S3; // any mem 4938 %} 4939 4940 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4941 instruction_count(3); 4942 src1 : S3(read); 4943 src2 : S3(read); 4944 mem : S3(read); 4945 DECODE : S0(2); // any decoder for FPU PUSH 4946 D0 : S1; // big decoder only 4947 FPU : S4; 4948 MEM : S3; // any mem 4949 %} 4950 4951 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4952 instruction_count(3); 4953 src1 : S3(read); 4954 src2 : S3(read); 4955 mem : S4(read); 4956 DECODE : S0; // any decoder for FPU PUSH 4957 D0 : S0(2); // big decoder only 4958 FPU : S4; 4959 MEM : S3(2); // any mem 4960 %} 4961 4962 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4963 instruction_count(2); 4964 src1 : S3(read); 4965 dst : S4(read); 4966 D0 : S0(2); // big decoder only 4967 MEM : S3(2); // any mem 4968 %} 4969 4970 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4971 instruction_count(3); 4972 src1 : S3(read); 4973 src2 : S3(read); 4974 dst : S4(read); 4975 D0 : S0(3); // big decoder only 4976 FPU : S4; 4977 MEM : S3(3); // any mem 4978 %} 4979 4980 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4981 instruction_count(3); 4982 src1 : S4(read); 4983 mem : S4(read); 4984 DECODE : S0; // any decoder for FPU PUSH 4985 D0 : S0(2); // big decoder only 4986 FPU : S4; 4987 MEM : S3(2); // any mem 4988 %} 4989 4990 // Float load constant 4991 pipe_class fpu_reg_con(regDPR dst) %{ 4992 instruction_count(2); 4993 dst : S5(write); 4994 D0 : S0; // big decoder only for the load 4995 DECODE : S1; // any decoder for FPU POP 4996 FPU : S4; 4997 MEM : S3; // any mem 4998 %} 4999 5000 // Float load constant 5001 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5002 instruction_count(3); 5003 dst : S5(write); 5004 src : S3(read); 5005 D0 : S0; // big decoder only for the load 5006 DECODE : S1(2); // any decoder for FPU POP 5007 FPU : S4; 5008 MEM : S3; // any mem 5009 %} 5010 5011 // UnConditional branch 5012 pipe_class pipe_jmp( label labl ) %{ 5013 single_instruction; 5014 BR : S3; 5015 %} 5016 5017 // Conditional branch 5018 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5019 single_instruction; 5020 cr : S1(read); 5021 BR : S3; 5022 %} 5023 5024 // Allocation idiom 5025 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5026 instruction_count(1); force_serialization; 5027 fixed_latency(6); 5028 heap_ptr : S3(read); 5029 DECODE : S0(3); 5030 D0 : S2; 5031 MEM : S3; 5032 ALU : S3(2); 5033 dst : S5(write); 5034 BR : S5; 5035 %} 5036 5037 // Generic big/slow expanded idiom 5038 pipe_class pipe_slow( ) %{ 5039 instruction_count(10); multiple_bundles; force_serialization; 5040 fixed_latency(100); 5041 D0 : S0(2); 5042 MEM : S3(2); 5043 %} 5044 5045 // The real do-nothing guy 5046 pipe_class empty( ) %{ 5047 instruction_count(0); 5048 %} 5049 5050 // Define the class for the Nop node 5051 define %{ 5052 MachNop = empty; 5053 %} 5054 5055 %} 5056 5057 //----------INSTRUCTIONS------------------------------------------------------- 5058 // 5059 // match -- States which machine-independent subtree may be replaced 5060 // by this instruction. 5061 // ins_cost -- The estimated cost of this instruction is used by instruction 5062 // selection to identify a minimum cost tree of machine 5063 // instructions that matches a tree of machine-independent 5064 // instructions. 5065 // format -- A string providing the disassembly for this instruction. 5066 // The value of an instruction's operand may be inserted 5067 // by referring to it with a '$' prefix. 5068 // opcode -- Three instruction opcodes may be provided. These are referred 5069 // to within an encode class as $primary, $secondary, and $tertiary 5070 // respectively. The primary opcode is commonly used to 5071 // indicate the type of machine instruction, while secondary 5072 // and tertiary are often used for prefix options or addressing 5073 // modes. 5074 // ins_encode -- A list of encode classes with parameters. The encode class 5075 // name must have been defined in an 'enc_class' specification 5076 // in the encode section of the architecture description. 5077 5078 //----------BSWAP-Instruction-------------------------------------------------- 5079 instruct bytes_reverse_int(rRegI dst) %{ 5080 match(Set dst (ReverseBytesI dst)); 5081 5082 format %{ "BSWAP $dst" %} 5083 opcode(0x0F, 0xC8); 5084 ins_encode( OpcP, OpcSReg(dst) ); 5085 ins_pipe( ialu_reg ); 5086 %} 5087 5088 instruct bytes_reverse_long(eRegL dst) %{ 5089 match(Set dst (ReverseBytesL dst)); 5090 5091 format %{ "BSWAP $dst.lo\n\t" 5092 "BSWAP $dst.hi\n\t" 5093 "XCHG $dst.lo $dst.hi" %} 5094 5095 ins_cost(125); 5096 ins_encode( bswap_long_bytes(dst) ); 5097 ins_pipe( ialu_reg_reg); 5098 %} 5099 5100 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5101 match(Set dst (ReverseBytesUS dst)); 5102 effect(KILL cr); 5103 5104 format %{ "BSWAP $dst\n\t" 5105 "SHR $dst,16\n\t" %} 5106 ins_encode %{ 5107 __ bswapl($dst$$Register); 5108 __ shrl($dst$$Register, 16); 5109 %} 5110 ins_pipe( ialu_reg ); 5111 %} 5112 5113 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5114 match(Set dst (ReverseBytesS dst)); 5115 effect(KILL cr); 5116 5117 format %{ "BSWAP $dst\n\t" 5118 "SAR $dst,16\n\t" %} 5119 ins_encode %{ 5120 __ bswapl($dst$$Register); 5121 __ sarl($dst$$Register, 16); 5122 %} 5123 ins_pipe( ialu_reg ); 5124 %} 5125 5126 5127 //---------- Zeros Count Instructions ------------------------------------------ 5128 5129 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5130 predicate(UseCountLeadingZerosInstruction); 5131 match(Set dst (CountLeadingZerosI src)); 5132 effect(KILL cr); 5133 5134 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5135 ins_encode %{ 5136 __ lzcntl($dst$$Register, $src$$Register); 5137 %} 5138 ins_pipe(ialu_reg); 5139 %} 5140 5141 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5142 predicate(!UseCountLeadingZerosInstruction); 5143 match(Set dst (CountLeadingZerosI src)); 5144 effect(KILL cr); 5145 5146 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5147 "JNZ skip\n\t" 5148 "MOV $dst, -1\n" 5149 "skip:\n\t" 5150 "NEG $dst\n\t" 5151 "ADD $dst, 31" %} 5152 ins_encode %{ 5153 Register Rdst = $dst$$Register; 5154 Register Rsrc = $src$$Register; 5155 Label skip; 5156 __ bsrl(Rdst, Rsrc); 5157 __ jccb(Assembler::notZero, skip); 5158 __ movl(Rdst, -1); 5159 __ bind(skip); 5160 __ negl(Rdst); 5161 __ addl(Rdst, BitsPerInt - 1); 5162 %} 5163 ins_pipe(ialu_reg); 5164 %} 5165 5166 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5167 predicate(UseCountLeadingZerosInstruction); 5168 match(Set dst (CountLeadingZerosL src)); 5169 effect(TEMP dst, KILL cr); 5170 5171 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5172 "JNC done\n\t" 5173 "LZCNT $dst, $src.lo\n\t" 5174 "ADD $dst, 32\n" 5175 "done:" %} 5176 ins_encode %{ 5177 Register Rdst = $dst$$Register; 5178 Register Rsrc = $src$$Register; 5179 Label done; 5180 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5181 __ jccb(Assembler::carryClear, done); 5182 __ lzcntl(Rdst, Rsrc); 5183 __ addl(Rdst, BitsPerInt); 5184 __ bind(done); 5185 %} 5186 ins_pipe(ialu_reg); 5187 %} 5188 5189 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5190 predicate(!UseCountLeadingZerosInstruction); 5191 match(Set dst (CountLeadingZerosL src)); 5192 effect(TEMP dst, KILL cr); 5193 5194 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5195 "JZ msw_is_zero\n\t" 5196 "ADD $dst, 32\n\t" 5197 "JMP not_zero\n" 5198 "msw_is_zero:\n\t" 5199 "BSR $dst, $src.lo\n\t" 5200 "JNZ not_zero\n\t" 5201 "MOV $dst, -1\n" 5202 "not_zero:\n\t" 5203 "NEG $dst\n\t" 5204 "ADD $dst, 63\n" %} 5205 ins_encode %{ 5206 Register Rdst = $dst$$Register; 5207 Register Rsrc = $src$$Register; 5208 Label msw_is_zero; 5209 Label not_zero; 5210 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5211 __ jccb(Assembler::zero, msw_is_zero); 5212 __ addl(Rdst, BitsPerInt); 5213 __ jmpb(not_zero); 5214 __ bind(msw_is_zero); 5215 __ bsrl(Rdst, Rsrc); 5216 __ jccb(Assembler::notZero, not_zero); 5217 __ movl(Rdst, -1); 5218 __ bind(not_zero); 5219 __ negl(Rdst); 5220 __ addl(Rdst, BitsPerLong - 1); 5221 %} 5222 ins_pipe(ialu_reg); 5223 %} 5224 5225 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5226 predicate(UseCountTrailingZerosInstruction); 5227 match(Set dst (CountTrailingZerosI src)); 5228 effect(KILL cr); 5229 5230 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5231 ins_encode %{ 5232 __ tzcntl($dst$$Register, $src$$Register); 5233 %} 5234 ins_pipe(ialu_reg); 5235 %} 5236 5237 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5238 predicate(!UseCountTrailingZerosInstruction); 5239 match(Set dst (CountTrailingZerosI src)); 5240 effect(KILL cr); 5241 5242 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5243 "JNZ done\n\t" 5244 "MOV $dst, 32\n" 5245 "done:" %} 5246 ins_encode %{ 5247 Register Rdst = $dst$$Register; 5248 Label done; 5249 __ bsfl(Rdst, $src$$Register); 5250 __ jccb(Assembler::notZero, done); 5251 __ movl(Rdst, BitsPerInt); 5252 __ bind(done); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5258 predicate(UseCountTrailingZerosInstruction); 5259 match(Set dst (CountTrailingZerosL src)); 5260 effect(TEMP dst, KILL cr); 5261 5262 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5263 "JNC done\n\t" 5264 "TZCNT $dst, $src.hi\n\t" 5265 "ADD $dst, 32\n" 5266 "done:" %} 5267 ins_encode %{ 5268 Register Rdst = $dst$$Register; 5269 Register Rsrc = $src$$Register; 5270 Label done; 5271 __ tzcntl(Rdst, Rsrc); 5272 __ jccb(Assembler::carryClear, done); 5273 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5274 __ addl(Rdst, BitsPerInt); 5275 __ bind(done); 5276 %} 5277 ins_pipe(ialu_reg); 5278 %} 5279 5280 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5281 predicate(!UseCountTrailingZerosInstruction); 5282 match(Set dst (CountTrailingZerosL src)); 5283 effect(TEMP dst, KILL cr); 5284 5285 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5286 "JNZ done\n\t" 5287 "BSF $dst, $src.hi\n\t" 5288 "JNZ msw_not_zero\n\t" 5289 "MOV $dst, 32\n" 5290 "msw_not_zero:\n\t" 5291 "ADD $dst, 32\n" 5292 "done:" %} 5293 ins_encode %{ 5294 Register Rdst = $dst$$Register; 5295 Register Rsrc = $src$$Register; 5296 Label msw_not_zero; 5297 Label done; 5298 __ bsfl(Rdst, Rsrc); 5299 __ jccb(Assembler::notZero, done); 5300 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5301 __ jccb(Assembler::notZero, msw_not_zero); 5302 __ movl(Rdst, BitsPerInt); 5303 __ bind(msw_not_zero); 5304 __ addl(Rdst, BitsPerInt); 5305 __ bind(done); 5306 %} 5307 ins_pipe(ialu_reg); 5308 %} 5309 5310 5311 //---------- Population Count Instructions ------------------------------------- 5312 5313 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5314 predicate(UsePopCountInstruction); 5315 match(Set dst (PopCountI src)); 5316 effect(KILL cr); 5317 5318 format %{ "POPCNT $dst, $src" %} 5319 ins_encode %{ 5320 __ popcntl($dst$$Register, $src$$Register); 5321 %} 5322 ins_pipe(ialu_reg); 5323 %} 5324 5325 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5326 predicate(UsePopCountInstruction); 5327 match(Set dst (PopCountI (LoadI mem))); 5328 effect(KILL cr); 5329 5330 format %{ "POPCNT $dst, $mem" %} 5331 ins_encode %{ 5332 __ popcntl($dst$$Register, $mem$$Address); 5333 %} 5334 ins_pipe(ialu_reg); 5335 %} 5336 5337 // Note: Long.bitCount(long) returns an int. 5338 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5339 predicate(UsePopCountInstruction); 5340 match(Set dst (PopCountL src)); 5341 effect(KILL cr, TEMP tmp, TEMP dst); 5342 5343 format %{ "POPCNT $dst, $src.lo\n\t" 5344 "POPCNT $tmp, $src.hi\n\t" 5345 "ADD $dst, $tmp" %} 5346 ins_encode %{ 5347 __ popcntl($dst$$Register, $src$$Register); 5348 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5349 __ addl($dst$$Register, $tmp$$Register); 5350 %} 5351 ins_pipe(ialu_reg); 5352 %} 5353 5354 // Note: Long.bitCount(long) returns an int. 5355 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5356 predicate(UsePopCountInstruction); 5357 match(Set dst (PopCountL (LoadL mem))); 5358 effect(KILL cr, TEMP tmp, TEMP dst); 5359 5360 format %{ "POPCNT $dst, $mem\n\t" 5361 "POPCNT $tmp, $mem+4\n\t" 5362 "ADD $dst, $tmp" %} 5363 ins_encode %{ 5364 //__ popcntl($dst$$Register, $mem$$Address$$first); 5365 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5366 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5367 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5368 __ addl($dst$$Register, $tmp$$Register); 5369 %} 5370 ins_pipe(ialu_reg); 5371 %} 5372 5373 5374 //----------Load/Store/Move Instructions--------------------------------------- 5375 //----------Load Instructions-------------------------------------------------- 5376 // Load Byte (8bit signed) 5377 instruct loadB(xRegI dst, memory mem) %{ 5378 match(Set dst (LoadB mem)); 5379 5380 ins_cost(125); 5381 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5382 5383 ins_encode %{ 5384 __ movsbl($dst$$Register, $mem$$Address); 5385 %} 5386 5387 ins_pipe(ialu_reg_mem); 5388 %} 5389 5390 // Load Byte (8bit signed) into Long Register 5391 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5392 match(Set dst (ConvI2L (LoadB mem))); 5393 effect(KILL cr); 5394 5395 ins_cost(375); 5396 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5397 "MOV $dst.hi,$dst.lo\n\t" 5398 "SAR $dst.hi,7" %} 5399 5400 ins_encode %{ 5401 __ movsbl($dst$$Register, $mem$$Address); 5402 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5403 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5404 %} 5405 5406 ins_pipe(ialu_reg_mem); 5407 %} 5408 5409 // Load Unsigned Byte (8bit UNsigned) 5410 instruct loadUB(xRegI dst, memory mem) %{ 5411 match(Set dst (LoadUB mem)); 5412 5413 ins_cost(125); 5414 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5415 5416 ins_encode %{ 5417 __ movzbl($dst$$Register, $mem$$Address); 5418 %} 5419 5420 ins_pipe(ialu_reg_mem); 5421 %} 5422 5423 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5424 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5425 match(Set dst (ConvI2L (LoadUB mem))); 5426 effect(KILL cr); 5427 5428 ins_cost(250); 5429 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5430 "XOR $dst.hi,$dst.hi" %} 5431 5432 ins_encode %{ 5433 Register Rdst = $dst$$Register; 5434 __ movzbl(Rdst, $mem$$Address); 5435 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5436 %} 5437 5438 ins_pipe(ialu_reg_mem); 5439 %} 5440 5441 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5442 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5443 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5444 effect(KILL cr); 5445 5446 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5447 "XOR $dst.hi,$dst.hi\n\t" 5448 "AND $dst.lo,right_n_bits($mask, 8)" %} 5449 ins_encode %{ 5450 Register Rdst = $dst$$Register; 5451 __ movzbl(Rdst, $mem$$Address); 5452 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5453 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5454 %} 5455 ins_pipe(ialu_reg_mem); 5456 %} 5457 5458 // Load Short (16bit signed) 5459 instruct loadS(rRegI dst, memory mem) %{ 5460 match(Set dst (LoadS mem)); 5461 5462 ins_cost(125); 5463 format %{ "MOVSX $dst,$mem\t# short" %} 5464 5465 ins_encode %{ 5466 __ movswl($dst$$Register, $mem$$Address); 5467 %} 5468 5469 ins_pipe(ialu_reg_mem); 5470 %} 5471 5472 // Load Short (16 bit signed) to Byte (8 bit signed) 5473 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5474 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5475 5476 ins_cost(125); 5477 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5478 ins_encode %{ 5479 __ movsbl($dst$$Register, $mem$$Address); 5480 %} 5481 ins_pipe(ialu_reg_mem); 5482 %} 5483 5484 // Load Short (16bit signed) into Long Register 5485 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5486 match(Set dst (ConvI2L (LoadS mem))); 5487 effect(KILL cr); 5488 5489 ins_cost(375); 5490 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5491 "MOV $dst.hi,$dst.lo\n\t" 5492 "SAR $dst.hi,15" %} 5493 5494 ins_encode %{ 5495 __ movswl($dst$$Register, $mem$$Address); 5496 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5497 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5498 %} 5499 5500 ins_pipe(ialu_reg_mem); 5501 %} 5502 5503 // Load Unsigned Short/Char (16bit unsigned) 5504 instruct loadUS(rRegI dst, memory mem) %{ 5505 match(Set dst (LoadUS mem)); 5506 5507 ins_cost(125); 5508 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5509 5510 ins_encode %{ 5511 __ movzwl($dst$$Register, $mem$$Address); 5512 %} 5513 5514 ins_pipe(ialu_reg_mem); 5515 %} 5516 5517 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5518 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5519 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5520 5521 ins_cost(125); 5522 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5523 ins_encode %{ 5524 __ movsbl($dst$$Register, $mem$$Address); 5525 %} 5526 ins_pipe(ialu_reg_mem); 5527 %} 5528 5529 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5530 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5531 match(Set dst (ConvI2L (LoadUS mem))); 5532 effect(KILL cr); 5533 5534 ins_cost(250); 5535 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5536 "XOR $dst.hi,$dst.hi" %} 5537 5538 ins_encode %{ 5539 __ movzwl($dst$$Register, $mem$$Address); 5540 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5541 %} 5542 5543 ins_pipe(ialu_reg_mem); 5544 %} 5545 5546 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5547 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5548 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5549 effect(KILL cr); 5550 5551 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5552 "XOR $dst.hi,$dst.hi" %} 5553 ins_encode %{ 5554 Register Rdst = $dst$$Register; 5555 __ movzbl(Rdst, $mem$$Address); 5556 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5557 %} 5558 ins_pipe(ialu_reg_mem); 5559 %} 5560 5561 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5562 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5563 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5564 effect(KILL cr); 5565 5566 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5567 "XOR $dst.hi,$dst.hi\n\t" 5568 "AND $dst.lo,right_n_bits($mask, 16)" %} 5569 ins_encode %{ 5570 Register Rdst = $dst$$Register; 5571 __ movzwl(Rdst, $mem$$Address); 5572 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5573 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5574 %} 5575 ins_pipe(ialu_reg_mem); 5576 %} 5577 5578 // Load Integer 5579 instruct loadI(rRegI dst, memory mem) %{ 5580 match(Set dst (LoadI mem)); 5581 5582 ins_cost(125); 5583 format %{ "MOV $dst,$mem\t# int" %} 5584 5585 ins_encode %{ 5586 __ movl($dst$$Register, $mem$$Address); 5587 %} 5588 5589 ins_pipe(ialu_reg_mem); 5590 %} 5591 5592 // Load Integer (32 bit signed) to Byte (8 bit signed) 5593 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5594 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5595 5596 ins_cost(125); 5597 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5598 ins_encode %{ 5599 __ movsbl($dst$$Register, $mem$$Address); 5600 %} 5601 ins_pipe(ialu_reg_mem); 5602 %} 5603 5604 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5605 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5606 match(Set dst (AndI (LoadI mem) mask)); 5607 5608 ins_cost(125); 5609 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5610 ins_encode %{ 5611 __ movzbl($dst$$Register, $mem$$Address); 5612 %} 5613 ins_pipe(ialu_reg_mem); 5614 %} 5615 5616 // Load Integer (32 bit signed) to Short (16 bit signed) 5617 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5618 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5619 5620 ins_cost(125); 5621 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5622 ins_encode %{ 5623 __ movswl($dst$$Register, $mem$$Address); 5624 %} 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5629 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5630 match(Set dst (AndI (LoadI mem) mask)); 5631 5632 ins_cost(125); 5633 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5634 ins_encode %{ 5635 __ movzwl($dst$$Register, $mem$$Address); 5636 %} 5637 ins_pipe(ialu_reg_mem); 5638 %} 5639 5640 // Load Integer into Long Register 5641 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5642 match(Set dst (ConvI2L (LoadI mem))); 5643 effect(KILL cr); 5644 5645 ins_cost(375); 5646 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5647 "MOV $dst.hi,$dst.lo\n\t" 5648 "SAR $dst.hi,31" %} 5649 5650 ins_encode %{ 5651 __ movl($dst$$Register, $mem$$Address); 5652 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5653 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5654 %} 5655 5656 ins_pipe(ialu_reg_mem); 5657 %} 5658 5659 // Load Integer with mask 0xFF into Long Register 5660 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5661 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5662 effect(KILL cr); 5663 5664 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5665 "XOR $dst.hi,$dst.hi" %} 5666 ins_encode %{ 5667 Register Rdst = $dst$$Register; 5668 __ movzbl(Rdst, $mem$$Address); 5669 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5670 %} 5671 ins_pipe(ialu_reg_mem); 5672 %} 5673 5674 // Load Integer with mask 0xFFFF into Long Register 5675 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5676 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5677 effect(KILL cr); 5678 5679 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5680 "XOR $dst.hi,$dst.hi" %} 5681 ins_encode %{ 5682 Register Rdst = $dst$$Register; 5683 __ movzwl(Rdst, $mem$$Address); 5684 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5685 %} 5686 ins_pipe(ialu_reg_mem); 5687 %} 5688 5689 // Load Integer with 31-bit mask into Long Register 5690 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5691 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5692 effect(KILL cr); 5693 5694 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5695 "XOR $dst.hi,$dst.hi\n\t" 5696 "AND $dst.lo,$mask" %} 5697 ins_encode %{ 5698 Register Rdst = $dst$$Register; 5699 __ movl(Rdst, $mem$$Address); 5700 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5701 __ andl(Rdst, $mask$$constant); 5702 %} 5703 ins_pipe(ialu_reg_mem); 5704 %} 5705 5706 // Load Unsigned Integer into Long Register 5707 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5708 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5709 effect(KILL cr); 5710 5711 ins_cost(250); 5712 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5713 "XOR $dst.hi,$dst.hi" %} 5714 5715 ins_encode %{ 5716 __ movl($dst$$Register, $mem$$Address); 5717 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5718 %} 5719 5720 ins_pipe(ialu_reg_mem); 5721 %} 5722 5723 // Load Long. Cannot clobber address while loading, so restrict address 5724 // register to ESI 5725 instruct loadL(eRegL dst, load_long_memory mem) %{ 5726 predicate(!((LoadLNode*)n)->require_atomic_access()); 5727 match(Set dst (LoadL mem)); 5728 5729 ins_cost(250); 5730 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5731 "MOV $dst.hi,$mem+4" %} 5732 5733 ins_encode %{ 5734 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5735 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5736 __ movl($dst$$Register, Amemlo); 5737 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5738 %} 5739 5740 ins_pipe(ialu_reg_long_mem); 5741 %} 5742 5743 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5744 // then store it down to the stack and reload on the int 5745 // side. 5746 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5747 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5748 match(Set dst (LoadL mem)); 5749 5750 ins_cost(200); 5751 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5752 "FISTp $dst" %} 5753 ins_encode(enc_loadL_volatile(mem,dst)); 5754 ins_pipe( fpu_reg_mem ); 5755 %} 5756 5757 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5758 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5759 match(Set dst (LoadL mem)); 5760 effect(TEMP tmp); 5761 ins_cost(180); 5762 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5763 "MOVSD $dst,$tmp" %} 5764 ins_encode %{ 5765 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5766 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5772 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5773 match(Set dst (LoadL mem)); 5774 effect(TEMP tmp); 5775 ins_cost(160); 5776 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5777 "MOVD $dst.lo,$tmp\n\t" 5778 "PSRLQ $tmp,32\n\t" 5779 "MOVD $dst.hi,$tmp" %} 5780 ins_encode %{ 5781 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5782 __ movdl($dst$$Register, $tmp$$XMMRegister); 5783 __ psrlq($tmp$$XMMRegister, 32); 5784 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 // Load Range 5790 instruct loadRange(rRegI dst, memory mem) %{ 5791 match(Set dst (LoadRange mem)); 5792 5793 ins_cost(125); 5794 format %{ "MOV $dst,$mem" %} 5795 opcode(0x8B); 5796 ins_encode( OpcP, RegMem(dst,mem)); 5797 ins_pipe( ialu_reg_mem ); 5798 %} 5799 5800 5801 // Load Pointer 5802 instruct loadP(eRegP dst, memory mem) %{ 5803 match(Set dst (LoadP mem)); 5804 5805 ins_cost(125); 5806 format %{ "MOV $dst,$mem" %} 5807 opcode(0x8B); 5808 ins_encode( OpcP, RegMem(dst,mem)); 5809 ins_pipe( ialu_reg_mem ); 5810 %} 5811 5812 // Load Klass Pointer 5813 instruct loadKlass(eRegP dst, memory mem) %{ 5814 match(Set dst (LoadKlass mem)); 5815 5816 ins_cost(125); 5817 format %{ "MOV $dst,$mem" %} 5818 opcode(0x8B); 5819 ins_encode( OpcP, RegMem(dst,mem)); 5820 ins_pipe( ialu_reg_mem ); 5821 %} 5822 5823 // Load Double 5824 instruct loadDPR(regDPR dst, memory mem) %{ 5825 predicate(UseSSE<=1); 5826 match(Set dst (LoadD mem)); 5827 5828 ins_cost(150); 5829 format %{ "FLD_D ST,$mem\n\t" 5830 "FSTP $dst" %} 5831 opcode(0xDD); /* DD /0 */ 5832 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5833 Pop_Reg_DPR(dst) ); 5834 ins_pipe( fpu_reg_mem ); 5835 %} 5836 5837 // Load Double to XMM 5838 instruct loadD(regD dst, memory mem) %{ 5839 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5840 match(Set dst (LoadD mem)); 5841 ins_cost(145); 5842 format %{ "MOVSD $dst,$mem" %} 5843 ins_encode %{ 5844 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5845 %} 5846 ins_pipe( pipe_slow ); 5847 %} 5848 5849 instruct loadD_partial(regD dst, memory mem) %{ 5850 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5851 match(Set dst (LoadD mem)); 5852 ins_cost(145); 5853 format %{ "MOVLPD $dst,$mem" %} 5854 ins_encode %{ 5855 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 // Load to XMM register (single-precision floating point) 5861 // MOVSS instruction 5862 instruct loadF(regF dst, memory mem) %{ 5863 predicate(UseSSE>=1); 5864 match(Set dst (LoadF mem)); 5865 ins_cost(145); 5866 format %{ "MOVSS $dst,$mem" %} 5867 ins_encode %{ 5868 __ movflt ($dst$$XMMRegister, $mem$$Address); 5869 %} 5870 ins_pipe( pipe_slow ); 5871 %} 5872 5873 // Load Float 5874 instruct loadFPR(regFPR dst, memory mem) %{ 5875 predicate(UseSSE==0); 5876 match(Set dst (LoadF mem)); 5877 5878 ins_cost(150); 5879 format %{ "FLD_S ST,$mem\n\t" 5880 "FSTP $dst" %} 5881 opcode(0xD9); /* D9 /0 */ 5882 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5883 Pop_Reg_FPR(dst) ); 5884 ins_pipe( fpu_reg_mem ); 5885 %} 5886 5887 // Load Effective Address 5888 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5889 match(Set dst mem); 5890 5891 ins_cost(110); 5892 format %{ "LEA $dst,$mem" %} 5893 opcode(0x8D); 5894 ins_encode( OpcP, RegMem(dst,mem)); 5895 ins_pipe( ialu_reg_reg_fat ); 5896 %} 5897 5898 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5899 match(Set dst mem); 5900 5901 ins_cost(110); 5902 format %{ "LEA $dst,$mem" %} 5903 opcode(0x8D); 5904 ins_encode( OpcP, RegMem(dst,mem)); 5905 ins_pipe( ialu_reg_reg_fat ); 5906 %} 5907 5908 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5909 match(Set dst mem); 5910 5911 ins_cost(110); 5912 format %{ "LEA $dst,$mem" %} 5913 opcode(0x8D); 5914 ins_encode( OpcP, RegMem(dst,mem)); 5915 ins_pipe( ialu_reg_reg_fat ); 5916 %} 5917 5918 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5919 match(Set dst mem); 5920 5921 ins_cost(110); 5922 format %{ "LEA $dst,$mem" %} 5923 opcode(0x8D); 5924 ins_encode( OpcP, RegMem(dst,mem)); 5925 ins_pipe( ialu_reg_reg_fat ); 5926 %} 5927 5928 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5929 match(Set dst mem); 5930 5931 ins_cost(110); 5932 format %{ "LEA $dst,$mem" %} 5933 opcode(0x8D); 5934 ins_encode( OpcP, RegMem(dst,mem)); 5935 ins_pipe( ialu_reg_reg_fat ); 5936 %} 5937 5938 // Load Constant 5939 instruct loadConI(rRegI dst, immI src) %{ 5940 match(Set dst src); 5941 5942 format %{ "MOV $dst,$src" %} 5943 ins_encode( LdImmI(dst, src) ); 5944 ins_pipe( ialu_reg_fat ); 5945 %} 5946 5947 // Load Constant zero 5948 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5949 match(Set dst src); 5950 effect(KILL cr); 5951 5952 ins_cost(50); 5953 format %{ "XOR $dst,$dst" %} 5954 opcode(0x33); /* + rd */ 5955 ins_encode( OpcP, RegReg( dst, dst ) ); 5956 ins_pipe( ialu_reg ); 5957 %} 5958 5959 instruct loadConP(eRegP dst, immP src) %{ 5960 match(Set dst src); 5961 5962 format %{ "MOV $dst,$src" %} 5963 opcode(0xB8); /* + rd */ 5964 ins_encode( LdImmP(dst, src) ); 5965 ins_pipe( ialu_reg_fat ); 5966 %} 5967 5968 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5969 match(Set dst src); 5970 effect(KILL cr); 5971 ins_cost(200); 5972 format %{ "MOV $dst.lo,$src.lo\n\t" 5973 "MOV $dst.hi,$src.hi" %} 5974 opcode(0xB8); 5975 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5976 ins_pipe( ialu_reg_long_fat ); 5977 %} 5978 5979 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5980 match(Set dst src); 5981 effect(KILL cr); 5982 ins_cost(150); 5983 format %{ "XOR $dst.lo,$dst.lo\n\t" 5984 "XOR $dst.hi,$dst.hi" %} 5985 opcode(0x33,0x33); 5986 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5987 ins_pipe( ialu_reg_long ); 5988 %} 5989 5990 // The instruction usage is guarded by predicate in operand immFPR(). 5991 instruct loadConFPR(regFPR dst, immFPR con) %{ 5992 match(Set dst con); 5993 ins_cost(125); 5994 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5995 "FSTP $dst" %} 5996 ins_encode %{ 5997 __ fld_s($constantaddress($con)); 5998 __ fstp_d($dst$$reg); 5999 %} 6000 ins_pipe(fpu_reg_con); 6001 %} 6002 6003 // The instruction usage is guarded by predicate in operand immFPR0(). 6004 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6005 match(Set dst con); 6006 ins_cost(125); 6007 format %{ "FLDZ ST\n\t" 6008 "FSTP $dst" %} 6009 ins_encode %{ 6010 __ fldz(); 6011 __ fstp_d($dst$$reg); 6012 %} 6013 ins_pipe(fpu_reg_con); 6014 %} 6015 6016 // The instruction usage is guarded by predicate in operand immFPR1(). 6017 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6018 match(Set dst con); 6019 ins_cost(125); 6020 format %{ "FLD1 ST\n\t" 6021 "FSTP $dst" %} 6022 ins_encode %{ 6023 __ fld1(); 6024 __ fstp_d($dst$$reg); 6025 %} 6026 ins_pipe(fpu_reg_con); 6027 %} 6028 6029 // The instruction usage is guarded by predicate in operand immF(). 6030 instruct loadConF(regF dst, immF con) %{ 6031 match(Set dst con); 6032 ins_cost(125); 6033 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6034 ins_encode %{ 6035 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6036 %} 6037 ins_pipe(pipe_slow); 6038 %} 6039 6040 // The instruction usage is guarded by predicate in operand immF0(). 6041 instruct loadConF0(regF dst, immF0 src) %{ 6042 match(Set dst src); 6043 ins_cost(100); 6044 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6045 ins_encode %{ 6046 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6047 %} 6048 ins_pipe(pipe_slow); 6049 %} 6050 6051 // The instruction usage is guarded by predicate in operand immDPR(). 6052 instruct loadConDPR(regDPR dst, immDPR con) %{ 6053 match(Set dst con); 6054 ins_cost(125); 6055 6056 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6057 "FSTP $dst" %} 6058 ins_encode %{ 6059 __ fld_d($constantaddress($con)); 6060 __ fstp_d($dst$$reg); 6061 %} 6062 ins_pipe(fpu_reg_con); 6063 %} 6064 6065 // The instruction usage is guarded by predicate in operand immDPR0(). 6066 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6067 match(Set dst con); 6068 ins_cost(125); 6069 6070 format %{ "FLDZ ST\n\t" 6071 "FSTP $dst" %} 6072 ins_encode %{ 6073 __ fldz(); 6074 __ fstp_d($dst$$reg); 6075 %} 6076 ins_pipe(fpu_reg_con); 6077 %} 6078 6079 // The instruction usage is guarded by predicate in operand immDPR1(). 6080 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6081 match(Set dst con); 6082 ins_cost(125); 6083 6084 format %{ "FLD1 ST\n\t" 6085 "FSTP $dst" %} 6086 ins_encode %{ 6087 __ fld1(); 6088 __ fstp_d($dst$$reg); 6089 %} 6090 ins_pipe(fpu_reg_con); 6091 %} 6092 6093 // The instruction usage is guarded by predicate in operand immD(). 6094 instruct loadConD(regD dst, immD con) %{ 6095 match(Set dst con); 6096 ins_cost(125); 6097 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6098 ins_encode %{ 6099 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6100 %} 6101 ins_pipe(pipe_slow); 6102 %} 6103 6104 // The instruction usage is guarded by predicate in operand immD0(). 6105 instruct loadConD0(regD dst, immD0 src) %{ 6106 match(Set dst src); 6107 ins_cost(100); 6108 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6109 ins_encode %{ 6110 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6111 %} 6112 ins_pipe( pipe_slow ); 6113 %} 6114 6115 // Load Stack Slot 6116 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6117 match(Set dst src); 6118 ins_cost(125); 6119 6120 format %{ "MOV $dst,$src" %} 6121 opcode(0x8B); 6122 ins_encode( OpcP, RegMem(dst,src)); 6123 ins_pipe( ialu_reg_mem ); 6124 %} 6125 6126 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6127 match(Set dst src); 6128 6129 ins_cost(200); 6130 format %{ "MOV $dst,$src.lo\n\t" 6131 "MOV $dst+4,$src.hi" %} 6132 opcode(0x8B, 0x8B); 6133 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6134 ins_pipe( ialu_mem_long_reg ); 6135 %} 6136 6137 // Load Stack Slot 6138 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6139 match(Set dst src); 6140 ins_cost(125); 6141 6142 format %{ "MOV $dst,$src" %} 6143 opcode(0x8B); 6144 ins_encode( OpcP, RegMem(dst,src)); 6145 ins_pipe( ialu_reg_mem ); 6146 %} 6147 6148 // Load Stack Slot 6149 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6150 match(Set dst src); 6151 ins_cost(125); 6152 6153 format %{ "FLD_S $src\n\t" 6154 "FSTP $dst" %} 6155 opcode(0xD9); /* D9 /0, FLD m32real */ 6156 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6157 Pop_Reg_FPR(dst) ); 6158 ins_pipe( fpu_reg_mem ); 6159 %} 6160 6161 // Load Stack Slot 6162 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6163 match(Set dst src); 6164 ins_cost(125); 6165 6166 format %{ "FLD_D $src\n\t" 6167 "FSTP $dst" %} 6168 opcode(0xDD); /* DD /0, FLD m64real */ 6169 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6170 Pop_Reg_DPR(dst) ); 6171 ins_pipe( fpu_reg_mem ); 6172 %} 6173 6174 // Prefetch instructions for allocation. 6175 // Must be safe to execute with invalid address (cannot fault). 6176 6177 instruct prefetchAlloc0( memory mem ) %{ 6178 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6179 match(PrefetchAllocation mem); 6180 ins_cost(0); 6181 size(0); 6182 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6183 ins_encode(); 6184 ins_pipe(empty); 6185 %} 6186 6187 instruct prefetchAlloc( memory mem ) %{ 6188 predicate(AllocatePrefetchInstr==3); 6189 match( PrefetchAllocation mem ); 6190 ins_cost(100); 6191 6192 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6193 ins_encode %{ 6194 __ prefetchw($mem$$Address); 6195 %} 6196 ins_pipe(ialu_mem); 6197 %} 6198 6199 instruct prefetchAllocNTA( memory mem ) %{ 6200 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6201 match(PrefetchAllocation mem); 6202 ins_cost(100); 6203 6204 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6205 ins_encode %{ 6206 __ prefetchnta($mem$$Address); 6207 %} 6208 ins_pipe(ialu_mem); 6209 %} 6210 6211 instruct prefetchAllocT0( memory mem ) %{ 6212 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6213 match(PrefetchAllocation mem); 6214 ins_cost(100); 6215 6216 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6217 ins_encode %{ 6218 __ prefetcht0($mem$$Address); 6219 %} 6220 ins_pipe(ialu_mem); 6221 %} 6222 6223 instruct prefetchAllocT2( memory mem ) %{ 6224 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6225 match(PrefetchAllocation mem); 6226 ins_cost(100); 6227 6228 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6229 ins_encode %{ 6230 __ prefetcht2($mem$$Address); 6231 %} 6232 ins_pipe(ialu_mem); 6233 %} 6234 6235 //----------Store Instructions------------------------------------------------- 6236 6237 // Store Byte 6238 instruct storeB(memory mem, xRegI src) %{ 6239 match(Set mem (StoreB mem src)); 6240 6241 ins_cost(125); 6242 format %{ "MOV8 $mem,$src" %} 6243 opcode(0x88); 6244 ins_encode( OpcP, RegMem( src, mem ) ); 6245 ins_pipe( ialu_mem_reg ); 6246 %} 6247 6248 // Store Char/Short 6249 instruct storeC(memory mem, rRegI src) %{ 6250 match(Set mem (StoreC mem src)); 6251 6252 ins_cost(125); 6253 format %{ "MOV16 $mem,$src" %} 6254 opcode(0x89, 0x66); 6255 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6256 ins_pipe( ialu_mem_reg ); 6257 %} 6258 6259 // Store Integer 6260 instruct storeI(memory mem, rRegI src) %{ 6261 match(Set mem (StoreI mem src)); 6262 6263 ins_cost(125); 6264 format %{ "MOV $mem,$src" %} 6265 opcode(0x89); 6266 ins_encode( OpcP, RegMem( src, mem ) ); 6267 ins_pipe( ialu_mem_reg ); 6268 %} 6269 6270 // Store Long 6271 instruct storeL(long_memory mem, eRegL src) %{ 6272 predicate(!((StoreLNode*)n)->require_atomic_access()); 6273 match(Set mem (StoreL mem src)); 6274 6275 ins_cost(200); 6276 format %{ "MOV $mem,$src.lo\n\t" 6277 "MOV $mem+4,$src.hi" %} 6278 opcode(0x89, 0x89); 6279 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6280 ins_pipe( ialu_mem_long_reg ); 6281 %} 6282 6283 // Store Long to Integer 6284 instruct storeL2I(memory mem, eRegL src) %{ 6285 match(Set mem (StoreI mem (ConvL2I src))); 6286 6287 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6288 ins_encode %{ 6289 __ movl($mem$$Address, $src$$Register); 6290 %} 6291 ins_pipe(ialu_mem_reg); 6292 %} 6293 6294 // Volatile Store Long. Must be atomic, so move it into 6295 // the FP TOS and then do a 64-bit FIST. Has to probe the 6296 // target address before the store (for null-ptr checks) 6297 // so the memory operand is used twice in the encoding. 6298 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6299 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6300 match(Set mem (StoreL mem src)); 6301 effect( KILL cr ); 6302 ins_cost(400); 6303 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6304 "FILD $src\n\t" 6305 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6306 opcode(0x3B); 6307 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6308 ins_pipe( fpu_reg_mem ); 6309 %} 6310 6311 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6312 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6313 match(Set mem (StoreL mem src)); 6314 effect( TEMP tmp, KILL cr ); 6315 ins_cost(380); 6316 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6317 "MOVSD $tmp,$src\n\t" 6318 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6319 ins_encode %{ 6320 __ cmpl(rax, $mem$$Address); 6321 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6322 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6323 %} 6324 ins_pipe( pipe_slow ); 6325 %} 6326 6327 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6328 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6329 match(Set mem (StoreL mem src)); 6330 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6331 ins_cost(360); 6332 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6333 "MOVD $tmp,$src.lo\n\t" 6334 "MOVD $tmp2,$src.hi\n\t" 6335 "PUNPCKLDQ $tmp,$tmp2\n\t" 6336 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6337 ins_encode %{ 6338 __ cmpl(rax, $mem$$Address); 6339 __ movdl($tmp$$XMMRegister, $src$$Register); 6340 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6341 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6342 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6343 %} 6344 ins_pipe( pipe_slow ); 6345 %} 6346 6347 // Store Pointer; for storing unknown oops and raw pointers 6348 instruct storeP(memory mem, anyRegP src) %{ 6349 match(Set mem (StoreP mem src)); 6350 6351 ins_cost(125); 6352 format %{ "MOV $mem,$src" %} 6353 opcode(0x89); 6354 ins_encode( OpcP, RegMem( src, mem ) ); 6355 ins_pipe( ialu_mem_reg ); 6356 %} 6357 6358 // Store Integer Immediate 6359 instruct storeImmI(memory mem, immI src) %{ 6360 match(Set mem (StoreI mem src)); 6361 6362 ins_cost(150); 6363 format %{ "MOV $mem,$src" %} 6364 opcode(0xC7); /* C7 /0 */ 6365 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6366 ins_pipe( ialu_mem_imm ); 6367 %} 6368 6369 // Store Short/Char Immediate 6370 instruct storeImmI16(memory mem, immI16 src) %{ 6371 predicate(UseStoreImmI16); 6372 match(Set mem (StoreC mem src)); 6373 6374 ins_cost(150); 6375 format %{ "MOV16 $mem,$src" %} 6376 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6377 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6378 ins_pipe( ialu_mem_imm ); 6379 %} 6380 6381 // Store Pointer Immediate; null pointers or constant oops that do not 6382 // need card-mark barriers. 6383 instruct storeImmP(memory mem, immP src) %{ 6384 match(Set mem (StoreP mem src)); 6385 6386 ins_cost(150); 6387 format %{ "MOV $mem,$src" %} 6388 opcode(0xC7); /* C7 /0 */ 6389 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6390 ins_pipe( ialu_mem_imm ); 6391 %} 6392 6393 // Store Byte Immediate 6394 instruct storeImmB(memory mem, immI8 src) %{ 6395 match(Set mem (StoreB mem src)); 6396 6397 ins_cost(150); 6398 format %{ "MOV8 $mem,$src" %} 6399 opcode(0xC6); /* C6 /0 */ 6400 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6401 ins_pipe( ialu_mem_imm ); 6402 %} 6403 6404 // Store CMS card-mark Immediate 6405 instruct storeImmCM(memory mem, immI8 src) %{ 6406 match(Set mem (StoreCM mem src)); 6407 6408 ins_cost(150); 6409 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6410 opcode(0xC6); /* C6 /0 */ 6411 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6412 ins_pipe( ialu_mem_imm ); 6413 %} 6414 6415 // Store Double 6416 instruct storeDPR( memory mem, regDPR1 src) %{ 6417 predicate(UseSSE<=1); 6418 match(Set mem (StoreD mem src)); 6419 6420 ins_cost(100); 6421 format %{ "FST_D $mem,$src" %} 6422 opcode(0xDD); /* DD /2 */ 6423 ins_encode( enc_FPR_store(mem,src) ); 6424 ins_pipe( fpu_mem_reg ); 6425 %} 6426 6427 // Store double does rounding on x86 6428 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6429 predicate(UseSSE<=1); 6430 match(Set mem (StoreD mem (RoundDouble src))); 6431 6432 ins_cost(100); 6433 format %{ "FST_D $mem,$src\t# round" %} 6434 opcode(0xDD); /* DD /2 */ 6435 ins_encode( enc_FPR_store(mem,src) ); 6436 ins_pipe( fpu_mem_reg ); 6437 %} 6438 6439 // Store XMM register to memory (double-precision floating points) 6440 // MOVSD instruction 6441 instruct storeD(memory mem, regD src) %{ 6442 predicate(UseSSE>=2); 6443 match(Set mem (StoreD mem src)); 6444 ins_cost(95); 6445 format %{ "MOVSD $mem,$src" %} 6446 ins_encode %{ 6447 __ movdbl($mem$$Address, $src$$XMMRegister); 6448 %} 6449 ins_pipe( pipe_slow ); 6450 %} 6451 6452 // Store XMM register to memory (single-precision floating point) 6453 // MOVSS instruction 6454 instruct storeF(memory mem, regF src) %{ 6455 predicate(UseSSE>=1); 6456 match(Set mem (StoreF mem src)); 6457 ins_cost(95); 6458 format %{ "MOVSS $mem,$src" %} 6459 ins_encode %{ 6460 __ movflt($mem$$Address, $src$$XMMRegister); 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 // Store Float 6466 instruct storeFPR( memory mem, regFPR1 src) %{ 6467 predicate(UseSSE==0); 6468 match(Set mem (StoreF mem src)); 6469 6470 ins_cost(100); 6471 format %{ "FST_S $mem,$src" %} 6472 opcode(0xD9); /* D9 /2 */ 6473 ins_encode( enc_FPR_store(mem,src) ); 6474 ins_pipe( fpu_mem_reg ); 6475 %} 6476 6477 // Store Float does rounding on x86 6478 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6479 predicate(UseSSE==0); 6480 match(Set mem (StoreF mem (RoundFloat src))); 6481 6482 ins_cost(100); 6483 format %{ "FST_S $mem,$src\t# round" %} 6484 opcode(0xD9); /* D9 /2 */ 6485 ins_encode( enc_FPR_store(mem,src) ); 6486 ins_pipe( fpu_mem_reg ); 6487 %} 6488 6489 // Store Float does rounding on x86 6490 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6491 predicate(UseSSE<=1); 6492 match(Set mem (StoreF mem (ConvD2F src))); 6493 6494 ins_cost(100); 6495 format %{ "FST_S $mem,$src\t# D-round" %} 6496 opcode(0xD9); /* D9 /2 */ 6497 ins_encode( enc_FPR_store(mem,src) ); 6498 ins_pipe( fpu_mem_reg ); 6499 %} 6500 6501 // Store immediate Float value (it is faster than store from FPU register) 6502 // The instruction usage is guarded by predicate in operand immFPR(). 6503 instruct storeFPR_imm( memory mem, immFPR src) %{ 6504 match(Set mem (StoreF mem src)); 6505 6506 ins_cost(50); 6507 format %{ "MOV $mem,$src\t# store float" %} 6508 opcode(0xC7); /* C7 /0 */ 6509 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6510 ins_pipe( ialu_mem_imm ); 6511 %} 6512 6513 // Store immediate Float value (it is faster than store from XMM register) 6514 // The instruction usage is guarded by predicate in operand immF(). 6515 instruct storeF_imm( memory mem, immF src) %{ 6516 match(Set mem (StoreF mem src)); 6517 6518 ins_cost(50); 6519 format %{ "MOV $mem,$src\t# store float" %} 6520 opcode(0xC7); /* C7 /0 */ 6521 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6522 ins_pipe( ialu_mem_imm ); 6523 %} 6524 6525 // Store Integer to stack slot 6526 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6527 match(Set dst src); 6528 6529 ins_cost(100); 6530 format %{ "MOV $dst,$src" %} 6531 opcode(0x89); 6532 ins_encode( OpcPRegSS( dst, src ) ); 6533 ins_pipe( ialu_mem_reg ); 6534 %} 6535 6536 // Store Integer to stack slot 6537 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6538 match(Set dst src); 6539 6540 ins_cost(100); 6541 format %{ "MOV $dst,$src" %} 6542 opcode(0x89); 6543 ins_encode( OpcPRegSS( dst, src ) ); 6544 ins_pipe( ialu_mem_reg ); 6545 %} 6546 6547 // Store Long to stack slot 6548 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6549 match(Set dst src); 6550 6551 ins_cost(200); 6552 format %{ "MOV $dst,$src.lo\n\t" 6553 "MOV $dst+4,$src.hi" %} 6554 opcode(0x89, 0x89); 6555 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6556 ins_pipe( ialu_mem_long_reg ); 6557 %} 6558 6559 //----------MemBar Instructions----------------------------------------------- 6560 // Memory barrier flavors 6561 6562 instruct membar_acquire() %{ 6563 match(MemBarAcquire); 6564 match(LoadFence); 6565 ins_cost(400); 6566 6567 size(0); 6568 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6569 ins_encode(); 6570 ins_pipe(empty); 6571 %} 6572 6573 instruct membar_acquire_lock() %{ 6574 match(MemBarAcquireLock); 6575 ins_cost(0); 6576 6577 size(0); 6578 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6579 ins_encode( ); 6580 ins_pipe(empty); 6581 %} 6582 6583 instruct membar_release() %{ 6584 match(MemBarRelease); 6585 match(StoreFence); 6586 ins_cost(400); 6587 6588 size(0); 6589 format %{ "MEMBAR-release ! (empty encoding)" %} 6590 ins_encode( ); 6591 ins_pipe(empty); 6592 %} 6593 6594 instruct membar_release_lock() %{ 6595 match(MemBarReleaseLock); 6596 ins_cost(0); 6597 6598 size(0); 6599 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6600 ins_encode( ); 6601 ins_pipe(empty); 6602 %} 6603 6604 instruct membar_volatile(eFlagsReg cr) %{ 6605 match(MemBarVolatile); 6606 effect(KILL cr); 6607 ins_cost(400); 6608 6609 format %{ 6610 $$template 6611 if (os::is_MP()) { 6612 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6613 } else { 6614 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6615 } 6616 %} 6617 ins_encode %{ 6618 __ membar(Assembler::StoreLoad); 6619 %} 6620 ins_pipe(pipe_slow); 6621 %} 6622 6623 instruct unnecessary_membar_volatile() %{ 6624 match(MemBarVolatile); 6625 predicate(Matcher::post_store_load_barrier(n)); 6626 ins_cost(0); 6627 6628 size(0); 6629 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6630 ins_encode( ); 6631 ins_pipe(empty); 6632 %} 6633 6634 instruct membar_storestore() %{ 6635 match(MemBarStoreStore); 6636 ins_cost(0); 6637 6638 size(0); 6639 format %{ "MEMBAR-storestore (empty encoding)" %} 6640 ins_encode( ); 6641 ins_pipe(empty); 6642 %} 6643 6644 //----------Move Instructions-------------------------------------------------- 6645 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6646 match(Set dst (CastX2P src)); 6647 format %{ "# X2P $dst, $src" %} 6648 ins_encode( /*empty encoding*/ ); 6649 ins_cost(0); 6650 ins_pipe(empty); 6651 %} 6652 6653 instruct castP2X(rRegI dst, eRegP src ) %{ 6654 match(Set dst (CastP2X src)); 6655 ins_cost(50); 6656 format %{ "MOV $dst, $src\t# CastP2X" %} 6657 ins_encode( enc_Copy( dst, src) ); 6658 ins_pipe( ialu_reg_reg ); 6659 %} 6660 6661 //----------Conditional Move--------------------------------------------------- 6662 // Conditional move 6663 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6664 predicate(!VM_Version::supports_cmov() ); 6665 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6666 ins_cost(200); 6667 format %{ "J$cop,us skip\t# signed cmove\n\t" 6668 "MOV $dst,$src\n" 6669 "skip:" %} 6670 ins_encode %{ 6671 Label Lskip; 6672 // Invert sense of branch from sense of CMOV 6673 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6674 __ movl($dst$$Register, $src$$Register); 6675 __ bind(Lskip); 6676 %} 6677 ins_pipe( pipe_cmov_reg ); 6678 %} 6679 6680 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6681 predicate(!VM_Version::supports_cmov() ); 6682 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6683 ins_cost(200); 6684 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6685 "MOV $dst,$src\n" 6686 "skip:" %} 6687 ins_encode %{ 6688 Label Lskip; 6689 // Invert sense of branch from sense of CMOV 6690 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6691 __ movl($dst$$Register, $src$$Register); 6692 __ bind(Lskip); 6693 %} 6694 ins_pipe( pipe_cmov_reg ); 6695 %} 6696 6697 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6698 predicate(VM_Version::supports_cmov() ); 6699 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6700 ins_cost(200); 6701 format %{ "CMOV$cop $dst,$src" %} 6702 opcode(0x0F,0x40); 6703 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6704 ins_pipe( pipe_cmov_reg ); 6705 %} 6706 6707 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6708 predicate(VM_Version::supports_cmov() ); 6709 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6710 ins_cost(200); 6711 format %{ "CMOV$cop $dst,$src" %} 6712 opcode(0x0F,0x40); 6713 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6714 ins_pipe( pipe_cmov_reg ); 6715 %} 6716 6717 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6718 predicate(VM_Version::supports_cmov() ); 6719 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6720 ins_cost(200); 6721 expand %{ 6722 cmovI_regU(cop, cr, dst, src); 6723 %} 6724 %} 6725 6726 // Conditional move 6727 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6728 predicate(VM_Version::supports_cmov() ); 6729 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6730 ins_cost(250); 6731 format %{ "CMOV$cop $dst,$src" %} 6732 opcode(0x0F,0x40); 6733 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6734 ins_pipe( pipe_cmov_mem ); 6735 %} 6736 6737 // Conditional move 6738 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6739 predicate(VM_Version::supports_cmov() ); 6740 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6741 ins_cost(250); 6742 format %{ "CMOV$cop $dst,$src" %} 6743 opcode(0x0F,0x40); 6744 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6745 ins_pipe( pipe_cmov_mem ); 6746 %} 6747 6748 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6749 predicate(VM_Version::supports_cmov() ); 6750 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6751 ins_cost(250); 6752 expand %{ 6753 cmovI_memU(cop, cr, dst, src); 6754 %} 6755 %} 6756 6757 // Conditional move 6758 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6759 predicate(VM_Version::supports_cmov() ); 6760 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6761 ins_cost(200); 6762 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6763 opcode(0x0F,0x40); 6764 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6765 ins_pipe( pipe_cmov_reg ); 6766 %} 6767 6768 // Conditional move (non-P6 version) 6769 // Note: a CMoveP is generated for stubs and native wrappers 6770 // regardless of whether we are on a P6, so we 6771 // emulate a cmov here 6772 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6773 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6774 ins_cost(300); 6775 format %{ "Jn$cop skip\n\t" 6776 "MOV $dst,$src\t# pointer\n" 6777 "skip:" %} 6778 opcode(0x8b); 6779 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6780 ins_pipe( pipe_cmov_reg ); 6781 %} 6782 6783 // Conditional move 6784 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6785 predicate(VM_Version::supports_cmov() ); 6786 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6787 ins_cost(200); 6788 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6789 opcode(0x0F,0x40); 6790 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6791 ins_pipe( pipe_cmov_reg ); 6792 %} 6793 6794 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6795 predicate(VM_Version::supports_cmov() ); 6796 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6797 ins_cost(200); 6798 expand %{ 6799 cmovP_regU(cop, cr, dst, src); 6800 %} 6801 %} 6802 6803 // DISABLED: Requires the ADLC to emit a bottom_type call that 6804 // correctly meets the two pointer arguments; one is an incoming 6805 // register but the other is a memory operand. ALSO appears to 6806 // be buggy with implicit null checks. 6807 // 6808 //// Conditional move 6809 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6810 // predicate(VM_Version::supports_cmov() ); 6811 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6812 // ins_cost(250); 6813 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6814 // opcode(0x0F,0x40); 6815 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6816 // ins_pipe( pipe_cmov_mem ); 6817 //%} 6818 // 6819 //// Conditional move 6820 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6821 // predicate(VM_Version::supports_cmov() ); 6822 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6823 // ins_cost(250); 6824 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6825 // opcode(0x0F,0x40); 6826 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6827 // ins_pipe( pipe_cmov_mem ); 6828 //%} 6829 6830 // Conditional move 6831 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6832 predicate(UseSSE<=1); 6833 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6834 ins_cost(200); 6835 format %{ "FCMOV$cop $dst,$src\t# double" %} 6836 opcode(0xDA); 6837 ins_encode( enc_cmov_dpr(cop,src) ); 6838 ins_pipe( pipe_cmovDPR_reg ); 6839 %} 6840 6841 // Conditional move 6842 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6843 predicate(UseSSE==0); 6844 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6845 ins_cost(200); 6846 format %{ "FCMOV$cop $dst,$src\t# float" %} 6847 opcode(0xDA); 6848 ins_encode( enc_cmov_dpr(cop,src) ); 6849 ins_pipe( pipe_cmovDPR_reg ); 6850 %} 6851 6852 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6853 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6854 predicate(UseSSE<=1); 6855 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6856 ins_cost(200); 6857 format %{ "Jn$cop skip\n\t" 6858 "MOV $dst,$src\t# double\n" 6859 "skip:" %} 6860 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6861 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6862 ins_pipe( pipe_cmovDPR_reg ); 6863 %} 6864 6865 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6866 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6867 predicate(UseSSE==0); 6868 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6869 ins_cost(200); 6870 format %{ "Jn$cop skip\n\t" 6871 "MOV $dst,$src\t# float\n" 6872 "skip:" %} 6873 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6874 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6875 ins_pipe( pipe_cmovDPR_reg ); 6876 %} 6877 6878 // No CMOVE with SSE/SSE2 6879 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6880 predicate (UseSSE>=1); 6881 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6882 ins_cost(200); 6883 format %{ "Jn$cop skip\n\t" 6884 "MOVSS $dst,$src\t# float\n" 6885 "skip:" %} 6886 ins_encode %{ 6887 Label skip; 6888 // Invert sense of branch from sense of CMOV 6889 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6890 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6891 __ bind(skip); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 // No CMOVE with SSE/SSE2 6897 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6898 predicate (UseSSE>=2); 6899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6900 ins_cost(200); 6901 format %{ "Jn$cop skip\n\t" 6902 "MOVSD $dst,$src\t# float\n" 6903 "skip:" %} 6904 ins_encode %{ 6905 Label skip; 6906 // Invert sense of branch from sense of CMOV 6907 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6908 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6909 __ bind(skip); 6910 %} 6911 ins_pipe( pipe_slow ); 6912 %} 6913 6914 // unsigned version 6915 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6916 predicate (UseSSE>=1); 6917 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6918 ins_cost(200); 6919 format %{ "Jn$cop skip\n\t" 6920 "MOVSS $dst,$src\t# float\n" 6921 "skip:" %} 6922 ins_encode %{ 6923 Label skip; 6924 // Invert sense of branch from sense of CMOV 6925 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6926 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6927 __ bind(skip); 6928 %} 6929 ins_pipe( pipe_slow ); 6930 %} 6931 6932 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6933 predicate (UseSSE>=1); 6934 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6935 ins_cost(200); 6936 expand %{ 6937 fcmovF_regU(cop, cr, dst, src); 6938 %} 6939 %} 6940 6941 // unsigned version 6942 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6943 predicate (UseSSE>=2); 6944 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6945 ins_cost(200); 6946 format %{ "Jn$cop skip\n\t" 6947 "MOVSD $dst,$src\t# float\n" 6948 "skip:" %} 6949 ins_encode %{ 6950 Label skip; 6951 // Invert sense of branch from sense of CMOV 6952 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6953 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6954 __ bind(skip); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6960 predicate (UseSSE>=2); 6961 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6962 ins_cost(200); 6963 expand %{ 6964 fcmovD_regU(cop, cr, dst, src); 6965 %} 6966 %} 6967 6968 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6969 predicate(VM_Version::supports_cmov() ); 6970 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6971 ins_cost(200); 6972 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6973 "CMOV$cop $dst.hi,$src.hi" %} 6974 opcode(0x0F,0x40); 6975 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6976 ins_pipe( pipe_cmov_reg_long ); 6977 %} 6978 6979 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6980 predicate(VM_Version::supports_cmov() ); 6981 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6982 ins_cost(200); 6983 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6984 "CMOV$cop $dst.hi,$src.hi" %} 6985 opcode(0x0F,0x40); 6986 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6987 ins_pipe( pipe_cmov_reg_long ); 6988 %} 6989 6990 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6991 predicate(VM_Version::supports_cmov() ); 6992 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6993 ins_cost(200); 6994 expand %{ 6995 cmovL_regU(cop, cr, dst, src); 6996 %} 6997 %} 6998 6999 //----------Arithmetic Instructions-------------------------------------------- 7000 //----------Addition Instructions---------------------------------------------- 7001 7002 // Integer Addition Instructions 7003 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7004 match(Set dst (AddI dst src)); 7005 effect(KILL cr); 7006 7007 size(2); 7008 format %{ "ADD $dst,$src" %} 7009 opcode(0x03); 7010 ins_encode( OpcP, RegReg( dst, src) ); 7011 ins_pipe( ialu_reg_reg ); 7012 %} 7013 7014 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7015 match(Set dst (AddI dst src)); 7016 effect(KILL cr); 7017 7018 format %{ "ADD $dst,$src" %} 7019 opcode(0x81, 0x00); /* /0 id */ 7020 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7021 ins_pipe( ialu_reg ); 7022 %} 7023 7024 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7025 predicate(UseIncDec); 7026 match(Set dst (AddI dst src)); 7027 effect(KILL cr); 7028 7029 size(1); 7030 format %{ "INC $dst" %} 7031 opcode(0x40); /* */ 7032 ins_encode( Opc_plus( primary, dst ) ); 7033 ins_pipe( ialu_reg ); 7034 %} 7035 7036 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7037 match(Set dst (AddI src0 src1)); 7038 ins_cost(110); 7039 7040 format %{ "LEA $dst,[$src0 + $src1]" %} 7041 opcode(0x8D); /* 0x8D /r */ 7042 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7043 ins_pipe( ialu_reg_reg ); 7044 %} 7045 7046 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7047 match(Set dst (AddP src0 src1)); 7048 ins_cost(110); 7049 7050 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7051 opcode(0x8D); /* 0x8D /r */ 7052 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7053 ins_pipe( ialu_reg_reg ); 7054 %} 7055 7056 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7057 predicate(UseIncDec); 7058 match(Set dst (AddI dst src)); 7059 effect(KILL cr); 7060 7061 size(1); 7062 format %{ "DEC $dst" %} 7063 opcode(0x48); /* */ 7064 ins_encode( Opc_plus( primary, dst ) ); 7065 ins_pipe( ialu_reg ); 7066 %} 7067 7068 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7069 match(Set dst (AddP dst src)); 7070 effect(KILL cr); 7071 7072 size(2); 7073 format %{ "ADD $dst,$src" %} 7074 opcode(0x03); 7075 ins_encode( OpcP, RegReg( dst, src) ); 7076 ins_pipe( ialu_reg_reg ); 7077 %} 7078 7079 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7080 match(Set dst (AddP dst src)); 7081 effect(KILL cr); 7082 7083 format %{ "ADD $dst,$src" %} 7084 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7085 // ins_encode( RegImm( dst, src) ); 7086 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7087 ins_pipe( ialu_reg ); 7088 %} 7089 7090 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7091 match(Set dst (AddI dst (LoadI src))); 7092 effect(KILL cr); 7093 7094 ins_cost(125); 7095 format %{ "ADD $dst,$src" %} 7096 opcode(0x03); 7097 ins_encode( OpcP, RegMem( dst, src) ); 7098 ins_pipe( ialu_reg_mem ); 7099 %} 7100 7101 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7102 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7103 effect(KILL cr); 7104 7105 ins_cost(150); 7106 format %{ "ADD $dst,$src" %} 7107 opcode(0x01); /* Opcode 01 /r */ 7108 ins_encode( OpcP, RegMem( src, dst ) ); 7109 ins_pipe( ialu_mem_reg ); 7110 %} 7111 7112 // Add Memory with Immediate 7113 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7114 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7115 effect(KILL cr); 7116 7117 ins_cost(125); 7118 format %{ "ADD $dst,$src" %} 7119 opcode(0x81); /* Opcode 81 /0 id */ 7120 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7121 ins_pipe( ialu_mem_imm ); 7122 %} 7123 7124 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7125 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7126 effect(KILL cr); 7127 7128 ins_cost(125); 7129 format %{ "INC $dst" %} 7130 opcode(0xFF); /* Opcode FF /0 */ 7131 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7132 ins_pipe( ialu_mem_imm ); 7133 %} 7134 7135 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7136 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7137 effect(KILL cr); 7138 7139 ins_cost(125); 7140 format %{ "DEC $dst" %} 7141 opcode(0xFF); /* Opcode FF /1 */ 7142 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7143 ins_pipe( ialu_mem_imm ); 7144 %} 7145 7146 7147 instruct checkCastPP( eRegP dst ) %{ 7148 match(Set dst (CheckCastPP dst)); 7149 7150 size(0); 7151 format %{ "#checkcastPP of $dst" %} 7152 ins_encode( /*empty encoding*/ ); 7153 ins_pipe( empty ); 7154 %} 7155 7156 instruct castPP( eRegP dst ) %{ 7157 match(Set dst (CastPP dst)); 7158 format %{ "#castPP of $dst" %} 7159 ins_encode( /*empty encoding*/ ); 7160 ins_pipe( empty ); 7161 %} 7162 7163 instruct castII( rRegI dst ) %{ 7164 match(Set dst (CastII dst)); 7165 format %{ "#castII of $dst" %} 7166 ins_encode( /*empty encoding*/ ); 7167 ins_cost(0); 7168 ins_pipe( empty ); 7169 %} 7170 7171 7172 // Load-locked - same as a regular pointer load when used with compare-swap 7173 instruct loadPLocked(eRegP dst, memory mem) %{ 7174 match(Set dst (LoadPLocked mem)); 7175 7176 ins_cost(125); 7177 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7178 opcode(0x8B); 7179 ins_encode( OpcP, RegMem(dst,mem)); 7180 ins_pipe( ialu_reg_mem ); 7181 %} 7182 7183 // Conditional-store of the updated heap-top. 7184 // Used during allocation of the shared heap. 7185 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7186 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7187 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7188 // EAX is killed if there is contention, but then it's also unused. 7189 // In the common case of no contention, EAX holds the new oop address. 7190 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7191 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7192 ins_pipe( pipe_cmpxchg ); 7193 %} 7194 7195 // Conditional-store of an int value. 7196 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7197 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7198 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7199 effect(KILL oldval); 7200 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7201 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7202 ins_pipe( pipe_cmpxchg ); 7203 %} 7204 7205 // Conditional-store of a long value. 7206 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7207 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7208 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7209 effect(KILL oldval); 7210 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7211 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7212 "XCHG EBX,ECX" 7213 %} 7214 ins_encode %{ 7215 // Note: we need to swap rbx, and rcx before and after the 7216 // cmpxchg8 instruction because the instruction uses 7217 // rcx as the high order word of the new value to store but 7218 // our register encoding uses rbx. 7219 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7220 if( os::is_MP() ) 7221 __ lock(); 7222 __ cmpxchg8($mem$$Address); 7223 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7224 %} 7225 ins_pipe( pipe_cmpxchg ); 7226 %} 7227 7228 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7229 7230 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7231 predicate(VM_Version::supports_cx8()); 7232 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7233 effect(KILL cr, KILL oldval); 7234 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7235 "MOV $res,0\n\t" 7236 "JNE,s fail\n\t" 7237 "MOV $res,1\n" 7238 "fail:" %} 7239 ins_encode( enc_cmpxchg8(mem_ptr), 7240 enc_flags_ne_to_boolean(res) ); 7241 ins_pipe( pipe_cmpxchg ); 7242 %} 7243 7244 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7245 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7246 effect(KILL cr, KILL oldval); 7247 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7248 "MOV $res,0\n\t" 7249 "JNE,s fail\n\t" 7250 "MOV $res,1\n" 7251 "fail:" %} 7252 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7253 ins_pipe( pipe_cmpxchg ); 7254 %} 7255 7256 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7257 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7258 effect(KILL cr, KILL oldval); 7259 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7260 "MOV $res,0\n\t" 7261 "JNE,s fail\n\t" 7262 "MOV $res,1\n" 7263 "fail:" %} 7264 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7265 ins_pipe( pipe_cmpxchg ); 7266 %} 7267 7268 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7269 predicate(n->as_LoadStore()->result_not_used()); 7270 match(Set dummy (GetAndAddI mem add)); 7271 effect(KILL cr); 7272 format %{ "ADDL [$mem],$add" %} 7273 ins_encode %{ 7274 if (os::is_MP()) { __ lock(); } 7275 __ addl($mem$$Address, $add$$constant); 7276 %} 7277 ins_pipe( pipe_cmpxchg ); 7278 %} 7279 7280 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7281 match(Set newval (GetAndAddI mem newval)); 7282 effect(KILL cr); 7283 format %{ "XADDL [$mem],$newval" %} 7284 ins_encode %{ 7285 if (os::is_MP()) { __ lock(); } 7286 __ xaddl($mem$$Address, $newval$$Register); 7287 %} 7288 ins_pipe( pipe_cmpxchg ); 7289 %} 7290 7291 instruct xchgI( memory mem, rRegI newval) %{ 7292 match(Set newval (GetAndSetI mem newval)); 7293 format %{ "XCHGL $newval,[$mem]" %} 7294 ins_encode %{ 7295 __ xchgl($newval$$Register, $mem$$Address); 7296 %} 7297 ins_pipe( pipe_cmpxchg ); 7298 %} 7299 7300 instruct xchgP( memory mem, pRegP newval) %{ 7301 match(Set newval (GetAndSetP mem newval)); 7302 format %{ "XCHGL $newval,[$mem]" %} 7303 ins_encode %{ 7304 __ xchgl($newval$$Register, $mem$$Address); 7305 %} 7306 ins_pipe( pipe_cmpxchg ); 7307 %} 7308 7309 //----------Subtraction Instructions------------------------------------------- 7310 7311 // Integer Subtraction Instructions 7312 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7313 match(Set dst (SubI dst src)); 7314 effect(KILL cr); 7315 7316 size(2); 7317 format %{ "SUB $dst,$src" %} 7318 opcode(0x2B); 7319 ins_encode( OpcP, RegReg( dst, src) ); 7320 ins_pipe( ialu_reg_reg ); 7321 %} 7322 7323 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7324 match(Set dst (SubI dst src)); 7325 effect(KILL cr); 7326 7327 format %{ "SUB $dst,$src" %} 7328 opcode(0x81,0x05); /* Opcode 81 /5 */ 7329 // ins_encode( RegImm( dst, src) ); 7330 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7331 ins_pipe( ialu_reg ); 7332 %} 7333 7334 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7335 match(Set dst (SubI dst (LoadI src))); 7336 effect(KILL cr); 7337 7338 ins_cost(125); 7339 format %{ "SUB $dst,$src" %} 7340 opcode(0x2B); 7341 ins_encode( OpcP, RegMem( dst, src) ); 7342 ins_pipe( ialu_reg_mem ); 7343 %} 7344 7345 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7346 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7347 effect(KILL cr); 7348 7349 ins_cost(150); 7350 format %{ "SUB $dst,$src" %} 7351 opcode(0x29); /* Opcode 29 /r */ 7352 ins_encode( OpcP, RegMem( src, dst ) ); 7353 ins_pipe( ialu_mem_reg ); 7354 %} 7355 7356 // Subtract from a pointer 7357 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7358 match(Set dst (AddP dst (SubI zero src))); 7359 effect(KILL cr); 7360 7361 size(2); 7362 format %{ "SUB $dst,$src" %} 7363 opcode(0x2B); 7364 ins_encode( OpcP, RegReg( dst, src) ); 7365 ins_pipe( ialu_reg_reg ); 7366 %} 7367 7368 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7369 match(Set dst (SubI zero dst)); 7370 effect(KILL cr); 7371 7372 size(2); 7373 format %{ "NEG $dst" %} 7374 opcode(0xF7,0x03); // Opcode F7 /3 7375 ins_encode( OpcP, RegOpc( dst ) ); 7376 ins_pipe( ialu_reg ); 7377 %} 7378 7379 //----------Multiplication/Division Instructions------------------------------- 7380 // Integer Multiplication Instructions 7381 // Multiply Register 7382 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7383 match(Set dst (MulI dst src)); 7384 effect(KILL cr); 7385 7386 size(3); 7387 ins_cost(300); 7388 format %{ "IMUL $dst,$src" %} 7389 opcode(0xAF, 0x0F); 7390 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7391 ins_pipe( ialu_reg_reg_alu0 ); 7392 %} 7393 7394 // Multiply 32-bit Immediate 7395 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7396 match(Set dst (MulI src imm)); 7397 effect(KILL cr); 7398 7399 ins_cost(300); 7400 format %{ "IMUL $dst,$src,$imm" %} 7401 opcode(0x69); /* 69 /r id */ 7402 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7403 ins_pipe( ialu_reg_reg_alu0 ); 7404 %} 7405 7406 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7407 match(Set dst src); 7408 effect(KILL cr); 7409 7410 // Note that this is artificially increased to make it more expensive than loadConL 7411 ins_cost(250); 7412 format %{ "MOV EAX,$src\t// low word only" %} 7413 opcode(0xB8); 7414 ins_encode( LdImmL_Lo(dst, src) ); 7415 ins_pipe( ialu_reg_fat ); 7416 %} 7417 7418 // Multiply by 32-bit Immediate, taking the shifted high order results 7419 // (special case for shift by 32) 7420 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7421 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7422 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7423 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7424 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7425 effect(USE src1, KILL cr); 7426 7427 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7428 ins_cost(0*100 + 1*400 - 150); 7429 format %{ "IMUL EDX:EAX,$src1" %} 7430 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 // Multiply by 32-bit Immediate, taking the shifted high order results 7435 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7436 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7437 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7438 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7439 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7440 effect(USE src1, KILL cr); 7441 7442 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7443 ins_cost(1*100 + 1*400 - 150); 7444 format %{ "IMUL EDX:EAX,$src1\n\t" 7445 "SAR EDX,$cnt-32" %} 7446 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 // Multiply Memory 32-bit Immediate 7451 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7452 match(Set dst (MulI (LoadI src) imm)); 7453 effect(KILL cr); 7454 7455 ins_cost(300); 7456 format %{ "IMUL $dst,$src,$imm" %} 7457 opcode(0x69); /* 69 /r id */ 7458 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7459 ins_pipe( ialu_reg_mem_alu0 ); 7460 %} 7461 7462 // Multiply Memory 7463 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7464 match(Set dst (MulI dst (LoadI src))); 7465 effect(KILL cr); 7466 7467 ins_cost(350); 7468 format %{ "IMUL $dst,$src" %} 7469 opcode(0xAF, 0x0F); 7470 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7471 ins_pipe( ialu_reg_mem_alu0 ); 7472 %} 7473 7474 // Multiply Register Int to Long 7475 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7476 // Basic Idea: long = (long)int * (long)int 7477 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7478 effect(DEF dst, USE src, USE src1, KILL flags); 7479 7480 ins_cost(300); 7481 format %{ "IMUL $dst,$src1" %} 7482 7483 ins_encode( long_int_multiply( dst, src1 ) ); 7484 ins_pipe( ialu_reg_reg_alu0 ); 7485 %} 7486 7487 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7488 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7489 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7490 effect(KILL flags); 7491 7492 ins_cost(300); 7493 format %{ "MUL $dst,$src1" %} 7494 7495 ins_encode( long_uint_multiply(dst, src1) ); 7496 ins_pipe( ialu_reg_reg_alu0 ); 7497 %} 7498 7499 // Multiply Register Long 7500 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7501 match(Set dst (MulL dst src)); 7502 effect(KILL cr, TEMP tmp); 7503 ins_cost(4*100+3*400); 7504 // Basic idea: lo(result) = lo(x_lo * y_lo) 7505 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7506 format %{ "MOV $tmp,$src.lo\n\t" 7507 "IMUL $tmp,EDX\n\t" 7508 "MOV EDX,$src.hi\n\t" 7509 "IMUL EDX,EAX\n\t" 7510 "ADD $tmp,EDX\n\t" 7511 "MUL EDX:EAX,$src.lo\n\t" 7512 "ADD EDX,$tmp" %} 7513 ins_encode( long_multiply( dst, src, tmp ) ); 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 // Multiply Register Long where the left operand's high 32 bits are zero 7518 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7519 predicate(is_operand_hi32_zero(n->in(1))); 7520 match(Set dst (MulL dst src)); 7521 effect(KILL cr, TEMP tmp); 7522 ins_cost(2*100+2*400); 7523 // Basic idea: lo(result) = lo(x_lo * y_lo) 7524 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7525 format %{ "MOV $tmp,$src.hi\n\t" 7526 "IMUL $tmp,EAX\n\t" 7527 "MUL EDX:EAX,$src.lo\n\t" 7528 "ADD EDX,$tmp" %} 7529 ins_encode %{ 7530 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7531 __ imull($tmp$$Register, rax); 7532 __ mull($src$$Register); 7533 __ addl(rdx, $tmp$$Register); 7534 %} 7535 ins_pipe( pipe_slow ); 7536 %} 7537 7538 // Multiply Register Long where the right operand's high 32 bits are zero 7539 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7540 predicate(is_operand_hi32_zero(n->in(2))); 7541 match(Set dst (MulL dst src)); 7542 effect(KILL cr, TEMP tmp); 7543 ins_cost(2*100+2*400); 7544 // Basic idea: lo(result) = lo(x_lo * y_lo) 7545 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7546 format %{ "MOV $tmp,$src.lo\n\t" 7547 "IMUL $tmp,EDX\n\t" 7548 "MUL EDX:EAX,$src.lo\n\t" 7549 "ADD EDX,$tmp" %} 7550 ins_encode %{ 7551 __ movl($tmp$$Register, $src$$Register); 7552 __ imull($tmp$$Register, rdx); 7553 __ mull($src$$Register); 7554 __ addl(rdx, $tmp$$Register); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7560 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7561 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7562 match(Set dst (MulL dst src)); 7563 effect(KILL cr); 7564 ins_cost(1*400); 7565 // Basic idea: lo(result) = lo(x_lo * y_lo) 7566 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7567 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7568 ins_encode %{ 7569 __ mull($src$$Register); 7570 %} 7571 ins_pipe( pipe_slow ); 7572 %} 7573 7574 // Multiply Register Long by small constant 7575 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7576 match(Set dst (MulL dst src)); 7577 effect(KILL cr, TEMP tmp); 7578 ins_cost(2*100+2*400); 7579 size(12); 7580 // Basic idea: lo(result) = lo(src * EAX) 7581 // hi(result) = hi(src * EAX) + lo(src * EDX) 7582 format %{ "IMUL $tmp,EDX,$src\n\t" 7583 "MOV EDX,$src\n\t" 7584 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7585 "ADD EDX,$tmp" %} 7586 ins_encode( long_multiply_con( dst, src, tmp ) ); 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 // Integer DIV with Register 7591 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7592 match(Set rax (DivI rax div)); 7593 effect(KILL rdx, KILL cr); 7594 size(26); 7595 ins_cost(30*100+10*100); 7596 format %{ "CMP EAX,0x80000000\n\t" 7597 "JNE,s normal\n\t" 7598 "XOR EDX,EDX\n\t" 7599 "CMP ECX,-1\n\t" 7600 "JE,s done\n" 7601 "normal: CDQ\n\t" 7602 "IDIV $div\n\t" 7603 "done:" %} 7604 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7605 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7606 ins_pipe( ialu_reg_reg_alu0 ); 7607 %} 7608 7609 // Divide Register Long 7610 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7611 match(Set dst (DivL src1 src2)); 7612 effect( KILL cr, KILL cx, KILL bx ); 7613 ins_cost(10000); 7614 format %{ "PUSH $src1.hi\n\t" 7615 "PUSH $src1.lo\n\t" 7616 "PUSH $src2.hi\n\t" 7617 "PUSH $src2.lo\n\t" 7618 "CALL SharedRuntime::ldiv\n\t" 7619 "ADD ESP,16" %} 7620 ins_encode( long_div(src1,src2) ); 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 // Integer DIVMOD with Register, both quotient and mod results 7625 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7626 match(DivModI rax div); 7627 effect(KILL cr); 7628 size(26); 7629 ins_cost(30*100+10*100); 7630 format %{ "CMP EAX,0x80000000\n\t" 7631 "JNE,s normal\n\t" 7632 "XOR EDX,EDX\n\t" 7633 "CMP ECX,-1\n\t" 7634 "JE,s done\n" 7635 "normal: CDQ\n\t" 7636 "IDIV $div\n\t" 7637 "done:" %} 7638 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7639 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 // Integer MOD with Register 7644 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7645 match(Set rdx (ModI rax div)); 7646 effect(KILL rax, KILL cr); 7647 7648 size(26); 7649 ins_cost(300); 7650 format %{ "CDQ\n\t" 7651 "IDIV $div" %} 7652 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7653 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7654 ins_pipe( ialu_reg_reg_alu0 ); 7655 %} 7656 7657 // Remainder Register Long 7658 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7659 match(Set dst (ModL src1 src2)); 7660 effect( KILL cr, KILL cx, KILL bx ); 7661 ins_cost(10000); 7662 format %{ "PUSH $src1.hi\n\t" 7663 "PUSH $src1.lo\n\t" 7664 "PUSH $src2.hi\n\t" 7665 "PUSH $src2.lo\n\t" 7666 "CALL SharedRuntime::lrem\n\t" 7667 "ADD ESP,16" %} 7668 ins_encode( long_mod(src1,src2) ); 7669 ins_pipe( pipe_slow ); 7670 %} 7671 7672 // Divide Register Long (no special case since divisor != -1) 7673 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7674 match(Set dst (DivL dst imm)); 7675 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7676 ins_cost(1000); 7677 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7678 "XOR $tmp2,$tmp2\n\t" 7679 "CMP $tmp,EDX\n\t" 7680 "JA,s fast\n\t" 7681 "MOV $tmp2,EAX\n\t" 7682 "MOV EAX,EDX\n\t" 7683 "MOV EDX,0\n\t" 7684 "JLE,s pos\n\t" 7685 "LNEG EAX : $tmp2\n\t" 7686 "DIV $tmp # unsigned division\n\t" 7687 "XCHG EAX,$tmp2\n\t" 7688 "DIV $tmp\n\t" 7689 "LNEG $tmp2 : EAX\n\t" 7690 "JMP,s done\n" 7691 "pos:\n\t" 7692 "DIV $tmp\n\t" 7693 "XCHG EAX,$tmp2\n" 7694 "fast:\n\t" 7695 "DIV $tmp\n" 7696 "done:\n\t" 7697 "MOV EDX,$tmp2\n\t" 7698 "NEG EDX:EAX # if $imm < 0" %} 7699 ins_encode %{ 7700 int con = (int)$imm$$constant; 7701 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7702 int pcon = (con > 0) ? con : -con; 7703 Label Lfast, Lpos, Ldone; 7704 7705 __ movl($tmp$$Register, pcon); 7706 __ xorl($tmp2$$Register,$tmp2$$Register); 7707 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7708 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7709 7710 __ movl($tmp2$$Register, $dst$$Register); // save 7711 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7712 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7713 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7714 7715 // Negative dividend. 7716 // convert value to positive to use unsigned division 7717 __ lneg($dst$$Register, $tmp2$$Register); 7718 __ divl($tmp$$Register); 7719 __ xchgl($dst$$Register, $tmp2$$Register); 7720 __ divl($tmp$$Register); 7721 // revert result back to negative 7722 __ lneg($tmp2$$Register, $dst$$Register); 7723 __ jmpb(Ldone); 7724 7725 __ bind(Lpos); 7726 __ divl($tmp$$Register); // Use unsigned division 7727 __ xchgl($dst$$Register, $tmp2$$Register); 7728 // Fallthrow for final divide, tmp2 has 32 bit hi result 7729 7730 __ bind(Lfast); 7731 // fast path: src is positive 7732 __ divl($tmp$$Register); // Use unsigned division 7733 7734 __ bind(Ldone); 7735 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7736 if (con < 0) { 7737 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7738 } 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 // Remainder Register Long (remainder fit into 32 bits) 7744 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7745 match(Set dst (ModL dst imm)); 7746 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7747 ins_cost(1000); 7748 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7749 "CMP $tmp,EDX\n\t" 7750 "JA,s fast\n\t" 7751 "MOV $tmp2,EAX\n\t" 7752 "MOV EAX,EDX\n\t" 7753 "MOV EDX,0\n\t" 7754 "JLE,s pos\n\t" 7755 "LNEG EAX : $tmp2\n\t" 7756 "DIV $tmp # unsigned division\n\t" 7757 "MOV EAX,$tmp2\n\t" 7758 "DIV $tmp\n\t" 7759 "NEG EDX\n\t" 7760 "JMP,s done\n" 7761 "pos:\n\t" 7762 "DIV $tmp\n\t" 7763 "MOV EAX,$tmp2\n" 7764 "fast:\n\t" 7765 "DIV $tmp\n" 7766 "done:\n\t" 7767 "MOV EAX,EDX\n\t" 7768 "SAR EDX,31\n\t" %} 7769 ins_encode %{ 7770 int con = (int)$imm$$constant; 7771 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7772 int pcon = (con > 0) ? con : -con; 7773 Label Lfast, Lpos, Ldone; 7774 7775 __ movl($tmp$$Register, pcon); 7776 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7777 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7778 7779 __ movl($tmp2$$Register, $dst$$Register); // save 7780 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7781 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7782 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7783 7784 // Negative dividend. 7785 // convert value to positive to use unsigned division 7786 __ lneg($dst$$Register, $tmp2$$Register); 7787 __ divl($tmp$$Register); 7788 __ movl($dst$$Register, $tmp2$$Register); 7789 __ divl($tmp$$Register); 7790 // revert remainder back to negative 7791 __ negl(HIGH_FROM_LOW($dst$$Register)); 7792 __ jmpb(Ldone); 7793 7794 __ bind(Lpos); 7795 __ divl($tmp$$Register); 7796 __ movl($dst$$Register, $tmp2$$Register); 7797 7798 __ bind(Lfast); 7799 // fast path: src is positive 7800 __ divl($tmp$$Register); 7801 7802 __ bind(Ldone); 7803 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7804 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7805 7806 %} 7807 ins_pipe( pipe_slow ); 7808 %} 7809 7810 // Integer Shift Instructions 7811 // Shift Left by one 7812 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7813 match(Set dst (LShiftI dst shift)); 7814 effect(KILL cr); 7815 7816 size(2); 7817 format %{ "SHL $dst,$shift" %} 7818 opcode(0xD1, 0x4); /* D1 /4 */ 7819 ins_encode( OpcP, RegOpc( dst ) ); 7820 ins_pipe( ialu_reg ); 7821 %} 7822 7823 // Shift Left by 8-bit immediate 7824 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7825 match(Set dst (LShiftI dst shift)); 7826 effect(KILL cr); 7827 7828 size(3); 7829 format %{ "SHL $dst,$shift" %} 7830 opcode(0xC1, 0x4); /* C1 /4 ib */ 7831 ins_encode( RegOpcImm( dst, shift) ); 7832 ins_pipe( ialu_reg ); 7833 %} 7834 7835 // Shift Left by variable 7836 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7837 match(Set dst (LShiftI dst shift)); 7838 effect(KILL cr); 7839 7840 size(2); 7841 format %{ "SHL $dst,$shift" %} 7842 opcode(0xD3, 0x4); /* D3 /4 */ 7843 ins_encode( OpcP, RegOpc( dst ) ); 7844 ins_pipe( ialu_reg_reg ); 7845 %} 7846 7847 // Arithmetic shift right by one 7848 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7849 match(Set dst (RShiftI dst shift)); 7850 effect(KILL cr); 7851 7852 size(2); 7853 format %{ "SAR $dst,$shift" %} 7854 opcode(0xD1, 0x7); /* D1 /7 */ 7855 ins_encode( OpcP, RegOpc( dst ) ); 7856 ins_pipe( ialu_reg ); 7857 %} 7858 7859 // Arithmetic shift right by one 7860 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7861 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7862 effect(KILL cr); 7863 format %{ "SAR $dst,$shift" %} 7864 opcode(0xD1, 0x7); /* D1 /7 */ 7865 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7866 ins_pipe( ialu_mem_imm ); 7867 %} 7868 7869 // Arithmetic Shift Right by 8-bit immediate 7870 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7871 match(Set dst (RShiftI dst shift)); 7872 effect(KILL cr); 7873 7874 size(3); 7875 format %{ "SAR $dst,$shift" %} 7876 opcode(0xC1, 0x7); /* C1 /7 ib */ 7877 ins_encode( RegOpcImm( dst, shift ) ); 7878 ins_pipe( ialu_mem_imm ); 7879 %} 7880 7881 // Arithmetic Shift Right by 8-bit immediate 7882 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7883 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7884 effect(KILL cr); 7885 7886 format %{ "SAR $dst,$shift" %} 7887 opcode(0xC1, 0x7); /* C1 /7 ib */ 7888 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7889 ins_pipe( ialu_mem_imm ); 7890 %} 7891 7892 // Arithmetic Shift Right by variable 7893 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7894 match(Set dst (RShiftI dst shift)); 7895 effect(KILL cr); 7896 7897 size(2); 7898 format %{ "SAR $dst,$shift" %} 7899 opcode(0xD3, 0x7); /* D3 /7 */ 7900 ins_encode( OpcP, RegOpc( dst ) ); 7901 ins_pipe( ialu_reg_reg ); 7902 %} 7903 7904 // Logical shift right by one 7905 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7906 match(Set dst (URShiftI dst shift)); 7907 effect(KILL cr); 7908 7909 size(2); 7910 format %{ "SHR $dst,$shift" %} 7911 opcode(0xD1, 0x5); /* D1 /5 */ 7912 ins_encode( OpcP, RegOpc( dst ) ); 7913 ins_pipe( ialu_reg ); 7914 %} 7915 7916 // Logical Shift Right by 8-bit immediate 7917 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7918 match(Set dst (URShiftI dst shift)); 7919 effect(KILL cr); 7920 7921 size(3); 7922 format %{ "SHR $dst,$shift" %} 7923 opcode(0xC1, 0x5); /* C1 /5 ib */ 7924 ins_encode( RegOpcImm( dst, shift) ); 7925 ins_pipe( ialu_reg ); 7926 %} 7927 7928 7929 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7930 // This idiom is used by the compiler for the i2b bytecode. 7931 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7932 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7933 7934 size(3); 7935 format %{ "MOVSX $dst,$src :8" %} 7936 ins_encode %{ 7937 __ movsbl($dst$$Register, $src$$Register); 7938 %} 7939 ins_pipe(ialu_reg_reg); 7940 %} 7941 7942 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7943 // This idiom is used by the compiler the i2s bytecode. 7944 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7945 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7946 7947 size(3); 7948 format %{ "MOVSX $dst,$src :16" %} 7949 ins_encode %{ 7950 __ movswl($dst$$Register, $src$$Register); 7951 %} 7952 ins_pipe(ialu_reg_reg); 7953 %} 7954 7955 7956 // Logical Shift Right by variable 7957 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7958 match(Set dst (URShiftI dst shift)); 7959 effect(KILL cr); 7960 7961 size(2); 7962 format %{ "SHR $dst,$shift" %} 7963 opcode(0xD3, 0x5); /* D3 /5 */ 7964 ins_encode( OpcP, RegOpc( dst ) ); 7965 ins_pipe( ialu_reg_reg ); 7966 %} 7967 7968 7969 //----------Logical Instructions----------------------------------------------- 7970 //----------Integer Logical Instructions--------------------------------------- 7971 // And Instructions 7972 // And Register with Register 7973 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7974 match(Set dst (AndI dst src)); 7975 effect(KILL cr); 7976 7977 size(2); 7978 format %{ "AND $dst,$src" %} 7979 opcode(0x23); 7980 ins_encode( OpcP, RegReg( dst, src) ); 7981 ins_pipe( ialu_reg_reg ); 7982 %} 7983 7984 // And Register with Immediate 7985 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7986 match(Set dst (AndI dst src)); 7987 effect(KILL cr); 7988 7989 format %{ "AND $dst,$src" %} 7990 opcode(0x81,0x04); /* Opcode 81 /4 */ 7991 // ins_encode( RegImm( dst, src) ); 7992 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7993 ins_pipe( ialu_reg ); 7994 %} 7995 7996 // And Register with Memory 7997 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7998 match(Set dst (AndI dst (LoadI src))); 7999 effect(KILL cr); 8000 8001 ins_cost(125); 8002 format %{ "AND $dst,$src" %} 8003 opcode(0x23); 8004 ins_encode( OpcP, RegMem( dst, src) ); 8005 ins_pipe( ialu_reg_mem ); 8006 %} 8007 8008 // And Memory with Register 8009 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8010 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8011 effect(KILL cr); 8012 8013 ins_cost(150); 8014 format %{ "AND $dst,$src" %} 8015 opcode(0x21); /* Opcode 21 /r */ 8016 ins_encode( OpcP, RegMem( src, dst ) ); 8017 ins_pipe( ialu_mem_reg ); 8018 %} 8019 8020 // And Memory with Immediate 8021 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8022 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8023 effect(KILL cr); 8024 8025 ins_cost(125); 8026 format %{ "AND $dst,$src" %} 8027 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8028 // ins_encode( MemImm( dst, src) ); 8029 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8030 ins_pipe( ialu_mem_imm ); 8031 %} 8032 8033 // BMI1 instructions 8034 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8035 match(Set dst (AndI (XorI src1 minus_1) src2)); 8036 predicate(UseBMI1Instructions); 8037 effect(KILL cr); 8038 8039 format %{ "ANDNL $dst, $src1, $src2" %} 8040 8041 ins_encode %{ 8042 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8043 %} 8044 ins_pipe(ialu_reg); 8045 %} 8046 8047 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8048 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8049 predicate(UseBMI1Instructions); 8050 effect(KILL cr); 8051 8052 ins_cost(125); 8053 format %{ "ANDNL $dst, $src1, $src2" %} 8054 8055 ins_encode %{ 8056 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8057 %} 8058 ins_pipe(ialu_reg_mem); 8059 %} 8060 8061 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8062 match(Set dst (AndI (SubI imm_zero src) src)); 8063 predicate(UseBMI1Instructions); 8064 effect(KILL cr); 8065 8066 format %{ "BLSIL $dst, $src" %} 8067 8068 ins_encode %{ 8069 __ blsil($dst$$Register, $src$$Register); 8070 %} 8071 ins_pipe(ialu_reg); 8072 %} 8073 8074 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8075 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8076 predicate(UseBMI1Instructions); 8077 effect(KILL cr); 8078 8079 ins_cost(125); 8080 format %{ "BLSIL $dst, $src" %} 8081 8082 ins_encode %{ 8083 __ blsil($dst$$Register, $src$$Address); 8084 %} 8085 ins_pipe(ialu_reg_mem); 8086 %} 8087 8088 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8089 %{ 8090 match(Set dst (XorI (AddI src minus_1) src)); 8091 predicate(UseBMI1Instructions); 8092 effect(KILL cr); 8093 8094 format %{ "BLSMSKL $dst, $src" %} 8095 8096 ins_encode %{ 8097 __ blsmskl($dst$$Register, $src$$Register); 8098 %} 8099 8100 ins_pipe(ialu_reg); 8101 %} 8102 8103 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8104 %{ 8105 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8106 predicate(UseBMI1Instructions); 8107 effect(KILL cr); 8108 8109 ins_cost(125); 8110 format %{ "BLSMSKL $dst, $src" %} 8111 8112 ins_encode %{ 8113 __ blsmskl($dst$$Register, $src$$Address); 8114 %} 8115 8116 ins_pipe(ialu_reg_mem); 8117 %} 8118 8119 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8120 %{ 8121 match(Set dst (AndI (AddI src minus_1) src) ); 8122 predicate(UseBMI1Instructions); 8123 effect(KILL cr); 8124 8125 format %{ "BLSRL $dst, $src" %} 8126 8127 ins_encode %{ 8128 __ blsrl($dst$$Register, $src$$Register); 8129 %} 8130 8131 ins_pipe(ialu_reg); 8132 %} 8133 8134 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8135 %{ 8136 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8137 predicate(UseBMI1Instructions); 8138 effect(KILL cr); 8139 8140 ins_cost(125); 8141 format %{ "BLSRL $dst, $src" %} 8142 8143 ins_encode %{ 8144 __ blsrl($dst$$Register, $src$$Address); 8145 %} 8146 8147 ins_pipe(ialu_reg_mem); 8148 %} 8149 8150 // Or Instructions 8151 // Or Register with Register 8152 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8153 match(Set dst (OrI dst src)); 8154 effect(KILL cr); 8155 8156 size(2); 8157 format %{ "OR $dst,$src" %} 8158 opcode(0x0B); 8159 ins_encode( OpcP, RegReg( dst, src) ); 8160 ins_pipe( ialu_reg_reg ); 8161 %} 8162 8163 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8164 match(Set dst (OrI dst (CastP2X src))); 8165 effect(KILL cr); 8166 8167 size(2); 8168 format %{ "OR $dst,$src" %} 8169 opcode(0x0B); 8170 ins_encode( OpcP, RegReg( dst, src) ); 8171 ins_pipe( ialu_reg_reg ); 8172 %} 8173 8174 8175 // Or Register with Immediate 8176 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8177 match(Set dst (OrI dst src)); 8178 effect(KILL cr); 8179 8180 format %{ "OR $dst,$src" %} 8181 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8182 // ins_encode( RegImm( dst, src) ); 8183 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8184 ins_pipe( ialu_reg ); 8185 %} 8186 8187 // Or Register with Memory 8188 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8189 match(Set dst (OrI dst (LoadI src))); 8190 effect(KILL cr); 8191 8192 ins_cost(125); 8193 format %{ "OR $dst,$src" %} 8194 opcode(0x0B); 8195 ins_encode( OpcP, RegMem( dst, src) ); 8196 ins_pipe( ialu_reg_mem ); 8197 %} 8198 8199 // Or Memory with Register 8200 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8201 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8202 effect(KILL cr); 8203 8204 ins_cost(150); 8205 format %{ "OR $dst,$src" %} 8206 opcode(0x09); /* Opcode 09 /r */ 8207 ins_encode( OpcP, RegMem( src, dst ) ); 8208 ins_pipe( ialu_mem_reg ); 8209 %} 8210 8211 // Or Memory with Immediate 8212 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8213 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8214 effect(KILL cr); 8215 8216 ins_cost(125); 8217 format %{ "OR $dst,$src" %} 8218 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8219 // ins_encode( MemImm( dst, src) ); 8220 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8221 ins_pipe( ialu_mem_imm ); 8222 %} 8223 8224 // ROL/ROR 8225 // ROL expand 8226 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8227 effect(USE_DEF dst, USE shift, KILL cr); 8228 8229 format %{ "ROL $dst, $shift" %} 8230 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8231 ins_encode( OpcP, RegOpc( dst )); 8232 ins_pipe( ialu_reg ); 8233 %} 8234 8235 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8236 effect(USE_DEF dst, USE shift, KILL cr); 8237 8238 format %{ "ROL $dst, $shift" %} 8239 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8240 ins_encode( RegOpcImm(dst, shift) ); 8241 ins_pipe(ialu_reg); 8242 %} 8243 8244 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8245 effect(USE_DEF dst, USE shift, KILL cr); 8246 8247 format %{ "ROL $dst, $shift" %} 8248 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8249 ins_encode(OpcP, RegOpc(dst)); 8250 ins_pipe( ialu_reg_reg ); 8251 %} 8252 // end of ROL expand 8253 8254 // ROL 32bit by one once 8255 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8256 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8257 8258 expand %{ 8259 rolI_eReg_imm1(dst, lshift, cr); 8260 %} 8261 %} 8262 8263 // ROL 32bit var by imm8 once 8264 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8265 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8266 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8267 8268 expand %{ 8269 rolI_eReg_imm8(dst, lshift, cr); 8270 %} 8271 %} 8272 8273 // ROL 32bit var by var once 8274 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8275 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8276 8277 expand %{ 8278 rolI_eReg_CL(dst, shift, cr); 8279 %} 8280 %} 8281 8282 // ROL 32bit var by var once 8283 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8284 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8285 8286 expand %{ 8287 rolI_eReg_CL(dst, shift, cr); 8288 %} 8289 %} 8290 8291 // ROR expand 8292 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8293 effect(USE_DEF dst, USE shift, KILL cr); 8294 8295 format %{ "ROR $dst, $shift" %} 8296 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8297 ins_encode( OpcP, RegOpc( dst ) ); 8298 ins_pipe( ialu_reg ); 8299 %} 8300 8301 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8302 effect (USE_DEF dst, USE shift, KILL cr); 8303 8304 format %{ "ROR $dst, $shift" %} 8305 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8306 ins_encode( RegOpcImm(dst, shift) ); 8307 ins_pipe( ialu_reg ); 8308 %} 8309 8310 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8311 effect(USE_DEF dst, USE shift, KILL cr); 8312 8313 format %{ "ROR $dst, $shift" %} 8314 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8315 ins_encode(OpcP, RegOpc(dst)); 8316 ins_pipe( ialu_reg_reg ); 8317 %} 8318 // end of ROR expand 8319 8320 // ROR right once 8321 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8322 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8323 8324 expand %{ 8325 rorI_eReg_imm1(dst, rshift, cr); 8326 %} 8327 %} 8328 8329 // ROR 32bit by immI8 once 8330 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8331 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8332 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8333 8334 expand %{ 8335 rorI_eReg_imm8(dst, rshift, cr); 8336 %} 8337 %} 8338 8339 // ROR 32bit var by var once 8340 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8341 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8342 8343 expand %{ 8344 rorI_eReg_CL(dst, shift, cr); 8345 %} 8346 %} 8347 8348 // ROR 32bit var by var once 8349 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8350 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8351 8352 expand %{ 8353 rorI_eReg_CL(dst, shift, cr); 8354 %} 8355 %} 8356 8357 // Xor Instructions 8358 // Xor Register with Register 8359 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8360 match(Set dst (XorI dst src)); 8361 effect(KILL cr); 8362 8363 size(2); 8364 format %{ "XOR $dst,$src" %} 8365 opcode(0x33); 8366 ins_encode( OpcP, RegReg( dst, src) ); 8367 ins_pipe( ialu_reg_reg ); 8368 %} 8369 8370 // Xor Register with Immediate -1 8371 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8372 match(Set dst (XorI dst imm)); 8373 8374 size(2); 8375 format %{ "NOT $dst" %} 8376 ins_encode %{ 8377 __ notl($dst$$Register); 8378 %} 8379 ins_pipe( ialu_reg ); 8380 %} 8381 8382 // Xor Register with Immediate 8383 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8384 match(Set dst (XorI dst src)); 8385 effect(KILL cr); 8386 8387 format %{ "XOR $dst,$src" %} 8388 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8389 // ins_encode( RegImm( dst, src) ); 8390 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8391 ins_pipe( ialu_reg ); 8392 %} 8393 8394 // Xor Register with Memory 8395 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8396 match(Set dst (XorI dst (LoadI src))); 8397 effect(KILL cr); 8398 8399 ins_cost(125); 8400 format %{ "XOR $dst,$src" %} 8401 opcode(0x33); 8402 ins_encode( OpcP, RegMem(dst, src) ); 8403 ins_pipe( ialu_reg_mem ); 8404 %} 8405 8406 // Xor Memory with Register 8407 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8408 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8409 effect(KILL cr); 8410 8411 ins_cost(150); 8412 format %{ "XOR $dst,$src" %} 8413 opcode(0x31); /* Opcode 31 /r */ 8414 ins_encode( OpcP, RegMem( src, dst ) ); 8415 ins_pipe( ialu_mem_reg ); 8416 %} 8417 8418 // Xor Memory with Immediate 8419 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8420 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8421 effect(KILL cr); 8422 8423 ins_cost(125); 8424 format %{ "XOR $dst,$src" %} 8425 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8426 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8427 ins_pipe( ialu_mem_imm ); 8428 %} 8429 8430 //----------Convert Int to Boolean--------------------------------------------- 8431 8432 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8433 effect( DEF dst, USE src ); 8434 format %{ "MOV $dst,$src" %} 8435 ins_encode( enc_Copy( dst, src) ); 8436 ins_pipe( ialu_reg_reg ); 8437 %} 8438 8439 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8440 effect( USE_DEF dst, USE src, KILL cr ); 8441 8442 size(4); 8443 format %{ "NEG $dst\n\t" 8444 "ADC $dst,$src" %} 8445 ins_encode( neg_reg(dst), 8446 OpcRegReg(0x13,dst,src) ); 8447 ins_pipe( ialu_reg_reg_long ); 8448 %} 8449 8450 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8451 match(Set dst (Conv2B src)); 8452 8453 expand %{ 8454 movI_nocopy(dst,src); 8455 ci2b(dst,src,cr); 8456 %} 8457 %} 8458 8459 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8460 effect( DEF dst, USE src ); 8461 format %{ "MOV $dst,$src" %} 8462 ins_encode( enc_Copy( dst, src) ); 8463 ins_pipe( ialu_reg_reg ); 8464 %} 8465 8466 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8467 effect( USE_DEF dst, USE src, KILL cr ); 8468 format %{ "NEG $dst\n\t" 8469 "ADC $dst,$src" %} 8470 ins_encode( neg_reg(dst), 8471 OpcRegReg(0x13,dst,src) ); 8472 ins_pipe( ialu_reg_reg_long ); 8473 %} 8474 8475 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8476 match(Set dst (Conv2B src)); 8477 8478 expand %{ 8479 movP_nocopy(dst,src); 8480 cp2b(dst,src,cr); 8481 %} 8482 %} 8483 8484 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8485 match(Set dst (CmpLTMask p q)); 8486 effect(KILL cr); 8487 ins_cost(400); 8488 8489 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8490 format %{ "XOR $dst,$dst\n\t" 8491 "CMP $p,$q\n\t" 8492 "SETlt $dst\n\t" 8493 "NEG $dst" %} 8494 ins_encode %{ 8495 Register Rp = $p$$Register; 8496 Register Rq = $q$$Register; 8497 Register Rd = $dst$$Register; 8498 Label done; 8499 __ xorl(Rd, Rd); 8500 __ cmpl(Rp, Rq); 8501 __ setb(Assembler::less, Rd); 8502 __ negl(Rd); 8503 %} 8504 8505 ins_pipe(pipe_slow); 8506 %} 8507 8508 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8509 match(Set dst (CmpLTMask dst zero)); 8510 effect(DEF dst, KILL cr); 8511 ins_cost(100); 8512 8513 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8514 ins_encode %{ 8515 __ sarl($dst$$Register, 31); 8516 %} 8517 ins_pipe(ialu_reg); 8518 %} 8519 8520 /* better to save a register than avoid a branch */ 8521 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8522 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8523 effect(KILL cr); 8524 ins_cost(400); 8525 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8526 "JGE done\n\t" 8527 "ADD $p,$y\n" 8528 "done: " %} 8529 ins_encode %{ 8530 Register Rp = $p$$Register; 8531 Register Rq = $q$$Register; 8532 Register Ry = $y$$Register; 8533 Label done; 8534 __ subl(Rp, Rq); 8535 __ jccb(Assembler::greaterEqual, done); 8536 __ addl(Rp, Ry); 8537 __ bind(done); 8538 %} 8539 8540 ins_pipe(pipe_cmplt); 8541 %} 8542 8543 /* better to save a register than avoid a branch */ 8544 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8545 match(Set y (AndI (CmpLTMask p q) y)); 8546 effect(KILL cr); 8547 8548 ins_cost(300); 8549 8550 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8551 "JLT done\n\t" 8552 "XORL $y, $y\n" 8553 "done: " %} 8554 ins_encode %{ 8555 Register Rp = $p$$Register; 8556 Register Rq = $q$$Register; 8557 Register Ry = $y$$Register; 8558 Label done; 8559 __ cmpl(Rp, Rq); 8560 __ jccb(Assembler::less, done); 8561 __ xorl(Ry, Ry); 8562 __ bind(done); 8563 %} 8564 8565 ins_pipe(pipe_cmplt); 8566 %} 8567 8568 /* If I enable this, I encourage spilling in the inner loop of compress. 8569 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8570 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8571 */ 8572 //----------Overflow Math Instructions----------------------------------------- 8573 8574 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8575 %{ 8576 match(Set cr (OverflowAddI op1 op2)); 8577 effect(DEF cr, USE_KILL op1, USE op2); 8578 8579 format %{ "ADD $op1, $op2\t# overflow check int" %} 8580 8581 ins_encode %{ 8582 __ addl($op1$$Register, $op2$$Register); 8583 %} 8584 ins_pipe(ialu_reg_reg); 8585 %} 8586 8587 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8588 %{ 8589 match(Set cr (OverflowAddI op1 op2)); 8590 effect(DEF cr, USE_KILL op1, USE op2); 8591 8592 format %{ "ADD $op1, $op2\t# overflow check int" %} 8593 8594 ins_encode %{ 8595 __ addl($op1$$Register, $op2$$constant); 8596 %} 8597 ins_pipe(ialu_reg_reg); 8598 %} 8599 8600 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8601 %{ 8602 match(Set cr (OverflowSubI op1 op2)); 8603 8604 format %{ "CMP $op1, $op2\t# overflow check int" %} 8605 ins_encode %{ 8606 __ cmpl($op1$$Register, $op2$$Register); 8607 %} 8608 ins_pipe(ialu_reg_reg); 8609 %} 8610 8611 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8612 %{ 8613 match(Set cr (OverflowSubI op1 op2)); 8614 8615 format %{ "CMP $op1, $op2\t# overflow check int" %} 8616 ins_encode %{ 8617 __ cmpl($op1$$Register, $op2$$constant); 8618 %} 8619 ins_pipe(ialu_reg_reg); 8620 %} 8621 8622 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8623 %{ 8624 match(Set cr (OverflowSubI zero op2)); 8625 effect(DEF cr, USE_KILL op2); 8626 8627 format %{ "NEG $op2\t# overflow check int" %} 8628 ins_encode %{ 8629 __ negl($op2$$Register); 8630 %} 8631 ins_pipe(ialu_reg_reg); 8632 %} 8633 8634 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8635 %{ 8636 match(Set cr (OverflowMulI op1 op2)); 8637 effect(DEF cr, USE_KILL op1, USE op2); 8638 8639 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8640 ins_encode %{ 8641 __ imull($op1$$Register, $op2$$Register); 8642 %} 8643 ins_pipe(ialu_reg_reg_alu0); 8644 %} 8645 8646 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8647 %{ 8648 match(Set cr (OverflowMulI op1 op2)); 8649 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8650 8651 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8652 ins_encode %{ 8653 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8654 %} 8655 ins_pipe(ialu_reg_reg_alu0); 8656 %} 8657 8658 //----------Long Instructions------------------------------------------------ 8659 // Add Long Register with Register 8660 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8661 match(Set dst (AddL dst src)); 8662 effect(KILL cr); 8663 ins_cost(200); 8664 format %{ "ADD $dst.lo,$src.lo\n\t" 8665 "ADC $dst.hi,$src.hi" %} 8666 opcode(0x03, 0x13); 8667 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8668 ins_pipe( ialu_reg_reg_long ); 8669 %} 8670 8671 // Add Long Register with Immediate 8672 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8673 match(Set dst (AddL dst src)); 8674 effect(KILL cr); 8675 format %{ "ADD $dst.lo,$src.lo\n\t" 8676 "ADC $dst.hi,$src.hi" %} 8677 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8678 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8679 ins_pipe( ialu_reg_long ); 8680 %} 8681 8682 // Add Long Register with Memory 8683 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8684 match(Set dst (AddL dst (LoadL mem))); 8685 effect(KILL cr); 8686 ins_cost(125); 8687 format %{ "ADD $dst.lo,$mem\n\t" 8688 "ADC $dst.hi,$mem+4" %} 8689 opcode(0x03, 0x13); 8690 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8691 ins_pipe( ialu_reg_long_mem ); 8692 %} 8693 8694 // Subtract Long Register with Register. 8695 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8696 match(Set dst (SubL dst src)); 8697 effect(KILL cr); 8698 ins_cost(200); 8699 format %{ "SUB $dst.lo,$src.lo\n\t" 8700 "SBB $dst.hi,$src.hi" %} 8701 opcode(0x2B, 0x1B); 8702 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8703 ins_pipe( ialu_reg_reg_long ); 8704 %} 8705 8706 // Subtract Long Register with Immediate 8707 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8708 match(Set dst (SubL dst src)); 8709 effect(KILL cr); 8710 format %{ "SUB $dst.lo,$src.lo\n\t" 8711 "SBB $dst.hi,$src.hi" %} 8712 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8713 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8714 ins_pipe( ialu_reg_long ); 8715 %} 8716 8717 // Subtract Long Register with Memory 8718 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8719 match(Set dst (SubL dst (LoadL mem))); 8720 effect(KILL cr); 8721 ins_cost(125); 8722 format %{ "SUB $dst.lo,$mem\n\t" 8723 "SBB $dst.hi,$mem+4" %} 8724 opcode(0x2B, 0x1B); 8725 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8726 ins_pipe( ialu_reg_long_mem ); 8727 %} 8728 8729 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8730 match(Set dst (SubL zero dst)); 8731 effect(KILL cr); 8732 ins_cost(300); 8733 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8734 ins_encode( neg_long(dst) ); 8735 ins_pipe( ialu_reg_reg_long ); 8736 %} 8737 8738 // And Long Register with Register 8739 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8740 match(Set dst (AndL dst src)); 8741 effect(KILL cr); 8742 format %{ "AND $dst.lo,$src.lo\n\t" 8743 "AND $dst.hi,$src.hi" %} 8744 opcode(0x23,0x23); 8745 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8746 ins_pipe( ialu_reg_reg_long ); 8747 %} 8748 8749 // And Long Register with Immediate 8750 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8751 match(Set dst (AndL dst src)); 8752 effect(KILL cr); 8753 format %{ "AND $dst.lo,$src.lo\n\t" 8754 "AND $dst.hi,$src.hi" %} 8755 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8756 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8757 ins_pipe( ialu_reg_long ); 8758 %} 8759 8760 // And Long Register with Memory 8761 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8762 match(Set dst (AndL dst (LoadL mem))); 8763 effect(KILL cr); 8764 ins_cost(125); 8765 format %{ "AND $dst.lo,$mem\n\t" 8766 "AND $dst.hi,$mem+4" %} 8767 opcode(0x23, 0x23); 8768 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8769 ins_pipe( ialu_reg_long_mem ); 8770 %} 8771 8772 // BMI1 instructions 8773 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8774 match(Set dst (AndL (XorL src1 minus_1) src2)); 8775 predicate(UseBMI1Instructions); 8776 effect(KILL cr, TEMP dst); 8777 8778 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8779 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8780 %} 8781 8782 ins_encode %{ 8783 Register Rdst = $dst$$Register; 8784 Register Rsrc1 = $src1$$Register; 8785 Register Rsrc2 = $src2$$Register; 8786 __ andnl(Rdst, Rsrc1, Rsrc2); 8787 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8788 %} 8789 ins_pipe(ialu_reg_reg_long); 8790 %} 8791 8792 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8793 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8794 predicate(UseBMI1Instructions); 8795 effect(KILL cr, TEMP dst); 8796 8797 ins_cost(125); 8798 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8799 "ANDNL $dst.hi, $src1.hi, $src2+4" 8800 %} 8801 8802 ins_encode %{ 8803 Register Rdst = $dst$$Register; 8804 Register Rsrc1 = $src1$$Register; 8805 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8806 8807 __ andnl(Rdst, Rsrc1, $src2$$Address); 8808 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8809 %} 8810 ins_pipe(ialu_reg_mem); 8811 %} 8812 8813 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8814 match(Set dst (AndL (SubL imm_zero src) src)); 8815 predicate(UseBMI1Instructions); 8816 effect(KILL cr, TEMP dst); 8817 8818 format %{ "MOVL $dst.hi, 0\n\t" 8819 "BLSIL $dst.lo, $src.lo\n\t" 8820 "JNZ done\n\t" 8821 "BLSIL $dst.hi, $src.hi\n" 8822 "done:" 8823 %} 8824 8825 ins_encode %{ 8826 Label done; 8827 Register Rdst = $dst$$Register; 8828 Register Rsrc = $src$$Register; 8829 __ movl(HIGH_FROM_LOW(Rdst), 0); 8830 __ blsil(Rdst, Rsrc); 8831 __ jccb(Assembler::notZero, done); 8832 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8833 __ bind(done); 8834 %} 8835 ins_pipe(ialu_reg); 8836 %} 8837 8838 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8839 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8840 predicate(UseBMI1Instructions); 8841 effect(KILL cr, TEMP dst); 8842 8843 ins_cost(125); 8844 format %{ "MOVL $dst.hi, 0\n\t" 8845 "BLSIL $dst.lo, $src\n\t" 8846 "JNZ done\n\t" 8847 "BLSIL $dst.hi, $src+4\n" 8848 "done:" 8849 %} 8850 8851 ins_encode %{ 8852 Label done; 8853 Register Rdst = $dst$$Register; 8854 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8855 8856 __ movl(HIGH_FROM_LOW(Rdst), 0); 8857 __ blsil(Rdst, $src$$Address); 8858 __ jccb(Assembler::notZero, done); 8859 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8860 __ bind(done); 8861 %} 8862 ins_pipe(ialu_reg_mem); 8863 %} 8864 8865 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8866 %{ 8867 match(Set dst (XorL (AddL src minus_1) src)); 8868 predicate(UseBMI1Instructions); 8869 effect(KILL cr, TEMP dst); 8870 8871 format %{ "MOVL $dst.hi, 0\n\t" 8872 "BLSMSKL $dst.lo, $src.lo\n\t" 8873 "JNC done\n\t" 8874 "BLSMSKL $dst.hi, $src.hi\n" 8875 "done:" 8876 %} 8877 8878 ins_encode %{ 8879 Label done; 8880 Register Rdst = $dst$$Register; 8881 Register Rsrc = $src$$Register; 8882 __ movl(HIGH_FROM_LOW(Rdst), 0); 8883 __ blsmskl(Rdst, Rsrc); 8884 __ jccb(Assembler::carryClear, done); 8885 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8886 __ bind(done); 8887 %} 8888 8889 ins_pipe(ialu_reg); 8890 %} 8891 8892 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8893 %{ 8894 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8895 predicate(UseBMI1Instructions); 8896 effect(KILL cr, TEMP dst); 8897 8898 ins_cost(125); 8899 format %{ "MOVL $dst.hi, 0\n\t" 8900 "BLSMSKL $dst.lo, $src\n\t" 8901 "JNC done\n\t" 8902 "BLSMSKL $dst.hi, $src+4\n" 8903 "done:" 8904 %} 8905 8906 ins_encode %{ 8907 Label done; 8908 Register Rdst = $dst$$Register; 8909 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8910 8911 __ movl(HIGH_FROM_LOW(Rdst), 0); 8912 __ blsmskl(Rdst, $src$$Address); 8913 __ jccb(Assembler::carryClear, done); 8914 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8915 __ bind(done); 8916 %} 8917 8918 ins_pipe(ialu_reg_mem); 8919 %} 8920 8921 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8922 %{ 8923 match(Set dst (AndL (AddL src minus_1) src) ); 8924 predicate(UseBMI1Instructions); 8925 effect(KILL cr, TEMP dst); 8926 8927 format %{ "MOVL $dst.hi, $src.hi\n\t" 8928 "BLSRL $dst.lo, $src.lo\n\t" 8929 "JNC done\n\t" 8930 "BLSRL $dst.hi, $src.hi\n" 8931 "done:" 8932 %} 8933 8934 ins_encode %{ 8935 Label done; 8936 Register Rdst = $dst$$Register; 8937 Register Rsrc = $src$$Register; 8938 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8939 __ blsrl(Rdst, Rsrc); 8940 __ jccb(Assembler::carryClear, done); 8941 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8942 __ bind(done); 8943 %} 8944 8945 ins_pipe(ialu_reg); 8946 %} 8947 8948 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8949 %{ 8950 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8951 predicate(UseBMI1Instructions); 8952 effect(KILL cr, TEMP dst); 8953 8954 ins_cost(125); 8955 format %{ "MOVL $dst.hi, $src+4\n\t" 8956 "BLSRL $dst.lo, $src\n\t" 8957 "JNC done\n\t" 8958 "BLSRL $dst.hi, $src+4\n" 8959 "done:" 8960 %} 8961 8962 ins_encode %{ 8963 Label done; 8964 Register Rdst = $dst$$Register; 8965 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8966 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8967 __ blsrl(Rdst, $src$$Address); 8968 __ jccb(Assembler::carryClear, done); 8969 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8970 __ bind(done); 8971 %} 8972 8973 ins_pipe(ialu_reg_mem); 8974 %} 8975 8976 // Or Long Register with Register 8977 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8978 match(Set dst (OrL dst src)); 8979 effect(KILL cr); 8980 format %{ "OR $dst.lo,$src.lo\n\t" 8981 "OR $dst.hi,$src.hi" %} 8982 opcode(0x0B,0x0B); 8983 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8984 ins_pipe( ialu_reg_reg_long ); 8985 %} 8986 8987 // Or Long Register with Immediate 8988 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8989 match(Set dst (OrL dst src)); 8990 effect(KILL cr); 8991 format %{ "OR $dst.lo,$src.lo\n\t" 8992 "OR $dst.hi,$src.hi" %} 8993 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8994 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8995 ins_pipe( ialu_reg_long ); 8996 %} 8997 8998 // Or Long Register with Memory 8999 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9000 match(Set dst (OrL dst (LoadL mem))); 9001 effect(KILL cr); 9002 ins_cost(125); 9003 format %{ "OR $dst.lo,$mem\n\t" 9004 "OR $dst.hi,$mem+4" %} 9005 opcode(0x0B,0x0B); 9006 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9007 ins_pipe( ialu_reg_long_mem ); 9008 %} 9009 9010 // Xor Long Register with Register 9011 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9012 match(Set dst (XorL dst src)); 9013 effect(KILL cr); 9014 format %{ "XOR $dst.lo,$src.lo\n\t" 9015 "XOR $dst.hi,$src.hi" %} 9016 opcode(0x33,0x33); 9017 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9018 ins_pipe( ialu_reg_reg_long ); 9019 %} 9020 9021 // Xor Long Register with Immediate -1 9022 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9023 match(Set dst (XorL dst imm)); 9024 format %{ "NOT $dst.lo\n\t" 9025 "NOT $dst.hi" %} 9026 ins_encode %{ 9027 __ notl($dst$$Register); 9028 __ notl(HIGH_FROM_LOW($dst$$Register)); 9029 %} 9030 ins_pipe( ialu_reg_long ); 9031 %} 9032 9033 // Xor Long Register with Immediate 9034 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9035 match(Set dst (XorL dst src)); 9036 effect(KILL cr); 9037 format %{ "XOR $dst.lo,$src.lo\n\t" 9038 "XOR $dst.hi,$src.hi" %} 9039 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9040 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9041 ins_pipe( ialu_reg_long ); 9042 %} 9043 9044 // Xor Long Register with Memory 9045 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9046 match(Set dst (XorL dst (LoadL mem))); 9047 effect(KILL cr); 9048 ins_cost(125); 9049 format %{ "XOR $dst.lo,$mem\n\t" 9050 "XOR $dst.hi,$mem+4" %} 9051 opcode(0x33,0x33); 9052 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9053 ins_pipe( ialu_reg_long_mem ); 9054 %} 9055 9056 // Shift Left Long by 1 9057 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9058 predicate(UseNewLongLShift); 9059 match(Set dst (LShiftL dst cnt)); 9060 effect(KILL cr); 9061 ins_cost(100); 9062 format %{ "ADD $dst.lo,$dst.lo\n\t" 9063 "ADC $dst.hi,$dst.hi" %} 9064 ins_encode %{ 9065 __ addl($dst$$Register,$dst$$Register); 9066 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9067 %} 9068 ins_pipe( ialu_reg_long ); 9069 %} 9070 9071 // Shift Left Long by 2 9072 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9073 predicate(UseNewLongLShift); 9074 match(Set dst (LShiftL dst cnt)); 9075 effect(KILL cr); 9076 ins_cost(100); 9077 format %{ "ADD $dst.lo,$dst.lo\n\t" 9078 "ADC $dst.hi,$dst.hi\n\t" 9079 "ADD $dst.lo,$dst.lo\n\t" 9080 "ADC $dst.hi,$dst.hi" %} 9081 ins_encode %{ 9082 __ addl($dst$$Register,$dst$$Register); 9083 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9084 __ addl($dst$$Register,$dst$$Register); 9085 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9086 %} 9087 ins_pipe( ialu_reg_long ); 9088 %} 9089 9090 // Shift Left Long by 3 9091 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9092 predicate(UseNewLongLShift); 9093 match(Set dst (LShiftL dst cnt)); 9094 effect(KILL cr); 9095 ins_cost(100); 9096 format %{ "ADD $dst.lo,$dst.lo\n\t" 9097 "ADC $dst.hi,$dst.hi\n\t" 9098 "ADD $dst.lo,$dst.lo\n\t" 9099 "ADC $dst.hi,$dst.hi\n\t" 9100 "ADD $dst.lo,$dst.lo\n\t" 9101 "ADC $dst.hi,$dst.hi" %} 9102 ins_encode %{ 9103 __ addl($dst$$Register,$dst$$Register); 9104 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9105 __ addl($dst$$Register,$dst$$Register); 9106 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9107 __ addl($dst$$Register,$dst$$Register); 9108 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9109 %} 9110 ins_pipe( ialu_reg_long ); 9111 %} 9112 9113 // Shift Left Long by 1-31 9114 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9115 match(Set dst (LShiftL dst cnt)); 9116 effect(KILL cr); 9117 ins_cost(200); 9118 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9119 "SHL $dst.lo,$cnt" %} 9120 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9121 ins_encode( move_long_small_shift(dst,cnt) ); 9122 ins_pipe( ialu_reg_long ); 9123 %} 9124 9125 // Shift Left Long by 32-63 9126 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9127 match(Set dst (LShiftL dst cnt)); 9128 effect(KILL cr); 9129 ins_cost(300); 9130 format %{ "MOV $dst.hi,$dst.lo\n" 9131 "\tSHL $dst.hi,$cnt-32\n" 9132 "\tXOR $dst.lo,$dst.lo" %} 9133 opcode(0xC1, 0x4); /* C1 /4 ib */ 9134 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9135 ins_pipe( ialu_reg_long ); 9136 %} 9137 9138 // Shift Left Long by variable 9139 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9140 match(Set dst (LShiftL dst shift)); 9141 effect(KILL cr); 9142 ins_cost(500+200); 9143 size(17); 9144 format %{ "TEST $shift,32\n\t" 9145 "JEQ,s small\n\t" 9146 "MOV $dst.hi,$dst.lo\n\t" 9147 "XOR $dst.lo,$dst.lo\n" 9148 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9149 "SHL $dst.lo,$shift" %} 9150 ins_encode( shift_left_long( dst, shift ) ); 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 // Shift Right Long by 1-31 9155 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9156 match(Set dst (URShiftL dst cnt)); 9157 effect(KILL cr); 9158 ins_cost(200); 9159 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9160 "SHR $dst.hi,$cnt" %} 9161 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9162 ins_encode( move_long_small_shift(dst,cnt) ); 9163 ins_pipe( ialu_reg_long ); 9164 %} 9165 9166 // Shift Right Long by 32-63 9167 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9168 match(Set dst (URShiftL dst cnt)); 9169 effect(KILL cr); 9170 ins_cost(300); 9171 format %{ "MOV $dst.lo,$dst.hi\n" 9172 "\tSHR $dst.lo,$cnt-32\n" 9173 "\tXOR $dst.hi,$dst.hi" %} 9174 opcode(0xC1, 0x5); /* C1 /5 ib */ 9175 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9176 ins_pipe( ialu_reg_long ); 9177 %} 9178 9179 // Shift Right Long by variable 9180 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9181 match(Set dst (URShiftL dst shift)); 9182 effect(KILL cr); 9183 ins_cost(600); 9184 size(17); 9185 format %{ "TEST $shift,32\n\t" 9186 "JEQ,s small\n\t" 9187 "MOV $dst.lo,$dst.hi\n\t" 9188 "XOR $dst.hi,$dst.hi\n" 9189 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9190 "SHR $dst.hi,$shift" %} 9191 ins_encode( shift_right_long( dst, shift ) ); 9192 ins_pipe( pipe_slow ); 9193 %} 9194 9195 // Shift Right Long by 1-31 9196 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9197 match(Set dst (RShiftL dst cnt)); 9198 effect(KILL cr); 9199 ins_cost(200); 9200 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9201 "SAR $dst.hi,$cnt" %} 9202 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9203 ins_encode( move_long_small_shift(dst,cnt) ); 9204 ins_pipe( ialu_reg_long ); 9205 %} 9206 9207 // Shift Right Long by 32-63 9208 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9209 match(Set dst (RShiftL dst cnt)); 9210 effect(KILL cr); 9211 ins_cost(300); 9212 format %{ "MOV $dst.lo,$dst.hi\n" 9213 "\tSAR $dst.lo,$cnt-32\n" 9214 "\tSAR $dst.hi,31" %} 9215 opcode(0xC1, 0x7); /* C1 /7 ib */ 9216 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9217 ins_pipe( ialu_reg_long ); 9218 %} 9219 9220 // Shift Right arithmetic Long by variable 9221 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9222 match(Set dst (RShiftL dst shift)); 9223 effect(KILL cr); 9224 ins_cost(600); 9225 size(18); 9226 format %{ "TEST $shift,32\n\t" 9227 "JEQ,s small\n\t" 9228 "MOV $dst.lo,$dst.hi\n\t" 9229 "SAR $dst.hi,31\n" 9230 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9231 "SAR $dst.hi,$shift" %} 9232 ins_encode( shift_right_arith_long( dst, shift ) ); 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 9237 //----------Double Instructions------------------------------------------------ 9238 // Double Math 9239 9240 // Compare & branch 9241 9242 // P6 version of float compare, sets condition codes in EFLAGS 9243 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9244 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9245 match(Set cr (CmpD src1 src2)); 9246 effect(KILL rax); 9247 ins_cost(150); 9248 format %{ "FLD $src1\n\t" 9249 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9250 "JNP exit\n\t" 9251 "MOV ah,1 // saw a NaN, set CF\n\t" 9252 "SAHF\n" 9253 "exit:\tNOP // avoid branch to branch" %} 9254 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9255 ins_encode( Push_Reg_DPR(src1), 9256 OpcP, RegOpc(src2), 9257 cmpF_P6_fixup ); 9258 ins_pipe( pipe_slow ); 9259 %} 9260 9261 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9262 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9263 match(Set cr (CmpD src1 src2)); 9264 ins_cost(150); 9265 format %{ "FLD $src1\n\t" 9266 "FUCOMIP ST,$src2 // P6 instruction" %} 9267 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9268 ins_encode( Push_Reg_DPR(src1), 9269 OpcP, RegOpc(src2)); 9270 ins_pipe( pipe_slow ); 9271 %} 9272 9273 // Compare & branch 9274 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9275 predicate(UseSSE<=1); 9276 match(Set cr (CmpD src1 src2)); 9277 effect(KILL rax); 9278 ins_cost(200); 9279 format %{ "FLD $src1\n\t" 9280 "FCOMp $src2\n\t" 9281 "FNSTSW AX\n\t" 9282 "TEST AX,0x400\n\t" 9283 "JZ,s flags\n\t" 9284 "MOV AH,1\t# unordered treat as LT\n" 9285 "flags:\tSAHF" %} 9286 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9287 ins_encode( Push_Reg_DPR(src1), 9288 OpcP, RegOpc(src2), 9289 fpu_flags); 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 // Compare vs zero into -1,0,1 9294 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9295 predicate(UseSSE<=1); 9296 match(Set dst (CmpD3 src1 zero)); 9297 effect(KILL cr, KILL rax); 9298 ins_cost(280); 9299 format %{ "FTSTD $dst,$src1" %} 9300 opcode(0xE4, 0xD9); 9301 ins_encode( Push_Reg_DPR(src1), 9302 OpcS, OpcP, PopFPU, 9303 CmpF_Result(dst)); 9304 ins_pipe( pipe_slow ); 9305 %} 9306 9307 // Compare into -1,0,1 9308 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9309 predicate(UseSSE<=1); 9310 match(Set dst (CmpD3 src1 src2)); 9311 effect(KILL cr, KILL rax); 9312 ins_cost(300); 9313 format %{ "FCMPD $dst,$src1,$src2" %} 9314 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9315 ins_encode( Push_Reg_DPR(src1), 9316 OpcP, RegOpc(src2), 9317 CmpF_Result(dst)); 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 // float compare and set condition codes in EFLAGS by XMM regs 9322 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9323 predicate(UseSSE>=2); 9324 match(Set cr (CmpD src1 src2)); 9325 ins_cost(145); 9326 format %{ "UCOMISD $src1,$src2\n\t" 9327 "JNP,s exit\n\t" 9328 "PUSHF\t# saw NaN, set CF\n\t" 9329 "AND [rsp], #0xffffff2b\n\t" 9330 "POPF\n" 9331 "exit:" %} 9332 ins_encode %{ 9333 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9334 emit_cmpfp_fixup(_masm); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9340 predicate(UseSSE>=2); 9341 match(Set cr (CmpD src1 src2)); 9342 ins_cost(100); 9343 format %{ "UCOMISD $src1,$src2" %} 9344 ins_encode %{ 9345 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9346 %} 9347 ins_pipe( pipe_slow ); 9348 %} 9349 9350 // float compare and set condition codes in EFLAGS by XMM regs 9351 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9352 predicate(UseSSE>=2); 9353 match(Set cr (CmpD src1 (LoadD src2))); 9354 ins_cost(145); 9355 format %{ "UCOMISD $src1,$src2\n\t" 9356 "JNP,s exit\n\t" 9357 "PUSHF\t# saw NaN, set CF\n\t" 9358 "AND [rsp], #0xffffff2b\n\t" 9359 "POPF\n" 9360 "exit:" %} 9361 ins_encode %{ 9362 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9363 emit_cmpfp_fixup(_masm); 9364 %} 9365 ins_pipe( pipe_slow ); 9366 %} 9367 9368 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9369 predicate(UseSSE>=2); 9370 match(Set cr (CmpD src1 (LoadD src2))); 9371 ins_cost(100); 9372 format %{ "UCOMISD $src1,$src2" %} 9373 ins_encode %{ 9374 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 // Compare into -1,0,1 in XMM 9380 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9381 predicate(UseSSE>=2); 9382 match(Set dst (CmpD3 src1 src2)); 9383 effect(KILL cr); 9384 ins_cost(255); 9385 format %{ "UCOMISD $src1, $src2\n\t" 9386 "MOV $dst, #-1\n\t" 9387 "JP,s done\n\t" 9388 "JB,s done\n\t" 9389 "SETNE $dst\n\t" 9390 "MOVZB $dst, $dst\n" 9391 "done:" %} 9392 ins_encode %{ 9393 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9394 emit_cmpfp3(_masm, $dst$$Register); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 // Compare into -1,0,1 in XMM and memory 9400 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9401 predicate(UseSSE>=2); 9402 match(Set dst (CmpD3 src1 (LoadD src2))); 9403 effect(KILL cr); 9404 ins_cost(275); 9405 format %{ "UCOMISD $src1, $src2\n\t" 9406 "MOV $dst, #-1\n\t" 9407 "JP,s done\n\t" 9408 "JB,s done\n\t" 9409 "SETNE $dst\n\t" 9410 "MOVZB $dst, $dst\n" 9411 "done:" %} 9412 ins_encode %{ 9413 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9414 emit_cmpfp3(_masm, $dst$$Register); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 9420 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9421 predicate (UseSSE <=1); 9422 match(Set dst (SubD dst src)); 9423 9424 format %{ "FLD $src\n\t" 9425 "DSUBp $dst,ST" %} 9426 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9427 ins_cost(150); 9428 ins_encode( Push_Reg_DPR(src), 9429 OpcP, RegOpc(dst) ); 9430 ins_pipe( fpu_reg_reg ); 9431 %} 9432 9433 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9434 predicate (UseSSE <=1); 9435 match(Set dst (RoundDouble (SubD src1 src2))); 9436 ins_cost(250); 9437 9438 format %{ "FLD $src2\n\t" 9439 "DSUB ST,$src1\n\t" 9440 "FSTP_D $dst\t# D-round" %} 9441 opcode(0xD8, 0x5); 9442 ins_encode( Push_Reg_DPR(src2), 9443 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9444 ins_pipe( fpu_mem_reg_reg ); 9445 %} 9446 9447 9448 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9449 predicate (UseSSE <=1); 9450 match(Set dst (SubD dst (LoadD src))); 9451 ins_cost(150); 9452 9453 format %{ "FLD $src\n\t" 9454 "DSUBp $dst,ST" %} 9455 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9456 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9457 OpcP, RegOpc(dst) ); 9458 ins_pipe( fpu_reg_mem ); 9459 %} 9460 9461 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9462 predicate (UseSSE<=1); 9463 match(Set dst (AbsD src)); 9464 ins_cost(100); 9465 format %{ "FABS" %} 9466 opcode(0xE1, 0xD9); 9467 ins_encode( OpcS, OpcP ); 9468 ins_pipe( fpu_reg_reg ); 9469 %} 9470 9471 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9472 predicate(UseSSE<=1); 9473 match(Set dst (NegD src)); 9474 ins_cost(100); 9475 format %{ "FCHS" %} 9476 opcode(0xE0, 0xD9); 9477 ins_encode( OpcS, OpcP ); 9478 ins_pipe( fpu_reg_reg ); 9479 %} 9480 9481 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9482 predicate(UseSSE<=1); 9483 match(Set dst (AddD dst src)); 9484 format %{ "FLD $src\n\t" 9485 "DADD $dst,ST" %} 9486 size(4); 9487 ins_cost(150); 9488 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9489 ins_encode( Push_Reg_DPR(src), 9490 OpcP, RegOpc(dst) ); 9491 ins_pipe( fpu_reg_reg ); 9492 %} 9493 9494 9495 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9496 predicate(UseSSE<=1); 9497 match(Set dst (RoundDouble (AddD src1 src2))); 9498 ins_cost(250); 9499 9500 format %{ "FLD $src2\n\t" 9501 "DADD ST,$src1\n\t" 9502 "FSTP_D $dst\t# D-round" %} 9503 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9504 ins_encode( Push_Reg_DPR(src2), 9505 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9506 ins_pipe( fpu_mem_reg_reg ); 9507 %} 9508 9509 9510 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9511 predicate(UseSSE<=1); 9512 match(Set dst (AddD dst (LoadD src))); 9513 ins_cost(150); 9514 9515 format %{ "FLD $src\n\t" 9516 "DADDp $dst,ST" %} 9517 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9518 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9519 OpcP, RegOpc(dst) ); 9520 ins_pipe( fpu_reg_mem ); 9521 %} 9522 9523 // add-to-memory 9524 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9525 predicate(UseSSE<=1); 9526 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9527 ins_cost(150); 9528 9529 format %{ "FLD_D $dst\n\t" 9530 "DADD ST,$src\n\t" 9531 "FST_D $dst" %} 9532 opcode(0xDD, 0x0); 9533 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9534 Opcode(0xD8), RegOpc(src), 9535 set_instruction_start, 9536 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9537 ins_pipe( fpu_reg_mem ); 9538 %} 9539 9540 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9541 predicate(UseSSE<=1); 9542 match(Set dst (AddD dst con)); 9543 ins_cost(125); 9544 format %{ "FLD1\n\t" 9545 "DADDp $dst,ST" %} 9546 ins_encode %{ 9547 __ fld1(); 9548 __ faddp($dst$$reg); 9549 %} 9550 ins_pipe(fpu_reg); 9551 %} 9552 9553 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9554 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9555 match(Set dst (AddD dst con)); 9556 ins_cost(200); 9557 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9558 "DADDp $dst,ST" %} 9559 ins_encode %{ 9560 __ fld_d($constantaddress($con)); 9561 __ faddp($dst$$reg); 9562 %} 9563 ins_pipe(fpu_reg_mem); 9564 %} 9565 9566 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9567 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9568 match(Set dst (RoundDouble (AddD src con))); 9569 ins_cost(200); 9570 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9571 "DADD ST,$src\n\t" 9572 "FSTP_D $dst\t# D-round" %} 9573 ins_encode %{ 9574 __ fld_d($constantaddress($con)); 9575 __ fadd($src$$reg); 9576 __ fstp_d(Address(rsp, $dst$$disp)); 9577 %} 9578 ins_pipe(fpu_mem_reg_con); 9579 %} 9580 9581 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9582 predicate(UseSSE<=1); 9583 match(Set dst (MulD dst src)); 9584 format %{ "FLD $src\n\t" 9585 "DMULp $dst,ST" %} 9586 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9587 ins_cost(150); 9588 ins_encode( Push_Reg_DPR(src), 9589 OpcP, RegOpc(dst) ); 9590 ins_pipe( fpu_reg_reg ); 9591 %} 9592 9593 // Strict FP instruction biases argument before multiply then 9594 // biases result to avoid double rounding of subnormals. 9595 // 9596 // scale arg1 by multiplying arg1 by 2^(-15360) 9597 // load arg2 9598 // multiply scaled arg1 by arg2 9599 // rescale product by 2^(15360) 9600 // 9601 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9602 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9603 match(Set dst (MulD dst src)); 9604 ins_cost(1); // Select this instruction for all strict FP double multiplies 9605 9606 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9607 "DMULp $dst,ST\n\t" 9608 "FLD $src\n\t" 9609 "DMULp $dst,ST\n\t" 9610 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9611 "DMULp $dst,ST\n\t" %} 9612 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9613 ins_encode( strictfp_bias1(dst), 9614 Push_Reg_DPR(src), 9615 OpcP, RegOpc(dst), 9616 strictfp_bias2(dst) ); 9617 ins_pipe( fpu_reg_reg ); 9618 %} 9619 9620 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9621 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9622 match(Set dst (MulD dst con)); 9623 ins_cost(200); 9624 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9625 "DMULp $dst,ST" %} 9626 ins_encode %{ 9627 __ fld_d($constantaddress($con)); 9628 __ fmulp($dst$$reg); 9629 %} 9630 ins_pipe(fpu_reg_mem); 9631 %} 9632 9633 9634 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9635 predicate( UseSSE<=1 ); 9636 match(Set dst (MulD dst (LoadD src))); 9637 ins_cost(200); 9638 format %{ "FLD_D $src\n\t" 9639 "DMULp $dst,ST" %} 9640 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9641 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9642 OpcP, RegOpc(dst) ); 9643 ins_pipe( fpu_reg_mem ); 9644 %} 9645 9646 // 9647 // Cisc-alternate to reg-reg multiply 9648 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9649 predicate( UseSSE<=1 ); 9650 match(Set dst (MulD src (LoadD mem))); 9651 ins_cost(250); 9652 format %{ "FLD_D $mem\n\t" 9653 "DMUL ST,$src\n\t" 9654 "FSTP_D $dst" %} 9655 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9656 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9657 OpcReg_FPR(src), 9658 Pop_Reg_DPR(dst) ); 9659 ins_pipe( fpu_reg_reg_mem ); 9660 %} 9661 9662 9663 // MACRO3 -- addDPR a mulDPR 9664 // This instruction is a '2-address' instruction in that the result goes 9665 // back to src2. This eliminates a move from the macro; possibly the 9666 // register allocator will have to add it back (and maybe not). 9667 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9668 predicate( UseSSE<=1 ); 9669 match(Set src2 (AddD (MulD src0 src1) src2)); 9670 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9671 "DMUL ST,$src1\n\t" 9672 "DADDp $src2,ST" %} 9673 ins_cost(250); 9674 opcode(0xDD); /* LoadD DD /0 */ 9675 ins_encode( Push_Reg_FPR(src0), 9676 FMul_ST_reg(src1), 9677 FAddP_reg_ST(src2) ); 9678 ins_pipe( fpu_reg_reg_reg ); 9679 %} 9680 9681 9682 // MACRO3 -- subDPR a mulDPR 9683 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9684 predicate( UseSSE<=1 ); 9685 match(Set src2 (SubD (MulD src0 src1) src2)); 9686 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9687 "DMUL ST,$src1\n\t" 9688 "DSUBRp $src2,ST" %} 9689 ins_cost(250); 9690 ins_encode( Push_Reg_FPR(src0), 9691 FMul_ST_reg(src1), 9692 Opcode(0xDE), Opc_plus(0xE0,src2)); 9693 ins_pipe( fpu_reg_reg_reg ); 9694 %} 9695 9696 9697 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9698 predicate( UseSSE<=1 ); 9699 match(Set dst (DivD dst src)); 9700 9701 format %{ "FLD $src\n\t" 9702 "FDIVp $dst,ST" %} 9703 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9704 ins_cost(150); 9705 ins_encode( Push_Reg_DPR(src), 9706 OpcP, RegOpc(dst) ); 9707 ins_pipe( fpu_reg_reg ); 9708 %} 9709 9710 // Strict FP instruction biases argument before division then 9711 // biases result, to avoid double rounding of subnormals. 9712 // 9713 // scale dividend by multiplying dividend by 2^(-15360) 9714 // load divisor 9715 // divide scaled dividend by divisor 9716 // rescale quotient by 2^(15360) 9717 // 9718 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9719 predicate (UseSSE<=1); 9720 match(Set dst (DivD dst src)); 9721 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9722 ins_cost(01); 9723 9724 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9725 "DMULp $dst,ST\n\t" 9726 "FLD $src\n\t" 9727 "FDIVp $dst,ST\n\t" 9728 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9729 "DMULp $dst,ST\n\t" %} 9730 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9731 ins_encode( strictfp_bias1(dst), 9732 Push_Reg_DPR(src), 9733 OpcP, RegOpc(dst), 9734 strictfp_bias2(dst) ); 9735 ins_pipe( fpu_reg_reg ); 9736 %} 9737 9738 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9739 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9740 match(Set dst (RoundDouble (DivD src1 src2))); 9741 9742 format %{ "FLD $src1\n\t" 9743 "FDIV ST,$src2\n\t" 9744 "FSTP_D $dst\t# D-round" %} 9745 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9746 ins_encode( Push_Reg_DPR(src1), 9747 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9748 ins_pipe( fpu_mem_reg_reg ); 9749 %} 9750 9751 9752 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9753 predicate(UseSSE<=1); 9754 match(Set dst (ModD dst src)); 9755 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9756 9757 format %{ "DMOD $dst,$src" %} 9758 ins_cost(250); 9759 ins_encode(Push_Reg_Mod_DPR(dst, src), 9760 emitModDPR(), 9761 Push_Result_Mod_DPR(src), 9762 Pop_Reg_DPR(dst)); 9763 ins_pipe( pipe_slow ); 9764 %} 9765 9766 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9767 predicate(UseSSE>=2); 9768 match(Set dst (ModD src0 src1)); 9769 effect(KILL rax, KILL cr); 9770 9771 format %{ "SUB ESP,8\t # DMOD\n" 9772 "\tMOVSD [ESP+0],$src1\n" 9773 "\tFLD_D [ESP+0]\n" 9774 "\tMOVSD [ESP+0],$src0\n" 9775 "\tFLD_D [ESP+0]\n" 9776 "loop:\tFPREM\n" 9777 "\tFWAIT\n" 9778 "\tFNSTSW AX\n" 9779 "\tSAHF\n" 9780 "\tJP loop\n" 9781 "\tFSTP_D [ESP+0]\n" 9782 "\tMOVSD $dst,[ESP+0]\n" 9783 "\tADD ESP,8\n" 9784 "\tFSTP ST0\t # Restore FPU Stack" 9785 %} 9786 ins_cost(250); 9787 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9788 ins_pipe( pipe_slow ); 9789 %} 9790 9791 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9792 predicate (UseSSE<=1); 9793 match(Set dst (SinD src)); 9794 ins_cost(1800); 9795 format %{ "DSIN $dst" %} 9796 opcode(0xD9, 0xFE); 9797 ins_encode( OpcP, OpcS ); 9798 ins_pipe( pipe_slow ); 9799 %} 9800 9801 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9802 predicate (UseSSE>=2); 9803 match(Set dst (SinD dst)); 9804 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9805 ins_cost(1800); 9806 format %{ "DSIN $dst" %} 9807 opcode(0xD9, 0xFE); 9808 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9809 ins_pipe( pipe_slow ); 9810 %} 9811 9812 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9813 predicate (UseSSE<=1); 9814 match(Set dst (CosD src)); 9815 ins_cost(1800); 9816 format %{ "DCOS $dst" %} 9817 opcode(0xD9, 0xFF); 9818 ins_encode( OpcP, OpcS ); 9819 ins_pipe( pipe_slow ); 9820 %} 9821 9822 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9823 predicate (UseSSE>=2); 9824 match(Set dst (CosD dst)); 9825 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9826 ins_cost(1800); 9827 format %{ "DCOS $dst" %} 9828 opcode(0xD9, 0xFF); 9829 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9834 predicate (UseSSE<=1); 9835 match(Set dst(TanD src)); 9836 format %{ "DTAN $dst" %} 9837 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9838 Opcode(0xDD), Opcode(0xD8)); // fstp st 9839 ins_pipe( pipe_slow ); 9840 %} 9841 9842 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9843 predicate (UseSSE>=2); 9844 match(Set dst(TanD dst)); 9845 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9846 format %{ "DTAN $dst" %} 9847 ins_encode( Push_SrcD(dst), 9848 Opcode(0xD9), Opcode(0xF2), // fptan 9849 Opcode(0xDD), Opcode(0xD8), // fstp st 9850 Push_ResultD(dst) ); 9851 ins_pipe( pipe_slow ); 9852 %} 9853 9854 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9855 predicate (UseSSE<=1); 9856 match(Set dst(AtanD dst src)); 9857 format %{ "DATA $dst,$src" %} 9858 opcode(0xD9, 0xF3); 9859 ins_encode( Push_Reg_DPR(src), 9860 OpcP, OpcS, RegOpc(dst) ); 9861 ins_pipe( pipe_slow ); 9862 %} 9863 9864 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9865 predicate (UseSSE>=2); 9866 match(Set dst(AtanD dst src)); 9867 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9868 format %{ "DATA $dst,$src" %} 9869 opcode(0xD9, 0xF3); 9870 ins_encode( Push_SrcD(src), 9871 OpcP, OpcS, Push_ResultD(dst) ); 9872 ins_pipe( pipe_slow ); 9873 %} 9874 9875 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9876 predicate (UseSSE<=1); 9877 match(Set dst (SqrtD src)); 9878 format %{ "DSQRT $dst,$src" %} 9879 opcode(0xFA, 0xD9); 9880 ins_encode( Push_Reg_DPR(src), 9881 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9886 predicate (UseSSE<=1); 9887 match(Set Y (PowD X Y)); // Raise X to the Yth power 9888 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9889 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9890 ins_encode %{ 9891 __ subptr(rsp, 8); 9892 __ fld_s($X$$reg - 1); 9893 __ fast_pow(); 9894 __ addptr(rsp, 8); 9895 %} 9896 ins_pipe( pipe_slow ); 9897 %} 9898 9899 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9900 predicate (UseSSE>=2); 9901 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9902 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9903 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9904 ins_encode %{ 9905 __ subptr(rsp, 8); 9906 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9907 __ fld_d(Address(rsp, 0)); 9908 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9909 __ fld_d(Address(rsp, 0)); 9910 __ fast_pow(); 9911 __ fstp_d(Address(rsp, 0)); 9912 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9913 __ addptr(rsp, 8); 9914 %} 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9919 predicate (UseSSE<=1); 9920 // The source Double operand on FPU stack 9921 match(Set dst (Log10D src)); 9922 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9923 // fxch ; swap ST(0) with ST(1) 9924 // fyl2x ; compute log_10(2) * log_2(x) 9925 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9926 "FXCH \n\t" 9927 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9928 %} 9929 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9930 Opcode(0xD9), Opcode(0xC9), // fxch 9931 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9932 9933 ins_pipe( pipe_slow ); 9934 %} 9935 9936 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9937 predicate (UseSSE>=2); 9938 effect(KILL cr); 9939 match(Set dst (Log10D src)); 9940 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9941 // fyl2x ; compute log_10(2) * log_2(x) 9942 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9943 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9944 %} 9945 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9946 Push_SrcD(src), 9947 Opcode(0xD9), Opcode(0xF1), // fyl2x 9948 Push_ResultD(dst)); 9949 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 //-------------Float Instructions------------------------------- 9954 // Float Math 9955 9956 // Code for float compare: 9957 // fcompp(); 9958 // fwait(); fnstsw_ax(); 9959 // sahf(); 9960 // movl(dst, unordered_result); 9961 // jcc(Assembler::parity, exit); 9962 // movl(dst, less_result); 9963 // jcc(Assembler::below, exit); 9964 // movl(dst, equal_result); 9965 // jcc(Assembler::equal, exit); 9966 // movl(dst, greater_result); 9967 // exit: 9968 9969 // P6 version of float compare, sets condition codes in EFLAGS 9970 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9971 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9972 match(Set cr (CmpF src1 src2)); 9973 effect(KILL rax); 9974 ins_cost(150); 9975 format %{ "FLD $src1\n\t" 9976 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9977 "JNP exit\n\t" 9978 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9979 "SAHF\n" 9980 "exit:\tNOP // avoid branch to branch" %} 9981 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9982 ins_encode( Push_Reg_DPR(src1), 9983 OpcP, RegOpc(src2), 9984 cmpF_P6_fixup ); 9985 ins_pipe( pipe_slow ); 9986 %} 9987 9988 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9989 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9990 match(Set cr (CmpF src1 src2)); 9991 ins_cost(100); 9992 format %{ "FLD $src1\n\t" 9993 "FUCOMIP ST,$src2 // P6 instruction" %} 9994 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9995 ins_encode( Push_Reg_DPR(src1), 9996 OpcP, RegOpc(src2)); 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 10001 // Compare & branch 10002 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10003 predicate(UseSSE == 0); 10004 match(Set cr (CmpF src1 src2)); 10005 effect(KILL rax); 10006 ins_cost(200); 10007 format %{ "FLD $src1\n\t" 10008 "FCOMp $src2\n\t" 10009 "FNSTSW AX\n\t" 10010 "TEST AX,0x400\n\t" 10011 "JZ,s flags\n\t" 10012 "MOV AH,1\t# unordered treat as LT\n" 10013 "flags:\tSAHF" %} 10014 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10015 ins_encode( Push_Reg_DPR(src1), 10016 OpcP, RegOpc(src2), 10017 fpu_flags); 10018 ins_pipe( pipe_slow ); 10019 %} 10020 10021 // Compare vs zero into -1,0,1 10022 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10023 predicate(UseSSE == 0); 10024 match(Set dst (CmpF3 src1 zero)); 10025 effect(KILL cr, KILL rax); 10026 ins_cost(280); 10027 format %{ "FTSTF $dst,$src1" %} 10028 opcode(0xE4, 0xD9); 10029 ins_encode( Push_Reg_DPR(src1), 10030 OpcS, OpcP, PopFPU, 10031 CmpF_Result(dst)); 10032 ins_pipe( pipe_slow ); 10033 %} 10034 10035 // Compare into -1,0,1 10036 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10037 predicate(UseSSE == 0); 10038 match(Set dst (CmpF3 src1 src2)); 10039 effect(KILL cr, KILL rax); 10040 ins_cost(300); 10041 format %{ "FCMPF $dst,$src1,$src2" %} 10042 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10043 ins_encode( Push_Reg_DPR(src1), 10044 OpcP, RegOpc(src2), 10045 CmpF_Result(dst)); 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 // float compare and set condition codes in EFLAGS by XMM regs 10050 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10051 predicate(UseSSE>=1); 10052 match(Set cr (CmpF src1 src2)); 10053 ins_cost(145); 10054 format %{ "UCOMISS $src1,$src2\n\t" 10055 "JNP,s exit\n\t" 10056 "PUSHF\t# saw NaN, set CF\n\t" 10057 "AND [rsp], #0xffffff2b\n\t" 10058 "POPF\n" 10059 "exit:" %} 10060 ins_encode %{ 10061 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10062 emit_cmpfp_fixup(_masm); 10063 %} 10064 ins_pipe( pipe_slow ); 10065 %} 10066 10067 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10068 predicate(UseSSE>=1); 10069 match(Set cr (CmpF src1 src2)); 10070 ins_cost(100); 10071 format %{ "UCOMISS $src1,$src2" %} 10072 ins_encode %{ 10073 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10074 %} 10075 ins_pipe( pipe_slow ); 10076 %} 10077 10078 // float compare and set condition codes in EFLAGS by XMM regs 10079 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10080 predicate(UseSSE>=1); 10081 match(Set cr (CmpF src1 (LoadF src2))); 10082 ins_cost(165); 10083 format %{ "UCOMISS $src1,$src2\n\t" 10084 "JNP,s exit\n\t" 10085 "PUSHF\t# saw NaN, set CF\n\t" 10086 "AND [rsp], #0xffffff2b\n\t" 10087 "POPF\n" 10088 "exit:" %} 10089 ins_encode %{ 10090 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10091 emit_cmpfp_fixup(_masm); 10092 %} 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10097 predicate(UseSSE>=1); 10098 match(Set cr (CmpF src1 (LoadF src2))); 10099 ins_cost(100); 10100 format %{ "UCOMISS $src1,$src2" %} 10101 ins_encode %{ 10102 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10103 %} 10104 ins_pipe( pipe_slow ); 10105 %} 10106 10107 // Compare into -1,0,1 in XMM 10108 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10109 predicate(UseSSE>=1); 10110 match(Set dst (CmpF3 src1 src2)); 10111 effect(KILL cr); 10112 ins_cost(255); 10113 format %{ "UCOMISS $src1, $src2\n\t" 10114 "MOV $dst, #-1\n\t" 10115 "JP,s done\n\t" 10116 "JB,s done\n\t" 10117 "SETNE $dst\n\t" 10118 "MOVZB $dst, $dst\n" 10119 "done:" %} 10120 ins_encode %{ 10121 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10122 emit_cmpfp3(_masm, $dst$$Register); 10123 %} 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 // Compare into -1,0,1 in XMM and memory 10128 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10129 predicate(UseSSE>=1); 10130 match(Set dst (CmpF3 src1 (LoadF src2))); 10131 effect(KILL cr); 10132 ins_cost(275); 10133 format %{ "UCOMISS $src1, $src2\n\t" 10134 "MOV $dst, #-1\n\t" 10135 "JP,s done\n\t" 10136 "JB,s done\n\t" 10137 "SETNE $dst\n\t" 10138 "MOVZB $dst, $dst\n" 10139 "done:" %} 10140 ins_encode %{ 10141 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10142 emit_cmpfp3(_masm, $dst$$Register); 10143 %} 10144 ins_pipe( pipe_slow ); 10145 %} 10146 10147 // Spill to obtain 24-bit precision 10148 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10149 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10150 match(Set dst (SubF src1 src2)); 10151 10152 format %{ "FSUB $dst,$src1 - $src2" %} 10153 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10154 ins_encode( Push_Reg_FPR(src1), 10155 OpcReg_FPR(src2), 10156 Pop_Mem_FPR(dst) ); 10157 ins_pipe( fpu_mem_reg_reg ); 10158 %} 10159 // 10160 // This instruction does not round to 24-bits 10161 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10162 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10163 match(Set dst (SubF dst src)); 10164 10165 format %{ "FSUB $dst,$src" %} 10166 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10167 ins_encode( Push_Reg_FPR(src), 10168 OpcP, RegOpc(dst) ); 10169 ins_pipe( fpu_reg_reg ); 10170 %} 10171 10172 // Spill to obtain 24-bit precision 10173 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10174 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10175 match(Set dst (AddF src1 src2)); 10176 10177 format %{ "FADD $dst,$src1,$src2" %} 10178 opcode(0xD8, 0x0); /* D8 C0+i */ 10179 ins_encode( Push_Reg_FPR(src2), 10180 OpcReg_FPR(src1), 10181 Pop_Mem_FPR(dst) ); 10182 ins_pipe( fpu_mem_reg_reg ); 10183 %} 10184 // 10185 // This instruction does not round to 24-bits 10186 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10187 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10188 match(Set dst (AddF dst src)); 10189 10190 format %{ "FLD $src\n\t" 10191 "FADDp $dst,ST" %} 10192 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10193 ins_encode( Push_Reg_FPR(src), 10194 OpcP, RegOpc(dst) ); 10195 ins_pipe( fpu_reg_reg ); 10196 %} 10197 10198 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10199 predicate(UseSSE==0); 10200 match(Set dst (AbsF src)); 10201 ins_cost(100); 10202 format %{ "FABS" %} 10203 opcode(0xE1, 0xD9); 10204 ins_encode( OpcS, OpcP ); 10205 ins_pipe( fpu_reg_reg ); 10206 %} 10207 10208 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10209 predicate(UseSSE==0); 10210 match(Set dst (NegF src)); 10211 ins_cost(100); 10212 format %{ "FCHS" %} 10213 opcode(0xE0, 0xD9); 10214 ins_encode( OpcS, OpcP ); 10215 ins_pipe( fpu_reg_reg ); 10216 %} 10217 10218 // Cisc-alternate to addFPR_reg 10219 // Spill to obtain 24-bit precision 10220 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10221 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10222 match(Set dst (AddF src1 (LoadF src2))); 10223 10224 format %{ "FLD $src2\n\t" 10225 "FADD ST,$src1\n\t" 10226 "FSTP_S $dst" %} 10227 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10228 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10229 OpcReg_FPR(src1), 10230 Pop_Mem_FPR(dst) ); 10231 ins_pipe( fpu_mem_reg_mem ); 10232 %} 10233 // 10234 // Cisc-alternate to addFPR_reg 10235 // This instruction does not round to 24-bits 10236 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10237 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10238 match(Set dst (AddF dst (LoadF src))); 10239 10240 format %{ "FADD $dst,$src" %} 10241 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10242 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10243 OpcP, RegOpc(dst) ); 10244 ins_pipe( fpu_reg_mem ); 10245 %} 10246 10247 // // Following two instructions for _222_mpegaudio 10248 // Spill to obtain 24-bit precision 10249 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10250 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10251 match(Set dst (AddF src1 src2)); 10252 10253 format %{ "FADD $dst,$src1,$src2" %} 10254 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10255 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10256 OpcReg_FPR(src2), 10257 Pop_Mem_FPR(dst) ); 10258 ins_pipe( fpu_mem_reg_mem ); 10259 %} 10260 10261 // Cisc-spill variant 10262 // Spill to obtain 24-bit precision 10263 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10264 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10265 match(Set dst (AddF src1 (LoadF src2))); 10266 10267 format %{ "FADD $dst,$src1,$src2 cisc" %} 10268 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10269 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10270 set_instruction_start, 10271 OpcP, RMopc_Mem(secondary,src1), 10272 Pop_Mem_FPR(dst) ); 10273 ins_pipe( fpu_mem_mem_mem ); 10274 %} 10275 10276 // Spill to obtain 24-bit precision 10277 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10278 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10279 match(Set dst (AddF src1 src2)); 10280 10281 format %{ "FADD $dst,$src1,$src2" %} 10282 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10283 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10284 set_instruction_start, 10285 OpcP, RMopc_Mem(secondary,src1), 10286 Pop_Mem_FPR(dst) ); 10287 ins_pipe( fpu_mem_mem_mem ); 10288 %} 10289 10290 10291 // Spill to obtain 24-bit precision 10292 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10293 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10294 match(Set dst (AddF src con)); 10295 format %{ "FLD $src\n\t" 10296 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10297 "FSTP_S $dst" %} 10298 ins_encode %{ 10299 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10300 __ fadd_s($constantaddress($con)); 10301 __ fstp_s(Address(rsp, $dst$$disp)); 10302 %} 10303 ins_pipe(fpu_mem_reg_con); 10304 %} 10305 // 10306 // This instruction does not round to 24-bits 10307 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10308 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10309 match(Set dst (AddF src con)); 10310 format %{ "FLD $src\n\t" 10311 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10312 "FSTP $dst" %} 10313 ins_encode %{ 10314 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10315 __ fadd_s($constantaddress($con)); 10316 __ fstp_d($dst$$reg); 10317 %} 10318 ins_pipe(fpu_reg_reg_con); 10319 %} 10320 10321 // Spill to obtain 24-bit precision 10322 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10323 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10324 match(Set dst (MulF src1 src2)); 10325 10326 format %{ "FLD $src1\n\t" 10327 "FMUL $src2\n\t" 10328 "FSTP_S $dst" %} 10329 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10330 ins_encode( Push_Reg_FPR(src1), 10331 OpcReg_FPR(src2), 10332 Pop_Mem_FPR(dst) ); 10333 ins_pipe( fpu_mem_reg_reg ); 10334 %} 10335 // 10336 // This instruction does not round to 24-bits 10337 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10338 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10339 match(Set dst (MulF src1 src2)); 10340 10341 format %{ "FLD $src1\n\t" 10342 "FMUL $src2\n\t" 10343 "FSTP_S $dst" %} 10344 opcode(0xD8, 0x1); /* D8 C8+i */ 10345 ins_encode( Push_Reg_FPR(src2), 10346 OpcReg_FPR(src1), 10347 Pop_Reg_FPR(dst) ); 10348 ins_pipe( fpu_reg_reg_reg ); 10349 %} 10350 10351 10352 // Spill to obtain 24-bit precision 10353 // Cisc-alternate to reg-reg multiply 10354 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10355 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10356 match(Set dst (MulF src1 (LoadF src2))); 10357 10358 format %{ "FLD_S $src2\n\t" 10359 "FMUL $src1\n\t" 10360 "FSTP_S $dst" %} 10361 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10362 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10363 OpcReg_FPR(src1), 10364 Pop_Mem_FPR(dst) ); 10365 ins_pipe( fpu_mem_reg_mem ); 10366 %} 10367 // 10368 // This instruction does not round to 24-bits 10369 // Cisc-alternate to reg-reg multiply 10370 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10371 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10372 match(Set dst (MulF src1 (LoadF src2))); 10373 10374 format %{ "FMUL $dst,$src1,$src2" %} 10375 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10376 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10377 OpcReg_FPR(src1), 10378 Pop_Reg_FPR(dst) ); 10379 ins_pipe( fpu_reg_reg_mem ); 10380 %} 10381 10382 // Spill to obtain 24-bit precision 10383 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10384 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10385 match(Set dst (MulF src1 src2)); 10386 10387 format %{ "FMUL $dst,$src1,$src2" %} 10388 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10389 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10390 set_instruction_start, 10391 OpcP, RMopc_Mem(secondary,src1), 10392 Pop_Mem_FPR(dst) ); 10393 ins_pipe( fpu_mem_mem_mem ); 10394 %} 10395 10396 // Spill to obtain 24-bit precision 10397 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10398 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10399 match(Set dst (MulF src con)); 10400 10401 format %{ "FLD $src\n\t" 10402 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10403 "FSTP_S $dst" %} 10404 ins_encode %{ 10405 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10406 __ fmul_s($constantaddress($con)); 10407 __ fstp_s(Address(rsp, $dst$$disp)); 10408 %} 10409 ins_pipe(fpu_mem_reg_con); 10410 %} 10411 // 10412 // This instruction does not round to 24-bits 10413 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10414 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10415 match(Set dst (MulF src con)); 10416 10417 format %{ "FLD $src\n\t" 10418 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10419 "FSTP $dst" %} 10420 ins_encode %{ 10421 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10422 __ fmul_s($constantaddress($con)); 10423 __ fstp_d($dst$$reg); 10424 %} 10425 ins_pipe(fpu_reg_reg_con); 10426 %} 10427 10428 10429 // 10430 // MACRO1 -- subsume unshared load into mulFPR 10431 // This instruction does not round to 24-bits 10432 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10433 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10434 match(Set dst (MulF (LoadF mem1) src)); 10435 10436 format %{ "FLD $mem1 ===MACRO1===\n\t" 10437 "FMUL ST,$src\n\t" 10438 "FSTP $dst" %} 10439 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10440 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10441 OpcReg_FPR(src), 10442 Pop_Reg_FPR(dst) ); 10443 ins_pipe( fpu_reg_reg_mem ); 10444 %} 10445 // 10446 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10447 // This instruction does not round to 24-bits 10448 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10449 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10450 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10451 ins_cost(95); 10452 10453 format %{ "FLD $mem1 ===MACRO2===\n\t" 10454 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10455 "FADD ST,$src2\n\t" 10456 "FSTP $dst" %} 10457 opcode(0xD9); /* LoadF D9 /0 */ 10458 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10459 FMul_ST_reg(src1), 10460 FAdd_ST_reg(src2), 10461 Pop_Reg_FPR(dst) ); 10462 ins_pipe( fpu_reg_mem_reg_reg ); 10463 %} 10464 10465 // MACRO3 -- addFPR a mulFPR 10466 // This instruction does not round to 24-bits. It is a '2-address' 10467 // instruction in that the result goes back to src2. This eliminates 10468 // a move from the macro; possibly the register allocator will have 10469 // to add it back (and maybe not). 10470 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10471 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10472 match(Set src2 (AddF (MulF src0 src1) src2)); 10473 10474 format %{ "FLD $src0 ===MACRO3===\n\t" 10475 "FMUL ST,$src1\n\t" 10476 "FADDP $src2,ST" %} 10477 opcode(0xD9); /* LoadF D9 /0 */ 10478 ins_encode( Push_Reg_FPR(src0), 10479 FMul_ST_reg(src1), 10480 FAddP_reg_ST(src2) ); 10481 ins_pipe( fpu_reg_reg_reg ); 10482 %} 10483 10484 // MACRO4 -- divFPR subFPR 10485 // This instruction does not round to 24-bits 10486 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10487 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10488 match(Set dst (DivF (SubF src2 src1) src3)); 10489 10490 format %{ "FLD $src2 ===MACRO4===\n\t" 10491 "FSUB ST,$src1\n\t" 10492 "FDIV ST,$src3\n\t" 10493 "FSTP $dst" %} 10494 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10495 ins_encode( Push_Reg_FPR(src2), 10496 subFPR_divFPR_encode(src1,src3), 10497 Pop_Reg_FPR(dst) ); 10498 ins_pipe( fpu_reg_reg_reg_reg ); 10499 %} 10500 10501 // Spill to obtain 24-bit precision 10502 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10503 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10504 match(Set dst (DivF src1 src2)); 10505 10506 format %{ "FDIV $dst,$src1,$src2" %} 10507 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10508 ins_encode( Push_Reg_FPR(src1), 10509 OpcReg_FPR(src2), 10510 Pop_Mem_FPR(dst) ); 10511 ins_pipe( fpu_mem_reg_reg ); 10512 %} 10513 // 10514 // This instruction does not round to 24-bits 10515 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10516 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10517 match(Set dst (DivF dst src)); 10518 10519 format %{ "FDIV $dst,$src" %} 10520 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10521 ins_encode( Push_Reg_FPR(src), 10522 OpcP, RegOpc(dst) ); 10523 ins_pipe( fpu_reg_reg ); 10524 %} 10525 10526 10527 // Spill to obtain 24-bit precision 10528 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10529 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10530 match(Set dst (ModF src1 src2)); 10531 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10532 10533 format %{ "FMOD $dst,$src1,$src2" %} 10534 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10535 emitModDPR(), 10536 Push_Result_Mod_DPR(src2), 10537 Pop_Mem_FPR(dst)); 10538 ins_pipe( pipe_slow ); 10539 %} 10540 // 10541 // This instruction does not round to 24-bits 10542 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10543 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10544 match(Set dst (ModF dst src)); 10545 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10546 10547 format %{ "FMOD $dst,$src" %} 10548 ins_encode(Push_Reg_Mod_DPR(dst, src), 10549 emitModDPR(), 10550 Push_Result_Mod_DPR(src), 10551 Pop_Reg_FPR(dst)); 10552 ins_pipe( pipe_slow ); 10553 %} 10554 10555 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10556 predicate(UseSSE>=1); 10557 match(Set dst (ModF src0 src1)); 10558 effect(KILL rax, KILL cr); 10559 format %{ "SUB ESP,4\t # FMOD\n" 10560 "\tMOVSS [ESP+0],$src1\n" 10561 "\tFLD_S [ESP+0]\n" 10562 "\tMOVSS [ESP+0],$src0\n" 10563 "\tFLD_S [ESP+0]\n" 10564 "loop:\tFPREM\n" 10565 "\tFWAIT\n" 10566 "\tFNSTSW AX\n" 10567 "\tSAHF\n" 10568 "\tJP loop\n" 10569 "\tFSTP_S [ESP+0]\n" 10570 "\tMOVSS $dst,[ESP+0]\n" 10571 "\tADD ESP,4\n" 10572 "\tFSTP ST0\t # Restore FPU Stack" 10573 %} 10574 ins_cost(250); 10575 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10576 ins_pipe( pipe_slow ); 10577 %} 10578 10579 10580 //----------Arithmetic Conversion Instructions--------------------------------- 10581 // The conversions operations are all Alpha sorted. Please keep it that way! 10582 10583 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10584 predicate(UseSSE==0); 10585 match(Set dst (RoundFloat src)); 10586 ins_cost(125); 10587 format %{ "FST_S $dst,$src\t# F-round" %} 10588 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10589 ins_pipe( fpu_mem_reg ); 10590 %} 10591 10592 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10593 predicate(UseSSE<=1); 10594 match(Set dst (RoundDouble src)); 10595 ins_cost(125); 10596 format %{ "FST_D $dst,$src\t# D-round" %} 10597 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10598 ins_pipe( fpu_mem_reg ); 10599 %} 10600 10601 // Force rounding to 24-bit precision and 6-bit exponent 10602 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10603 predicate(UseSSE==0); 10604 match(Set dst (ConvD2F src)); 10605 format %{ "FST_S $dst,$src\t# F-round" %} 10606 expand %{ 10607 roundFloat_mem_reg(dst,src); 10608 %} 10609 %} 10610 10611 // Force rounding to 24-bit precision and 6-bit exponent 10612 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10613 predicate(UseSSE==1); 10614 match(Set dst (ConvD2F src)); 10615 effect( KILL cr ); 10616 format %{ "SUB ESP,4\n\t" 10617 "FST_S [ESP],$src\t# F-round\n\t" 10618 "MOVSS $dst,[ESP]\n\t" 10619 "ADD ESP,4" %} 10620 ins_encode %{ 10621 __ subptr(rsp, 4); 10622 if ($src$$reg != FPR1L_enc) { 10623 __ fld_s($src$$reg-1); 10624 __ fstp_s(Address(rsp, 0)); 10625 } else { 10626 __ fst_s(Address(rsp, 0)); 10627 } 10628 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10629 __ addptr(rsp, 4); 10630 %} 10631 ins_pipe( pipe_slow ); 10632 %} 10633 10634 // Force rounding double precision to single precision 10635 instruct convD2F_reg(regF dst, regD src) %{ 10636 predicate(UseSSE>=2); 10637 match(Set dst (ConvD2F src)); 10638 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10639 ins_encode %{ 10640 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10641 %} 10642 ins_pipe( pipe_slow ); 10643 %} 10644 10645 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10646 predicate(UseSSE==0); 10647 match(Set dst (ConvF2D src)); 10648 format %{ "FST_S $dst,$src\t# D-round" %} 10649 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10650 ins_pipe( fpu_reg_reg ); 10651 %} 10652 10653 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10654 predicate(UseSSE==1); 10655 match(Set dst (ConvF2D src)); 10656 format %{ "FST_D $dst,$src\t# D-round" %} 10657 expand %{ 10658 roundDouble_mem_reg(dst,src); 10659 %} 10660 %} 10661 10662 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10663 predicate(UseSSE==1); 10664 match(Set dst (ConvF2D src)); 10665 effect( KILL cr ); 10666 format %{ "SUB ESP,4\n\t" 10667 "MOVSS [ESP] $src\n\t" 10668 "FLD_S [ESP]\n\t" 10669 "ADD ESP,4\n\t" 10670 "FSTP $dst\t# D-round" %} 10671 ins_encode %{ 10672 __ subptr(rsp, 4); 10673 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10674 __ fld_s(Address(rsp, 0)); 10675 __ addptr(rsp, 4); 10676 __ fstp_d($dst$$reg); 10677 %} 10678 ins_pipe( pipe_slow ); 10679 %} 10680 10681 instruct convF2D_reg(regD dst, regF src) %{ 10682 predicate(UseSSE>=2); 10683 match(Set dst (ConvF2D src)); 10684 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10685 ins_encode %{ 10686 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10687 %} 10688 ins_pipe( pipe_slow ); 10689 %} 10690 10691 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10692 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10693 predicate(UseSSE<=1); 10694 match(Set dst (ConvD2I src)); 10695 effect( KILL tmp, KILL cr ); 10696 format %{ "FLD $src\t# Convert double to int \n\t" 10697 "FLDCW trunc mode\n\t" 10698 "SUB ESP,4\n\t" 10699 "FISTp [ESP + #0]\n\t" 10700 "FLDCW std/24-bit mode\n\t" 10701 "POP EAX\n\t" 10702 "CMP EAX,0x80000000\n\t" 10703 "JNE,s fast\n\t" 10704 "FLD_D $src\n\t" 10705 "CALL d2i_wrapper\n" 10706 "fast:" %} 10707 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10708 ins_pipe( pipe_slow ); 10709 %} 10710 10711 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10712 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10713 predicate(UseSSE>=2); 10714 match(Set dst (ConvD2I src)); 10715 effect( KILL tmp, KILL cr ); 10716 format %{ "CVTTSD2SI $dst, $src\n\t" 10717 "CMP $dst,0x80000000\n\t" 10718 "JNE,s fast\n\t" 10719 "SUB ESP, 8\n\t" 10720 "MOVSD [ESP], $src\n\t" 10721 "FLD_D [ESP]\n\t" 10722 "ADD ESP, 8\n\t" 10723 "CALL d2i_wrapper\n" 10724 "fast:" %} 10725 ins_encode %{ 10726 Label fast; 10727 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10728 __ cmpl($dst$$Register, 0x80000000); 10729 __ jccb(Assembler::notEqual, fast); 10730 __ subptr(rsp, 8); 10731 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10732 __ fld_d(Address(rsp, 0)); 10733 __ addptr(rsp, 8); 10734 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10735 __ bind(fast); 10736 %} 10737 ins_pipe( pipe_slow ); 10738 %} 10739 10740 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10741 predicate(UseSSE<=1); 10742 match(Set dst (ConvD2L src)); 10743 effect( KILL cr ); 10744 format %{ "FLD $src\t# Convert double to long\n\t" 10745 "FLDCW trunc mode\n\t" 10746 "SUB ESP,8\n\t" 10747 "FISTp [ESP + #0]\n\t" 10748 "FLDCW std/24-bit mode\n\t" 10749 "POP EAX\n\t" 10750 "POP EDX\n\t" 10751 "CMP EDX,0x80000000\n\t" 10752 "JNE,s fast\n\t" 10753 "TEST EAX,EAX\n\t" 10754 "JNE,s fast\n\t" 10755 "FLD $src\n\t" 10756 "CALL d2l_wrapper\n" 10757 "fast:" %} 10758 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10759 ins_pipe( pipe_slow ); 10760 %} 10761 10762 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10763 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10764 predicate (UseSSE>=2); 10765 match(Set dst (ConvD2L src)); 10766 effect( KILL cr ); 10767 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10768 "MOVSD [ESP],$src\n\t" 10769 "FLD_D [ESP]\n\t" 10770 "FLDCW trunc mode\n\t" 10771 "FISTp [ESP + #0]\n\t" 10772 "FLDCW std/24-bit mode\n\t" 10773 "POP EAX\n\t" 10774 "POP EDX\n\t" 10775 "CMP EDX,0x80000000\n\t" 10776 "JNE,s fast\n\t" 10777 "TEST EAX,EAX\n\t" 10778 "JNE,s fast\n\t" 10779 "SUB ESP,8\n\t" 10780 "MOVSD [ESP],$src\n\t" 10781 "FLD_D [ESP]\n\t" 10782 "ADD ESP,8\n\t" 10783 "CALL d2l_wrapper\n" 10784 "fast:" %} 10785 ins_encode %{ 10786 Label fast; 10787 __ subptr(rsp, 8); 10788 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10789 __ fld_d(Address(rsp, 0)); 10790 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10791 __ fistp_d(Address(rsp, 0)); 10792 // Restore the rounding mode, mask the exception 10793 if (Compile::current()->in_24_bit_fp_mode()) { 10794 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10795 } else { 10796 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10797 } 10798 // Load the converted long, adjust CPU stack 10799 __ pop(rax); 10800 __ pop(rdx); 10801 __ cmpl(rdx, 0x80000000); 10802 __ jccb(Assembler::notEqual, fast); 10803 __ testl(rax, rax); 10804 __ jccb(Assembler::notEqual, fast); 10805 __ subptr(rsp, 8); 10806 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10807 __ fld_d(Address(rsp, 0)); 10808 __ addptr(rsp, 8); 10809 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10810 __ bind(fast); 10811 %} 10812 ins_pipe( pipe_slow ); 10813 %} 10814 10815 // Convert a double to an int. Java semantics require we do complex 10816 // manglations in the corner cases. So we set the rounding mode to 10817 // 'zero', store the darned double down as an int, and reset the 10818 // rounding mode to 'nearest'. The hardware stores a flag value down 10819 // if we would overflow or converted a NAN; we check for this and 10820 // and go the slow path if needed. 10821 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10822 predicate(UseSSE==0); 10823 match(Set dst (ConvF2I src)); 10824 effect( KILL tmp, KILL cr ); 10825 format %{ "FLD $src\t# Convert float to int \n\t" 10826 "FLDCW trunc mode\n\t" 10827 "SUB ESP,4\n\t" 10828 "FISTp [ESP + #0]\n\t" 10829 "FLDCW std/24-bit mode\n\t" 10830 "POP EAX\n\t" 10831 "CMP EAX,0x80000000\n\t" 10832 "JNE,s fast\n\t" 10833 "FLD $src\n\t" 10834 "CALL d2i_wrapper\n" 10835 "fast:" %} 10836 // DPR2I_encoding works for FPR2I 10837 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10838 ins_pipe( pipe_slow ); 10839 %} 10840 10841 // Convert a float in xmm to an int reg. 10842 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10843 predicate(UseSSE>=1); 10844 match(Set dst (ConvF2I src)); 10845 effect( KILL tmp, KILL cr ); 10846 format %{ "CVTTSS2SI $dst, $src\n\t" 10847 "CMP $dst,0x80000000\n\t" 10848 "JNE,s fast\n\t" 10849 "SUB ESP, 4\n\t" 10850 "MOVSS [ESP], $src\n\t" 10851 "FLD [ESP]\n\t" 10852 "ADD ESP, 4\n\t" 10853 "CALL d2i_wrapper\n" 10854 "fast:" %} 10855 ins_encode %{ 10856 Label fast; 10857 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10858 __ cmpl($dst$$Register, 0x80000000); 10859 __ jccb(Assembler::notEqual, fast); 10860 __ subptr(rsp, 4); 10861 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10862 __ fld_s(Address(rsp, 0)); 10863 __ addptr(rsp, 4); 10864 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10865 __ bind(fast); 10866 %} 10867 ins_pipe( pipe_slow ); 10868 %} 10869 10870 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10871 predicate(UseSSE==0); 10872 match(Set dst (ConvF2L src)); 10873 effect( KILL cr ); 10874 format %{ "FLD $src\t# Convert float to long\n\t" 10875 "FLDCW trunc mode\n\t" 10876 "SUB ESP,8\n\t" 10877 "FISTp [ESP + #0]\n\t" 10878 "FLDCW std/24-bit mode\n\t" 10879 "POP EAX\n\t" 10880 "POP EDX\n\t" 10881 "CMP EDX,0x80000000\n\t" 10882 "JNE,s fast\n\t" 10883 "TEST EAX,EAX\n\t" 10884 "JNE,s fast\n\t" 10885 "FLD $src\n\t" 10886 "CALL d2l_wrapper\n" 10887 "fast:" %} 10888 // DPR2L_encoding works for FPR2L 10889 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10890 ins_pipe( pipe_slow ); 10891 %} 10892 10893 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10894 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10895 predicate (UseSSE>=1); 10896 match(Set dst (ConvF2L src)); 10897 effect( KILL cr ); 10898 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10899 "MOVSS [ESP],$src\n\t" 10900 "FLD_S [ESP]\n\t" 10901 "FLDCW trunc mode\n\t" 10902 "FISTp [ESP + #0]\n\t" 10903 "FLDCW std/24-bit mode\n\t" 10904 "POP EAX\n\t" 10905 "POP EDX\n\t" 10906 "CMP EDX,0x80000000\n\t" 10907 "JNE,s fast\n\t" 10908 "TEST EAX,EAX\n\t" 10909 "JNE,s fast\n\t" 10910 "SUB ESP,4\t# Convert float to long\n\t" 10911 "MOVSS [ESP],$src\n\t" 10912 "FLD_S [ESP]\n\t" 10913 "ADD ESP,4\n\t" 10914 "CALL d2l_wrapper\n" 10915 "fast:" %} 10916 ins_encode %{ 10917 Label fast; 10918 __ subptr(rsp, 8); 10919 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10920 __ fld_s(Address(rsp, 0)); 10921 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10922 __ fistp_d(Address(rsp, 0)); 10923 // Restore the rounding mode, mask the exception 10924 if (Compile::current()->in_24_bit_fp_mode()) { 10925 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10926 } else { 10927 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10928 } 10929 // Load the converted long, adjust CPU stack 10930 __ pop(rax); 10931 __ pop(rdx); 10932 __ cmpl(rdx, 0x80000000); 10933 __ jccb(Assembler::notEqual, fast); 10934 __ testl(rax, rax); 10935 __ jccb(Assembler::notEqual, fast); 10936 __ subptr(rsp, 4); 10937 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10938 __ fld_s(Address(rsp, 0)); 10939 __ addptr(rsp, 4); 10940 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10941 __ bind(fast); 10942 %} 10943 ins_pipe( pipe_slow ); 10944 %} 10945 10946 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10947 predicate( UseSSE<=1 ); 10948 match(Set dst (ConvI2D src)); 10949 format %{ "FILD $src\n\t" 10950 "FSTP $dst" %} 10951 opcode(0xDB, 0x0); /* DB /0 */ 10952 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10953 ins_pipe( fpu_reg_mem ); 10954 %} 10955 10956 instruct convI2D_reg(regD dst, rRegI src) %{ 10957 predicate( UseSSE>=2 && !UseXmmI2D ); 10958 match(Set dst (ConvI2D src)); 10959 format %{ "CVTSI2SD $dst,$src" %} 10960 ins_encode %{ 10961 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10962 %} 10963 ins_pipe( pipe_slow ); 10964 %} 10965 10966 instruct convI2D_mem(regD dst, memory mem) %{ 10967 predicate( UseSSE>=2 ); 10968 match(Set dst (ConvI2D (LoadI mem))); 10969 format %{ "CVTSI2SD $dst,$mem" %} 10970 ins_encode %{ 10971 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10972 %} 10973 ins_pipe( pipe_slow ); 10974 %} 10975 10976 instruct convXI2D_reg(regD dst, rRegI src) 10977 %{ 10978 predicate( UseSSE>=2 && UseXmmI2D ); 10979 match(Set dst (ConvI2D src)); 10980 10981 format %{ "MOVD $dst,$src\n\t" 10982 "CVTDQ2PD $dst,$dst\t# i2d" %} 10983 ins_encode %{ 10984 __ movdl($dst$$XMMRegister, $src$$Register); 10985 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10986 %} 10987 ins_pipe(pipe_slow); // XXX 10988 %} 10989 10990 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10991 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10992 match(Set dst (ConvI2D (LoadI mem))); 10993 format %{ "FILD $mem\n\t" 10994 "FSTP $dst" %} 10995 opcode(0xDB); /* DB /0 */ 10996 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10997 Pop_Reg_DPR(dst)); 10998 ins_pipe( fpu_reg_mem ); 10999 %} 11000 11001 // Convert a byte to a float; no rounding step needed. 11002 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11003 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11004 match(Set dst (ConvI2F src)); 11005 format %{ "FILD $src\n\t" 11006 "FSTP $dst" %} 11007 11008 opcode(0xDB, 0x0); /* DB /0 */ 11009 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11010 ins_pipe( fpu_reg_mem ); 11011 %} 11012 11013 // In 24-bit mode, force exponent rounding by storing back out 11014 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11015 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11016 match(Set dst (ConvI2F src)); 11017 ins_cost(200); 11018 format %{ "FILD $src\n\t" 11019 "FSTP_S $dst" %} 11020 opcode(0xDB, 0x0); /* DB /0 */ 11021 ins_encode( Push_Mem_I(src), 11022 Pop_Mem_FPR(dst)); 11023 ins_pipe( fpu_mem_mem ); 11024 %} 11025 11026 // In 24-bit mode, force exponent rounding by storing back out 11027 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11028 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11029 match(Set dst (ConvI2F (LoadI mem))); 11030 ins_cost(200); 11031 format %{ "FILD $mem\n\t" 11032 "FSTP_S $dst" %} 11033 opcode(0xDB); /* DB /0 */ 11034 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11035 Pop_Mem_FPR(dst)); 11036 ins_pipe( fpu_mem_mem ); 11037 %} 11038 11039 // This instruction does not round to 24-bits 11040 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11041 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11042 match(Set dst (ConvI2F src)); 11043 format %{ "FILD $src\n\t" 11044 "FSTP $dst" %} 11045 opcode(0xDB, 0x0); /* DB /0 */ 11046 ins_encode( Push_Mem_I(src), 11047 Pop_Reg_FPR(dst)); 11048 ins_pipe( fpu_reg_mem ); 11049 %} 11050 11051 // This instruction does not round to 24-bits 11052 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11053 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11054 match(Set dst (ConvI2F (LoadI mem))); 11055 format %{ "FILD $mem\n\t" 11056 "FSTP $dst" %} 11057 opcode(0xDB); /* DB /0 */ 11058 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11059 Pop_Reg_FPR(dst)); 11060 ins_pipe( fpu_reg_mem ); 11061 %} 11062 11063 // Convert an int to a float in xmm; no rounding step needed. 11064 instruct convI2F_reg(regF dst, rRegI src) %{ 11065 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11066 match(Set dst (ConvI2F src)); 11067 format %{ "CVTSI2SS $dst, $src" %} 11068 ins_encode %{ 11069 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11070 %} 11071 ins_pipe( pipe_slow ); 11072 %} 11073 11074 instruct convXI2F_reg(regF dst, rRegI src) 11075 %{ 11076 predicate( UseSSE>=2 && UseXmmI2F ); 11077 match(Set dst (ConvI2F src)); 11078 11079 format %{ "MOVD $dst,$src\n\t" 11080 "CVTDQ2PS $dst,$dst\t# i2f" %} 11081 ins_encode %{ 11082 __ movdl($dst$$XMMRegister, $src$$Register); 11083 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11084 %} 11085 ins_pipe(pipe_slow); // XXX 11086 %} 11087 11088 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11089 match(Set dst (ConvI2L src)); 11090 effect(KILL cr); 11091 ins_cost(375); 11092 format %{ "MOV $dst.lo,$src\n\t" 11093 "MOV $dst.hi,$src\n\t" 11094 "SAR $dst.hi,31" %} 11095 ins_encode(convert_int_long(dst,src)); 11096 ins_pipe( ialu_reg_reg_long ); 11097 %} 11098 11099 // Zero-extend convert int to long 11100 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11101 match(Set dst (AndL (ConvI2L src) mask) ); 11102 effect( KILL flags ); 11103 ins_cost(250); 11104 format %{ "MOV $dst.lo,$src\n\t" 11105 "XOR $dst.hi,$dst.hi" %} 11106 opcode(0x33); // XOR 11107 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11108 ins_pipe( ialu_reg_reg_long ); 11109 %} 11110 11111 // Zero-extend long 11112 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11113 match(Set dst (AndL src mask) ); 11114 effect( KILL flags ); 11115 ins_cost(250); 11116 format %{ "MOV $dst.lo,$src.lo\n\t" 11117 "XOR $dst.hi,$dst.hi\n\t" %} 11118 opcode(0x33); // XOR 11119 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11120 ins_pipe( ialu_reg_reg_long ); 11121 %} 11122 11123 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11124 predicate (UseSSE<=1); 11125 match(Set dst (ConvL2D src)); 11126 effect( KILL cr ); 11127 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11128 "PUSH $src.lo\n\t" 11129 "FILD ST,[ESP + #0]\n\t" 11130 "ADD ESP,8\n\t" 11131 "FSTP_D $dst\t# D-round" %} 11132 opcode(0xDF, 0x5); /* DF /5 */ 11133 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11134 ins_pipe( pipe_slow ); 11135 %} 11136 11137 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11138 predicate (UseSSE>=2); 11139 match(Set dst (ConvL2D src)); 11140 effect( KILL cr ); 11141 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11142 "PUSH $src.lo\n\t" 11143 "FILD_D [ESP]\n\t" 11144 "FSTP_D [ESP]\n\t" 11145 "MOVSD $dst,[ESP]\n\t" 11146 "ADD ESP,8" %} 11147 opcode(0xDF, 0x5); /* DF /5 */ 11148 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11149 ins_pipe( pipe_slow ); 11150 %} 11151 11152 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11153 predicate (UseSSE>=1); 11154 match(Set dst (ConvL2F src)); 11155 effect( KILL cr ); 11156 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11157 "PUSH $src.lo\n\t" 11158 "FILD_D [ESP]\n\t" 11159 "FSTP_S [ESP]\n\t" 11160 "MOVSS $dst,[ESP]\n\t" 11161 "ADD ESP,8" %} 11162 opcode(0xDF, 0x5); /* DF /5 */ 11163 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11164 ins_pipe( pipe_slow ); 11165 %} 11166 11167 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11168 match(Set dst (ConvL2F src)); 11169 effect( KILL cr ); 11170 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11171 "PUSH $src.lo\n\t" 11172 "FILD ST,[ESP + #0]\n\t" 11173 "ADD ESP,8\n\t" 11174 "FSTP_S $dst\t# F-round" %} 11175 opcode(0xDF, 0x5); /* DF /5 */ 11176 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11177 ins_pipe( pipe_slow ); 11178 %} 11179 11180 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11181 match(Set dst (ConvL2I src)); 11182 effect( DEF dst, USE src ); 11183 format %{ "MOV $dst,$src.lo" %} 11184 ins_encode(enc_CopyL_Lo(dst,src)); 11185 ins_pipe( ialu_reg_reg ); 11186 %} 11187 11188 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11189 match(Set dst (MoveF2I src)); 11190 effect( DEF dst, USE src ); 11191 ins_cost(100); 11192 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11193 ins_encode %{ 11194 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11195 %} 11196 ins_pipe( ialu_reg_mem ); 11197 %} 11198 11199 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11200 predicate(UseSSE==0); 11201 match(Set dst (MoveF2I src)); 11202 effect( DEF dst, USE src ); 11203 11204 ins_cost(125); 11205 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11206 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11207 ins_pipe( fpu_mem_reg ); 11208 %} 11209 11210 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11211 predicate(UseSSE>=1); 11212 match(Set dst (MoveF2I src)); 11213 effect( DEF dst, USE src ); 11214 11215 ins_cost(95); 11216 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11217 ins_encode %{ 11218 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11219 %} 11220 ins_pipe( pipe_slow ); 11221 %} 11222 11223 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11224 predicate(UseSSE>=2); 11225 match(Set dst (MoveF2I src)); 11226 effect( DEF dst, USE src ); 11227 ins_cost(85); 11228 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11229 ins_encode %{ 11230 __ movdl($dst$$Register, $src$$XMMRegister); 11231 %} 11232 ins_pipe( pipe_slow ); 11233 %} 11234 11235 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11236 match(Set dst (MoveI2F src)); 11237 effect( DEF dst, USE src ); 11238 11239 ins_cost(100); 11240 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11241 ins_encode %{ 11242 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11243 %} 11244 ins_pipe( ialu_mem_reg ); 11245 %} 11246 11247 11248 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11249 predicate(UseSSE==0); 11250 match(Set dst (MoveI2F src)); 11251 effect(DEF dst, USE src); 11252 11253 ins_cost(125); 11254 format %{ "FLD_S $src\n\t" 11255 "FSTP $dst\t# MoveI2F_stack_reg" %} 11256 opcode(0xD9); /* D9 /0, FLD m32real */ 11257 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11258 Pop_Reg_FPR(dst) ); 11259 ins_pipe( fpu_reg_mem ); 11260 %} 11261 11262 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11263 predicate(UseSSE>=1); 11264 match(Set dst (MoveI2F src)); 11265 effect( DEF dst, USE src ); 11266 11267 ins_cost(95); 11268 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11269 ins_encode %{ 11270 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11271 %} 11272 ins_pipe( pipe_slow ); 11273 %} 11274 11275 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11276 predicate(UseSSE>=2); 11277 match(Set dst (MoveI2F src)); 11278 effect( DEF dst, USE src ); 11279 11280 ins_cost(85); 11281 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11282 ins_encode %{ 11283 __ movdl($dst$$XMMRegister, $src$$Register); 11284 %} 11285 ins_pipe( pipe_slow ); 11286 %} 11287 11288 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11289 match(Set dst (MoveD2L src)); 11290 effect(DEF dst, USE src); 11291 11292 ins_cost(250); 11293 format %{ "MOV $dst.lo,$src\n\t" 11294 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11295 opcode(0x8B, 0x8B); 11296 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11297 ins_pipe( ialu_mem_long_reg ); 11298 %} 11299 11300 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11301 predicate(UseSSE<=1); 11302 match(Set dst (MoveD2L src)); 11303 effect(DEF dst, USE src); 11304 11305 ins_cost(125); 11306 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11307 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11308 ins_pipe( fpu_mem_reg ); 11309 %} 11310 11311 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11312 predicate(UseSSE>=2); 11313 match(Set dst (MoveD2L src)); 11314 effect(DEF dst, USE src); 11315 ins_cost(95); 11316 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11317 ins_encode %{ 11318 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11319 %} 11320 ins_pipe( pipe_slow ); 11321 %} 11322 11323 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11324 predicate(UseSSE>=2); 11325 match(Set dst (MoveD2L src)); 11326 effect(DEF dst, USE src, TEMP tmp); 11327 ins_cost(85); 11328 format %{ "MOVD $dst.lo,$src\n\t" 11329 "PSHUFLW $tmp,$src,0x4E\n\t" 11330 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11331 ins_encode %{ 11332 __ movdl($dst$$Register, $src$$XMMRegister); 11333 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11334 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11335 %} 11336 ins_pipe( pipe_slow ); 11337 %} 11338 11339 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11340 match(Set dst (MoveL2D src)); 11341 effect(DEF dst, USE src); 11342 11343 ins_cost(200); 11344 format %{ "MOV $dst,$src.lo\n\t" 11345 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11346 opcode(0x89, 0x89); 11347 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11348 ins_pipe( ialu_mem_long_reg ); 11349 %} 11350 11351 11352 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11353 predicate(UseSSE<=1); 11354 match(Set dst (MoveL2D src)); 11355 effect(DEF dst, USE src); 11356 ins_cost(125); 11357 11358 format %{ "FLD_D $src\n\t" 11359 "FSTP $dst\t# MoveL2D_stack_reg" %} 11360 opcode(0xDD); /* DD /0, FLD m64real */ 11361 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11362 Pop_Reg_DPR(dst) ); 11363 ins_pipe( fpu_reg_mem ); 11364 %} 11365 11366 11367 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11368 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11369 match(Set dst (MoveL2D src)); 11370 effect(DEF dst, USE src); 11371 11372 ins_cost(95); 11373 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11374 ins_encode %{ 11375 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11376 %} 11377 ins_pipe( pipe_slow ); 11378 %} 11379 11380 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11381 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11382 match(Set dst (MoveL2D src)); 11383 effect(DEF dst, USE src); 11384 11385 ins_cost(95); 11386 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11387 ins_encode %{ 11388 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11389 %} 11390 ins_pipe( pipe_slow ); 11391 %} 11392 11393 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11394 predicate(UseSSE>=2); 11395 match(Set dst (MoveL2D src)); 11396 effect(TEMP dst, USE src, TEMP tmp); 11397 ins_cost(85); 11398 format %{ "MOVD $dst,$src.lo\n\t" 11399 "MOVD $tmp,$src.hi\n\t" 11400 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11401 ins_encode %{ 11402 __ movdl($dst$$XMMRegister, $src$$Register); 11403 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11404 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11405 %} 11406 ins_pipe( pipe_slow ); 11407 %} 11408 11409 11410 // ======================================================================= 11411 // fast clearing of an array 11412 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11413 predicate(!UseFastStosb); 11414 match(Set dummy (ClearArray cnt base)); 11415 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11416 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11417 "SHL ECX,1\t# Convert doublewords to words\n\t" 11418 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11419 ins_encode %{ 11420 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11421 %} 11422 ins_pipe( pipe_slow ); 11423 %} 11424 11425 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11426 predicate(UseFastStosb); 11427 match(Set dummy (ClearArray cnt base)); 11428 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11429 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11430 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11431 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11432 ins_encode %{ 11433 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11434 %} 11435 ins_pipe( pipe_slow ); 11436 %} 11437 11438 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11439 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11440 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11441 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11442 11443 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11444 ins_encode %{ 11445 __ string_compare($str1$$Register, $str2$$Register, 11446 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11447 $tmp1$$XMMRegister); 11448 %} 11449 ins_pipe( pipe_slow ); 11450 %} 11451 11452 // fast string equals 11453 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11454 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11455 match(Set result (StrEquals (Binary str1 str2) cnt)); 11456 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11457 11458 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11459 ins_encode %{ 11460 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11461 $cnt$$Register, $result$$Register, $tmp3$$Register, 11462 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11463 %} 11464 ins_pipe( pipe_slow ); 11465 %} 11466 11467 // fast search of substring with known size. 11468 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11469 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11470 predicate(UseSSE42Intrinsics); 11471 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11472 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11473 11474 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11475 ins_encode %{ 11476 int icnt2 = (int)$int_cnt2$$constant; 11477 if (icnt2 >= 8) { 11478 // IndexOf for constant substrings with size >= 8 elements 11479 // which don't need to be loaded through stack. 11480 __ string_indexofC8($str1$$Register, $str2$$Register, 11481 $cnt1$$Register, $cnt2$$Register, 11482 icnt2, $result$$Register, 11483 $vec$$XMMRegister, $tmp$$Register); 11484 } else { 11485 // Small strings are loaded through stack if they cross page boundary. 11486 __ string_indexof($str1$$Register, $str2$$Register, 11487 $cnt1$$Register, $cnt2$$Register, 11488 icnt2, $result$$Register, 11489 $vec$$XMMRegister, $tmp$$Register); 11490 } 11491 %} 11492 ins_pipe( pipe_slow ); 11493 %} 11494 11495 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11496 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11497 predicate(UseSSE42Intrinsics); 11498 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11499 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11500 11501 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11502 ins_encode %{ 11503 __ string_indexof($str1$$Register, $str2$$Register, 11504 $cnt1$$Register, $cnt2$$Register, 11505 (-1), $result$$Register, 11506 $vec$$XMMRegister, $tmp$$Register); 11507 %} 11508 ins_pipe( pipe_slow ); 11509 %} 11510 11511 // fast array equals 11512 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11513 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11514 %{ 11515 match(Set result (AryEq ary1 ary2)); 11516 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11517 //ins_cost(300); 11518 11519 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11520 ins_encode %{ 11521 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11522 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11523 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11524 %} 11525 ins_pipe( pipe_slow ); 11526 %} 11527 11528 // encode char[] to byte[] in ISO_8859_1 11529 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11530 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11531 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11532 match(Set result (EncodeISOArray src (Binary dst len))); 11533 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11534 11535 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11536 ins_encode %{ 11537 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11538 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11539 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11540 %} 11541 ins_pipe( pipe_slow ); 11542 %} 11543 11544 11545 //----------Control Flow Instructions------------------------------------------ 11546 // Signed compare Instructions 11547 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11548 match(Set cr (CmpI op1 op2)); 11549 effect( DEF cr, USE op1, USE op2 ); 11550 format %{ "CMP $op1,$op2" %} 11551 opcode(0x3B); /* Opcode 3B /r */ 11552 ins_encode( OpcP, RegReg( op1, op2) ); 11553 ins_pipe( ialu_cr_reg_reg ); 11554 %} 11555 11556 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11557 match(Set cr (CmpI op1 op2)); 11558 effect( DEF cr, USE op1 ); 11559 format %{ "CMP $op1,$op2" %} 11560 opcode(0x81,0x07); /* Opcode 81 /7 */ 11561 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11562 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11563 ins_pipe( ialu_cr_reg_imm ); 11564 %} 11565 11566 // Cisc-spilled version of cmpI_eReg 11567 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11568 match(Set cr (CmpI op1 (LoadI op2))); 11569 11570 format %{ "CMP $op1,$op2" %} 11571 ins_cost(500); 11572 opcode(0x3B); /* Opcode 3B /r */ 11573 ins_encode( OpcP, RegMem( op1, op2) ); 11574 ins_pipe( ialu_cr_reg_mem ); 11575 %} 11576 11577 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11578 match(Set cr (CmpI src zero)); 11579 effect( DEF cr, USE src ); 11580 11581 format %{ "TEST $src,$src" %} 11582 opcode(0x85); 11583 ins_encode( OpcP, RegReg( src, src ) ); 11584 ins_pipe( ialu_cr_reg_imm ); 11585 %} 11586 11587 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11588 match(Set cr (CmpI (AndI src con) zero)); 11589 11590 format %{ "TEST $src,$con" %} 11591 opcode(0xF7,0x00); 11592 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11593 ins_pipe( ialu_cr_reg_imm ); 11594 %} 11595 11596 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11597 match(Set cr (CmpI (AndI src mem) zero)); 11598 11599 format %{ "TEST $src,$mem" %} 11600 opcode(0x85); 11601 ins_encode( OpcP, RegMem( src, mem ) ); 11602 ins_pipe( ialu_cr_reg_mem ); 11603 %} 11604 11605 // Unsigned compare Instructions; really, same as signed except they 11606 // produce an eFlagsRegU instead of eFlagsReg. 11607 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11608 match(Set cr (CmpU op1 op2)); 11609 11610 format %{ "CMPu $op1,$op2" %} 11611 opcode(0x3B); /* Opcode 3B /r */ 11612 ins_encode( OpcP, RegReg( op1, op2) ); 11613 ins_pipe( ialu_cr_reg_reg ); 11614 %} 11615 11616 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11617 match(Set cr (CmpU op1 op2)); 11618 11619 format %{ "CMPu $op1,$op2" %} 11620 opcode(0x81,0x07); /* Opcode 81 /7 */ 11621 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11622 ins_pipe( ialu_cr_reg_imm ); 11623 %} 11624 11625 // // Cisc-spilled version of cmpU_eReg 11626 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11627 match(Set cr (CmpU op1 (LoadI op2))); 11628 11629 format %{ "CMPu $op1,$op2" %} 11630 ins_cost(500); 11631 opcode(0x3B); /* Opcode 3B /r */ 11632 ins_encode( OpcP, RegMem( op1, op2) ); 11633 ins_pipe( ialu_cr_reg_mem ); 11634 %} 11635 11636 // // Cisc-spilled version of cmpU_eReg 11637 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11638 // match(Set cr (CmpU (LoadI op1) op2)); 11639 // 11640 // format %{ "CMPu $op1,$op2" %} 11641 // ins_cost(500); 11642 // opcode(0x39); /* Opcode 39 /r */ 11643 // ins_encode( OpcP, RegMem( op1, op2) ); 11644 //%} 11645 11646 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11647 match(Set cr (CmpU src zero)); 11648 11649 format %{ "TESTu $src,$src" %} 11650 opcode(0x85); 11651 ins_encode( OpcP, RegReg( src, src ) ); 11652 ins_pipe( ialu_cr_reg_imm ); 11653 %} 11654 11655 // Unsigned pointer compare Instructions 11656 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11657 match(Set cr (CmpP op1 op2)); 11658 11659 format %{ "CMPu $op1,$op2" %} 11660 opcode(0x3B); /* Opcode 3B /r */ 11661 ins_encode( OpcP, RegReg( op1, op2) ); 11662 ins_pipe( ialu_cr_reg_reg ); 11663 %} 11664 11665 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11666 match(Set cr (CmpP op1 op2)); 11667 11668 format %{ "CMPu $op1,$op2" %} 11669 opcode(0x81,0x07); /* Opcode 81 /7 */ 11670 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11671 ins_pipe( ialu_cr_reg_imm ); 11672 %} 11673 11674 // // Cisc-spilled version of cmpP_eReg 11675 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11676 match(Set cr (CmpP op1 (LoadP op2))); 11677 11678 format %{ "CMPu $op1,$op2" %} 11679 ins_cost(500); 11680 opcode(0x3B); /* Opcode 3B /r */ 11681 ins_encode( OpcP, RegMem( op1, op2) ); 11682 ins_pipe( ialu_cr_reg_mem ); 11683 %} 11684 11685 // // Cisc-spilled version of cmpP_eReg 11686 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11687 // match(Set cr (CmpP (LoadP op1) op2)); 11688 // 11689 // format %{ "CMPu $op1,$op2" %} 11690 // ins_cost(500); 11691 // opcode(0x39); /* Opcode 39 /r */ 11692 // ins_encode( OpcP, RegMem( op1, op2) ); 11693 //%} 11694 11695 // Compare raw pointer (used in out-of-heap check). 11696 // Only works because non-oop pointers must be raw pointers 11697 // and raw pointers have no anti-dependencies. 11698 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11699 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11700 match(Set cr (CmpP op1 (LoadP op2))); 11701 11702 format %{ "CMPu $op1,$op2" %} 11703 opcode(0x3B); /* Opcode 3B /r */ 11704 ins_encode( OpcP, RegMem( op1, op2) ); 11705 ins_pipe( ialu_cr_reg_mem ); 11706 %} 11707 11708 // 11709 // This will generate a signed flags result. This should be ok 11710 // since any compare to a zero should be eq/neq. 11711 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11712 match(Set cr (CmpP src zero)); 11713 11714 format %{ "TEST $src,$src" %} 11715 opcode(0x85); 11716 ins_encode( OpcP, RegReg( src, src ) ); 11717 ins_pipe( ialu_cr_reg_imm ); 11718 %} 11719 11720 // Cisc-spilled version of testP_reg 11721 // This will generate a signed flags result. This should be ok 11722 // since any compare to a zero should be eq/neq. 11723 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11724 match(Set cr (CmpP (LoadP op) zero)); 11725 11726 format %{ "TEST $op,0xFFFFFFFF" %} 11727 ins_cost(500); 11728 opcode(0xF7); /* Opcode F7 /0 */ 11729 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11730 ins_pipe( ialu_cr_reg_imm ); 11731 %} 11732 11733 // Yanked all unsigned pointer compare operations. 11734 // Pointer compares are done with CmpP which is already unsigned. 11735 11736 //----------Max and Min-------------------------------------------------------- 11737 // Min Instructions 11738 //// 11739 // *** Min and Max using the conditional move are slower than the 11740 // *** branch version on a Pentium III. 11741 // // Conditional move for min 11742 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11743 // effect( USE_DEF op2, USE op1, USE cr ); 11744 // format %{ "CMOVlt $op2,$op1\t! min" %} 11745 // opcode(0x4C,0x0F); 11746 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11747 // ins_pipe( pipe_cmov_reg ); 11748 //%} 11749 // 11750 //// Min Register with Register (P6 version) 11751 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11752 // predicate(VM_Version::supports_cmov() ); 11753 // match(Set op2 (MinI op1 op2)); 11754 // ins_cost(200); 11755 // expand %{ 11756 // eFlagsReg cr; 11757 // compI_eReg(cr,op1,op2); 11758 // cmovI_reg_lt(op2,op1,cr); 11759 // %} 11760 //%} 11761 11762 // Min Register with Register (generic version) 11763 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11764 match(Set dst (MinI dst src)); 11765 effect(KILL flags); 11766 ins_cost(300); 11767 11768 format %{ "MIN $dst,$src" %} 11769 opcode(0xCC); 11770 ins_encode( min_enc(dst,src) ); 11771 ins_pipe( pipe_slow ); 11772 %} 11773 11774 // Max Register with Register 11775 // *** Min and Max using the conditional move are slower than the 11776 // *** branch version on a Pentium III. 11777 // // Conditional move for max 11778 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11779 // effect( USE_DEF op2, USE op1, USE cr ); 11780 // format %{ "CMOVgt $op2,$op1\t! max" %} 11781 // opcode(0x4F,0x0F); 11782 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11783 // ins_pipe( pipe_cmov_reg ); 11784 //%} 11785 // 11786 // // Max Register with Register (P6 version) 11787 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11788 // predicate(VM_Version::supports_cmov() ); 11789 // match(Set op2 (MaxI op1 op2)); 11790 // ins_cost(200); 11791 // expand %{ 11792 // eFlagsReg cr; 11793 // compI_eReg(cr,op1,op2); 11794 // cmovI_reg_gt(op2,op1,cr); 11795 // %} 11796 //%} 11797 11798 // Max Register with Register (generic version) 11799 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11800 match(Set dst (MaxI dst src)); 11801 effect(KILL flags); 11802 ins_cost(300); 11803 11804 format %{ "MAX $dst,$src" %} 11805 opcode(0xCC); 11806 ins_encode( max_enc(dst,src) ); 11807 ins_pipe( pipe_slow ); 11808 %} 11809 11810 // ============================================================================ 11811 // Counted Loop limit node which represents exact final iterator value. 11812 // Note: the resulting value should fit into integer range since 11813 // counted loops have limit check on overflow. 11814 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11815 match(Set limit (LoopLimit (Binary init limit) stride)); 11816 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11817 ins_cost(300); 11818 11819 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11820 ins_encode %{ 11821 int strd = (int)$stride$$constant; 11822 assert(strd != 1 && strd != -1, "sanity"); 11823 int m1 = (strd > 0) ? 1 : -1; 11824 // Convert limit to long (EAX:EDX) 11825 __ cdql(); 11826 // Convert init to long (init:tmp) 11827 __ movl($tmp$$Register, $init$$Register); 11828 __ sarl($tmp$$Register, 31); 11829 // $limit - $init 11830 __ subl($limit$$Register, $init$$Register); 11831 __ sbbl($limit_hi$$Register, $tmp$$Register); 11832 // + ($stride - 1) 11833 if (strd > 0) { 11834 __ addl($limit$$Register, (strd - 1)); 11835 __ adcl($limit_hi$$Register, 0); 11836 __ movl($tmp$$Register, strd); 11837 } else { 11838 __ addl($limit$$Register, (strd + 1)); 11839 __ adcl($limit_hi$$Register, -1); 11840 __ lneg($limit_hi$$Register, $limit$$Register); 11841 __ movl($tmp$$Register, -strd); 11842 } 11843 // signed devision: (EAX:EDX) / pos_stride 11844 __ idivl($tmp$$Register); 11845 if (strd < 0) { 11846 // restore sign 11847 __ negl($tmp$$Register); 11848 } 11849 // (EAX) * stride 11850 __ mull($tmp$$Register); 11851 // + init (ignore upper bits) 11852 __ addl($limit$$Register, $init$$Register); 11853 %} 11854 ins_pipe( pipe_slow ); 11855 %} 11856 11857 // ============================================================================ 11858 // Branch Instructions 11859 // Jump Table 11860 instruct jumpXtnd(rRegI switch_val) %{ 11861 match(Jump switch_val); 11862 ins_cost(350); 11863 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11864 ins_encode %{ 11865 // Jump to Address(table_base + switch_reg) 11866 Address index(noreg, $switch_val$$Register, Address::times_1); 11867 __ jump(ArrayAddress($constantaddress, index)); 11868 %} 11869 ins_pipe(pipe_jmp); 11870 %} 11871 11872 // Jump Direct - Label defines a relative address from JMP+1 11873 instruct jmpDir(label labl) %{ 11874 match(Goto); 11875 effect(USE labl); 11876 11877 ins_cost(300); 11878 format %{ "JMP $labl" %} 11879 size(5); 11880 ins_encode %{ 11881 Label* L = $labl$$label; 11882 __ jmp(*L, false); // Always long jump 11883 %} 11884 ins_pipe( pipe_jmp ); 11885 %} 11886 11887 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11888 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 11889 match(If cop cr); 11890 effect(USE labl); 11891 11892 ins_cost(300); 11893 format %{ "J$cop $labl" %} 11894 size(6); 11895 ins_encode %{ 11896 Label* L = $labl$$label; 11897 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11898 %} 11899 ins_pipe( pipe_jcc ); 11900 %} 11901 11902 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11903 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 11904 match(CountedLoopEnd cop cr); 11905 effect(USE labl); 11906 11907 ins_cost(300); 11908 format %{ "J$cop $labl\t# Loop end" %} 11909 size(6); 11910 ins_encode %{ 11911 Label* L = $labl$$label; 11912 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11913 %} 11914 ins_pipe( pipe_jcc ); 11915 %} 11916 11917 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11918 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11919 match(CountedLoopEnd cop cmp); 11920 effect(USE labl); 11921 11922 ins_cost(300); 11923 format %{ "J$cop,u $labl\t# Loop end" %} 11924 size(6); 11925 ins_encode %{ 11926 Label* L = $labl$$label; 11927 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11928 %} 11929 ins_pipe( pipe_jcc ); 11930 %} 11931 11932 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11933 match(CountedLoopEnd cop cmp); 11934 effect(USE labl); 11935 11936 ins_cost(200); 11937 format %{ "J$cop,u $labl\t# Loop end" %} 11938 size(6); 11939 ins_encode %{ 11940 Label* L = $labl$$label; 11941 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11942 %} 11943 ins_pipe( pipe_jcc ); 11944 %} 11945 11946 // Jump Direct Conditional - using unsigned comparison 11947 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11948 match(If cop cmp); 11949 effect(USE labl); 11950 11951 ins_cost(300); 11952 format %{ "J$cop,u $labl" %} 11953 size(6); 11954 ins_encode %{ 11955 Label* L = $labl$$label; 11956 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11957 %} 11958 ins_pipe(pipe_jcc); 11959 %} 11960 11961 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11962 match(If cop cmp); 11963 effect(USE labl); 11964 11965 ins_cost(200); 11966 format %{ "J$cop,u $labl" %} 11967 size(6); 11968 ins_encode %{ 11969 Label* L = $labl$$label; 11970 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11971 %} 11972 ins_pipe(pipe_jcc); 11973 %} 11974 11975 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 11976 match(If cop cmp); 11977 effect(USE labl); 11978 11979 ins_cost(200); 11980 format %{ $$template 11981 if ($cop$$cmpcode == Assembler::notEqual) { 11982 $$emit$$"JP,u $labl\n\t" 11983 $$emit$$"J$cop,u $labl" 11984 } else { 11985 $$emit$$"JP,u done\n\t" 11986 $$emit$$"J$cop,u $labl\n\t" 11987 $$emit$$"done:" 11988 } 11989 %} 11990 ins_encode %{ 11991 Label* l = $labl$$label; 11992 if ($cop$$cmpcode == Assembler::notEqual) { 11993 __ jcc(Assembler::parity, *l, false); 11994 __ jcc(Assembler::notEqual, *l, false); 11995 } else if ($cop$$cmpcode == Assembler::equal) { 11996 Label done; 11997 __ jccb(Assembler::parity, done); 11998 __ jcc(Assembler::equal, *l, false); 11999 __ bind(done); 12000 } else { 12001 ShouldNotReachHere(); 12002 } 12003 %} 12004 ins_pipe(pipe_jcc); 12005 %} 12006 12007 // ============================================================================ 12008 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12009 // array for an instance of the superklass. Set a hidden internal cache on a 12010 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12011 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12012 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12013 match(Set result (PartialSubtypeCheck sub super)); 12014 effect( KILL rcx, KILL cr ); 12015 12016 ins_cost(1100); // slightly larger than the next version 12017 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12018 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12019 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12020 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12021 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12022 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12023 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12024 "miss:\t" %} 12025 12026 opcode(0x1); // Force a XOR of EDI 12027 ins_encode( enc_PartialSubtypeCheck() ); 12028 ins_pipe( pipe_slow ); 12029 %} 12030 12031 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12032 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12033 effect( KILL rcx, KILL result ); 12034 12035 ins_cost(1000); 12036 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12037 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12038 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12039 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12040 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12041 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12042 "miss:\t" %} 12043 12044 opcode(0x0); // No need to XOR EDI 12045 ins_encode( enc_PartialSubtypeCheck() ); 12046 ins_pipe( pipe_slow ); 12047 %} 12048 12049 // ============================================================================ 12050 // Branch Instructions -- short offset versions 12051 // 12052 // These instructions are used to replace jumps of a long offset (the default 12053 // match) with jumps of a shorter offset. These instructions are all tagged 12054 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12055 // match rules in general matching. Instead, the ADLC generates a conversion 12056 // method in the MachNode which can be used to do in-place replacement of the 12057 // long variant with the shorter variant. The compiler will determine if a 12058 // branch can be taken by the is_short_branch_offset() predicate in the machine 12059 // specific code section of the file. 12060 12061 // Jump Direct - Label defines a relative address from JMP+1 12062 instruct jmpDir_short(label labl) %{ 12063 match(Goto); 12064 effect(USE labl); 12065 12066 ins_cost(300); 12067 format %{ "JMP,s $labl" %} 12068 size(2); 12069 ins_encode %{ 12070 Label* L = $labl$$label; 12071 __ jmpb(*L); 12072 %} 12073 ins_pipe( pipe_jmp ); 12074 ins_short_branch(1); 12075 %} 12076 12077 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12078 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12079 match(If cop cr); 12080 effect(USE labl); 12081 12082 ins_cost(300); 12083 format %{ "J$cop,s $labl" %} 12084 size(2); 12085 ins_encode %{ 12086 Label* L = $labl$$label; 12087 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12088 %} 12089 ins_pipe( pipe_jcc ); 12090 ins_short_branch(1); 12091 %} 12092 12093 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12094 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12095 match(CountedLoopEnd cop cr); 12096 effect(USE labl); 12097 12098 ins_cost(300); 12099 format %{ "J$cop,s $labl\t# Loop end" %} 12100 size(2); 12101 ins_encode %{ 12102 Label* L = $labl$$label; 12103 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12104 %} 12105 ins_pipe( pipe_jcc ); 12106 ins_short_branch(1); 12107 %} 12108 12109 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12110 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12111 match(CountedLoopEnd cop cmp); 12112 effect(USE labl); 12113 12114 ins_cost(300); 12115 format %{ "J$cop,us $labl\t# Loop end" %} 12116 size(2); 12117 ins_encode %{ 12118 Label* L = $labl$$label; 12119 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12120 %} 12121 ins_pipe( pipe_jcc ); 12122 ins_short_branch(1); 12123 %} 12124 12125 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12126 match(CountedLoopEnd cop cmp); 12127 effect(USE labl); 12128 12129 ins_cost(300); 12130 format %{ "J$cop,us $labl\t# Loop end" %} 12131 size(2); 12132 ins_encode %{ 12133 Label* L = $labl$$label; 12134 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12135 %} 12136 ins_pipe( pipe_jcc ); 12137 ins_short_branch(1); 12138 %} 12139 12140 // Jump Direct Conditional - using unsigned comparison 12141 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12142 match(If cop cmp); 12143 effect(USE labl); 12144 12145 ins_cost(300); 12146 format %{ "J$cop,us $labl" %} 12147 size(2); 12148 ins_encode %{ 12149 Label* L = $labl$$label; 12150 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12151 %} 12152 ins_pipe( pipe_jcc ); 12153 ins_short_branch(1); 12154 %} 12155 12156 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12157 match(If cop cmp); 12158 effect(USE labl); 12159 12160 ins_cost(300); 12161 format %{ "J$cop,us $labl" %} 12162 size(2); 12163 ins_encode %{ 12164 Label* L = $labl$$label; 12165 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12166 %} 12167 ins_pipe( pipe_jcc ); 12168 ins_short_branch(1); 12169 %} 12170 12171 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12172 match(If cop cmp); 12173 effect(USE labl); 12174 12175 ins_cost(300); 12176 format %{ $$template 12177 if ($cop$$cmpcode == Assembler::notEqual) { 12178 $$emit$$"JP,u,s $labl\n\t" 12179 $$emit$$"J$cop,u,s $labl" 12180 } else { 12181 $$emit$$"JP,u,s done\n\t" 12182 $$emit$$"J$cop,u,s $labl\n\t" 12183 $$emit$$"done:" 12184 } 12185 %} 12186 size(4); 12187 ins_encode %{ 12188 Label* l = $labl$$label; 12189 if ($cop$$cmpcode == Assembler::notEqual) { 12190 __ jccb(Assembler::parity, *l); 12191 __ jccb(Assembler::notEqual, *l); 12192 } else if ($cop$$cmpcode == Assembler::equal) { 12193 Label done; 12194 __ jccb(Assembler::parity, done); 12195 __ jccb(Assembler::equal, *l); 12196 __ bind(done); 12197 } else { 12198 ShouldNotReachHere(); 12199 } 12200 %} 12201 ins_pipe(pipe_jcc); 12202 ins_short_branch(1); 12203 %} 12204 12205 // ============================================================================ 12206 // Long Compare 12207 // 12208 // Currently we hold longs in 2 registers. Comparing such values efficiently 12209 // is tricky. The flavor of compare used depends on whether we are testing 12210 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12211 // The GE test is the negated LT test. The LE test can be had by commuting 12212 // the operands (yielding a GE test) and then negating; negate again for the 12213 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12214 // NE test is negated from that. 12215 12216 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12217 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12218 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12219 // are collapsed internally in the ADLC's dfa-gen code. The match for 12220 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12221 // foo match ends up with the wrong leaf. One fix is to not match both 12222 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12223 // both forms beat the trinary form of long-compare and both are very useful 12224 // on Intel which has so few registers. 12225 12226 // Manifest a CmpL result in an integer register. Very painful. 12227 // This is the test to avoid. 12228 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12229 match(Set dst (CmpL3 src1 src2)); 12230 effect( KILL flags ); 12231 ins_cost(1000); 12232 format %{ "XOR $dst,$dst\n\t" 12233 "CMP $src1.hi,$src2.hi\n\t" 12234 "JLT,s m_one\n\t" 12235 "JGT,s p_one\n\t" 12236 "CMP $src1.lo,$src2.lo\n\t" 12237 "JB,s m_one\n\t" 12238 "JEQ,s done\n" 12239 "p_one:\tINC $dst\n\t" 12240 "JMP,s done\n" 12241 "m_one:\tDEC $dst\n" 12242 "done:" %} 12243 ins_encode %{ 12244 Label p_one, m_one, done; 12245 __ xorptr($dst$$Register, $dst$$Register); 12246 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12247 __ jccb(Assembler::less, m_one); 12248 __ jccb(Assembler::greater, p_one); 12249 __ cmpl($src1$$Register, $src2$$Register); 12250 __ jccb(Assembler::below, m_one); 12251 __ jccb(Assembler::equal, done); 12252 __ bind(p_one); 12253 __ incrementl($dst$$Register); 12254 __ jmpb(done); 12255 __ bind(m_one); 12256 __ decrementl($dst$$Register); 12257 __ bind(done); 12258 %} 12259 ins_pipe( pipe_slow ); 12260 %} 12261 12262 //====== 12263 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12264 // compares. Can be used for LE or GT compares by reversing arguments. 12265 // NOT GOOD FOR EQ/NE tests. 12266 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12267 match( Set flags (CmpL src zero )); 12268 ins_cost(100); 12269 format %{ "TEST $src.hi,$src.hi" %} 12270 opcode(0x85); 12271 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12272 ins_pipe( ialu_cr_reg_reg ); 12273 %} 12274 12275 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12276 // compares. Can be used for LE or GT compares by reversing arguments. 12277 // NOT GOOD FOR EQ/NE tests. 12278 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12279 match( Set flags (CmpL src1 src2 )); 12280 effect( TEMP tmp ); 12281 ins_cost(300); 12282 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12283 "MOV $tmp,$src1.hi\n\t" 12284 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12285 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12286 ins_pipe( ialu_cr_reg_reg ); 12287 %} 12288 12289 // Long compares reg < zero/req OR reg >= zero/req. 12290 // Just a wrapper for a normal branch, plus the predicate test. 12291 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12292 match(If cmp flags); 12293 effect(USE labl); 12294 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12295 expand %{ 12296 jmpCon(cmp,flags,labl); // JLT or JGE... 12297 %} 12298 %} 12299 12300 // Compare 2 longs and CMOVE longs. 12301 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12302 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12303 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12304 ins_cost(400); 12305 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12306 "CMOV$cmp $dst.hi,$src.hi" %} 12307 opcode(0x0F,0x40); 12308 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12309 ins_pipe( pipe_cmov_reg_long ); 12310 %} 12311 12312 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12313 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12314 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12315 ins_cost(500); 12316 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12317 "CMOV$cmp $dst.hi,$src.hi" %} 12318 opcode(0x0F,0x40); 12319 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12320 ins_pipe( pipe_cmov_reg_long ); 12321 %} 12322 12323 // Compare 2 longs and CMOVE ints. 12324 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12325 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12326 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12327 ins_cost(200); 12328 format %{ "CMOV$cmp $dst,$src" %} 12329 opcode(0x0F,0x40); 12330 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12331 ins_pipe( pipe_cmov_reg ); 12332 %} 12333 12334 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12335 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12336 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12337 ins_cost(250); 12338 format %{ "CMOV$cmp $dst,$src" %} 12339 opcode(0x0F,0x40); 12340 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12341 ins_pipe( pipe_cmov_mem ); 12342 %} 12343 12344 // Compare 2 longs and CMOVE ints. 12345 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12346 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12347 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12348 ins_cost(200); 12349 format %{ "CMOV$cmp $dst,$src" %} 12350 opcode(0x0F,0x40); 12351 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12352 ins_pipe( pipe_cmov_reg ); 12353 %} 12354 12355 // Compare 2 longs and CMOVE doubles 12356 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12357 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12358 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12359 ins_cost(200); 12360 expand %{ 12361 fcmovDPR_regS(cmp,flags,dst,src); 12362 %} 12363 %} 12364 12365 // Compare 2 longs and CMOVE doubles 12366 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12367 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12368 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12369 ins_cost(200); 12370 expand %{ 12371 fcmovD_regS(cmp,flags,dst,src); 12372 %} 12373 %} 12374 12375 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12376 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12377 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12378 ins_cost(200); 12379 expand %{ 12380 fcmovFPR_regS(cmp,flags,dst,src); 12381 %} 12382 %} 12383 12384 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12385 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12386 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12387 ins_cost(200); 12388 expand %{ 12389 fcmovF_regS(cmp,flags,dst,src); 12390 %} 12391 %} 12392 12393 //====== 12394 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12395 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12396 match( Set flags (CmpL src zero )); 12397 effect(TEMP tmp); 12398 ins_cost(200); 12399 format %{ "MOV $tmp,$src.lo\n\t" 12400 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12401 ins_encode( long_cmp_flags0( src, tmp ) ); 12402 ins_pipe( ialu_reg_reg_long ); 12403 %} 12404 12405 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12406 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12407 match( Set flags (CmpL src1 src2 )); 12408 ins_cost(200+300); 12409 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12410 "JNE,s skip\n\t" 12411 "CMP $src1.hi,$src2.hi\n\t" 12412 "skip:\t" %} 12413 ins_encode( long_cmp_flags1( src1, src2 ) ); 12414 ins_pipe( ialu_cr_reg_reg ); 12415 %} 12416 12417 // Long compare reg == zero/reg OR reg != zero/reg 12418 // Just a wrapper for a normal branch, plus the predicate test. 12419 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12420 match(If cmp flags); 12421 effect(USE labl); 12422 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12423 expand %{ 12424 jmpCon(cmp,flags,labl); // JEQ or JNE... 12425 %} 12426 %} 12427 12428 // Compare 2 longs and CMOVE longs. 12429 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12430 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12431 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12432 ins_cost(400); 12433 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12434 "CMOV$cmp $dst.hi,$src.hi" %} 12435 opcode(0x0F,0x40); 12436 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12437 ins_pipe( pipe_cmov_reg_long ); 12438 %} 12439 12440 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12441 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12442 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12443 ins_cost(500); 12444 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12445 "CMOV$cmp $dst.hi,$src.hi" %} 12446 opcode(0x0F,0x40); 12447 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12448 ins_pipe( pipe_cmov_reg_long ); 12449 %} 12450 12451 // Compare 2 longs and CMOVE ints. 12452 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12453 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12454 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12455 ins_cost(200); 12456 format %{ "CMOV$cmp $dst,$src" %} 12457 opcode(0x0F,0x40); 12458 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12459 ins_pipe( pipe_cmov_reg ); 12460 %} 12461 12462 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12463 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12464 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12465 ins_cost(250); 12466 format %{ "CMOV$cmp $dst,$src" %} 12467 opcode(0x0F,0x40); 12468 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12469 ins_pipe( pipe_cmov_mem ); 12470 %} 12471 12472 // Compare 2 longs and CMOVE ints. 12473 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12474 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12475 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12476 ins_cost(200); 12477 format %{ "CMOV$cmp $dst,$src" %} 12478 opcode(0x0F,0x40); 12479 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12480 ins_pipe( pipe_cmov_reg ); 12481 %} 12482 12483 // Compare 2 longs and CMOVE doubles 12484 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12485 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12486 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12487 ins_cost(200); 12488 expand %{ 12489 fcmovDPR_regS(cmp,flags,dst,src); 12490 %} 12491 %} 12492 12493 // Compare 2 longs and CMOVE doubles 12494 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12495 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12496 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12497 ins_cost(200); 12498 expand %{ 12499 fcmovD_regS(cmp,flags,dst,src); 12500 %} 12501 %} 12502 12503 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12504 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12505 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12506 ins_cost(200); 12507 expand %{ 12508 fcmovFPR_regS(cmp,flags,dst,src); 12509 %} 12510 %} 12511 12512 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12513 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12514 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12515 ins_cost(200); 12516 expand %{ 12517 fcmovF_regS(cmp,flags,dst,src); 12518 %} 12519 %} 12520 12521 //====== 12522 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12523 // Same as cmpL_reg_flags_LEGT except must negate src 12524 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12525 match( Set flags (CmpL src zero )); 12526 effect( TEMP tmp ); 12527 ins_cost(300); 12528 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12529 "CMP $tmp,$src.lo\n\t" 12530 "SBB $tmp,$src.hi\n\t" %} 12531 ins_encode( long_cmp_flags3(src, tmp) ); 12532 ins_pipe( ialu_reg_reg_long ); 12533 %} 12534 12535 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12536 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12537 // requires a commuted test to get the same result. 12538 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12539 match( Set flags (CmpL src1 src2 )); 12540 effect( TEMP tmp ); 12541 ins_cost(300); 12542 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12543 "MOV $tmp,$src2.hi\n\t" 12544 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12545 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12546 ins_pipe( ialu_cr_reg_reg ); 12547 %} 12548 12549 // Long compares reg < zero/req OR reg >= zero/req. 12550 // Just a wrapper for a normal branch, plus the predicate test 12551 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12552 match(If cmp flags); 12553 effect(USE labl); 12554 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12555 ins_cost(300); 12556 expand %{ 12557 jmpCon(cmp,flags,labl); // JGT or JLE... 12558 %} 12559 %} 12560 12561 // Compare 2 longs and CMOVE longs. 12562 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12563 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12564 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12565 ins_cost(400); 12566 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12567 "CMOV$cmp $dst.hi,$src.hi" %} 12568 opcode(0x0F,0x40); 12569 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12570 ins_pipe( pipe_cmov_reg_long ); 12571 %} 12572 12573 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12574 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12575 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12576 ins_cost(500); 12577 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12578 "CMOV$cmp $dst.hi,$src.hi+4" %} 12579 opcode(0x0F,0x40); 12580 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12581 ins_pipe( pipe_cmov_reg_long ); 12582 %} 12583 12584 // Compare 2 longs and CMOVE ints. 12585 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12586 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12587 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12588 ins_cost(200); 12589 format %{ "CMOV$cmp $dst,$src" %} 12590 opcode(0x0F,0x40); 12591 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12592 ins_pipe( pipe_cmov_reg ); 12593 %} 12594 12595 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12596 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12597 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12598 ins_cost(250); 12599 format %{ "CMOV$cmp $dst,$src" %} 12600 opcode(0x0F,0x40); 12601 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12602 ins_pipe( pipe_cmov_mem ); 12603 %} 12604 12605 // Compare 2 longs and CMOVE ptrs. 12606 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12607 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12608 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12609 ins_cost(200); 12610 format %{ "CMOV$cmp $dst,$src" %} 12611 opcode(0x0F,0x40); 12612 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12613 ins_pipe( pipe_cmov_reg ); 12614 %} 12615 12616 // Compare 2 longs and CMOVE doubles 12617 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12618 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12619 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12620 ins_cost(200); 12621 expand %{ 12622 fcmovDPR_regS(cmp,flags,dst,src); 12623 %} 12624 %} 12625 12626 // Compare 2 longs and CMOVE doubles 12627 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12628 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12629 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12630 ins_cost(200); 12631 expand %{ 12632 fcmovD_regS(cmp,flags,dst,src); 12633 %} 12634 %} 12635 12636 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12637 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12638 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12639 ins_cost(200); 12640 expand %{ 12641 fcmovFPR_regS(cmp,flags,dst,src); 12642 %} 12643 %} 12644 12645 12646 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12647 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12648 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12649 ins_cost(200); 12650 expand %{ 12651 fcmovF_regS(cmp,flags,dst,src); 12652 %} 12653 %} 12654 12655 12656 // ============================================================================ 12657 // Procedure Call/Return Instructions 12658 // Call Java Static Instruction 12659 // Note: If this code changes, the corresponding ret_addr_offset() and 12660 // compute_padding() functions will have to be adjusted. 12661 instruct CallStaticJavaDirect(method meth) %{ 12662 match(CallStaticJava); 12663 effect(USE meth); 12664 12665 ins_cost(300); 12666 format %{ "CALL,static " %} 12667 opcode(0xE8); /* E8 cd */ 12668 ins_encode( pre_call_resets, 12669 Java_Static_Call( meth ), 12670 call_epilog, 12671 post_call_FPU ); 12672 ins_pipe( pipe_slow ); 12673 ins_alignment(4); 12674 %} 12675 12676 // Call Java Dynamic Instruction 12677 // Note: If this code changes, the corresponding ret_addr_offset() and 12678 // compute_padding() functions will have to be adjusted. 12679 instruct CallDynamicJavaDirect(method meth) %{ 12680 match(CallDynamicJava); 12681 effect(USE meth); 12682 12683 ins_cost(300); 12684 format %{ "MOV EAX,(oop)-1\n\t" 12685 "CALL,dynamic" %} 12686 opcode(0xE8); /* E8 cd */ 12687 ins_encode( pre_call_resets, 12688 Java_Dynamic_Call( meth ), 12689 call_epilog, 12690 post_call_FPU ); 12691 ins_pipe( pipe_slow ); 12692 ins_alignment(4); 12693 %} 12694 12695 // Call Runtime Instruction 12696 instruct CallRuntimeDirect(method meth) %{ 12697 match(CallRuntime ); 12698 effect(USE meth); 12699 12700 ins_cost(300); 12701 format %{ "CALL,runtime " %} 12702 opcode(0xE8); /* E8 cd */ 12703 // Use FFREEs to clear entries in float stack 12704 ins_encode( pre_call_resets, 12705 FFree_Float_Stack_All, 12706 Java_To_Runtime( meth ), 12707 post_call_FPU ); 12708 ins_pipe( pipe_slow ); 12709 %} 12710 12711 // Call runtime without safepoint 12712 instruct CallLeafDirect(method meth) %{ 12713 match(CallLeaf); 12714 effect(USE meth); 12715 12716 ins_cost(300); 12717 format %{ "CALL_LEAF,runtime " %} 12718 opcode(0xE8); /* E8 cd */ 12719 ins_encode( pre_call_resets, 12720 FFree_Float_Stack_All, 12721 Java_To_Runtime( meth ), 12722 Verify_FPU_For_Leaf, post_call_FPU ); 12723 ins_pipe( pipe_slow ); 12724 %} 12725 12726 instruct CallLeafNoFPDirect(method meth) %{ 12727 match(CallLeafNoFP); 12728 effect(USE meth); 12729 12730 ins_cost(300); 12731 format %{ "CALL_LEAF_NOFP,runtime " %} 12732 opcode(0xE8); /* E8 cd */ 12733 ins_encode(Java_To_Runtime(meth)); 12734 ins_pipe( pipe_slow ); 12735 %} 12736 12737 12738 // Return Instruction 12739 // Remove the return address & jump to it. 12740 instruct Ret() %{ 12741 match(Return); 12742 format %{ "RET" %} 12743 opcode(0xC3); 12744 ins_encode(OpcP); 12745 ins_pipe( pipe_jmp ); 12746 %} 12747 12748 // Tail Call; Jump from runtime stub to Java code. 12749 // Also known as an 'interprocedural jump'. 12750 // Target of jump will eventually return to caller. 12751 // TailJump below removes the return address. 12752 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12753 match(TailCall jump_target method_oop ); 12754 ins_cost(300); 12755 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12756 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12757 ins_encode( OpcP, RegOpc(jump_target) ); 12758 ins_pipe( pipe_jmp ); 12759 %} 12760 12761 12762 // Tail Jump; remove the return address; jump to target. 12763 // TailCall above leaves the return address around. 12764 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12765 match( TailJump jump_target ex_oop ); 12766 ins_cost(300); 12767 format %{ "POP EDX\t# pop return address into dummy\n\t" 12768 "JMP $jump_target " %} 12769 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12770 ins_encode( enc_pop_rdx, 12771 OpcP, RegOpc(jump_target) ); 12772 ins_pipe( pipe_jmp ); 12773 %} 12774 12775 // Create exception oop: created by stack-crawling runtime code. 12776 // Created exception is now available to this handler, and is setup 12777 // just prior to jumping to this handler. No code emitted. 12778 instruct CreateException( eAXRegP ex_oop ) 12779 %{ 12780 match(Set ex_oop (CreateEx)); 12781 12782 size(0); 12783 // use the following format syntax 12784 format %{ "# exception oop is in EAX; no code emitted" %} 12785 ins_encode(); 12786 ins_pipe( empty ); 12787 %} 12788 12789 12790 // Rethrow exception: 12791 // The exception oop will come in the first argument position. 12792 // Then JUMP (not call) to the rethrow stub code. 12793 instruct RethrowException() 12794 %{ 12795 match(Rethrow); 12796 12797 // use the following format syntax 12798 format %{ "JMP rethrow_stub" %} 12799 ins_encode(enc_rethrow); 12800 ins_pipe( pipe_jmp ); 12801 %} 12802 12803 // inlined locking and unlocking 12804 12805 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12806 predicate(Compile::current()->use_rtm()); 12807 match(Set cr (FastLock object box)); 12808 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12809 ins_cost(300); 12810 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12811 ins_encode %{ 12812 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12813 $scr$$Register, $cx1$$Register, $cx2$$Register, 12814 _counters, _rtm_counters, _stack_rtm_counters, 12815 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12816 true, ra_->C->profile_rtm()); 12817 %} 12818 ins_pipe(pipe_slow); 12819 %} 12820 12821 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12822 predicate(!Compile::current()->use_rtm()); 12823 match(Set cr (FastLock object box)); 12824 effect(TEMP tmp, TEMP scr, USE_KILL box); 12825 ins_cost(300); 12826 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12827 ins_encode %{ 12828 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12829 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12830 %} 12831 ins_pipe(pipe_slow); 12832 %} 12833 12834 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12835 match(Set cr (FastUnlock object box)); 12836 effect(TEMP tmp, USE_KILL box); 12837 ins_cost(300); 12838 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12839 ins_encode %{ 12840 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12841 %} 12842 ins_pipe(pipe_slow); 12843 %} 12844 12845 12846 12847 // ============================================================================ 12848 // Safepoint Instruction 12849 instruct safePoint_poll(eFlagsReg cr) %{ 12850 match(SafePoint); 12851 effect(KILL cr); 12852 12853 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12854 // On SPARC that might be acceptable as we can generate the address with 12855 // just a sethi, saving an or. By polling at offset 0 we can end up 12856 // putting additional pressure on the index-0 in the D$. Because of 12857 // alignment (just like the situation at hand) the lower indices tend 12858 // to see more traffic. It'd be better to change the polling address 12859 // to offset 0 of the last $line in the polling page. 12860 12861 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 12862 ins_cost(125); 12863 size(6) ; 12864 ins_encode( Safepoint_Poll() ); 12865 ins_pipe( ialu_reg_mem ); 12866 %} 12867 12868 12869 // ============================================================================ 12870 // This name is KNOWN by the ADLC and cannot be changed. 12871 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 12872 // for this guy. 12873 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 12874 match(Set dst (ThreadLocal)); 12875 effect(DEF dst, KILL cr); 12876 12877 format %{ "MOV $dst, Thread::current()" %} 12878 ins_encode %{ 12879 Register dstReg = as_Register($dst$$reg); 12880 __ get_thread(dstReg); 12881 %} 12882 ins_pipe( ialu_reg_fat ); 12883 %} 12884 12885 12886 12887 //----------PEEPHOLE RULES----------------------------------------------------- 12888 // These must follow all instruction definitions as they use the names 12889 // defined in the instructions definitions. 12890 // 12891 // peepmatch ( root_instr_name [preceding_instruction]* ); 12892 // 12893 // peepconstraint %{ 12894 // (instruction_number.operand_name relational_op instruction_number.operand_name 12895 // [, ...] ); 12896 // // instruction numbers are zero-based using left to right order in peepmatch 12897 // 12898 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 12899 // // provide an instruction_number.operand_name for each operand that appears 12900 // // in the replacement instruction's match rule 12901 // 12902 // ---------VM FLAGS--------------------------------------------------------- 12903 // 12904 // All peephole optimizations can be turned off using -XX:-OptoPeephole 12905 // 12906 // Each peephole rule is given an identifying number starting with zero and 12907 // increasing by one in the order seen by the parser. An individual peephole 12908 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 12909 // on the command-line. 12910 // 12911 // ---------CURRENT LIMITATIONS---------------------------------------------- 12912 // 12913 // Only match adjacent instructions in same basic block 12914 // Only equality constraints 12915 // Only constraints between operands, not (0.dest_reg == EAX_enc) 12916 // Only one replacement instruction 12917 // 12918 // ---------EXAMPLE---------------------------------------------------------- 12919 // 12920 // // pertinent parts of existing instructions in architecture description 12921 // instruct movI(rRegI dst, rRegI src) %{ 12922 // match(Set dst (CopyI src)); 12923 // %} 12924 // 12925 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 12926 // match(Set dst (AddI dst src)); 12927 // effect(KILL cr); 12928 // %} 12929 // 12930 // // Change (inc mov) to lea 12931 // peephole %{ 12932 // // increment preceeded by register-register move 12933 // peepmatch ( incI_eReg movI ); 12934 // // require that the destination register of the increment 12935 // // match the destination register of the move 12936 // peepconstraint ( 0.dst == 1.dst ); 12937 // // construct a replacement instruction that sets 12938 // // the destination to ( move's source register + one ) 12939 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12940 // %} 12941 // 12942 // Implementation no longer uses movX instructions since 12943 // machine-independent system no longer uses CopyX nodes. 12944 // 12945 // peephole %{ 12946 // peepmatch ( incI_eReg movI ); 12947 // peepconstraint ( 0.dst == 1.dst ); 12948 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12949 // %} 12950 // 12951 // peephole %{ 12952 // peepmatch ( decI_eReg movI ); 12953 // peepconstraint ( 0.dst == 1.dst ); 12954 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12955 // %} 12956 // 12957 // peephole %{ 12958 // peepmatch ( addI_eReg_imm movI ); 12959 // peepconstraint ( 0.dst == 1.dst ); 12960 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12961 // %} 12962 // 12963 // peephole %{ 12964 // peepmatch ( addP_eReg_imm movP ); 12965 // peepconstraint ( 0.dst == 1.dst ); 12966 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 12967 // %} 12968 12969 // // Change load of spilled value to only a spill 12970 // instruct storeI(memory mem, rRegI src) %{ 12971 // match(Set mem (StoreI mem src)); 12972 // %} 12973 // 12974 // instruct loadI(rRegI dst, memory mem) %{ 12975 // match(Set dst (LoadI mem)); 12976 // %} 12977 // 12978 peephole %{ 12979 peepmatch ( loadI storeI ); 12980 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 12981 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 12982 %} 12983 12984 //----------SMARTSPILL RULES--------------------------------------------------- 12985 // These must follow all instruction definitions as they use the names 12986 // defined in the instructions definitions.