1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 if(UseAVX <= 2) { 295 size += 3; // vzeroupper 296 } 297 } 298 return size; 299 } 300 301 // !!!!! Special hack to get all type of calls to specify the byte offset 302 // from the start of the call to the point where the return address 303 // will point. 304 int MachCallStaticJavaNode::ret_addr_offset() { 305 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 306 } 307 308 int MachCallDynamicJavaNode::ret_addr_offset() { 309 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 310 } 311 312 static int sizeof_FFree_Float_Stack_All = -1; 313 314 int MachCallRuntimeNode::ret_addr_offset() { 315 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 316 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 317 } 318 319 // Indicate if the safepoint node needs the polling page as an input. 320 // Since x86 does have absolute addressing, it doesn't. 321 bool SafePointNode::needs_polling_address_input() { 322 return false; 323 } 324 325 // 326 // Compute padding required for nodes which need alignment 327 // 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 1; // skip call opcode byte 334 return round_to(current_offset, alignment_required()) - current_offset; 335 } 336 337 // The address of the call instruction needs to be 4-byte aligned to 338 // ensure that it does not span a cache line so that it can be patched. 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 340 current_offset += pre_call_resets_size(); // skip fldcw, if any 341 current_offset += 5; // skip MOV instruction 342 current_offset += 1; // skip call opcode byte 343 return round_to(current_offset, alignment_required()) - current_offset; 344 } 345 346 // EMIT_RM() 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 348 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 349 cbuf.insts()->emit_int8(c); 350 } 351 352 // EMIT_CC() 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 354 unsigned char c = (unsigned char)( f1 | f2 ); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_OPCODE() 359 void emit_opcode(CodeBuffer &cbuf, int code) { 360 cbuf.insts()->emit_int8((unsigned char) code); 361 } 362 363 // EMIT_OPCODE() w/ relocation information 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 365 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 366 emit_opcode(cbuf, code); 367 } 368 369 // EMIT_D8() 370 void emit_d8(CodeBuffer &cbuf, int d8) { 371 cbuf.insts()->emit_int8((unsigned char) d8); 372 } 373 374 // EMIT_D16() 375 void emit_d16(CodeBuffer &cbuf, int d16) { 376 cbuf.insts()->emit_int16(d16); 377 } 378 379 // EMIT_D32() 380 void emit_d32(CodeBuffer &cbuf, int d32) { 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 386 int format) { 387 cbuf.relocate(cbuf.insts_mark(), reloc, format); 388 cbuf.insts()->emit_int32(d32); 389 } 390 391 // emit 32 bit value and construct relocation entry from RelocationHolder 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 393 int format) { 394 #ifdef ASSERT 395 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 396 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 397 } 398 #endif 399 cbuf.relocate(cbuf.insts_mark(), rspec, format); 400 cbuf.insts()->emit_int32(d32); 401 } 402 403 // Access stack slot for load or store 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 405 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 406 if( -128 <= disp && disp <= 127 ) { 407 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 408 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 409 emit_d8 (cbuf, disp); // Displacement // R/M byte 410 } else { 411 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d32(cbuf, disp); // Displacement // R/M byte 414 } 415 } 416 417 // rRegI ereg, memory mem) %{ // emit_reg_mem 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 419 // There is no index & no scale, use form without SIB byte 420 if ((index == 0x4) && 421 (scale == 0) && (base != ESP_enc)) { 422 // If no displacement, mode is 0x0; unless base is [EBP] 423 if ( (displace == 0) && (base != EBP_enc) ) { 424 emit_rm(cbuf, 0x0, reg_encoding, base); 425 } 426 else { // If 8-bit displacement, mode 0x1 427 if ((displace >= -128) && (displace <= 127) 428 && (disp_reloc == relocInfo::none) ) { 429 emit_rm(cbuf, 0x1, reg_encoding, base); 430 emit_d8(cbuf, displace); 431 } 432 else { // If 32-bit displacement 433 if (base == -1) { // Special flag for absolute address 434 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 435 // (manual lies; no SIB needed here) 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 else { // Normal base + offset 443 emit_rm(cbuf, 0x2, reg_encoding, base); 444 if ( disp_reloc != relocInfo::none ) { 445 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 446 } else { 447 emit_d32 (cbuf, displace); 448 } 449 } 450 } 451 } 452 } 453 else { // Else, encode with the SIB byte 454 // If no displacement, mode is 0x0; unless base is [EBP] 455 if (displace == 0 && (base != EBP_enc)) { // If no displacement 456 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 457 emit_rm(cbuf, scale, index, base); 458 } 459 else { // If 8-bit displacement, mode 0x1 460 if ((displace >= -128) && (displace <= 127) 461 && (disp_reloc == relocInfo::none) ) { 462 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 emit_d8(cbuf, displace); 465 } 466 else { // If 32-bit displacement 467 if (base == 0x04 ) { 468 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, 0x04); 470 } else { 471 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 472 emit_rm(cbuf, scale, index, base); 473 } 474 if ( disp_reloc != relocInfo::none ) { 475 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 476 } else { 477 emit_d32 (cbuf, displace); 478 } 479 } 480 } 481 } 482 } 483 484 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 486 if( dst_encoding == src_encoding ) { 487 // reg-reg copy, use an empty encoding 488 } else { 489 emit_opcode( cbuf, 0x8B ); 490 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 491 } 492 } 493 494 void emit_cmpfp_fixup(MacroAssembler& _masm) { 495 Label exit; 496 __ jccb(Assembler::noParity, exit); 497 __ pushf(); 498 // 499 // comiss/ucomiss instructions set ZF,PF,CF flags and 500 // zero OF,AF,SF for NaN values. 501 // Fixup flags by zeroing ZF,PF so that compare of NaN 502 // values returns 'less than' result (CF is set). 503 // Leave the rest of flags unchanged. 504 // 505 // 7 6 5 4 3 2 1 0 506 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 507 // 0 0 1 0 1 0 1 1 (0x2B) 508 // 509 __ andl(Address(rsp, 0), 0xffffff2b); 510 __ popf(); 511 __ bind(exit); 512 } 513 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 515 Label done; 516 __ movl(dst, -1); 517 __ jcc(Assembler::parity, done); 518 __ jcc(Assembler::below, done); 519 __ setb(Assembler::notEqual, dst); 520 __ movzbl(dst, dst); 521 __ bind(done); 522 } 523 524 525 //============================================================================= 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 527 528 int Compile::ConstantTable::calculate_table_base_offset() const { 529 return 0; // absolute addressing, no offset 530 } 531 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 534 ShouldNotReachHere(); 535 } 536 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 538 // Empty encoding 539 } 540 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 542 return 0; 543 } 544 545 #ifndef PRODUCT 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 st->print("# MachConstantBaseNode (empty encoding)"); 548 } 549 #endif 550 551 552 //============================================================================= 553 #ifndef PRODUCT 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 555 Compile* C = ra_->C; 556 557 int framesize = C->frame_size_in_bytes(); 558 int bangsize = C->bang_size_in_bytes(); 559 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 560 // Remove wordSize for return addr which is already pushed. 561 framesize -= wordSize; 562 563 if (C->need_stack_bang(bangsize)) { 564 framesize -= wordSize; 565 st->print("# stack bang (%d bytes)", bangsize); 566 st->print("\n\t"); 567 st->print("PUSH EBP\t# Save EBP"); 568 if (PreserveFramePointer) { 569 st->print("\n\t"); 570 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 571 } 572 if (framesize) { 573 st->print("\n\t"); 574 st->print("SUB ESP, #%d\t# Create frame",framesize); 575 } 576 } else { 577 st->print("SUB ESP, #%d\t# Create frame",framesize); 578 st->print("\n\t"); 579 framesize -= wordSize; 580 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 581 if (PreserveFramePointer) { 582 st->print("\n\t"); 583 st->print("MOV EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize)); 584 } 585 } 586 587 if (VerifyStackAtCalls) { 588 st->print("\n\t"); 589 framesize -= wordSize; 590 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 591 } 592 593 if( C->in_24_bit_fp_mode() ) { 594 st->print("\n\t"); 595 st->print("FLDCW \t# load 24 bit fpu control word"); 596 } 597 if (UseSSE >= 2 && VerifyFPU) { 598 st->print("\n\t"); 599 st->print("# verify FPU stack (must be clean on entry)"); 600 } 601 602 #ifdef ASSERT 603 if (VerifyStackAtCalls) { 604 st->print("\n\t"); 605 st->print("# stack alignment check"); 606 } 607 #endif 608 st->cr(); 609 } 610 #endif 611 612 613 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 614 Compile* C = ra_->C; 615 MacroAssembler _masm(&cbuf); 616 617 int framesize = C->frame_size_in_bytes(); 618 int bangsize = C->bang_size_in_bytes(); 619 620 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 621 622 C->set_frame_complete(cbuf.insts_size()); 623 624 if (C->has_mach_constant_base_node()) { 625 // NOTE: We set the table base offset here because users might be 626 // emitted before MachConstantBaseNode. 627 Compile::ConstantTable& constant_table = C->constant_table(); 628 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 629 } 630 } 631 632 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 633 return MachNode::size(ra_); // too many variables; just compute it the hard way 634 } 635 636 int MachPrologNode::reloc() const { 637 return 0; // a large enough number 638 } 639 640 //============================================================================= 641 #ifndef PRODUCT 642 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 643 Compile *C = ra_->C; 644 int framesize = C->frame_size_in_bytes(); 645 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 646 // Remove two words for return addr and rbp, 647 framesize -= 2*wordSize; 648 649 if (C->max_vector_size() > 16) { 650 st->print("VZEROUPPER"); 651 st->cr(); st->print("\t"); 652 } 653 if (C->in_24_bit_fp_mode()) { 654 st->print("FLDCW standard control word"); 655 st->cr(); st->print("\t"); 656 } 657 if (framesize) { 658 st->print("ADD ESP,%d\t# Destroy frame",framesize); 659 st->cr(); st->print("\t"); 660 } 661 st->print_cr("POPL EBP"); st->print("\t"); 662 if (do_polling() && C->is_method_compilation()) { 663 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 664 st->cr(); st->print("\t"); 665 } 666 } 667 #endif 668 669 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 670 Compile *C = ra_->C; 671 672 if (C->max_vector_size() > 16) { 673 // Clear upper bits of YMM registers when current compiled code uses 674 // wide vectors to avoid AVX <-> SSE transition penalty during call. 675 MacroAssembler masm(&cbuf); 676 masm.vzeroupper(); 677 } 678 // If method set FPU control word, restore to standard control word 679 if (C->in_24_bit_fp_mode()) { 680 MacroAssembler masm(&cbuf); 681 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 682 } 683 684 int framesize = C->frame_size_in_bytes(); 685 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 686 // Remove two words for return addr and rbp, 687 framesize -= 2*wordSize; 688 689 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 690 691 if (framesize >= 128) { 692 emit_opcode(cbuf, 0x81); // add SP, #framesize 693 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 694 emit_d32(cbuf, framesize); 695 } else if (framesize) { 696 emit_opcode(cbuf, 0x83); // add SP, #framesize 697 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 698 emit_d8(cbuf, framesize); 699 } 700 701 emit_opcode(cbuf, 0x58 | EBP_enc); 702 703 if (do_polling() && C->is_method_compilation()) { 704 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 705 emit_opcode(cbuf,0x85); 706 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 707 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 708 } 709 } 710 711 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 712 Compile *C = ra_->C; 713 // If method set FPU control word, restore to standard control word 714 int size = C->in_24_bit_fp_mode() ? 6 : 0; 715 if (C->max_vector_size() > 16) size += 3; // vzeroupper 716 if (do_polling() && C->is_method_compilation()) size += 6; 717 718 int framesize = C->frame_size_in_bytes(); 719 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 720 // Remove two words for return addr and rbp, 721 framesize -= 2*wordSize; 722 723 size++; // popl rbp, 724 725 if (framesize >= 128) { 726 size += 6; 727 } else { 728 size += framesize ? 3 : 0; 729 } 730 return size; 731 } 732 733 int MachEpilogNode::reloc() const { 734 return 0; // a large enough number 735 } 736 737 const Pipeline * MachEpilogNode::pipeline() const { 738 return MachNode::pipeline_class(); 739 } 740 741 int MachEpilogNode::safepoint_offset() const { return 0; } 742 743 //============================================================================= 744 745 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 746 static enum RC rc_class( OptoReg::Name reg ) { 747 748 if( !OptoReg::is_valid(reg) ) return rc_bad; 749 if (OptoReg::is_stack(reg)) return rc_stack; 750 751 VMReg r = OptoReg::as_VMReg(reg); 752 if (r->is_Register()) return rc_int; 753 if (r->is_FloatRegister()) { 754 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 755 return rc_float; 756 } 757 assert(r->is_XMMRegister(), "must be"); 758 return rc_xmm; 759 } 760 761 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 762 int opcode, const char *op_str, int size, outputStream* st ) { 763 if( cbuf ) { 764 emit_opcode (*cbuf, opcode ); 765 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 766 #ifndef PRODUCT 767 } else if( !do_size ) { 768 if( size != 0 ) st->print("\n\t"); 769 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 770 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 771 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 772 } else { // FLD, FST, PUSH, POP 773 st->print("%s [ESP + #%d]",op_str,offset); 774 } 775 #endif 776 } 777 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 778 return size+3+offset_size; 779 } 780 781 // Helper for XMM registers. Extra opcode bits, limited syntax. 782 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 783 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 784 int in_size_in_bits = Assembler::EVEX_32bit; 785 int evex_encoding = 0; 786 if (reg_lo+1 == reg_hi) { 787 in_size_in_bits = Assembler::EVEX_64bit; 788 evex_encoding = Assembler::VEX_W; 789 } 790 if (cbuf) { 791 MacroAssembler _masm(cbuf); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 843 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 844 as_XMMRegister(Matcher::_regEncode[src_lo])); 845 } else { 846 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 847 as_XMMRegister(Matcher::_regEncode[src_lo])); 848 } 849 #ifndef PRODUCT 850 } else if (!do_size) { 851 if (size != 0) st->print("\n\t"); 852 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 855 } else { 856 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } 858 } else { 859 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 860 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } 865 #endif 866 } 867 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 868 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 869 int sz = (UseAVX > 2) ? 6 : 4; 870 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 871 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 872 return size + sz; 873 } 874 875 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 876 int src_hi, int dst_hi, int size, outputStream* st ) { 877 // 32-bit 878 if (cbuf) { 879 MacroAssembler _masm(cbuf); 880 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 881 as_Register(Matcher::_regEncode[src_lo])); 882 #ifndef PRODUCT 883 } else if (!do_size) { 884 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 885 #endif 886 } 887 return (UseAVX> 2) ? 6 : 4; 888 } 889 890 891 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 892 int src_hi, int dst_hi, int size, outputStream* st ) { 893 // 32-bit 894 if (cbuf) { 895 MacroAssembler _masm(cbuf); 896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 897 as_XMMRegister(Matcher::_regEncode[src_lo])); 898 #ifndef PRODUCT 899 } else if (!do_size) { 900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 901 #endif 902 } 903 return (UseAVX> 2) ? 6 : 4; 904 } 905 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 907 if( cbuf ) { 908 emit_opcode(*cbuf, 0x8B ); 909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 910 #ifndef PRODUCT 911 } else if( !do_size ) { 912 if( size != 0 ) st->print("\n\t"); 913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 914 #endif 915 } 916 return size+2; 917 } 918 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 920 int offset, int size, outputStream* st ) { 921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 922 if( cbuf ) { 923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("FLD %s",Matcher::regName[src_lo]); 929 #endif 930 } 931 size += 2; 932 } 933 934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 935 const char *op_str; 936 int op; 937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 939 op = 0xDD; 940 } else { // 32-bit store 941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 942 op = 0xD9; 943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 944 } 945 946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 947 } 948 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 950 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 951 int src_hi, int dst_hi, uint ireg, outputStream* st); 952 953 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 954 int stack_offset, int reg, uint ireg, outputStream* st); 955 956 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 957 int dst_offset, uint ireg, outputStream* st) { 958 int calc_size = 0; 959 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 960 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 961 switch (ireg) { 962 case Op_VecS: 963 calc_size = 3+src_offset_size + 3+dst_offset_size; 964 break; 965 case Op_VecD: 966 calc_size = 3+src_offset_size + 3+dst_offset_size; 967 src_offset += 4; 968 dst_offset += 4; 969 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 970 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 971 calc_size += 3+src_offset_size + 3+dst_offset_size; 972 break; 973 case Op_VecX: 974 case Op_VecY: 975 case Op_VecZ: 976 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 977 break; 978 default: 979 ShouldNotReachHere(); 980 } 981 if (cbuf) { 982 MacroAssembler _masm(cbuf); 983 int offset = __ offset(); 984 switch (ireg) { 985 case Op_VecS: 986 __ pushl(Address(rsp, src_offset)); 987 __ popl (Address(rsp, dst_offset)); 988 break; 989 case Op_VecD: 990 __ pushl(Address(rsp, src_offset)); 991 __ popl (Address(rsp, dst_offset)); 992 __ pushl(Address(rsp, src_offset+4)); 993 __ popl (Address(rsp, dst_offset+4)); 994 break; 995 case Op_VecX: 996 __ movdqu(Address(rsp, -16), xmm0); 997 __ movdqu(xmm0, Address(rsp, src_offset)); 998 __ movdqu(Address(rsp, dst_offset), xmm0); 999 __ movdqu(xmm0, Address(rsp, -16)); 1000 break; 1001 case Op_VecY: 1002 __ vmovdqu(Address(rsp, -32), xmm0); 1003 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1004 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1005 __ vmovdqu(xmm0, Address(rsp, -32)); 1006 case Op_VecZ: 1007 __ evmovdqu(Address(rsp, -64), xmm0, 2); 1008 __ evmovdqu(xmm0, Address(rsp, src_offset), 2); 1009 __ evmovdqu(Address(rsp, dst_offset), xmm0, 2); 1010 __ evmovdqu(xmm0, Address(rsp, -64), 2); 1011 break; 1012 default: 1013 ShouldNotReachHere(); 1014 } 1015 int size = __ offset() - offset; 1016 assert(size == calc_size, "incorrect size calculattion"); 1017 return size; 1018 #ifndef PRODUCT 1019 } else if (!do_size) { 1020 switch (ireg) { 1021 case Op_VecS: 1022 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1023 "popl [rsp + #%d]", 1024 src_offset, dst_offset); 1025 break; 1026 case Op_VecD: 1027 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1028 "popq [rsp + #%d]\n\t" 1029 "pushl [rsp + #%d]\n\t" 1030 "popq [rsp + #%d]", 1031 src_offset, dst_offset, src_offset+4, dst_offset+4); 1032 break; 1033 case Op_VecX: 1034 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1035 "movdqu xmm0, [rsp + #%d]\n\t" 1036 "movdqu [rsp + #%d], xmm0\n\t" 1037 "movdqu xmm0, [rsp - #16]", 1038 src_offset, dst_offset); 1039 break; 1040 case Op_VecY: 1041 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1042 "vmovdqu xmm0, [rsp + #%d]\n\t" 1043 "vmovdqu [rsp + #%d], xmm0\n\t" 1044 "vmovdqu xmm0, [rsp - #32]", 1045 src_offset, dst_offset); 1046 case Op_VecZ: 1047 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1048 "vmovdqu xmm0, [rsp + #%d]\n\t" 1049 "vmovdqu [rsp + #%d], xmm0\n\t" 1050 "vmovdqu xmm0, [rsp - #64]", 1051 src_offset, dst_offset); 1052 break; 1053 default: 1054 ShouldNotReachHere(); 1055 } 1056 #endif 1057 } 1058 return calc_size; 1059 } 1060 1061 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1062 // Get registers to move 1063 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1064 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1065 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1066 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1067 1068 enum RC src_second_rc = rc_class(src_second); 1069 enum RC src_first_rc = rc_class(src_first); 1070 enum RC dst_second_rc = rc_class(dst_second); 1071 enum RC dst_first_rc = rc_class(dst_first); 1072 1073 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1074 1075 // Generate spill code! 1076 int size = 0; 1077 1078 if( src_first == dst_first && src_second == dst_second ) 1079 return size; // Self copy, no move 1080 1081 if (bottom_type()->isa_vect() != NULL) { 1082 uint ireg = ideal_reg(); 1083 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1084 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1085 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1086 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1087 // mem -> mem 1088 int src_offset = ra_->reg2offset(src_first); 1089 int dst_offset = ra_->reg2offset(dst_first); 1090 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1091 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1092 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1093 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1094 int stack_offset = ra_->reg2offset(dst_first); 1095 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1096 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1097 int stack_offset = ra_->reg2offset(src_first); 1098 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1099 } else { 1100 ShouldNotReachHere(); 1101 } 1102 } 1103 1104 // -------------------------------------- 1105 // Check for mem-mem move. push/pop to move. 1106 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1107 if( src_second == dst_first ) { // overlapping stack copy ranges 1108 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1110 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1111 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1112 } 1113 // move low bits 1114 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1115 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1116 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1117 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1118 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1119 } 1120 return size; 1121 } 1122 1123 // -------------------------------------- 1124 // Check for integer reg-reg copy 1125 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1126 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1127 1128 // Check for integer store 1129 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1130 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1131 1132 // Check for integer load 1133 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1134 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1135 1136 // Check for integer reg-xmm reg copy 1137 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1138 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1139 "no 64 bit integer-float reg moves" ); 1140 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1141 } 1142 // -------------------------------------- 1143 // Check for float reg-reg copy 1144 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1145 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1146 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1147 if( cbuf ) { 1148 1149 // Note the mucking with the register encode to compensate for the 0/1 1150 // indexing issue mentioned in a comment in the reg_def sections 1151 // for FPR registers many lines above here. 1152 1153 if( src_first != FPR1L_num ) { 1154 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1155 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1156 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1157 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1158 } else { 1159 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1160 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1161 } 1162 #ifndef PRODUCT 1163 } else if( !do_size ) { 1164 if( size != 0 ) st->print("\n\t"); 1165 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1166 else st->print( "FST %s", Matcher::regName[dst_first]); 1167 #endif 1168 } 1169 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1170 } 1171 1172 // Check for float store 1173 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1174 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1175 } 1176 1177 // Check for float load 1178 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1179 int offset = ra_->reg2offset(src_first); 1180 const char *op_str; 1181 int op; 1182 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1183 op_str = "FLD_D"; 1184 op = 0xDD; 1185 } else { // 32-bit load 1186 op_str = "FLD_S"; 1187 op = 0xD9; 1188 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1189 } 1190 if( cbuf ) { 1191 emit_opcode (*cbuf, op ); 1192 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1193 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1194 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1195 #ifndef PRODUCT 1196 } else if( !do_size ) { 1197 if( size != 0 ) st->print("\n\t"); 1198 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1199 #endif 1200 } 1201 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1202 return size + 3+offset_size+2; 1203 } 1204 1205 // Check for xmm reg-reg copy 1206 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1207 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1208 (src_first+1 == src_second && dst_first+1 == dst_second), 1209 "no non-adjacent float-moves" ); 1210 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1211 } 1212 1213 // Check for xmm reg-integer reg copy 1214 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1215 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1216 "no 64 bit float-integer reg moves" ); 1217 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1218 } 1219 1220 // Check for xmm store 1221 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1222 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1223 } 1224 1225 // Check for float xmm load 1226 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1227 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1228 } 1229 1230 // Copy from float reg to xmm reg 1231 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1232 // copy to the top of stack from floating point reg 1233 // and use LEA to preserve flags 1234 if( cbuf ) { 1235 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1236 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1237 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1238 emit_d8(*cbuf,0xF8); 1239 #ifndef PRODUCT 1240 } else if( !do_size ) { 1241 if( size != 0 ) st->print("\n\t"); 1242 st->print("LEA ESP,[ESP-8]"); 1243 #endif 1244 } 1245 size += 4; 1246 1247 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1248 1249 // Copy from the temp memory to the xmm reg. 1250 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1251 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0x08); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP+8]"); 1261 #endif 1262 } 1263 size += 4; 1264 return size; 1265 } 1266 1267 assert( size > 0, "missed a case" ); 1268 1269 // -------------------------------------------------------------------- 1270 // Check for second bits still needing moving. 1271 if( src_second == dst_second ) 1272 return size; // Self copy; no move 1273 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1274 1275 // Check for second word int-int move 1276 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1277 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1278 1279 // Check for second word integer store 1280 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1281 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1282 1283 // Check for second word integer load 1284 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1285 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1286 1287 1288 Unimplemented(); 1289 return 0; // Mute compiler 1290 } 1291 1292 #ifndef PRODUCT 1293 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1294 implementation( NULL, ra_, false, st ); 1295 } 1296 #endif 1297 1298 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1299 implementation( &cbuf, ra_, false, NULL ); 1300 } 1301 1302 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1303 return implementation( NULL, ra_, true, NULL ); 1304 } 1305 1306 1307 //============================================================================= 1308 #ifndef PRODUCT 1309 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1310 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1311 int reg = ra_->get_reg_first(this); 1312 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1313 } 1314 #endif 1315 1316 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1318 int reg = ra_->get_encode(this); 1319 if( offset >= 128 ) { 1320 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1321 emit_rm(cbuf, 0x2, reg, 0x04); 1322 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1323 emit_d32(cbuf, offset); 1324 } 1325 else { 1326 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1327 emit_rm(cbuf, 0x1, reg, 0x04); 1328 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1329 emit_d8(cbuf, offset); 1330 } 1331 } 1332 1333 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1335 if( offset >= 128 ) { 1336 return 7; 1337 } 1338 else { 1339 return 4; 1340 } 1341 } 1342 1343 //============================================================================= 1344 #ifndef PRODUCT 1345 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1346 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1347 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1348 st->print_cr("\tNOP"); 1349 st->print_cr("\tNOP"); 1350 if( !OptoBreakpoint ) 1351 st->print_cr("\tNOP"); 1352 } 1353 #endif 1354 1355 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1356 MacroAssembler masm(&cbuf); 1357 #ifdef ASSERT 1358 uint insts_size = cbuf.insts_size(); 1359 #endif 1360 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1361 masm.jump_cc(Assembler::notEqual, 1362 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1363 /* WARNING these NOPs are critical so that verified entry point is properly 1364 aligned for patching by NativeJump::patch_verified_entry() */ 1365 int nops_cnt = 2; 1366 if( !OptoBreakpoint ) // Leave space for int3 1367 nops_cnt += 1; 1368 masm.nop(nops_cnt); 1369 1370 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1371 } 1372 1373 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1374 return OptoBreakpoint ? 11 : 12; 1375 } 1376 1377 1378 //============================================================================= 1379 1380 int Matcher::regnum_to_fpu_offset(int regnum) { 1381 return regnum - 32; // The FP registers are in the second chunk 1382 } 1383 1384 // This is UltraSparc specific, true just means we have fast l2f conversion 1385 const bool Matcher::convL2FSupported(void) { 1386 return true; 1387 } 1388 1389 // Is this branch offset short enough that a short branch can be used? 1390 // 1391 // NOTE: If the platform does not provide any short branch variants, then 1392 // this method should return false for offset 0. 1393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1394 // The passed offset is relative to address of the branch. 1395 // On 86 a branch displacement is calculated relative to address 1396 // of a next instruction. 1397 offset -= br_size; 1398 1399 // the short version of jmpConUCF2 contains multiple branches, 1400 // making the reach slightly less 1401 if (rule == jmpConUCF2_rule) 1402 return (-126 <= offset && offset <= 125); 1403 return (-128 <= offset && offset <= 127); 1404 } 1405 1406 const bool Matcher::isSimpleConstant64(jlong value) { 1407 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1408 return false; 1409 } 1410 1411 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1412 const bool Matcher::init_array_count_is_in_bytes = false; 1413 1414 // Threshold size for cleararray. 1415 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1416 1417 // Needs 2 CMOV's for longs. 1418 const int Matcher::long_cmove_cost() { return 1; } 1419 1420 // No CMOVF/CMOVD with SSE/SSE2 1421 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1422 1423 // Does the CPU require late expand (see block.cpp for description of late expand)? 1424 const bool Matcher::require_postalloc_expand = false; 1425 1426 // Should the Matcher clone shifts on addressing modes, expecting them to 1427 // be subsumed into complex addressing expressions or compute them into 1428 // registers? True for Intel but false for most RISCs 1429 const bool Matcher::clone_shift_expressions = true; 1430 1431 // Do we need to mask the count passed to shift instructions or does 1432 // the cpu only look at the lower 5/6 bits anyway? 1433 const bool Matcher::need_masked_shift_count = false; 1434 1435 bool Matcher::narrow_oop_use_complex_address() { 1436 ShouldNotCallThis(); 1437 return true; 1438 } 1439 1440 bool Matcher::narrow_klass_use_complex_address() { 1441 ShouldNotCallThis(); 1442 return true; 1443 } 1444 1445 1446 // Is it better to copy float constants, or load them directly from memory? 1447 // Intel can load a float constant from a direct address, requiring no 1448 // extra registers. Most RISCs will have to materialize an address into a 1449 // register first, so they would do better to copy the constant from stack. 1450 const bool Matcher::rematerialize_float_constants = true; 1451 1452 // If CPU can load and store mis-aligned doubles directly then no fixup is 1453 // needed. Else we split the double into 2 integer pieces and move it 1454 // piece-by-piece. Only happens when passing doubles into C code as the 1455 // Java calling convention forces doubles to be aligned. 1456 const bool Matcher::misaligned_doubles_ok = true; 1457 1458 1459 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1460 // Get the memory operand from the node 1461 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1462 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1463 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1464 uint opcnt = 1; // First operand 1465 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1466 while( idx >= skipped+num_edges ) { 1467 skipped += num_edges; 1468 opcnt++; // Bump operand count 1469 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1470 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1471 } 1472 1473 MachOper *memory = node->_opnds[opcnt]; 1474 MachOper *new_memory = NULL; 1475 switch (memory->opcode()) { 1476 case DIRECT: 1477 case INDOFFSET32X: 1478 // No transformation necessary. 1479 return; 1480 case INDIRECT: 1481 new_memory = new indirect_win95_safeOper( ); 1482 break; 1483 case INDOFFSET8: 1484 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1485 break; 1486 case INDOFFSET32: 1487 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1488 break; 1489 case INDINDEXOFFSET: 1490 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1491 break; 1492 case INDINDEXSCALE: 1493 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1494 break; 1495 case INDINDEXSCALEOFFSET: 1496 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1497 break; 1498 case LOAD_LONG_INDIRECT: 1499 case LOAD_LONG_INDOFFSET32: 1500 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1501 return; 1502 default: 1503 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1504 return; 1505 } 1506 node->_opnds[opcnt] = new_memory; 1507 } 1508 1509 // Advertise here if the CPU requires explicit rounding operations 1510 // to implement the UseStrictFP mode. 1511 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1512 1513 // Are floats conerted to double when stored to stack during deoptimization? 1514 // On x32 it is stored with convertion only when FPU is used for floats. 1515 bool Matcher::float_in_double() { return (UseSSE == 0); } 1516 1517 // Do ints take an entire long register or just half? 1518 const bool Matcher::int_in_long = false; 1519 1520 // Return whether or not this register is ever used as an argument. This 1521 // function is used on startup to build the trampoline stubs in generateOptoStub. 1522 // Registers not mentioned will be killed by the VM call in the trampoline, and 1523 // arguments in those registers not be available to the callee. 1524 bool Matcher::can_be_java_arg( int reg ) { 1525 if( reg == ECX_num || reg == EDX_num ) return true; 1526 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1527 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1528 return false; 1529 } 1530 1531 bool Matcher::is_spillable_arg( int reg ) { 1532 return can_be_java_arg(reg); 1533 } 1534 1535 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1536 // Use hardware integer DIV instruction when 1537 // it is faster than a code which use multiply. 1538 // Only when constant divisor fits into 32 bit 1539 // (min_jint is excluded to get only correct 1540 // positive 32 bit values from negative). 1541 return VM_Version::has_fast_idiv() && 1542 (divisor == (int)divisor && divisor != min_jint); 1543 } 1544 1545 // Register for DIVI projection of divmodI 1546 RegMask Matcher::divI_proj_mask() { 1547 return EAX_REG_mask(); 1548 } 1549 1550 // Register for MODI projection of divmodI 1551 RegMask Matcher::modI_proj_mask() { 1552 return EDX_REG_mask(); 1553 } 1554 1555 // Register for DIVL projection of divmodL 1556 RegMask Matcher::divL_proj_mask() { 1557 ShouldNotReachHere(); 1558 return RegMask(); 1559 } 1560 1561 // Register for MODL projection of divmodL 1562 RegMask Matcher::modL_proj_mask() { 1563 ShouldNotReachHere(); 1564 return RegMask(); 1565 } 1566 1567 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1568 return NO_REG_mask(); 1569 } 1570 1571 // Returns true if the high 32 bits of the value is known to be zero. 1572 bool is_operand_hi32_zero(Node* n) { 1573 int opc = n->Opcode(); 1574 if (opc == Op_AndL) { 1575 Node* o2 = n->in(2); 1576 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1577 return true; 1578 } 1579 } 1580 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1581 return true; 1582 } 1583 return false; 1584 } 1585 1586 %} 1587 1588 //----------ENCODING BLOCK----------------------------------------------------- 1589 // This block specifies the encoding classes used by the compiler to output 1590 // byte streams. Encoding classes generate functions which are called by 1591 // Machine Instruction Nodes in order to generate the bit encoding of the 1592 // instruction. Operands specify their base encoding interface with the 1593 // interface keyword. There are currently supported four interfaces, 1594 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1595 // operand to generate a function which returns its register number when 1596 // queried. CONST_INTER causes an operand to generate a function which 1597 // returns the value of the constant when queried. MEMORY_INTER causes an 1598 // operand to generate four functions which return the Base Register, the 1599 // Index Register, the Scale Value, and the Offset Value of the operand when 1600 // queried. COND_INTER causes an operand to generate six functions which 1601 // return the encoding code (ie - encoding bits for the instruction) 1602 // associated with each basic boolean condition for a conditional instruction. 1603 // Instructions specify two basic values for encoding. They use the 1604 // ins_encode keyword to specify their encoding class (which must be one of 1605 // the class names specified in the encoding block), and they use the 1606 // opcode keyword to specify, in order, their primary, secondary, and 1607 // tertiary opcode. Only the opcode sections which a particular instruction 1608 // needs for encoding need to be specified. 1609 encode %{ 1610 // Build emit functions for each basic byte or larger field in the intel 1611 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1612 // code in the enc_class source block. Emit functions will live in the 1613 // main source block for now. In future, we can generalize this by 1614 // adding a syntax that specifies the sizes of fields in an order, 1615 // so that the adlc can build the emit functions automagically 1616 1617 // Emit primary opcode 1618 enc_class OpcP %{ 1619 emit_opcode(cbuf, $primary); 1620 %} 1621 1622 // Emit secondary opcode 1623 enc_class OpcS %{ 1624 emit_opcode(cbuf, $secondary); 1625 %} 1626 1627 // Emit opcode directly 1628 enc_class Opcode(immI d8) %{ 1629 emit_opcode(cbuf, $d8$$constant); 1630 %} 1631 1632 enc_class SizePrefix %{ 1633 emit_opcode(cbuf,0x66); 1634 %} 1635 1636 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1637 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1638 %} 1639 1640 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1641 emit_opcode(cbuf,$opcode$$constant); 1642 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1643 %} 1644 1645 enc_class mov_r32_imm0( rRegI dst ) %{ 1646 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1647 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1648 %} 1649 1650 enc_class cdq_enc %{ 1651 // Full implementation of Java idiv and irem; checks for 1652 // special case as described in JVM spec., p.243 & p.271. 1653 // 1654 // normal case special case 1655 // 1656 // input : rax,: dividend min_int 1657 // reg: divisor -1 1658 // 1659 // output: rax,: quotient (= rax, idiv reg) min_int 1660 // rdx: remainder (= rax, irem reg) 0 1661 // 1662 // Code sequnce: 1663 // 1664 // 81 F8 00 00 00 80 cmp rax,80000000h 1665 // 0F 85 0B 00 00 00 jne normal_case 1666 // 33 D2 xor rdx,edx 1667 // 83 F9 FF cmp rcx,0FFh 1668 // 0F 84 03 00 00 00 je done 1669 // normal_case: 1670 // 99 cdq 1671 // F7 F9 idiv rax,ecx 1672 // done: 1673 // 1674 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1675 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1676 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1677 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1678 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1679 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1680 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1681 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1682 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1683 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1684 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1685 // normal_case: 1686 emit_opcode(cbuf,0x99); // cdq 1687 // idiv (note: must be emitted by the user of this rule) 1688 // normal: 1689 %} 1690 1691 // Dense encoding for older common ops 1692 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1693 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1694 %} 1695 1696 1697 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1698 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1699 // Check for 8-bit immediate, and set sign extend bit in opcode 1700 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1701 emit_opcode(cbuf, $primary | 0x02); 1702 } 1703 else { // If 32-bit immediate 1704 emit_opcode(cbuf, $primary); 1705 } 1706 %} 1707 1708 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1709 // Emit primary opcode and set sign-extend bit 1710 // Check for 8-bit immediate, and set sign extend bit in opcode 1711 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1712 emit_opcode(cbuf, $primary | 0x02); } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 // Emit r/m byte with secondary opcode, after primary opcode. 1717 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1718 %} 1719 1720 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1721 // Check for 8-bit immediate, and set sign extend bit in opcode 1722 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1723 $$$emit8$imm$$constant; 1724 } 1725 else { // If 32-bit immediate 1726 // Output immediate 1727 $$$emit32$imm$$constant; 1728 } 1729 %} 1730 1731 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1732 // Emit primary opcode and set sign-extend bit 1733 // Check for 8-bit immediate, and set sign extend bit in opcode 1734 int con = (int)$imm$$constant; // Throw away top bits 1735 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1736 // Emit r/m byte with secondary opcode, after primary opcode. 1737 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1738 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1739 else emit_d32(cbuf,con); 1740 %} 1741 1742 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1743 // Emit primary opcode and set sign-extend bit 1744 // Check for 8-bit immediate, and set sign extend bit in opcode 1745 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1746 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1747 // Emit r/m byte with tertiary opcode, after primary opcode. 1748 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1749 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1750 else emit_d32(cbuf,con); 1751 %} 1752 1753 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1754 emit_cc(cbuf, $secondary, $dst$$reg ); 1755 %} 1756 1757 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1758 int destlo = $dst$$reg; 1759 int desthi = HIGH_FROM_LOW(destlo); 1760 // bswap lo 1761 emit_opcode(cbuf, 0x0F); 1762 emit_cc(cbuf, 0xC8, destlo); 1763 // bswap hi 1764 emit_opcode(cbuf, 0x0F); 1765 emit_cc(cbuf, 0xC8, desthi); 1766 // xchg lo and hi 1767 emit_opcode(cbuf, 0x87); 1768 emit_rm(cbuf, 0x3, destlo, desthi); 1769 %} 1770 1771 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1772 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1773 %} 1774 1775 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1776 $$$emit8$primary; 1777 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1778 %} 1779 1780 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1781 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1782 emit_d8(cbuf, op >> 8 ); 1783 emit_d8(cbuf, op & 255); 1784 %} 1785 1786 // emulate a CMOV with a conditional branch around a MOV 1787 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1788 // Invert sense of branch from sense of CMOV 1789 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1790 emit_d8( cbuf, $brOffs$$constant ); 1791 %} 1792 1793 enc_class enc_PartialSubtypeCheck( ) %{ 1794 Register Redi = as_Register(EDI_enc); // result register 1795 Register Reax = as_Register(EAX_enc); // super class 1796 Register Recx = as_Register(ECX_enc); // killed 1797 Register Resi = as_Register(ESI_enc); // sub class 1798 Label miss; 1799 1800 MacroAssembler _masm(&cbuf); 1801 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1802 NULL, &miss, 1803 /*set_cond_codes:*/ true); 1804 if ($primary) { 1805 __ xorptr(Redi, Redi); 1806 } 1807 __ bind(miss); 1808 %} 1809 1810 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1811 MacroAssembler masm(&cbuf); 1812 int start = masm.offset(); 1813 if (UseSSE >= 2) { 1814 if (VerifyFPU) { 1815 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1816 } 1817 } else { 1818 // External c_calling_convention expects the FPU stack to be 'clean'. 1819 // Compiled code leaves it dirty. Do cleanup now. 1820 masm.empty_FPU_stack(); 1821 } 1822 if (sizeof_FFree_Float_Stack_All == -1) { 1823 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1824 } else { 1825 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1826 } 1827 %} 1828 1829 enc_class Verify_FPU_For_Leaf %{ 1830 if( VerifyFPU ) { 1831 MacroAssembler masm(&cbuf); 1832 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1833 } 1834 %} 1835 1836 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1837 // This is the instruction starting address for relocation info. 1838 cbuf.set_insts_mark(); 1839 $$$emit8$primary; 1840 // CALL directly to the runtime 1841 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1842 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1843 1844 if (UseSSE >= 2) { 1845 MacroAssembler _masm(&cbuf); 1846 BasicType rt = tf()->return_type(); 1847 1848 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1849 // A C runtime call where the return value is unused. In SSE2+ 1850 // mode the result needs to be removed from the FPU stack. It's 1851 // likely that this function call could be removed by the 1852 // optimizer if the C function is a pure function. 1853 __ ffree(0); 1854 } else if (rt == T_FLOAT) { 1855 __ lea(rsp, Address(rsp, -4)); 1856 __ fstp_s(Address(rsp, 0)); 1857 __ movflt(xmm0, Address(rsp, 0)); 1858 __ lea(rsp, Address(rsp, 4)); 1859 } else if (rt == T_DOUBLE) { 1860 __ lea(rsp, Address(rsp, -8)); 1861 __ fstp_d(Address(rsp, 0)); 1862 __ movdbl(xmm0, Address(rsp, 0)); 1863 __ lea(rsp, Address(rsp, 8)); 1864 } 1865 } 1866 %} 1867 1868 1869 enc_class pre_call_resets %{ 1870 // If method sets FPU control word restore it here 1871 debug_only(int off0 = cbuf.insts_size()); 1872 if (ra_->C->in_24_bit_fp_mode()) { 1873 MacroAssembler _masm(&cbuf); 1874 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1875 } 1876 if (ra_->C->max_vector_size() > 16) { 1877 // Clear upper bits of YMM registers when current compiled code uses 1878 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1879 MacroAssembler _masm(&cbuf); 1880 __ vzeroupper(); 1881 } 1882 debug_only(int off1 = cbuf.insts_size()); 1883 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1884 %} 1885 1886 enc_class post_call_FPU %{ 1887 // If method sets FPU control word do it here also 1888 if (Compile::current()->in_24_bit_fp_mode()) { 1889 MacroAssembler masm(&cbuf); 1890 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1891 } 1892 %} 1893 1894 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1895 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1896 // who we intended to call. 1897 cbuf.set_insts_mark(); 1898 $$$emit8$primary; 1899 if (!_method) { 1900 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1901 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1902 } else if (_optimized_virtual) { 1903 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1904 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1905 } else { 1906 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1907 static_call_Relocation::spec(), RELOC_IMM32 ); 1908 } 1909 if (_method) { // Emit stub for static call. 1910 CompiledStaticCall::emit_to_interp_stub(cbuf); 1911 } 1912 %} 1913 1914 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1915 MacroAssembler _masm(&cbuf); 1916 __ ic_call((address)$meth$$method); 1917 %} 1918 1919 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1920 int disp = in_bytes(Method::from_compiled_offset()); 1921 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1922 1923 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1924 cbuf.set_insts_mark(); 1925 $$$emit8$primary; 1926 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1927 emit_d8(cbuf, disp); // Displacement 1928 1929 %} 1930 1931 // Following encoding is no longer used, but may be restored if calling 1932 // convention changes significantly. 1933 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1934 // 1935 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1936 // // int ic_reg = Matcher::inline_cache_reg(); 1937 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1938 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1939 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1940 // 1941 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1942 // // // so we load it immediately before the call 1943 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1944 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1945 // 1946 // // xor rbp,ebp 1947 // emit_opcode(cbuf, 0x33); 1948 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1949 // 1950 // // CALL to interpreter. 1951 // cbuf.set_insts_mark(); 1952 // $$$emit8$primary; 1953 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1954 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1955 // %} 1956 1957 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1958 $$$emit8$primary; 1959 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1960 $$$emit8$shift$$constant; 1961 %} 1962 1963 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1964 // Load immediate does not have a zero or sign extended version 1965 // for 8-bit immediates 1966 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1967 $$$emit32$src$$constant; 1968 %} 1969 1970 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1971 // Load immediate does not have a zero or sign extended version 1972 // for 8-bit immediates 1973 emit_opcode(cbuf, $primary + $dst$$reg); 1974 $$$emit32$src$$constant; 1975 %} 1976 1977 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1978 // Load immediate does not have a zero or sign extended version 1979 // for 8-bit immediates 1980 int dst_enc = $dst$$reg; 1981 int src_con = $src$$constant & 0x0FFFFFFFFL; 1982 if (src_con == 0) { 1983 // xor dst, dst 1984 emit_opcode(cbuf, 0x33); 1985 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1986 } else { 1987 emit_opcode(cbuf, $primary + dst_enc); 1988 emit_d32(cbuf, src_con); 1989 } 1990 %} 1991 1992 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1993 // Load immediate does not have a zero or sign extended version 1994 // for 8-bit immediates 1995 int dst_enc = $dst$$reg + 2; 1996 int src_con = ((julong)($src$$constant)) >> 32; 1997 if (src_con == 0) { 1998 // xor dst, dst 1999 emit_opcode(cbuf, 0x33); 2000 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2001 } else { 2002 emit_opcode(cbuf, $primary + dst_enc); 2003 emit_d32(cbuf, src_con); 2004 } 2005 %} 2006 2007 2008 // Encode a reg-reg copy. If it is useless, then empty encoding. 2009 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2010 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2011 %} 2012 2013 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2014 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2015 %} 2016 2017 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2018 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2019 %} 2020 2021 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2022 $$$emit8$primary; 2023 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2024 %} 2025 2026 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2027 $$$emit8$secondary; 2028 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2029 %} 2030 2031 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2032 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2033 %} 2034 2035 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2036 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2037 %} 2038 2039 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2040 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2044 // Output immediate 2045 $$$emit32$src$$constant; 2046 %} 2047 2048 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2049 // Output Float immediate bits 2050 jfloat jf = $src$$constant; 2051 int jf_as_bits = jint_cast( jf ); 2052 emit_d32(cbuf, jf_as_bits); 2053 %} 2054 2055 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2056 // Output Float immediate bits 2057 jfloat jf = $src$$constant; 2058 int jf_as_bits = jint_cast( jf ); 2059 emit_d32(cbuf, jf_as_bits); 2060 %} 2061 2062 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2063 // Output immediate 2064 $$$emit16$src$$constant; 2065 %} 2066 2067 enc_class Con_d32(immI src) %{ 2068 emit_d32(cbuf,$src$$constant); 2069 %} 2070 2071 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2072 // Output immediate memory reference 2073 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2074 emit_d32(cbuf, 0x00); 2075 %} 2076 2077 enc_class lock_prefix( ) %{ 2078 if( os::is_MP() ) 2079 emit_opcode(cbuf,0xF0); // [Lock] 2080 %} 2081 2082 // Cmp-xchg long value. 2083 // Note: we need to swap rbx, and rcx before and after the 2084 // cmpxchg8 instruction because the instruction uses 2085 // rcx as the high order word of the new value to store but 2086 // our register encoding uses rbx,. 2087 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2088 2089 // XCHG rbx,ecx 2090 emit_opcode(cbuf,0x87); 2091 emit_opcode(cbuf,0xD9); 2092 // [Lock] 2093 if( os::is_MP() ) 2094 emit_opcode(cbuf,0xF0); 2095 // CMPXCHG8 [Eptr] 2096 emit_opcode(cbuf,0x0F); 2097 emit_opcode(cbuf,0xC7); 2098 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2099 // XCHG rbx,ecx 2100 emit_opcode(cbuf,0x87); 2101 emit_opcode(cbuf,0xD9); 2102 %} 2103 2104 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2105 // [Lock] 2106 if( os::is_MP() ) 2107 emit_opcode(cbuf,0xF0); 2108 2109 // CMPXCHG [Eptr] 2110 emit_opcode(cbuf,0x0F); 2111 emit_opcode(cbuf,0xB1); 2112 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2113 %} 2114 2115 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2116 int res_encoding = $res$$reg; 2117 2118 // MOV res,0 2119 emit_opcode( cbuf, 0xB8 + res_encoding); 2120 emit_d32( cbuf, 0 ); 2121 // JNE,s fail 2122 emit_opcode(cbuf,0x75); 2123 emit_d8(cbuf, 5 ); 2124 // MOV res,1 2125 emit_opcode( cbuf, 0xB8 + res_encoding); 2126 emit_d32( cbuf, 1 ); 2127 // fail: 2128 %} 2129 2130 enc_class set_instruction_start( ) %{ 2131 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2132 %} 2133 2134 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2135 int reg_encoding = $ereg$$reg; 2136 int base = $mem$$base; 2137 int index = $mem$$index; 2138 int scale = $mem$$scale; 2139 int displace = $mem$$disp; 2140 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2141 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2142 %} 2143 2144 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2145 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2146 int base = $mem$$base; 2147 int index = $mem$$index; 2148 int scale = $mem$$scale; 2149 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2150 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2151 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2152 %} 2153 2154 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2155 int r1, r2; 2156 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2157 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2158 emit_opcode(cbuf,0x0F); 2159 emit_opcode(cbuf,$tertiary); 2160 emit_rm(cbuf, 0x3, r1, r2); 2161 emit_d8(cbuf,$cnt$$constant); 2162 emit_d8(cbuf,$primary); 2163 emit_rm(cbuf, 0x3, $secondary, r1); 2164 emit_d8(cbuf,$cnt$$constant); 2165 %} 2166 2167 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2168 emit_opcode( cbuf, 0x8B ); // Move 2169 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2170 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2171 emit_d8(cbuf,$primary); 2172 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2173 emit_d8(cbuf,$cnt$$constant-32); 2174 } 2175 emit_d8(cbuf,$primary); 2176 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2177 emit_d8(cbuf,31); 2178 %} 2179 2180 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2181 int r1, r2; 2182 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2183 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2184 2185 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2186 emit_rm(cbuf, 0x3, r1, r2); 2187 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2188 emit_opcode(cbuf,$primary); 2189 emit_rm(cbuf, 0x3, $secondary, r1); 2190 emit_d8(cbuf,$cnt$$constant-32); 2191 } 2192 emit_opcode(cbuf,0x33); // XOR r2,r2 2193 emit_rm(cbuf, 0x3, r2, r2); 2194 %} 2195 2196 // Clone of RegMem but accepts an extra parameter to access each 2197 // half of a double in memory; it never needs relocation info. 2198 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2199 emit_opcode(cbuf,$opcode$$constant); 2200 int reg_encoding = $rm_reg$$reg; 2201 int base = $mem$$base; 2202 int index = $mem$$index; 2203 int scale = $mem$$scale; 2204 int displace = $mem$$disp + $disp_for_half$$constant; 2205 relocInfo::relocType disp_reloc = relocInfo::none; 2206 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2207 %} 2208 2209 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2210 // 2211 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2212 // and it never needs relocation information. 2213 // Frequently used to move data between FPU's Stack Top and memory. 2214 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2215 int rm_byte_opcode = $rm_opcode$$constant; 2216 int base = $mem$$base; 2217 int index = $mem$$index; 2218 int scale = $mem$$scale; 2219 int displace = $mem$$disp; 2220 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2221 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2222 %} 2223 2224 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2225 int rm_byte_opcode = $rm_opcode$$constant; 2226 int base = $mem$$base; 2227 int index = $mem$$index; 2228 int scale = $mem$$scale; 2229 int displace = $mem$$disp; 2230 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2231 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2232 %} 2233 2234 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2235 int reg_encoding = $dst$$reg; 2236 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2237 int index = 0x04; // 0x04 indicates no index 2238 int scale = 0x00; // 0x00 indicates no scale 2239 int displace = $src1$$constant; // 0x00 indicates no displacement 2240 relocInfo::relocType disp_reloc = relocInfo::none; 2241 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2242 %} 2243 2244 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2245 // Compare dst,src 2246 emit_opcode(cbuf,0x3B); 2247 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2248 // jmp dst < src around move 2249 emit_opcode(cbuf,0x7C); 2250 emit_d8(cbuf,2); 2251 // move dst,src 2252 emit_opcode(cbuf,0x8B); 2253 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2254 %} 2255 2256 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2257 // Compare dst,src 2258 emit_opcode(cbuf,0x3B); 2259 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2260 // jmp dst > src around move 2261 emit_opcode(cbuf,0x7F); 2262 emit_d8(cbuf,2); 2263 // move dst,src 2264 emit_opcode(cbuf,0x8B); 2265 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2266 %} 2267 2268 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2269 // If src is FPR1, we can just FST to store it. 2270 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2271 int reg_encoding = 0x2; // Just store 2272 int base = $mem$$base; 2273 int index = $mem$$index; 2274 int scale = $mem$$scale; 2275 int displace = $mem$$disp; 2276 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2277 if( $src$$reg != FPR1L_enc ) { 2278 reg_encoding = 0x3; // Store & pop 2279 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2280 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2281 } 2282 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2283 emit_opcode(cbuf,$primary); 2284 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2285 %} 2286 2287 enc_class neg_reg(rRegI dst) %{ 2288 // NEG $dst 2289 emit_opcode(cbuf,0xF7); 2290 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2291 %} 2292 2293 enc_class setLT_reg(eCXRegI dst) %{ 2294 // SETLT $dst 2295 emit_opcode(cbuf,0x0F); 2296 emit_opcode(cbuf,0x9C); 2297 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2298 %} 2299 2300 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2301 int tmpReg = $tmp$$reg; 2302 2303 // SUB $p,$q 2304 emit_opcode(cbuf,0x2B); 2305 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2306 // SBB $tmp,$tmp 2307 emit_opcode(cbuf,0x1B); 2308 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2309 // AND $tmp,$y 2310 emit_opcode(cbuf,0x23); 2311 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2312 // ADD $p,$tmp 2313 emit_opcode(cbuf,0x03); 2314 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2315 %} 2316 2317 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2318 // TEST shift,32 2319 emit_opcode(cbuf,0xF7); 2320 emit_rm(cbuf, 0x3, 0, ECX_enc); 2321 emit_d32(cbuf,0x20); 2322 // JEQ,s small 2323 emit_opcode(cbuf, 0x74); 2324 emit_d8(cbuf, 0x04); 2325 // MOV $dst.hi,$dst.lo 2326 emit_opcode( cbuf, 0x8B ); 2327 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2328 // CLR $dst.lo 2329 emit_opcode(cbuf, 0x33); 2330 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2331 // small: 2332 // SHLD $dst.hi,$dst.lo,$shift 2333 emit_opcode(cbuf,0x0F); 2334 emit_opcode(cbuf,0xA5); 2335 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2336 // SHL $dst.lo,$shift" 2337 emit_opcode(cbuf,0xD3); 2338 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2339 %} 2340 2341 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2342 // TEST shift,32 2343 emit_opcode(cbuf,0xF7); 2344 emit_rm(cbuf, 0x3, 0, ECX_enc); 2345 emit_d32(cbuf,0x20); 2346 // JEQ,s small 2347 emit_opcode(cbuf, 0x74); 2348 emit_d8(cbuf, 0x04); 2349 // MOV $dst.lo,$dst.hi 2350 emit_opcode( cbuf, 0x8B ); 2351 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2352 // CLR $dst.hi 2353 emit_opcode(cbuf, 0x33); 2354 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2355 // small: 2356 // SHRD $dst.lo,$dst.hi,$shift 2357 emit_opcode(cbuf,0x0F); 2358 emit_opcode(cbuf,0xAD); 2359 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2360 // SHR $dst.hi,$shift" 2361 emit_opcode(cbuf,0xD3); 2362 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2363 %} 2364 2365 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2366 // TEST shift,32 2367 emit_opcode(cbuf,0xF7); 2368 emit_rm(cbuf, 0x3, 0, ECX_enc); 2369 emit_d32(cbuf,0x20); 2370 // JEQ,s small 2371 emit_opcode(cbuf, 0x74); 2372 emit_d8(cbuf, 0x05); 2373 // MOV $dst.lo,$dst.hi 2374 emit_opcode( cbuf, 0x8B ); 2375 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2376 // SAR $dst.hi,31 2377 emit_opcode(cbuf, 0xC1); 2378 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2379 emit_d8(cbuf, 0x1F ); 2380 // small: 2381 // SHRD $dst.lo,$dst.hi,$shift 2382 emit_opcode(cbuf,0x0F); 2383 emit_opcode(cbuf,0xAD); 2384 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2385 // SAR $dst.hi,$shift" 2386 emit_opcode(cbuf,0xD3); 2387 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2388 %} 2389 2390 2391 // ----------------- Encodings for floating point unit ----------------- 2392 // May leave result in FPU-TOS or FPU reg depending on opcodes 2393 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2394 $$$emit8$primary; 2395 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2396 %} 2397 2398 // Pop argument in FPR0 with FSTP ST(0) 2399 enc_class PopFPU() %{ 2400 emit_opcode( cbuf, 0xDD ); 2401 emit_d8( cbuf, 0xD8 ); 2402 %} 2403 2404 // !!!!! equivalent to Pop_Reg_F 2405 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2406 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2407 emit_d8( cbuf, 0xD8+$dst$$reg ); 2408 %} 2409 2410 enc_class Push_Reg_DPR( regDPR dst ) %{ 2411 emit_opcode( cbuf, 0xD9 ); 2412 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2413 %} 2414 2415 enc_class strictfp_bias1( regDPR dst ) %{ 2416 emit_opcode( cbuf, 0xDB ); // FLD m80real 2417 emit_opcode( cbuf, 0x2D ); 2418 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2419 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2420 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2421 %} 2422 2423 enc_class strictfp_bias2( regDPR dst ) %{ 2424 emit_opcode( cbuf, 0xDB ); // FLD m80real 2425 emit_opcode( cbuf, 0x2D ); 2426 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2427 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2428 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2429 %} 2430 2431 // Special case for moving an integer register to a stack slot. 2432 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2433 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2434 %} 2435 2436 // Special case for moving a register to a stack slot. 2437 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2438 // Opcode already emitted 2439 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2440 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2441 emit_d32(cbuf, $dst$$disp); // Displacement 2442 %} 2443 2444 // Push the integer in stackSlot 'src' onto FP-stack 2445 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2446 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2447 %} 2448 2449 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2450 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2451 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2452 %} 2453 2454 // Same as Pop_Mem_F except for opcode 2455 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2456 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2457 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2458 %} 2459 2460 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2461 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2462 emit_d8( cbuf, 0xD8+$dst$$reg ); 2463 %} 2464 2465 enc_class Push_Reg_FPR( regFPR dst ) %{ 2466 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2467 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2468 %} 2469 2470 // Push FPU's float to a stack-slot, and pop FPU-stack 2471 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2472 int pop = 0x02; 2473 if ($src$$reg != FPR1L_enc) { 2474 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2475 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2476 pop = 0x03; 2477 } 2478 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2479 %} 2480 2481 // Push FPU's double to a stack-slot, and pop FPU-stack 2482 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2483 int pop = 0x02; 2484 if ($src$$reg != FPR1L_enc) { 2485 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2486 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2487 pop = 0x03; 2488 } 2489 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2490 %} 2491 2492 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2493 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2494 int pop = 0xD0 - 1; // -1 since we skip FLD 2495 if ($src$$reg != FPR1L_enc) { 2496 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2497 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2498 pop = 0xD8; 2499 } 2500 emit_opcode( cbuf, 0xDD ); 2501 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2502 %} 2503 2504 2505 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2506 // load dst in FPR0 2507 emit_opcode( cbuf, 0xD9 ); 2508 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2509 if ($src$$reg != FPR1L_enc) { 2510 // fincstp 2511 emit_opcode (cbuf, 0xD9); 2512 emit_opcode (cbuf, 0xF7); 2513 // swap src with FPR1: 2514 // FXCH FPR1 with src 2515 emit_opcode(cbuf, 0xD9); 2516 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2517 // fdecstp 2518 emit_opcode (cbuf, 0xD9); 2519 emit_opcode (cbuf, 0xF6); 2520 } 2521 %} 2522 2523 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2524 MacroAssembler _masm(&cbuf); 2525 __ subptr(rsp, 8); 2526 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2527 __ fld_d(Address(rsp, 0)); 2528 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2529 __ fld_d(Address(rsp, 0)); 2530 %} 2531 2532 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2533 MacroAssembler _masm(&cbuf); 2534 __ subptr(rsp, 4); 2535 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2536 __ fld_s(Address(rsp, 0)); 2537 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2538 __ fld_s(Address(rsp, 0)); 2539 %} 2540 2541 enc_class Push_ResultD(regD dst) %{ 2542 MacroAssembler _masm(&cbuf); 2543 __ fstp_d(Address(rsp, 0)); 2544 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2545 __ addptr(rsp, 8); 2546 %} 2547 2548 enc_class Push_ResultF(regF dst, immI d8) %{ 2549 MacroAssembler _masm(&cbuf); 2550 __ fstp_s(Address(rsp, 0)); 2551 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2552 __ addptr(rsp, $d8$$constant); 2553 %} 2554 2555 enc_class Push_SrcD(regD src) %{ 2556 MacroAssembler _masm(&cbuf); 2557 __ subptr(rsp, 8); 2558 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2559 __ fld_d(Address(rsp, 0)); 2560 %} 2561 2562 enc_class push_stack_temp_qword() %{ 2563 MacroAssembler _masm(&cbuf); 2564 __ subptr(rsp, 8); 2565 %} 2566 2567 enc_class pop_stack_temp_qword() %{ 2568 MacroAssembler _masm(&cbuf); 2569 __ addptr(rsp, 8); 2570 %} 2571 2572 enc_class push_xmm_to_fpr1(regD src) %{ 2573 MacroAssembler _masm(&cbuf); 2574 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2575 __ fld_d(Address(rsp, 0)); 2576 %} 2577 2578 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2579 if ($src$$reg != FPR1L_enc) { 2580 // fincstp 2581 emit_opcode (cbuf, 0xD9); 2582 emit_opcode (cbuf, 0xF7); 2583 // FXCH FPR1 with src 2584 emit_opcode(cbuf, 0xD9); 2585 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2586 // fdecstp 2587 emit_opcode (cbuf, 0xD9); 2588 emit_opcode (cbuf, 0xF6); 2589 } 2590 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2591 // // FSTP FPR$dst$$reg 2592 // emit_opcode( cbuf, 0xDD ); 2593 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2594 %} 2595 2596 enc_class fnstsw_sahf_skip_parity() %{ 2597 // fnstsw ax 2598 emit_opcode( cbuf, 0xDF ); 2599 emit_opcode( cbuf, 0xE0 ); 2600 // sahf 2601 emit_opcode( cbuf, 0x9E ); 2602 // jnp ::skip 2603 emit_opcode( cbuf, 0x7B ); 2604 emit_opcode( cbuf, 0x05 ); 2605 %} 2606 2607 enc_class emitModDPR() %{ 2608 // fprem must be iterative 2609 // :: loop 2610 // fprem 2611 emit_opcode( cbuf, 0xD9 ); 2612 emit_opcode( cbuf, 0xF8 ); 2613 // wait 2614 emit_opcode( cbuf, 0x9b ); 2615 // fnstsw ax 2616 emit_opcode( cbuf, 0xDF ); 2617 emit_opcode( cbuf, 0xE0 ); 2618 // sahf 2619 emit_opcode( cbuf, 0x9E ); 2620 // jp ::loop 2621 emit_opcode( cbuf, 0x0F ); 2622 emit_opcode( cbuf, 0x8A ); 2623 emit_opcode( cbuf, 0xF4 ); 2624 emit_opcode( cbuf, 0xFF ); 2625 emit_opcode( cbuf, 0xFF ); 2626 emit_opcode( cbuf, 0xFF ); 2627 %} 2628 2629 enc_class fpu_flags() %{ 2630 // fnstsw_ax 2631 emit_opcode( cbuf, 0xDF); 2632 emit_opcode( cbuf, 0xE0); 2633 // test ax,0x0400 2634 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2635 emit_opcode( cbuf, 0xA9 ); 2636 emit_d16 ( cbuf, 0x0400 ); 2637 // // // This sequence works, but stalls for 12-16 cycles on PPro 2638 // // test rax,0x0400 2639 // emit_opcode( cbuf, 0xA9 ); 2640 // emit_d32 ( cbuf, 0x00000400 ); 2641 // 2642 // jz exit (no unordered comparison) 2643 emit_opcode( cbuf, 0x74 ); 2644 emit_d8 ( cbuf, 0x02 ); 2645 // mov ah,1 - treat as LT case (set carry flag) 2646 emit_opcode( cbuf, 0xB4 ); 2647 emit_d8 ( cbuf, 0x01 ); 2648 // sahf 2649 emit_opcode( cbuf, 0x9E); 2650 %} 2651 2652 enc_class cmpF_P6_fixup() %{ 2653 // Fixup the integer flags in case comparison involved a NaN 2654 // 2655 // JNP exit (no unordered comparison, P-flag is set by NaN) 2656 emit_opcode( cbuf, 0x7B ); 2657 emit_d8 ( cbuf, 0x03 ); 2658 // MOV AH,1 - treat as LT case (set carry flag) 2659 emit_opcode( cbuf, 0xB4 ); 2660 emit_d8 ( cbuf, 0x01 ); 2661 // SAHF 2662 emit_opcode( cbuf, 0x9E); 2663 // NOP // target for branch to avoid branch to branch 2664 emit_opcode( cbuf, 0x90); 2665 %} 2666 2667 // fnstsw_ax(); 2668 // sahf(); 2669 // movl(dst, nan_result); 2670 // jcc(Assembler::parity, exit); 2671 // movl(dst, less_result); 2672 // jcc(Assembler::below, exit); 2673 // movl(dst, equal_result); 2674 // jcc(Assembler::equal, exit); 2675 // movl(dst, greater_result); 2676 2677 // less_result = 1; 2678 // greater_result = -1; 2679 // equal_result = 0; 2680 // nan_result = -1; 2681 2682 enc_class CmpF_Result(rRegI dst) %{ 2683 // fnstsw_ax(); 2684 emit_opcode( cbuf, 0xDF); 2685 emit_opcode( cbuf, 0xE0); 2686 // sahf 2687 emit_opcode( cbuf, 0x9E); 2688 // movl(dst, nan_result); 2689 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2690 emit_d32( cbuf, -1 ); 2691 // jcc(Assembler::parity, exit); 2692 emit_opcode( cbuf, 0x7A ); 2693 emit_d8 ( cbuf, 0x13 ); 2694 // movl(dst, less_result); 2695 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2696 emit_d32( cbuf, -1 ); 2697 // jcc(Assembler::below, exit); 2698 emit_opcode( cbuf, 0x72 ); 2699 emit_d8 ( cbuf, 0x0C ); 2700 // movl(dst, equal_result); 2701 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2702 emit_d32( cbuf, 0 ); 2703 // jcc(Assembler::equal, exit); 2704 emit_opcode( cbuf, 0x74 ); 2705 emit_d8 ( cbuf, 0x05 ); 2706 // movl(dst, greater_result); 2707 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2708 emit_d32( cbuf, 1 ); 2709 %} 2710 2711 2712 // Compare the longs and set flags 2713 // BROKEN! Do Not use as-is 2714 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2715 // CMP $src1.hi,$src2.hi 2716 emit_opcode( cbuf, 0x3B ); 2717 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2718 // JNE,s done 2719 emit_opcode(cbuf,0x75); 2720 emit_d8(cbuf, 2 ); 2721 // CMP $src1.lo,$src2.lo 2722 emit_opcode( cbuf, 0x3B ); 2723 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2724 // done: 2725 %} 2726 2727 enc_class convert_int_long( regL dst, rRegI src ) %{ 2728 // mov $dst.lo,$src 2729 int dst_encoding = $dst$$reg; 2730 int src_encoding = $src$$reg; 2731 encode_Copy( cbuf, dst_encoding , src_encoding ); 2732 // mov $dst.hi,$src 2733 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2734 // sar $dst.hi,31 2735 emit_opcode( cbuf, 0xC1 ); 2736 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2737 emit_d8(cbuf, 0x1F ); 2738 %} 2739 2740 enc_class convert_long_double( eRegL src ) %{ 2741 // push $src.hi 2742 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2743 // push $src.lo 2744 emit_opcode(cbuf, 0x50+$src$$reg ); 2745 // fild 64-bits at [SP] 2746 emit_opcode(cbuf,0xdf); 2747 emit_d8(cbuf, 0x6C); 2748 emit_d8(cbuf, 0x24); 2749 emit_d8(cbuf, 0x00); 2750 // pop stack 2751 emit_opcode(cbuf, 0x83); // add SP, #8 2752 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2753 emit_d8(cbuf, 0x8); 2754 %} 2755 2756 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2757 // IMUL EDX:EAX,$src1 2758 emit_opcode( cbuf, 0xF7 ); 2759 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2760 // SAR EDX,$cnt-32 2761 int shift_count = ((int)$cnt$$constant) - 32; 2762 if (shift_count > 0) { 2763 emit_opcode(cbuf, 0xC1); 2764 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2765 emit_d8(cbuf, shift_count); 2766 } 2767 %} 2768 2769 // this version doesn't have add sp, 8 2770 enc_class convert_long_double2( eRegL src ) %{ 2771 // push $src.hi 2772 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2773 // push $src.lo 2774 emit_opcode(cbuf, 0x50+$src$$reg ); 2775 // fild 64-bits at [SP] 2776 emit_opcode(cbuf,0xdf); 2777 emit_d8(cbuf, 0x6C); 2778 emit_d8(cbuf, 0x24); 2779 emit_d8(cbuf, 0x00); 2780 %} 2781 2782 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2783 // Basic idea: long = (long)int * (long)int 2784 // IMUL EDX:EAX, src 2785 emit_opcode( cbuf, 0xF7 ); 2786 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2787 %} 2788 2789 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2790 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2791 // MUL EDX:EAX, src 2792 emit_opcode( cbuf, 0xF7 ); 2793 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2794 %} 2795 2796 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2797 // Basic idea: lo(result) = lo(x_lo * y_lo) 2798 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2799 // MOV $tmp,$src.lo 2800 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2801 // IMUL $tmp,EDX 2802 emit_opcode( cbuf, 0x0F ); 2803 emit_opcode( cbuf, 0xAF ); 2804 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2805 // MOV EDX,$src.hi 2806 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2807 // IMUL EDX,EAX 2808 emit_opcode( cbuf, 0x0F ); 2809 emit_opcode( cbuf, 0xAF ); 2810 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2811 // ADD $tmp,EDX 2812 emit_opcode( cbuf, 0x03 ); 2813 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2814 // MUL EDX:EAX,$src.lo 2815 emit_opcode( cbuf, 0xF7 ); 2816 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2817 // ADD EDX,ESI 2818 emit_opcode( cbuf, 0x03 ); 2819 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2820 %} 2821 2822 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2823 // Basic idea: lo(result) = lo(src * y_lo) 2824 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2825 // IMUL $tmp,EDX,$src 2826 emit_opcode( cbuf, 0x6B ); 2827 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2828 emit_d8( cbuf, (int)$src$$constant ); 2829 // MOV EDX,$src 2830 emit_opcode(cbuf, 0xB8 + EDX_enc); 2831 emit_d32( cbuf, (int)$src$$constant ); 2832 // MUL EDX:EAX,EDX 2833 emit_opcode( cbuf, 0xF7 ); 2834 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2835 // ADD EDX,ESI 2836 emit_opcode( cbuf, 0x03 ); 2837 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2838 %} 2839 2840 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2841 // PUSH src1.hi 2842 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2843 // PUSH src1.lo 2844 emit_opcode(cbuf, 0x50+$src1$$reg ); 2845 // PUSH src2.hi 2846 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2847 // PUSH src2.lo 2848 emit_opcode(cbuf, 0x50+$src2$$reg ); 2849 // CALL directly to the runtime 2850 cbuf.set_insts_mark(); 2851 emit_opcode(cbuf,0xE8); // Call into runtime 2852 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2853 // Restore stack 2854 emit_opcode(cbuf, 0x83); // add SP, #framesize 2855 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2856 emit_d8(cbuf, 4*4); 2857 %} 2858 2859 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2860 // PUSH src1.hi 2861 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2862 // PUSH src1.lo 2863 emit_opcode(cbuf, 0x50+$src1$$reg ); 2864 // PUSH src2.hi 2865 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2866 // PUSH src2.lo 2867 emit_opcode(cbuf, 0x50+$src2$$reg ); 2868 // CALL directly to the runtime 2869 cbuf.set_insts_mark(); 2870 emit_opcode(cbuf,0xE8); // Call into runtime 2871 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2872 // Restore stack 2873 emit_opcode(cbuf, 0x83); // add SP, #framesize 2874 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2875 emit_d8(cbuf, 4*4); 2876 %} 2877 2878 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2879 // MOV $tmp,$src.lo 2880 emit_opcode(cbuf, 0x8B); 2881 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2882 // OR $tmp,$src.hi 2883 emit_opcode(cbuf, 0x0B); 2884 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2885 %} 2886 2887 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2888 // CMP $src1.lo,$src2.lo 2889 emit_opcode( cbuf, 0x3B ); 2890 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2891 // JNE,s skip 2892 emit_cc(cbuf, 0x70, 0x5); 2893 emit_d8(cbuf,2); 2894 // CMP $src1.hi,$src2.hi 2895 emit_opcode( cbuf, 0x3B ); 2896 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2897 %} 2898 2899 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2900 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2901 emit_opcode( cbuf, 0x3B ); 2902 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2903 // MOV $tmp,$src1.hi 2904 emit_opcode( cbuf, 0x8B ); 2905 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2906 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2907 emit_opcode( cbuf, 0x1B ); 2908 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2909 %} 2910 2911 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2912 // XOR $tmp,$tmp 2913 emit_opcode(cbuf,0x33); // XOR 2914 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2915 // CMP $tmp,$src.lo 2916 emit_opcode( cbuf, 0x3B ); 2917 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2918 // SBB $tmp,$src.hi 2919 emit_opcode( cbuf, 0x1B ); 2920 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2921 %} 2922 2923 // Sniff, sniff... smells like Gnu Superoptimizer 2924 enc_class neg_long( eRegL dst ) %{ 2925 emit_opcode(cbuf,0xF7); // NEG hi 2926 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2927 emit_opcode(cbuf,0xF7); // NEG lo 2928 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2929 emit_opcode(cbuf,0x83); // SBB hi,0 2930 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2931 emit_d8 (cbuf,0 ); 2932 %} 2933 2934 enc_class enc_pop_rdx() %{ 2935 emit_opcode(cbuf,0x5A); 2936 %} 2937 2938 enc_class enc_rethrow() %{ 2939 cbuf.set_insts_mark(); 2940 emit_opcode(cbuf, 0xE9); // jmp entry 2941 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2942 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2943 %} 2944 2945 2946 // Convert a double to an int. Java semantics require we do complex 2947 // manglelations in the corner cases. So we set the rounding mode to 2948 // 'zero', store the darned double down as an int, and reset the 2949 // rounding mode to 'nearest'. The hardware throws an exception which 2950 // patches up the correct value directly to the stack. 2951 enc_class DPR2I_encoding( regDPR src ) %{ 2952 // Flip to round-to-zero mode. We attempted to allow invalid-op 2953 // exceptions here, so that a NAN or other corner-case value will 2954 // thrown an exception (but normal values get converted at full speed). 2955 // However, I2C adapters and other float-stack manglers leave pending 2956 // invalid-op exceptions hanging. We would have to clear them before 2957 // enabling them and that is more expensive than just testing for the 2958 // invalid value Intel stores down in the corner cases. 2959 emit_opcode(cbuf,0xD9); // FLDCW trunc 2960 emit_opcode(cbuf,0x2D); 2961 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2962 // Allocate a word 2963 emit_opcode(cbuf,0x83); // SUB ESP,4 2964 emit_opcode(cbuf,0xEC); 2965 emit_d8(cbuf,0x04); 2966 // Encoding assumes a double has been pushed into FPR0. 2967 // Store down the double as an int, popping the FPU stack 2968 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2969 emit_opcode(cbuf,0x1C); 2970 emit_d8(cbuf,0x24); 2971 // Restore the rounding mode; mask the exception 2972 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2973 emit_opcode(cbuf,0x2D); 2974 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2975 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2976 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2977 2978 // Load the converted int; adjust CPU stack 2979 emit_opcode(cbuf,0x58); // POP EAX 2980 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2981 emit_d32 (cbuf,0x80000000); // 0x80000000 2982 emit_opcode(cbuf,0x75); // JNE around_slow_call 2983 emit_d8 (cbuf,0x07); // Size of slow_call 2984 // Push src onto stack slow-path 2985 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2986 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2987 // CALL directly to the runtime 2988 cbuf.set_insts_mark(); 2989 emit_opcode(cbuf,0xE8); // Call into runtime 2990 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2991 // Carry on here... 2992 %} 2993 2994 enc_class DPR2L_encoding( regDPR src ) %{ 2995 emit_opcode(cbuf,0xD9); // FLDCW trunc 2996 emit_opcode(cbuf,0x2D); 2997 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2998 // Allocate a word 2999 emit_opcode(cbuf,0x83); // SUB ESP,8 3000 emit_opcode(cbuf,0xEC); 3001 emit_d8(cbuf,0x08); 3002 // Encoding assumes a double has been pushed into FPR0. 3003 // Store down the double as a long, popping the FPU stack 3004 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3005 emit_opcode(cbuf,0x3C); 3006 emit_d8(cbuf,0x24); 3007 // Restore the rounding mode; mask the exception 3008 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3009 emit_opcode(cbuf,0x2D); 3010 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3011 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3012 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3013 3014 // Load the converted int; adjust CPU stack 3015 emit_opcode(cbuf,0x58); // POP EAX 3016 emit_opcode(cbuf,0x5A); // POP EDX 3017 emit_opcode(cbuf,0x81); // CMP EDX,imm 3018 emit_d8 (cbuf,0xFA); // rdx 3019 emit_d32 (cbuf,0x80000000); // 0x80000000 3020 emit_opcode(cbuf,0x75); // JNE around_slow_call 3021 emit_d8 (cbuf,0x07+4); // Size of slow_call 3022 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3023 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3024 emit_opcode(cbuf,0x75); // JNE around_slow_call 3025 emit_d8 (cbuf,0x07); // Size of slow_call 3026 // Push src onto stack slow-path 3027 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3028 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3029 // CALL directly to the runtime 3030 cbuf.set_insts_mark(); 3031 emit_opcode(cbuf,0xE8); // Call into runtime 3032 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3033 // Carry on here... 3034 %} 3035 3036 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3037 // Operand was loaded from memory into fp ST (stack top) 3038 // FMUL ST,$src /* D8 C8+i */ 3039 emit_opcode(cbuf, 0xD8); 3040 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3041 %} 3042 3043 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3044 // FADDP ST,src2 /* D8 C0+i */ 3045 emit_opcode(cbuf, 0xD8); 3046 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3047 //could use FADDP src2,fpST /* DE C0+i */ 3048 %} 3049 3050 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3051 // FADDP src2,ST /* DE C0+i */ 3052 emit_opcode(cbuf, 0xDE); 3053 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3054 %} 3055 3056 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3057 // Operand has been loaded into fp ST (stack top) 3058 // FSUB ST,$src1 3059 emit_opcode(cbuf, 0xD8); 3060 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3061 3062 // FDIV 3063 emit_opcode(cbuf, 0xD8); 3064 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3065 %} 3066 3067 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3068 // Operand was loaded from memory into fp ST (stack top) 3069 // FADD ST,$src /* D8 C0+i */ 3070 emit_opcode(cbuf, 0xD8); 3071 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3072 3073 // FMUL ST,src2 /* D8 C*+i */ 3074 emit_opcode(cbuf, 0xD8); 3075 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3076 %} 3077 3078 3079 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3080 // Operand was loaded from memory into fp ST (stack top) 3081 // FADD ST,$src /* D8 C0+i */ 3082 emit_opcode(cbuf, 0xD8); 3083 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3084 3085 // FMULP src2,ST /* DE C8+i */ 3086 emit_opcode(cbuf, 0xDE); 3087 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3088 %} 3089 3090 // Atomically load the volatile long 3091 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3092 emit_opcode(cbuf,0xDF); 3093 int rm_byte_opcode = 0x05; 3094 int base = $mem$$base; 3095 int index = $mem$$index; 3096 int scale = $mem$$scale; 3097 int displace = $mem$$disp; 3098 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3099 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3100 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3101 %} 3102 3103 // Volatile Store Long. Must be atomic, so move it into 3104 // the FP TOS and then do a 64-bit FIST. Has to probe the 3105 // target address before the store (for null-ptr checks) 3106 // so the memory operand is used twice in the encoding. 3107 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3108 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3109 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3110 emit_opcode(cbuf,0xDF); 3111 int rm_byte_opcode = 0x07; 3112 int base = $mem$$base; 3113 int index = $mem$$index; 3114 int scale = $mem$$scale; 3115 int displace = $mem$$disp; 3116 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3117 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3118 %} 3119 3120 // Safepoint Poll. This polls the safepoint page, and causes an 3121 // exception if it is not readable. Unfortunately, it kills the condition code 3122 // in the process 3123 // We current use TESTL [spp],EDI 3124 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3125 3126 enc_class Safepoint_Poll() %{ 3127 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3128 emit_opcode(cbuf,0x85); 3129 emit_rm (cbuf, 0x0, 0x7, 0x5); 3130 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3131 %} 3132 %} 3133 3134 3135 //----------FRAME-------------------------------------------------------------- 3136 // Definition of frame structure and management information. 3137 // 3138 // S T A C K L A Y O U T Allocators stack-slot number 3139 // | (to get allocators register number 3140 // G Owned by | | v add OptoReg::stack0()) 3141 // r CALLER | | 3142 // o | +--------+ pad to even-align allocators stack-slot 3143 // w V | pad0 | numbers; owned by CALLER 3144 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3145 // h ^ | in | 5 3146 // | | args | 4 Holes in incoming args owned by SELF 3147 // | | | | 3 3148 // | | +--------+ 3149 // V | | old out| Empty on Intel, window on Sparc 3150 // | old |preserve| Must be even aligned. 3151 // | SP-+--------+----> Matcher::_old_SP, even aligned 3152 // | | in | 3 area for Intel ret address 3153 // Owned by |preserve| Empty on Sparc. 3154 // SELF +--------+ 3155 // | | pad2 | 2 pad to align old SP 3156 // | +--------+ 1 3157 // | | locks | 0 3158 // | +--------+----> OptoReg::stack0(), even aligned 3159 // | | pad1 | 11 pad to align new SP 3160 // | +--------+ 3161 // | | | 10 3162 // | | spills | 9 spills 3163 // V | | 8 (pad0 slot for callee) 3164 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3165 // ^ | out | 7 3166 // | | args | 6 Holes in outgoing args owned by CALLEE 3167 // Owned by +--------+ 3168 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3169 // | new |preserve| Must be even-aligned. 3170 // | SP-+--------+----> Matcher::_new_SP, even aligned 3171 // | | | 3172 // 3173 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3174 // known from SELF's arguments and the Java calling convention. 3175 // Region 6-7 is determined per call site. 3176 // Note 2: If the calling convention leaves holes in the incoming argument 3177 // area, those holes are owned by SELF. Holes in the outgoing area 3178 // are owned by the CALLEE. Holes should not be nessecary in the 3179 // incoming area, as the Java calling convention is completely under 3180 // the control of the AD file. Doubles can be sorted and packed to 3181 // avoid holes. Holes in the outgoing arguments may be nessecary for 3182 // varargs C calling conventions. 3183 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3184 // even aligned with pad0 as needed. 3185 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3186 // region 6-11 is even aligned; it may be padded out more so that 3187 // the region from SP to FP meets the minimum stack alignment. 3188 3189 frame %{ 3190 // What direction does stack grow in (assumed to be same for C & Java) 3191 stack_direction(TOWARDS_LOW); 3192 3193 // These three registers define part of the calling convention 3194 // between compiled code and the interpreter. 3195 inline_cache_reg(EAX); // Inline Cache Register 3196 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3197 3198 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3199 cisc_spilling_operand_name(indOffset32); 3200 3201 // Number of stack slots consumed by locking an object 3202 sync_stack_slots(1); 3203 3204 // Compiled code's Frame Pointer 3205 frame_pointer(ESP); 3206 // Interpreter stores its frame pointer in a register which is 3207 // stored to the stack by I2CAdaptors. 3208 // I2CAdaptors convert from interpreted java to compiled java. 3209 interpreter_frame_pointer(EBP); 3210 3211 // Stack alignment requirement 3212 // Alignment size in bytes (128-bit -> 16 bytes) 3213 stack_alignment(StackAlignmentInBytes); 3214 3215 // Number of stack slots between incoming argument block and the start of 3216 // a new frame. The PROLOG must add this many slots to the stack. The 3217 // EPILOG must remove this many slots. Intel needs one slot for 3218 // return address and one for rbp, (must save rbp) 3219 in_preserve_stack_slots(2+VerifyStackAtCalls); 3220 3221 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3222 // for calls to C. Supports the var-args backing area for register parms. 3223 varargs_C_out_slots_killed(0); 3224 3225 // The after-PROLOG location of the return address. Location of 3226 // return address specifies a type (REG or STACK) and a number 3227 // representing the register number (i.e. - use a register name) or 3228 // stack slot. 3229 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3230 // Otherwise, it is above the locks and verification slot and alignment word 3231 return_addr(STACK - 1 + 3232 round_to((Compile::current()->in_preserve_stack_slots() + 3233 Compile::current()->fixed_slots()), 3234 stack_alignment_in_slots())); 3235 3236 // Body of function which returns an integer array locating 3237 // arguments either in registers or in stack slots. Passed an array 3238 // of ideal registers called "sig" and a "length" count. Stack-slot 3239 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3240 // arguments for a CALLEE. Incoming stack arguments are 3241 // automatically biased by the preserve_stack_slots field above. 3242 calling_convention %{ 3243 // No difference between ingoing/outgoing just pass false 3244 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3245 %} 3246 3247 3248 // Body of function which returns an integer array locating 3249 // arguments either in registers or in stack slots. Passed an array 3250 // of ideal registers called "sig" and a "length" count. Stack-slot 3251 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3252 // arguments for a CALLEE. Incoming stack arguments are 3253 // automatically biased by the preserve_stack_slots field above. 3254 c_calling_convention %{ 3255 // This is obviously always outgoing 3256 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3257 %} 3258 3259 // Location of C & interpreter return values 3260 c_return_value %{ 3261 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3262 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3263 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3264 3265 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3266 // that C functions return float and double results in XMM0. 3267 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3268 return OptoRegPair(XMM0b_num,XMM0_num); 3269 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3270 return OptoRegPair(OptoReg::Bad,XMM0_num); 3271 3272 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3273 %} 3274 3275 // Location of return values 3276 return_value %{ 3277 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3278 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3279 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3280 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3281 return OptoRegPair(XMM0b_num,XMM0_num); 3282 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3283 return OptoRegPair(OptoReg::Bad,XMM0_num); 3284 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3285 %} 3286 3287 %} 3288 3289 //----------ATTRIBUTES--------------------------------------------------------- 3290 //----------Operand Attributes------------------------------------------------- 3291 op_attrib op_cost(0); // Required cost attribute 3292 3293 //----------Instruction Attributes--------------------------------------------- 3294 ins_attrib ins_cost(100); // Required cost attribute 3295 ins_attrib ins_size(8); // Required size attribute (in bits) 3296 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3297 // non-matching short branch variant of some 3298 // long branch? 3299 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3300 // specifies the alignment that some part of the instruction (not 3301 // necessarily the start) requires. If > 1, a compute_padding() 3302 // function must be provided for the instruction 3303 3304 //----------OPERANDS----------------------------------------------------------- 3305 // Operand definitions must precede instruction definitions for correct parsing 3306 // in the ADLC because operands constitute user defined types which are used in 3307 // instruction definitions. 3308 3309 //----------Simple Operands---------------------------------------------------- 3310 // Immediate Operands 3311 // Integer Immediate 3312 operand immI() %{ 3313 match(ConI); 3314 3315 op_cost(10); 3316 format %{ %} 3317 interface(CONST_INTER); 3318 %} 3319 3320 // Constant for test vs zero 3321 operand immI0() %{ 3322 predicate(n->get_int() == 0); 3323 match(ConI); 3324 3325 op_cost(0); 3326 format %{ %} 3327 interface(CONST_INTER); 3328 %} 3329 3330 // Constant for increment 3331 operand immI1() %{ 3332 predicate(n->get_int() == 1); 3333 match(ConI); 3334 3335 op_cost(0); 3336 format %{ %} 3337 interface(CONST_INTER); 3338 %} 3339 3340 // Constant for decrement 3341 operand immI_M1() %{ 3342 predicate(n->get_int() == -1); 3343 match(ConI); 3344 3345 op_cost(0); 3346 format %{ %} 3347 interface(CONST_INTER); 3348 %} 3349 3350 // Valid scale values for addressing modes 3351 operand immI2() %{ 3352 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3353 match(ConI); 3354 3355 format %{ %} 3356 interface(CONST_INTER); 3357 %} 3358 3359 operand immI8() %{ 3360 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3361 match(ConI); 3362 3363 op_cost(5); 3364 format %{ %} 3365 interface(CONST_INTER); 3366 %} 3367 3368 operand immI16() %{ 3369 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3370 match(ConI); 3371 3372 op_cost(10); 3373 format %{ %} 3374 interface(CONST_INTER); 3375 %} 3376 3377 // Int Immediate non-negative 3378 operand immU31() 3379 %{ 3380 predicate(n->get_int() >= 0); 3381 match(ConI); 3382 3383 op_cost(0); 3384 format %{ %} 3385 interface(CONST_INTER); 3386 %} 3387 3388 // Constant for long shifts 3389 operand immI_32() %{ 3390 predicate( n->get_int() == 32 ); 3391 match(ConI); 3392 3393 op_cost(0); 3394 format %{ %} 3395 interface(CONST_INTER); 3396 %} 3397 3398 operand immI_1_31() %{ 3399 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3400 match(ConI); 3401 3402 op_cost(0); 3403 format %{ %} 3404 interface(CONST_INTER); 3405 %} 3406 3407 operand immI_32_63() %{ 3408 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3409 match(ConI); 3410 op_cost(0); 3411 3412 format %{ %} 3413 interface(CONST_INTER); 3414 %} 3415 3416 operand immI_1() %{ 3417 predicate( n->get_int() == 1 ); 3418 match(ConI); 3419 3420 op_cost(0); 3421 format %{ %} 3422 interface(CONST_INTER); 3423 %} 3424 3425 operand immI_2() %{ 3426 predicate( n->get_int() == 2 ); 3427 match(ConI); 3428 3429 op_cost(0); 3430 format %{ %} 3431 interface(CONST_INTER); 3432 %} 3433 3434 operand immI_3() %{ 3435 predicate( n->get_int() == 3 ); 3436 match(ConI); 3437 3438 op_cost(0); 3439 format %{ %} 3440 interface(CONST_INTER); 3441 %} 3442 3443 // Pointer Immediate 3444 operand immP() %{ 3445 match(ConP); 3446 3447 op_cost(10); 3448 format %{ %} 3449 interface(CONST_INTER); 3450 %} 3451 3452 // NULL Pointer Immediate 3453 operand immP0() %{ 3454 predicate( n->get_ptr() == 0 ); 3455 match(ConP); 3456 op_cost(0); 3457 3458 format %{ %} 3459 interface(CONST_INTER); 3460 %} 3461 3462 // Long Immediate 3463 operand immL() %{ 3464 match(ConL); 3465 3466 op_cost(20); 3467 format %{ %} 3468 interface(CONST_INTER); 3469 %} 3470 3471 // Long Immediate zero 3472 operand immL0() %{ 3473 predicate( n->get_long() == 0L ); 3474 match(ConL); 3475 op_cost(0); 3476 3477 format %{ %} 3478 interface(CONST_INTER); 3479 %} 3480 3481 // Long Immediate zero 3482 operand immL_M1() %{ 3483 predicate( n->get_long() == -1L ); 3484 match(ConL); 3485 op_cost(0); 3486 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Long immediate from 0 to 127. 3492 // Used for a shorter form of long mul by 10. 3493 operand immL_127() %{ 3494 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3495 match(ConL); 3496 op_cost(0); 3497 3498 format %{ %} 3499 interface(CONST_INTER); 3500 %} 3501 3502 // Long Immediate: low 32-bit mask 3503 operand immL_32bits() %{ 3504 predicate(n->get_long() == 0xFFFFFFFFL); 3505 match(ConL); 3506 op_cost(0); 3507 3508 format %{ %} 3509 interface(CONST_INTER); 3510 %} 3511 3512 // Long Immediate: low 32-bit mask 3513 operand immL32() %{ 3514 predicate(n->get_long() == (int)(n->get_long())); 3515 match(ConL); 3516 op_cost(20); 3517 3518 format %{ %} 3519 interface(CONST_INTER); 3520 %} 3521 3522 //Double Immediate zero 3523 operand immDPR0() %{ 3524 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3525 // bug that generates code such that NaNs compare equal to 0.0 3526 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3527 match(ConD); 3528 3529 op_cost(5); 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 // Double Immediate one 3535 operand immDPR1() %{ 3536 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3537 match(ConD); 3538 3539 op_cost(5); 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 // Double Immediate 3545 operand immDPR() %{ 3546 predicate(UseSSE<=1); 3547 match(ConD); 3548 3549 op_cost(5); 3550 format %{ %} 3551 interface(CONST_INTER); 3552 %} 3553 3554 operand immD() %{ 3555 predicate(UseSSE>=2); 3556 match(ConD); 3557 3558 op_cost(5); 3559 format %{ %} 3560 interface(CONST_INTER); 3561 %} 3562 3563 // Double Immediate zero 3564 operand immD0() %{ 3565 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3566 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3567 // compare equal to -0.0. 3568 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3569 match(ConD); 3570 3571 format %{ %} 3572 interface(CONST_INTER); 3573 %} 3574 3575 // Float Immediate zero 3576 operand immFPR0() %{ 3577 predicate(UseSSE == 0 && n->getf() == 0.0F); 3578 match(ConF); 3579 3580 op_cost(5); 3581 format %{ %} 3582 interface(CONST_INTER); 3583 %} 3584 3585 // Float Immediate one 3586 operand immFPR1() %{ 3587 predicate(UseSSE == 0 && n->getf() == 1.0F); 3588 match(ConF); 3589 3590 op_cost(5); 3591 format %{ %} 3592 interface(CONST_INTER); 3593 %} 3594 3595 // Float Immediate 3596 operand immFPR() %{ 3597 predicate( UseSSE == 0 ); 3598 match(ConF); 3599 3600 op_cost(5); 3601 format %{ %} 3602 interface(CONST_INTER); 3603 %} 3604 3605 // Float Immediate 3606 operand immF() %{ 3607 predicate(UseSSE >= 1); 3608 match(ConF); 3609 3610 op_cost(5); 3611 format %{ %} 3612 interface(CONST_INTER); 3613 %} 3614 3615 // Float Immediate zero. Zero and not -0.0 3616 operand immF0() %{ 3617 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3618 match(ConF); 3619 3620 op_cost(5); 3621 format %{ %} 3622 interface(CONST_INTER); 3623 %} 3624 3625 // Immediates for special shifts (sign extend) 3626 3627 // Constants for increment 3628 operand immI_16() %{ 3629 predicate( n->get_int() == 16 ); 3630 match(ConI); 3631 3632 format %{ %} 3633 interface(CONST_INTER); 3634 %} 3635 3636 operand immI_24() %{ 3637 predicate( n->get_int() == 24 ); 3638 match(ConI); 3639 3640 format %{ %} 3641 interface(CONST_INTER); 3642 %} 3643 3644 // Constant for byte-wide masking 3645 operand immI_255() %{ 3646 predicate( n->get_int() == 255 ); 3647 match(ConI); 3648 3649 format %{ %} 3650 interface(CONST_INTER); 3651 %} 3652 3653 // Constant for short-wide masking 3654 operand immI_65535() %{ 3655 predicate(n->get_int() == 65535); 3656 match(ConI); 3657 3658 format %{ %} 3659 interface(CONST_INTER); 3660 %} 3661 3662 // Register Operands 3663 // Integer Register 3664 operand rRegI() %{ 3665 constraint(ALLOC_IN_RC(int_reg)); 3666 match(RegI); 3667 match(xRegI); 3668 match(eAXRegI); 3669 match(eBXRegI); 3670 match(eCXRegI); 3671 match(eDXRegI); 3672 match(eDIRegI); 3673 match(eSIRegI); 3674 3675 format %{ %} 3676 interface(REG_INTER); 3677 %} 3678 3679 // Subset of Integer Register 3680 operand xRegI(rRegI reg) %{ 3681 constraint(ALLOC_IN_RC(int_x_reg)); 3682 match(reg); 3683 match(eAXRegI); 3684 match(eBXRegI); 3685 match(eCXRegI); 3686 match(eDXRegI); 3687 3688 format %{ %} 3689 interface(REG_INTER); 3690 %} 3691 3692 // Special Registers 3693 operand eAXRegI(xRegI reg) %{ 3694 constraint(ALLOC_IN_RC(eax_reg)); 3695 match(reg); 3696 match(rRegI); 3697 3698 format %{ "EAX" %} 3699 interface(REG_INTER); 3700 %} 3701 3702 // Special Registers 3703 operand eBXRegI(xRegI reg) %{ 3704 constraint(ALLOC_IN_RC(ebx_reg)); 3705 match(reg); 3706 match(rRegI); 3707 3708 format %{ "EBX" %} 3709 interface(REG_INTER); 3710 %} 3711 3712 operand eCXRegI(xRegI reg) %{ 3713 constraint(ALLOC_IN_RC(ecx_reg)); 3714 match(reg); 3715 match(rRegI); 3716 3717 format %{ "ECX" %} 3718 interface(REG_INTER); 3719 %} 3720 3721 operand eDXRegI(xRegI reg) %{ 3722 constraint(ALLOC_IN_RC(edx_reg)); 3723 match(reg); 3724 match(rRegI); 3725 3726 format %{ "EDX" %} 3727 interface(REG_INTER); 3728 %} 3729 3730 operand eDIRegI(xRegI reg) %{ 3731 constraint(ALLOC_IN_RC(edi_reg)); 3732 match(reg); 3733 match(rRegI); 3734 3735 format %{ "EDI" %} 3736 interface(REG_INTER); 3737 %} 3738 3739 operand naxRegI() %{ 3740 constraint(ALLOC_IN_RC(nax_reg)); 3741 match(RegI); 3742 match(eCXRegI); 3743 match(eDXRegI); 3744 match(eSIRegI); 3745 match(eDIRegI); 3746 3747 format %{ %} 3748 interface(REG_INTER); 3749 %} 3750 3751 operand nadxRegI() %{ 3752 constraint(ALLOC_IN_RC(nadx_reg)); 3753 match(RegI); 3754 match(eBXRegI); 3755 match(eCXRegI); 3756 match(eSIRegI); 3757 match(eDIRegI); 3758 3759 format %{ %} 3760 interface(REG_INTER); 3761 %} 3762 3763 operand ncxRegI() %{ 3764 constraint(ALLOC_IN_RC(ncx_reg)); 3765 match(RegI); 3766 match(eAXRegI); 3767 match(eDXRegI); 3768 match(eSIRegI); 3769 match(eDIRegI); 3770 3771 format %{ %} 3772 interface(REG_INTER); 3773 %} 3774 3775 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3776 // // 3777 operand eSIRegI(xRegI reg) %{ 3778 constraint(ALLOC_IN_RC(esi_reg)); 3779 match(reg); 3780 match(rRegI); 3781 3782 format %{ "ESI" %} 3783 interface(REG_INTER); 3784 %} 3785 3786 // Pointer Register 3787 operand anyRegP() %{ 3788 constraint(ALLOC_IN_RC(any_reg)); 3789 match(RegP); 3790 match(eAXRegP); 3791 match(eBXRegP); 3792 match(eCXRegP); 3793 match(eDIRegP); 3794 match(eRegP); 3795 3796 format %{ %} 3797 interface(REG_INTER); 3798 %} 3799 3800 operand eRegP() %{ 3801 constraint(ALLOC_IN_RC(int_reg)); 3802 match(RegP); 3803 match(eAXRegP); 3804 match(eBXRegP); 3805 match(eCXRegP); 3806 match(eDIRegP); 3807 3808 format %{ %} 3809 interface(REG_INTER); 3810 %} 3811 3812 // On windows95, EBP is not safe to use for implicit null tests. 3813 operand eRegP_no_EBP() %{ 3814 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3815 match(RegP); 3816 match(eAXRegP); 3817 match(eBXRegP); 3818 match(eCXRegP); 3819 match(eDIRegP); 3820 3821 op_cost(100); 3822 format %{ %} 3823 interface(REG_INTER); 3824 %} 3825 3826 operand naxRegP() %{ 3827 constraint(ALLOC_IN_RC(nax_reg)); 3828 match(RegP); 3829 match(eBXRegP); 3830 match(eDXRegP); 3831 match(eCXRegP); 3832 match(eSIRegP); 3833 match(eDIRegP); 3834 3835 format %{ %} 3836 interface(REG_INTER); 3837 %} 3838 3839 operand nabxRegP() %{ 3840 constraint(ALLOC_IN_RC(nabx_reg)); 3841 match(RegP); 3842 match(eCXRegP); 3843 match(eDXRegP); 3844 match(eSIRegP); 3845 match(eDIRegP); 3846 3847 format %{ %} 3848 interface(REG_INTER); 3849 %} 3850 3851 operand pRegP() %{ 3852 constraint(ALLOC_IN_RC(p_reg)); 3853 match(RegP); 3854 match(eBXRegP); 3855 match(eDXRegP); 3856 match(eSIRegP); 3857 match(eDIRegP); 3858 3859 format %{ %} 3860 interface(REG_INTER); 3861 %} 3862 3863 // Special Registers 3864 // Return a pointer value 3865 operand eAXRegP(eRegP reg) %{ 3866 constraint(ALLOC_IN_RC(eax_reg)); 3867 match(reg); 3868 format %{ "EAX" %} 3869 interface(REG_INTER); 3870 %} 3871 3872 // Used in AtomicAdd 3873 operand eBXRegP(eRegP reg) %{ 3874 constraint(ALLOC_IN_RC(ebx_reg)); 3875 match(reg); 3876 format %{ "EBX" %} 3877 interface(REG_INTER); 3878 %} 3879 3880 // Tail-call (interprocedural jump) to interpreter 3881 operand eCXRegP(eRegP reg) %{ 3882 constraint(ALLOC_IN_RC(ecx_reg)); 3883 match(reg); 3884 format %{ "ECX" %} 3885 interface(REG_INTER); 3886 %} 3887 3888 operand eSIRegP(eRegP reg) %{ 3889 constraint(ALLOC_IN_RC(esi_reg)); 3890 match(reg); 3891 format %{ "ESI" %} 3892 interface(REG_INTER); 3893 %} 3894 3895 // Used in rep stosw 3896 operand eDIRegP(eRegP reg) %{ 3897 constraint(ALLOC_IN_RC(edi_reg)); 3898 match(reg); 3899 format %{ "EDI" %} 3900 interface(REG_INTER); 3901 %} 3902 3903 operand eRegL() %{ 3904 constraint(ALLOC_IN_RC(long_reg)); 3905 match(RegL); 3906 match(eADXRegL); 3907 3908 format %{ %} 3909 interface(REG_INTER); 3910 %} 3911 3912 operand eADXRegL( eRegL reg ) %{ 3913 constraint(ALLOC_IN_RC(eadx_reg)); 3914 match(reg); 3915 3916 format %{ "EDX:EAX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eBCXRegL( eRegL reg ) %{ 3921 constraint(ALLOC_IN_RC(ebcx_reg)); 3922 match(reg); 3923 3924 format %{ "EBX:ECX" %} 3925 interface(REG_INTER); 3926 %} 3927 3928 // Special case for integer high multiply 3929 operand eADXRegL_low_only() %{ 3930 constraint(ALLOC_IN_RC(eadx_reg)); 3931 match(RegL); 3932 3933 format %{ "EAX" %} 3934 interface(REG_INTER); 3935 %} 3936 3937 // Flags register, used as output of compare instructions 3938 operand eFlagsReg() %{ 3939 constraint(ALLOC_IN_RC(int_flags)); 3940 match(RegFlags); 3941 3942 format %{ "EFLAGS" %} 3943 interface(REG_INTER); 3944 %} 3945 3946 // Flags register, used as output of FLOATING POINT compare instructions 3947 operand eFlagsRegU() %{ 3948 constraint(ALLOC_IN_RC(int_flags)); 3949 match(RegFlags); 3950 3951 format %{ "EFLAGS_U" %} 3952 interface(REG_INTER); 3953 %} 3954 3955 operand eFlagsRegUCF() %{ 3956 constraint(ALLOC_IN_RC(int_flags)); 3957 match(RegFlags); 3958 predicate(false); 3959 3960 format %{ "EFLAGS_U_CF" %} 3961 interface(REG_INTER); 3962 %} 3963 3964 // Condition Code Register used by long compare 3965 operand flagsReg_long_LTGE() %{ 3966 constraint(ALLOC_IN_RC(int_flags)); 3967 match(RegFlags); 3968 format %{ "FLAGS_LTGE" %} 3969 interface(REG_INTER); 3970 %} 3971 operand flagsReg_long_EQNE() %{ 3972 constraint(ALLOC_IN_RC(int_flags)); 3973 match(RegFlags); 3974 format %{ "FLAGS_EQNE" %} 3975 interface(REG_INTER); 3976 %} 3977 operand flagsReg_long_LEGT() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 format %{ "FLAGS_LEGT" %} 3981 interface(REG_INTER); 3982 %} 3983 3984 // Float register operands 3985 operand regDPR() %{ 3986 predicate( UseSSE < 2 ); 3987 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3988 match(RegD); 3989 match(regDPR1); 3990 match(regDPR2); 3991 format %{ %} 3992 interface(REG_INTER); 3993 %} 3994 3995 operand regDPR1(regDPR reg) %{ 3996 predicate( UseSSE < 2 ); 3997 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3998 match(reg); 3999 format %{ "FPR1" %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand regDPR2(regDPR reg) %{ 4004 predicate( UseSSE < 2 ); 4005 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4006 match(reg); 4007 format %{ "FPR2" %} 4008 interface(REG_INTER); 4009 %} 4010 4011 operand regnotDPR1(regDPR reg) %{ 4012 predicate( UseSSE < 2 ); 4013 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4014 match(reg); 4015 format %{ %} 4016 interface(REG_INTER); 4017 %} 4018 4019 // Float register operands 4020 operand regFPR() %{ 4021 predicate( UseSSE < 2 ); 4022 constraint(ALLOC_IN_RC(fp_flt_reg)); 4023 match(RegF); 4024 match(regFPR1); 4025 format %{ %} 4026 interface(REG_INTER); 4027 %} 4028 4029 // Float register operands 4030 operand regFPR1(regFPR reg) %{ 4031 predicate( UseSSE < 2 ); 4032 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4033 match(reg); 4034 format %{ "FPR1" %} 4035 interface(REG_INTER); 4036 %} 4037 4038 // XMM Float register operands 4039 operand regF() %{ 4040 predicate( UseSSE>=1 ); 4041 constraint(ALLOC_IN_RC(float_reg_legacy)); 4042 match(RegF); 4043 format %{ %} 4044 interface(REG_INTER); 4045 %} 4046 4047 // XMM Double register operands 4048 operand regD() %{ 4049 predicate( UseSSE>=2 ); 4050 constraint(ALLOC_IN_RC(double_reg_legacy)); 4051 match(RegD); 4052 format %{ %} 4053 interface(REG_INTER); 4054 %} 4055 4056 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4057 // runtime code generation via reg_class_dynamic. 4058 operand vecS() %{ 4059 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4060 match(VecS); 4061 4062 format %{ %} 4063 interface(REG_INTER); 4064 %} 4065 4066 operand vecD() %{ 4067 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4068 match(VecD); 4069 4070 format %{ %} 4071 interface(REG_INTER); 4072 %} 4073 4074 operand vecX() %{ 4075 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4076 match(VecX); 4077 4078 format %{ %} 4079 interface(REG_INTER); 4080 %} 4081 4082 operand vecY() %{ 4083 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4084 match(VecY); 4085 4086 format %{ %} 4087 interface(REG_INTER); 4088 %} 4089 4090 //----------Memory Operands---------------------------------------------------- 4091 // Direct Memory Operand 4092 operand direct(immP addr) %{ 4093 match(addr); 4094 4095 format %{ "[$addr]" %} 4096 interface(MEMORY_INTER) %{ 4097 base(0xFFFFFFFF); 4098 index(0x4); 4099 scale(0x0); 4100 disp($addr); 4101 %} 4102 %} 4103 4104 // Indirect Memory Operand 4105 operand indirect(eRegP reg) %{ 4106 constraint(ALLOC_IN_RC(int_reg)); 4107 match(reg); 4108 4109 format %{ "[$reg]" %} 4110 interface(MEMORY_INTER) %{ 4111 base($reg); 4112 index(0x4); 4113 scale(0x0); 4114 disp(0x0); 4115 %} 4116 %} 4117 4118 // Indirect Memory Plus Short Offset Operand 4119 operand indOffset8(eRegP reg, immI8 off) %{ 4120 match(AddP reg off); 4121 4122 format %{ "[$reg + $off]" %} 4123 interface(MEMORY_INTER) %{ 4124 base($reg); 4125 index(0x4); 4126 scale(0x0); 4127 disp($off); 4128 %} 4129 %} 4130 4131 // Indirect Memory Plus Long Offset Operand 4132 operand indOffset32(eRegP reg, immI off) %{ 4133 match(AddP reg off); 4134 4135 format %{ "[$reg + $off]" %} 4136 interface(MEMORY_INTER) %{ 4137 base($reg); 4138 index(0x4); 4139 scale(0x0); 4140 disp($off); 4141 %} 4142 %} 4143 4144 // Indirect Memory Plus Long Offset Operand 4145 operand indOffset32X(rRegI reg, immP off) %{ 4146 match(AddP off reg); 4147 4148 format %{ "[$reg + $off]" %} 4149 interface(MEMORY_INTER) %{ 4150 base($reg); 4151 index(0x4); 4152 scale(0x0); 4153 disp($off); 4154 %} 4155 %} 4156 4157 // Indirect Memory Plus Index Register Plus Offset Operand 4158 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4159 match(AddP (AddP reg ireg) off); 4160 4161 op_cost(10); 4162 format %{"[$reg + $off + $ireg]" %} 4163 interface(MEMORY_INTER) %{ 4164 base($reg); 4165 index($ireg); 4166 scale(0x0); 4167 disp($off); 4168 %} 4169 %} 4170 4171 // Indirect Memory Plus Index Register Plus Offset Operand 4172 operand indIndex(eRegP reg, rRegI ireg) %{ 4173 match(AddP reg ireg); 4174 4175 op_cost(10); 4176 format %{"[$reg + $ireg]" %} 4177 interface(MEMORY_INTER) %{ 4178 base($reg); 4179 index($ireg); 4180 scale(0x0); 4181 disp(0x0); 4182 %} 4183 %} 4184 4185 // // ------------------------------------------------------------------------- 4186 // // 486 architecture doesn't support "scale * index + offset" with out a base 4187 // // ------------------------------------------------------------------------- 4188 // // Scaled Memory Operands 4189 // // Indirect Memory Times Scale Plus Offset Operand 4190 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4191 // match(AddP off (LShiftI ireg scale)); 4192 // 4193 // op_cost(10); 4194 // format %{"[$off + $ireg << $scale]" %} 4195 // interface(MEMORY_INTER) %{ 4196 // base(0x4); 4197 // index($ireg); 4198 // scale($scale); 4199 // disp($off); 4200 // %} 4201 // %} 4202 4203 // Indirect Memory Times Scale Plus Index Register 4204 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4205 match(AddP reg (LShiftI ireg scale)); 4206 4207 op_cost(10); 4208 format %{"[$reg + $ireg << $scale]" %} 4209 interface(MEMORY_INTER) %{ 4210 base($reg); 4211 index($ireg); 4212 scale($scale); 4213 disp(0x0); 4214 %} 4215 %} 4216 4217 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4218 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4219 match(AddP (AddP reg (LShiftI ireg scale)) off); 4220 4221 op_cost(10); 4222 format %{"[$reg + $off + $ireg << $scale]" %} 4223 interface(MEMORY_INTER) %{ 4224 base($reg); 4225 index($ireg); 4226 scale($scale); 4227 disp($off); 4228 %} 4229 %} 4230 4231 //----------Load Long Memory Operands------------------------------------------ 4232 // The load-long idiom will use it's address expression again after loading 4233 // the first word of the long. If the load-long destination overlaps with 4234 // registers used in the addressing expression, the 2nd half will be loaded 4235 // from a clobbered address. Fix this by requiring that load-long use 4236 // address registers that do not overlap with the load-long target. 4237 4238 // load-long support 4239 operand load_long_RegP() %{ 4240 constraint(ALLOC_IN_RC(esi_reg)); 4241 match(RegP); 4242 match(eSIRegP); 4243 op_cost(100); 4244 format %{ %} 4245 interface(REG_INTER); 4246 %} 4247 4248 // Indirect Memory Operand Long 4249 operand load_long_indirect(load_long_RegP reg) %{ 4250 constraint(ALLOC_IN_RC(esi_reg)); 4251 match(reg); 4252 4253 format %{ "[$reg]" %} 4254 interface(MEMORY_INTER) %{ 4255 base($reg); 4256 index(0x4); 4257 scale(0x0); 4258 disp(0x0); 4259 %} 4260 %} 4261 4262 // Indirect Memory Plus Long Offset Operand 4263 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4264 match(AddP reg off); 4265 4266 format %{ "[$reg + $off]" %} 4267 interface(MEMORY_INTER) %{ 4268 base($reg); 4269 index(0x4); 4270 scale(0x0); 4271 disp($off); 4272 %} 4273 %} 4274 4275 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4276 4277 4278 //----------Special Memory Operands-------------------------------------------- 4279 // Stack Slot Operand - This operand is used for loading and storing temporary 4280 // values on the stack where a match requires a value to 4281 // flow through memory. 4282 operand stackSlotP(sRegP reg) %{ 4283 constraint(ALLOC_IN_RC(stack_slots)); 4284 // No match rule because this operand is only generated in matching 4285 format %{ "[$reg]" %} 4286 interface(MEMORY_INTER) %{ 4287 base(0x4); // ESP 4288 index(0x4); // No Index 4289 scale(0x0); // No Scale 4290 disp($reg); // Stack Offset 4291 %} 4292 %} 4293 4294 operand stackSlotI(sRegI reg) %{ 4295 constraint(ALLOC_IN_RC(stack_slots)); 4296 // No match rule because this operand is only generated in matching 4297 format %{ "[$reg]" %} 4298 interface(MEMORY_INTER) %{ 4299 base(0x4); // ESP 4300 index(0x4); // No Index 4301 scale(0x0); // No Scale 4302 disp($reg); // Stack Offset 4303 %} 4304 %} 4305 4306 operand stackSlotF(sRegF reg) %{ 4307 constraint(ALLOC_IN_RC(stack_slots)); 4308 // No match rule because this operand is only generated in matching 4309 format %{ "[$reg]" %} 4310 interface(MEMORY_INTER) %{ 4311 base(0x4); // ESP 4312 index(0x4); // No Index 4313 scale(0x0); // No Scale 4314 disp($reg); // Stack Offset 4315 %} 4316 %} 4317 4318 operand stackSlotD(sRegD reg) %{ 4319 constraint(ALLOC_IN_RC(stack_slots)); 4320 // No match rule because this operand is only generated in matching 4321 format %{ "[$reg]" %} 4322 interface(MEMORY_INTER) %{ 4323 base(0x4); // ESP 4324 index(0x4); // No Index 4325 scale(0x0); // No Scale 4326 disp($reg); // Stack Offset 4327 %} 4328 %} 4329 4330 operand stackSlotL(sRegL reg) %{ 4331 constraint(ALLOC_IN_RC(stack_slots)); 4332 // No match rule because this operand is only generated in matching 4333 format %{ "[$reg]" %} 4334 interface(MEMORY_INTER) %{ 4335 base(0x4); // ESP 4336 index(0x4); // No Index 4337 scale(0x0); // No Scale 4338 disp($reg); // Stack Offset 4339 %} 4340 %} 4341 4342 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4343 // Indirect Memory Operand 4344 operand indirect_win95_safe(eRegP_no_EBP reg) 4345 %{ 4346 constraint(ALLOC_IN_RC(int_reg)); 4347 match(reg); 4348 4349 op_cost(100); 4350 format %{ "[$reg]" %} 4351 interface(MEMORY_INTER) %{ 4352 base($reg); 4353 index(0x4); 4354 scale(0x0); 4355 disp(0x0); 4356 %} 4357 %} 4358 4359 // Indirect Memory Plus Short Offset Operand 4360 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4361 %{ 4362 match(AddP reg off); 4363 4364 op_cost(100); 4365 format %{ "[$reg + $off]" %} 4366 interface(MEMORY_INTER) %{ 4367 base($reg); 4368 index(0x4); 4369 scale(0x0); 4370 disp($off); 4371 %} 4372 %} 4373 4374 // Indirect Memory Plus Long Offset Operand 4375 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4376 %{ 4377 match(AddP reg off); 4378 4379 op_cost(100); 4380 format %{ "[$reg + $off]" %} 4381 interface(MEMORY_INTER) %{ 4382 base($reg); 4383 index(0x4); 4384 scale(0x0); 4385 disp($off); 4386 %} 4387 %} 4388 4389 // Indirect Memory Plus Index Register Plus Offset Operand 4390 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4391 %{ 4392 match(AddP (AddP reg ireg) off); 4393 4394 op_cost(100); 4395 format %{"[$reg + $off + $ireg]" %} 4396 interface(MEMORY_INTER) %{ 4397 base($reg); 4398 index($ireg); 4399 scale(0x0); 4400 disp($off); 4401 %} 4402 %} 4403 4404 // Indirect Memory Times Scale Plus Index Register 4405 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4406 %{ 4407 match(AddP reg (LShiftI ireg scale)); 4408 4409 op_cost(100); 4410 format %{"[$reg + $ireg << $scale]" %} 4411 interface(MEMORY_INTER) %{ 4412 base($reg); 4413 index($ireg); 4414 scale($scale); 4415 disp(0x0); 4416 %} 4417 %} 4418 4419 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4420 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4421 %{ 4422 match(AddP (AddP reg (LShiftI ireg scale)) off); 4423 4424 op_cost(100); 4425 format %{"[$reg + $off + $ireg << $scale]" %} 4426 interface(MEMORY_INTER) %{ 4427 base($reg); 4428 index($ireg); 4429 scale($scale); 4430 disp($off); 4431 %} 4432 %} 4433 4434 //----------Conditional Branch Operands---------------------------------------- 4435 // Comparison Op - This is the operation of the comparison, and is limited to 4436 // the following set of codes: 4437 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4438 // 4439 // Other attributes of the comparison, such as unsignedness, are specified 4440 // by the comparison instruction that sets a condition code flags register. 4441 // That result is represented by a flags operand whose subtype is appropriate 4442 // to the unsignedness (etc.) of the comparison. 4443 // 4444 // Later, the instruction which matches both the Comparison Op (a Bool) and 4445 // the flags (produced by the Cmp) specifies the coding of the comparison op 4446 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4447 4448 // Comparision Code 4449 operand cmpOp() %{ 4450 match(Bool); 4451 4452 format %{ "" %} 4453 interface(COND_INTER) %{ 4454 equal(0x4, "e"); 4455 not_equal(0x5, "ne"); 4456 less(0xC, "l"); 4457 greater_equal(0xD, "ge"); 4458 less_equal(0xE, "le"); 4459 greater(0xF, "g"); 4460 overflow(0x0, "o"); 4461 no_overflow(0x1, "no"); 4462 %} 4463 %} 4464 4465 // Comparison Code, unsigned compare. Used by FP also, with 4466 // C2 (unordered) turned into GT or LT already. The other bits 4467 // C0 and C3 are turned into Carry & Zero flags. 4468 operand cmpOpU() %{ 4469 match(Bool); 4470 4471 format %{ "" %} 4472 interface(COND_INTER) %{ 4473 equal(0x4, "e"); 4474 not_equal(0x5, "ne"); 4475 less(0x2, "b"); 4476 greater_equal(0x3, "nb"); 4477 less_equal(0x6, "be"); 4478 greater(0x7, "nbe"); 4479 overflow(0x0, "o"); 4480 no_overflow(0x1, "no"); 4481 %} 4482 %} 4483 4484 // Floating comparisons that don't require any fixup for the unordered case 4485 operand cmpOpUCF() %{ 4486 match(Bool); 4487 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4488 n->as_Bool()->_test._test == BoolTest::ge || 4489 n->as_Bool()->_test._test == BoolTest::le || 4490 n->as_Bool()->_test._test == BoolTest::gt); 4491 format %{ "" %} 4492 interface(COND_INTER) %{ 4493 equal(0x4, "e"); 4494 not_equal(0x5, "ne"); 4495 less(0x2, "b"); 4496 greater_equal(0x3, "nb"); 4497 less_equal(0x6, "be"); 4498 greater(0x7, "nbe"); 4499 overflow(0x0, "o"); 4500 no_overflow(0x1, "no"); 4501 %} 4502 %} 4503 4504 4505 // Floating comparisons that can be fixed up with extra conditional jumps 4506 operand cmpOpUCF2() %{ 4507 match(Bool); 4508 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4509 n->as_Bool()->_test._test == BoolTest::eq); 4510 format %{ "" %} 4511 interface(COND_INTER) %{ 4512 equal(0x4, "e"); 4513 not_equal(0x5, "ne"); 4514 less(0x2, "b"); 4515 greater_equal(0x3, "nb"); 4516 less_equal(0x6, "be"); 4517 greater(0x7, "nbe"); 4518 overflow(0x0, "o"); 4519 no_overflow(0x1, "no"); 4520 %} 4521 %} 4522 4523 // Comparison Code for FP conditional move 4524 operand cmpOp_fcmov() %{ 4525 match(Bool); 4526 4527 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4528 n->as_Bool()->_test._test != BoolTest::no_overflow); 4529 format %{ "" %} 4530 interface(COND_INTER) %{ 4531 equal (0x0C8); 4532 not_equal (0x1C8); 4533 less (0x0C0); 4534 greater_equal(0x1C0); 4535 less_equal (0x0D0); 4536 greater (0x1D0); 4537 overflow(0x0, "o"); // not really supported by the instruction 4538 no_overflow(0x1, "no"); // not really supported by the instruction 4539 %} 4540 %} 4541 4542 // Comparision Code used in long compares 4543 operand cmpOp_commute() %{ 4544 match(Bool); 4545 4546 format %{ "" %} 4547 interface(COND_INTER) %{ 4548 equal(0x4, "e"); 4549 not_equal(0x5, "ne"); 4550 less(0xF, "g"); 4551 greater_equal(0xE, "le"); 4552 less_equal(0xD, "ge"); 4553 greater(0xC, "l"); 4554 overflow(0x0, "o"); 4555 no_overflow(0x1, "no"); 4556 %} 4557 %} 4558 4559 //----------OPERAND CLASSES---------------------------------------------------- 4560 // Operand Classes are groups of operands that are used as to simplify 4561 // instruction definitions by not requiring the AD writer to specify separate 4562 // instructions for every form of operand when the instruction accepts 4563 // multiple operand types with the same basic encoding and format. The classic 4564 // case of this is memory operands. 4565 4566 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4567 indIndex, indIndexScale, indIndexScaleOffset); 4568 4569 // Long memory operations are encoded in 2 instructions and a +4 offset. 4570 // This means some kind of offset is always required and you cannot use 4571 // an oop as the offset (done when working on static globals). 4572 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4573 indIndex, indIndexScale, indIndexScaleOffset); 4574 4575 4576 //----------PIPELINE----------------------------------------------------------- 4577 // Rules which define the behavior of the target architectures pipeline. 4578 pipeline %{ 4579 4580 //----------ATTRIBUTES--------------------------------------------------------- 4581 attributes %{ 4582 variable_size_instructions; // Fixed size instructions 4583 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4584 instruction_unit_size = 1; // An instruction is 1 bytes long 4585 instruction_fetch_unit_size = 16; // The processor fetches one line 4586 instruction_fetch_units = 1; // of 16 bytes 4587 4588 // List of nop instructions 4589 nops( MachNop ); 4590 %} 4591 4592 //----------RESOURCES---------------------------------------------------------- 4593 // Resources are the functional units available to the machine 4594 4595 // Generic P2/P3 pipeline 4596 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4597 // 3 instructions decoded per cycle. 4598 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4599 // 2 ALU op, only ALU0 handles mul/div instructions. 4600 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4601 MS0, MS1, MEM = MS0 | MS1, 4602 BR, FPU, 4603 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4604 4605 //----------PIPELINE DESCRIPTION----------------------------------------------- 4606 // Pipeline Description specifies the stages in the machine's pipeline 4607 4608 // Generic P2/P3 pipeline 4609 pipe_desc(S0, S1, S2, S3, S4, S5); 4610 4611 //----------PIPELINE CLASSES--------------------------------------------------- 4612 // Pipeline Classes describe the stages in which input and output are 4613 // referenced by the hardware pipeline. 4614 4615 // Naming convention: ialu or fpu 4616 // Then: _reg 4617 // Then: _reg if there is a 2nd register 4618 // Then: _long if it's a pair of instructions implementing a long 4619 // Then: _fat if it requires the big decoder 4620 // Or: _mem if it requires the big decoder and a memory unit. 4621 4622 // Integer ALU reg operation 4623 pipe_class ialu_reg(rRegI dst) %{ 4624 single_instruction; 4625 dst : S4(write); 4626 dst : S3(read); 4627 DECODE : S0; // any decoder 4628 ALU : S3; // any alu 4629 %} 4630 4631 // Long ALU reg operation 4632 pipe_class ialu_reg_long(eRegL dst) %{ 4633 instruction_count(2); 4634 dst : S4(write); 4635 dst : S3(read); 4636 DECODE : S0(2); // any 2 decoders 4637 ALU : S3(2); // both alus 4638 %} 4639 4640 // Integer ALU reg operation using big decoder 4641 pipe_class ialu_reg_fat(rRegI dst) %{ 4642 single_instruction; 4643 dst : S4(write); 4644 dst : S3(read); 4645 D0 : S0; // big decoder only 4646 ALU : S3; // any alu 4647 %} 4648 4649 // Long ALU reg operation using big decoder 4650 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4651 instruction_count(2); 4652 dst : S4(write); 4653 dst : S3(read); 4654 D0 : S0(2); // big decoder only; twice 4655 ALU : S3(2); // any 2 alus 4656 %} 4657 4658 // Integer ALU reg-reg operation 4659 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4660 single_instruction; 4661 dst : S4(write); 4662 src : S3(read); 4663 DECODE : S0; // any decoder 4664 ALU : S3; // any alu 4665 %} 4666 4667 // Long ALU reg-reg operation 4668 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4669 instruction_count(2); 4670 dst : S4(write); 4671 src : S3(read); 4672 DECODE : S0(2); // any 2 decoders 4673 ALU : S3(2); // both alus 4674 %} 4675 4676 // Integer ALU reg-reg operation 4677 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4678 single_instruction; 4679 dst : S4(write); 4680 src : S3(read); 4681 D0 : S0; // big decoder only 4682 ALU : S3; // any alu 4683 %} 4684 4685 // Long ALU reg-reg operation 4686 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4687 instruction_count(2); 4688 dst : S4(write); 4689 src : S3(read); 4690 D0 : S0(2); // big decoder only; twice 4691 ALU : S3(2); // both alus 4692 %} 4693 4694 // Integer ALU reg-mem operation 4695 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4696 single_instruction; 4697 dst : S5(write); 4698 mem : S3(read); 4699 D0 : S0; // big decoder only 4700 ALU : S4; // any alu 4701 MEM : S3; // any mem 4702 %} 4703 4704 // Long ALU reg-mem operation 4705 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4706 instruction_count(2); 4707 dst : S5(write); 4708 mem : S3(read); 4709 D0 : S0(2); // big decoder only; twice 4710 ALU : S4(2); // any 2 alus 4711 MEM : S3(2); // both mems 4712 %} 4713 4714 // Integer mem operation (prefetch) 4715 pipe_class ialu_mem(memory mem) 4716 %{ 4717 single_instruction; 4718 mem : S3(read); 4719 D0 : S0; // big decoder only 4720 MEM : S3; // any mem 4721 %} 4722 4723 // Integer Store to Memory 4724 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4725 single_instruction; 4726 mem : S3(read); 4727 src : S5(read); 4728 D0 : S0; // big decoder only 4729 ALU : S4; // any alu 4730 MEM : S3; 4731 %} 4732 4733 // Long Store to Memory 4734 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4735 instruction_count(2); 4736 mem : S3(read); 4737 src : S5(read); 4738 D0 : S0(2); // big decoder only; twice 4739 ALU : S4(2); // any 2 alus 4740 MEM : S3(2); // Both mems 4741 %} 4742 4743 // Integer Store to Memory 4744 pipe_class ialu_mem_imm(memory mem) %{ 4745 single_instruction; 4746 mem : S3(read); 4747 D0 : S0; // big decoder only 4748 ALU : S4; // any alu 4749 MEM : S3; 4750 %} 4751 4752 // Integer ALU0 reg-reg operation 4753 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4754 single_instruction; 4755 dst : S4(write); 4756 src : S3(read); 4757 D0 : S0; // Big decoder only 4758 ALU0 : S3; // only alu0 4759 %} 4760 4761 // Integer ALU0 reg-mem operation 4762 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4763 single_instruction; 4764 dst : S5(write); 4765 mem : S3(read); 4766 D0 : S0; // big decoder only 4767 ALU0 : S4; // ALU0 only 4768 MEM : S3; // any mem 4769 %} 4770 4771 // Integer ALU reg-reg operation 4772 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4773 single_instruction; 4774 cr : S4(write); 4775 src1 : S3(read); 4776 src2 : S3(read); 4777 DECODE : S0; // any decoder 4778 ALU : S3; // any alu 4779 %} 4780 4781 // Integer ALU reg-imm operation 4782 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4783 single_instruction; 4784 cr : S4(write); 4785 src1 : S3(read); 4786 DECODE : S0; // any decoder 4787 ALU : S3; // any alu 4788 %} 4789 4790 // Integer ALU reg-mem operation 4791 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4792 single_instruction; 4793 cr : S4(write); 4794 src1 : S3(read); 4795 src2 : S3(read); 4796 D0 : S0; // big decoder only 4797 ALU : S4; // any alu 4798 MEM : S3; 4799 %} 4800 4801 // Conditional move reg-reg 4802 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4803 instruction_count(4); 4804 y : S4(read); 4805 q : S3(read); 4806 p : S3(read); 4807 DECODE : S0(4); // any decoder 4808 %} 4809 4810 // Conditional move reg-reg 4811 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4812 single_instruction; 4813 dst : S4(write); 4814 src : S3(read); 4815 cr : S3(read); 4816 DECODE : S0; // any decoder 4817 %} 4818 4819 // Conditional move reg-mem 4820 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4821 single_instruction; 4822 dst : S4(write); 4823 src : S3(read); 4824 cr : S3(read); 4825 DECODE : S0; // any decoder 4826 MEM : S3; 4827 %} 4828 4829 // Conditional move reg-reg long 4830 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4831 single_instruction; 4832 dst : S4(write); 4833 src : S3(read); 4834 cr : S3(read); 4835 DECODE : S0(2); // any 2 decoders 4836 %} 4837 4838 // Conditional move double reg-reg 4839 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4840 single_instruction; 4841 dst : S4(write); 4842 src : S3(read); 4843 cr : S3(read); 4844 DECODE : S0; // any decoder 4845 %} 4846 4847 // Float reg-reg operation 4848 pipe_class fpu_reg(regDPR dst) %{ 4849 instruction_count(2); 4850 dst : S3(read); 4851 DECODE : S0(2); // any 2 decoders 4852 FPU : S3; 4853 %} 4854 4855 // Float reg-reg operation 4856 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4857 instruction_count(2); 4858 dst : S4(write); 4859 src : S3(read); 4860 DECODE : S0(2); // any 2 decoders 4861 FPU : S3; 4862 %} 4863 4864 // Float reg-reg operation 4865 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4866 instruction_count(3); 4867 dst : S4(write); 4868 src1 : S3(read); 4869 src2 : S3(read); 4870 DECODE : S0(3); // any 3 decoders 4871 FPU : S3(2); 4872 %} 4873 4874 // Float reg-reg operation 4875 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4876 instruction_count(4); 4877 dst : S4(write); 4878 src1 : S3(read); 4879 src2 : S3(read); 4880 src3 : S3(read); 4881 DECODE : S0(4); // any 3 decoders 4882 FPU : S3(2); 4883 %} 4884 4885 // Float reg-reg operation 4886 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4887 instruction_count(4); 4888 dst : S4(write); 4889 src1 : S3(read); 4890 src2 : S3(read); 4891 src3 : S3(read); 4892 DECODE : S1(3); // any 3 decoders 4893 D0 : S0; // Big decoder only 4894 FPU : S3(2); 4895 MEM : S3; 4896 %} 4897 4898 // Float reg-mem operation 4899 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4900 instruction_count(2); 4901 dst : S5(write); 4902 mem : S3(read); 4903 D0 : S0; // big decoder only 4904 DECODE : S1; // any decoder for FPU POP 4905 FPU : S4; 4906 MEM : S3; // any mem 4907 %} 4908 4909 // Float reg-mem operation 4910 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4911 instruction_count(3); 4912 dst : S5(write); 4913 src1 : S3(read); 4914 mem : S3(read); 4915 D0 : S0; // big decoder only 4916 DECODE : S1(2); // any decoder for FPU POP 4917 FPU : S4; 4918 MEM : S3; // any mem 4919 %} 4920 4921 // Float mem-reg operation 4922 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4923 instruction_count(2); 4924 src : S5(read); 4925 mem : S3(read); 4926 DECODE : S0; // any decoder for FPU PUSH 4927 D0 : S1; // big decoder only 4928 FPU : S4; 4929 MEM : S3; // any mem 4930 %} 4931 4932 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4933 instruction_count(3); 4934 src1 : S3(read); 4935 src2 : S3(read); 4936 mem : S3(read); 4937 DECODE : S0(2); // any decoder for FPU PUSH 4938 D0 : S1; // big decoder only 4939 FPU : S4; 4940 MEM : S3; // any mem 4941 %} 4942 4943 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4944 instruction_count(3); 4945 src1 : S3(read); 4946 src2 : S3(read); 4947 mem : S4(read); 4948 DECODE : S0; // any decoder for FPU PUSH 4949 D0 : S0(2); // big decoder only 4950 FPU : S4; 4951 MEM : S3(2); // any mem 4952 %} 4953 4954 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4955 instruction_count(2); 4956 src1 : S3(read); 4957 dst : S4(read); 4958 D0 : S0(2); // big decoder only 4959 MEM : S3(2); // any mem 4960 %} 4961 4962 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4963 instruction_count(3); 4964 src1 : S3(read); 4965 src2 : S3(read); 4966 dst : S4(read); 4967 D0 : S0(3); // big decoder only 4968 FPU : S4; 4969 MEM : S3(3); // any mem 4970 %} 4971 4972 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4973 instruction_count(3); 4974 src1 : S4(read); 4975 mem : S4(read); 4976 DECODE : S0; // any decoder for FPU PUSH 4977 D0 : S0(2); // big decoder only 4978 FPU : S4; 4979 MEM : S3(2); // any mem 4980 %} 4981 4982 // Float load constant 4983 pipe_class fpu_reg_con(regDPR dst) %{ 4984 instruction_count(2); 4985 dst : S5(write); 4986 D0 : S0; // big decoder only for the load 4987 DECODE : S1; // any decoder for FPU POP 4988 FPU : S4; 4989 MEM : S3; // any mem 4990 %} 4991 4992 // Float load constant 4993 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4994 instruction_count(3); 4995 dst : S5(write); 4996 src : S3(read); 4997 D0 : S0; // big decoder only for the load 4998 DECODE : S1(2); // any decoder for FPU POP 4999 FPU : S4; 5000 MEM : S3; // any mem 5001 %} 5002 5003 // UnConditional branch 5004 pipe_class pipe_jmp( label labl ) %{ 5005 single_instruction; 5006 BR : S3; 5007 %} 5008 5009 // Conditional branch 5010 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5011 single_instruction; 5012 cr : S1(read); 5013 BR : S3; 5014 %} 5015 5016 // Allocation idiom 5017 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5018 instruction_count(1); force_serialization; 5019 fixed_latency(6); 5020 heap_ptr : S3(read); 5021 DECODE : S0(3); 5022 D0 : S2; 5023 MEM : S3; 5024 ALU : S3(2); 5025 dst : S5(write); 5026 BR : S5; 5027 %} 5028 5029 // Generic big/slow expanded idiom 5030 pipe_class pipe_slow( ) %{ 5031 instruction_count(10); multiple_bundles; force_serialization; 5032 fixed_latency(100); 5033 D0 : S0(2); 5034 MEM : S3(2); 5035 %} 5036 5037 // The real do-nothing guy 5038 pipe_class empty( ) %{ 5039 instruction_count(0); 5040 %} 5041 5042 // Define the class for the Nop node 5043 define %{ 5044 MachNop = empty; 5045 %} 5046 5047 %} 5048 5049 //----------INSTRUCTIONS------------------------------------------------------- 5050 // 5051 // match -- States which machine-independent subtree may be replaced 5052 // by this instruction. 5053 // ins_cost -- The estimated cost of this instruction is used by instruction 5054 // selection to identify a minimum cost tree of machine 5055 // instructions that matches a tree of machine-independent 5056 // instructions. 5057 // format -- A string providing the disassembly for this instruction. 5058 // The value of an instruction's operand may be inserted 5059 // by referring to it with a '$' prefix. 5060 // opcode -- Three instruction opcodes may be provided. These are referred 5061 // to within an encode class as $primary, $secondary, and $tertiary 5062 // respectively. The primary opcode is commonly used to 5063 // indicate the type of machine instruction, while secondary 5064 // and tertiary are often used for prefix options or addressing 5065 // modes. 5066 // ins_encode -- A list of encode classes with parameters. The encode class 5067 // name must have been defined in an 'enc_class' specification 5068 // in the encode section of the architecture description. 5069 5070 //----------BSWAP-Instruction-------------------------------------------------- 5071 instruct bytes_reverse_int(rRegI dst) %{ 5072 match(Set dst (ReverseBytesI dst)); 5073 5074 format %{ "BSWAP $dst" %} 5075 opcode(0x0F, 0xC8); 5076 ins_encode( OpcP, OpcSReg(dst) ); 5077 ins_pipe( ialu_reg ); 5078 %} 5079 5080 instruct bytes_reverse_long(eRegL dst) %{ 5081 match(Set dst (ReverseBytesL dst)); 5082 5083 format %{ "BSWAP $dst.lo\n\t" 5084 "BSWAP $dst.hi\n\t" 5085 "XCHG $dst.lo $dst.hi" %} 5086 5087 ins_cost(125); 5088 ins_encode( bswap_long_bytes(dst) ); 5089 ins_pipe( ialu_reg_reg); 5090 %} 5091 5092 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5093 match(Set dst (ReverseBytesUS dst)); 5094 effect(KILL cr); 5095 5096 format %{ "BSWAP $dst\n\t" 5097 "SHR $dst,16\n\t" %} 5098 ins_encode %{ 5099 __ bswapl($dst$$Register); 5100 __ shrl($dst$$Register, 16); 5101 %} 5102 ins_pipe( ialu_reg ); 5103 %} 5104 5105 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5106 match(Set dst (ReverseBytesS dst)); 5107 effect(KILL cr); 5108 5109 format %{ "BSWAP $dst\n\t" 5110 "SAR $dst,16\n\t" %} 5111 ins_encode %{ 5112 __ bswapl($dst$$Register); 5113 __ sarl($dst$$Register, 16); 5114 %} 5115 ins_pipe( ialu_reg ); 5116 %} 5117 5118 5119 //---------- Zeros Count Instructions ------------------------------------------ 5120 5121 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5122 predicate(UseCountLeadingZerosInstruction); 5123 match(Set dst (CountLeadingZerosI src)); 5124 effect(KILL cr); 5125 5126 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5127 ins_encode %{ 5128 __ lzcntl($dst$$Register, $src$$Register); 5129 %} 5130 ins_pipe(ialu_reg); 5131 %} 5132 5133 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5134 predicate(!UseCountLeadingZerosInstruction); 5135 match(Set dst (CountLeadingZerosI src)); 5136 effect(KILL cr); 5137 5138 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5139 "JNZ skip\n\t" 5140 "MOV $dst, -1\n" 5141 "skip:\n\t" 5142 "NEG $dst\n\t" 5143 "ADD $dst, 31" %} 5144 ins_encode %{ 5145 Register Rdst = $dst$$Register; 5146 Register Rsrc = $src$$Register; 5147 Label skip; 5148 __ bsrl(Rdst, Rsrc); 5149 __ jccb(Assembler::notZero, skip); 5150 __ movl(Rdst, -1); 5151 __ bind(skip); 5152 __ negl(Rdst); 5153 __ addl(Rdst, BitsPerInt - 1); 5154 %} 5155 ins_pipe(ialu_reg); 5156 %} 5157 5158 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5159 predicate(UseCountLeadingZerosInstruction); 5160 match(Set dst (CountLeadingZerosL src)); 5161 effect(TEMP dst, KILL cr); 5162 5163 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5164 "JNC done\n\t" 5165 "LZCNT $dst, $src.lo\n\t" 5166 "ADD $dst, 32\n" 5167 "done:" %} 5168 ins_encode %{ 5169 Register Rdst = $dst$$Register; 5170 Register Rsrc = $src$$Register; 5171 Label done; 5172 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5173 __ jccb(Assembler::carryClear, done); 5174 __ lzcntl(Rdst, Rsrc); 5175 __ addl(Rdst, BitsPerInt); 5176 __ bind(done); 5177 %} 5178 ins_pipe(ialu_reg); 5179 %} 5180 5181 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5182 predicate(!UseCountLeadingZerosInstruction); 5183 match(Set dst (CountLeadingZerosL src)); 5184 effect(TEMP dst, KILL cr); 5185 5186 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5187 "JZ msw_is_zero\n\t" 5188 "ADD $dst, 32\n\t" 5189 "JMP not_zero\n" 5190 "msw_is_zero:\n\t" 5191 "BSR $dst, $src.lo\n\t" 5192 "JNZ not_zero\n\t" 5193 "MOV $dst, -1\n" 5194 "not_zero:\n\t" 5195 "NEG $dst\n\t" 5196 "ADD $dst, 63\n" %} 5197 ins_encode %{ 5198 Register Rdst = $dst$$Register; 5199 Register Rsrc = $src$$Register; 5200 Label msw_is_zero; 5201 Label not_zero; 5202 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5203 __ jccb(Assembler::zero, msw_is_zero); 5204 __ addl(Rdst, BitsPerInt); 5205 __ jmpb(not_zero); 5206 __ bind(msw_is_zero); 5207 __ bsrl(Rdst, Rsrc); 5208 __ jccb(Assembler::notZero, not_zero); 5209 __ movl(Rdst, -1); 5210 __ bind(not_zero); 5211 __ negl(Rdst); 5212 __ addl(Rdst, BitsPerLong - 1); 5213 %} 5214 ins_pipe(ialu_reg); 5215 %} 5216 5217 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5218 predicate(UseCountTrailingZerosInstruction); 5219 match(Set dst (CountTrailingZerosI src)); 5220 effect(KILL cr); 5221 5222 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5223 ins_encode %{ 5224 __ tzcntl($dst$$Register, $src$$Register); 5225 %} 5226 ins_pipe(ialu_reg); 5227 %} 5228 5229 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5230 predicate(!UseCountTrailingZerosInstruction); 5231 match(Set dst (CountTrailingZerosI src)); 5232 effect(KILL cr); 5233 5234 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5235 "JNZ done\n\t" 5236 "MOV $dst, 32\n" 5237 "done:" %} 5238 ins_encode %{ 5239 Register Rdst = $dst$$Register; 5240 Label done; 5241 __ bsfl(Rdst, $src$$Register); 5242 __ jccb(Assembler::notZero, done); 5243 __ movl(Rdst, BitsPerInt); 5244 __ bind(done); 5245 %} 5246 ins_pipe(ialu_reg); 5247 %} 5248 5249 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5250 predicate(UseCountTrailingZerosInstruction); 5251 match(Set dst (CountTrailingZerosL src)); 5252 effect(TEMP dst, KILL cr); 5253 5254 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5255 "JNC done\n\t" 5256 "TZCNT $dst, $src.hi\n\t" 5257 "ADD $dst, 32\n" 5258 "done:" %} 5259 ins_encode %{ 5260 Register Rdst = $dst$$Register; 5261 Register Rsrc = $src$$Register; 5262 Label done; 5263 __ tzcntl(Rdst, Rsrc); 5264 __ jccb(Assembler::carryClear, done); 5265 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5266 __ addl(Rdst, BitsPerInt); 5267 __ bind(done); 5268 %} 5269 ins_pipe(ialu_reg); 5270 %} 5271 5272 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5273 predicate(!UseCountTrailingZerosInstruction); 5274 match(Set dst (CountTrailingZerosL src)); 5275 effect(TEMP dst, KILL cr); 5276 5277 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5278 "JNZ done\n\t" 5279 "BSF $dst, $src.hi\n\t" 5280 "JNZ msw_not_zero\n\t" 5281 "MOV $dst, 32\n" 5282 "msw_not_zero:\n\t" 5283 "ADD $dst, 32\n" 5284 "done:" %} 5285 ins_encode %{ 5286 Register Rdst = $dst$$Register; 5287 Register Rsrc = $src$$Register; 5288 Label msw_not_zero; 5289 Label done; 5290 __ bsfl(Rdst, Rsrc); 5291 __ jccb(Assembler::notZero, done); 5292 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5293 __ jccb(Assembler::notZero, msw_not_zero); 5294 __ movl(Rdst, BitsPerInt); 5295 __ bind(msw_not_zero); 5296 __ addl(Rdst, BitsPerInt); 5297 __ bind(done); 5298 %} 5299 ins_pipe(ialu_reg); 5300 %} 5301 5302 5303 //---------- Population Count Instructions ------------------------------------- 5304 5305 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5306 predicate(UsePopCountInstruction); 5307 match(Set dst (PopCountI src)); 5308 effect(KILL cr); 5309 5310 format %{ "POPCNT $dst, $src" %} 5311 ins_encode %{ 5312 __ popcntl($dst$$Register, $src$$Register); 5313 %} 5314 ins_pipe(ialu_reg); 5315 %} 5316 5317 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5318 predicate(UsePopCountInstruction); 5319 match(Set dst (PopCountI (LoadI mem))); 5320 effect(KILL cr); 5321 5322 format %{ "POPCNT $dst, $mem" %} 5323 ins_encode %{ 5324 __ popcntl($dst$$Register, $mem$$Address); 5325 %} 5326 ins_pipe(ialu_reg); 5327 %} 5328 5329 // Note: Long.bitCount(long) returns an int. 5330 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5331 predicate(UsePopCountInstruction); 5332 match(Set dst (PopCountL src)); 5333 effect(KILL cr, TEMP tmp, TEMP dst); 5334 5335 format %{ "POPCNT $dst, $src.lo\n\t" 5336 "POPCNT $tmp, $src.hi\n\t" 5337 "ADD $dst, $tmp" %} 5338 ins_encode %{ 5339 __ popcntl($dst$$Register, $src$$Register); 5340 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5341 __ addl($dst$$Register, $tmp$$Register); 5342 %} 5343 ins_pipe(ialu_reg); 5344 %} 5345 5346 // Note: Long.bitCount(long) returns an int. 5347 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5348 predicate(UsePopCountInstruction); 5349 match(Set dst (PopCountL (LoadL mem))); 5350 effect(KILL cr, TEMP tmp, TEMP dst); 5351 5352 format %{ "POPCNT $dst, $mem\n\t" 5353 "POPCNT $tmp, $mem+4\n\t" 5354 "ADD $dst, $tmp" %} 5355 ins_encode %{ 5356 //__ popcntl($dst$$Register, $mem$$Address$$first); 5357 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5358 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5359 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5360 __ addl($dst$$Register, $tmp$$Register); 5361 %} 5362 ins_pipe(ialu_reg); 5363 %} 5364 5365 5366 //----------Load/Store/Move Instructions--------------------------------------- 5367 //----------Load Instructions-------------------------------------------------- 5368 // Load Byte (8bit signed) 5369 instruct loadB(xRegI dst, memory mem) %{ 5370 match(Set dst (LoadB mem)); 5371 5372 ins_cost(125); 5373 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5374 5375 ins_encode %{ 5376 __ movsbl($dst$$Register, $mem$$Address); 5377 %} 5378 5379 ins_pipe(ialu_reg_mem); 5380 %} 5381 5382 // Load Byte (8bit signed) into Long Register 5383 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5384 match(Set dst (ConvI2L (LoadB mem))); 5385 effect(KILL cr); 5386 5387 ins_cost(375); 5388 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5389 "MOV $dst.hi,$dst.lo\n\t" 5390 "SAR $dst.hi,7" %} 5391 5392 ins_encode %{ 5393 __ movsbl($dst$$Register, $mem$$Address); 5394 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5395 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5396 %} 5397 5398 ins_pipe(ialu_reg_mem); 5399 %} 5400 5401 // Load Unsigned Byte (8bit UNsigned) 5402 instruct loadUB(xRegI dst, memory mem) %{ 5403 match(Set dst (LoadUB mem)); 5404 5405 ins_cost(125); 5406 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5407 5408 ins_encode %{ 5409 __ movzbl($dst$$Register, $mem$$Address); 5410 %} 5411 5412 ins_pipe(ialu_reg_mem); 5413 %} 5414 5415 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5416 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5417 match(Set dst (ConvI2L (LoadUB mem))); 5418 effect(KILL cr); 5419 5420 ins_cost(250); 5421 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5422 "XOR $dst.hi,$dst.hi" %} 5423 5424 ins_encode %{ 5425 Register Rdst = $dst$$Register; 5426 __ movzbl(Rdst, $mem$$Address); 5427 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5428 %} 5429 5430 ins_pipe(ialu_reg_mem); 5431 %} 5432 5433 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5434 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5435 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5436 effect(KILL cr); 5437 5438 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5439 "XOR $dst.hi,$dst.hi\n\t" 5440 "AND $dst.lo,right_n_bits($mask, 8)" %} 5441 ins_encode %{ 5442 Register Rdst = $dst$$Register; 5443 __ movzbl(Rdst, $mem$$Address); 5444 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5445 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5446 %} 5447 ins_pipe(ialu_reg_mem); 5448 %} 5449 5450 // Load Short (16bit signed) 5451 instruct loadS(rRegI dst, memory mem) %{ 5452 match(Set dst (LoadS mem)); 5453 5454 ins_cost(125); 5455 format %{ "MOVSX $dst,$mem\t# short" %} 5456 5457 ins_encode %{ 5458 __ movswl($dst$$Register, $mem$$Address); 5459 %} 5460 5461 ins_pipe(ialu_reg_mem); 5462 %} 5463 5464 // Load Short (16 bit signed) to Byte (8 bit signed) 5465 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5466 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5467 5468 ins_cost(125); 5469 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5470 ins_encode %{ 5471 __ movsbl($dst$$Register, $mem$$Address); 5472 %} 5473 ins_pipe(ialu_reg_mem); 5474 %} 5475 5476 // Load Short (16bit signed) into Long Register 5477 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5478 match(Set dst (ConvI2L (LoadS mem))); 5479 effect(KILL cr); 5480 5481 ins_cost(375); 5482 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5483 "MOV $dst.hi,$dst.lo\n\t" 5484 "SAR $dst.hi,15" %} 5485 5486 ins_encode %{ 5487 __ movswl($dst$$Register, $mem$$Address); 5488 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5489 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5490 %} 5491 5492 ins_pipe(ialu_reg_mem); 5493 %} 5494 5495 // Load Unsigned Short/Char (16bit unsigned) 5496 instruct loadUS(rRegI dst, memory mem) %{ 5497 match(Set dst (LoadUS mem)); 5498 5499 ins_cost(125); 5500 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5501 5502 ins_encode %{ 5503 __ movzwl($dst$$Register, $mem$$Address); 5504 %} 5505 5506 ins_pipe(ialu_reg_mem); 5507 %} 5508 5509 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5510 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5511 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5512 5513 ins_cost(125); 5514 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5515 ins_encode %{ 5516 __ movsbl($dst$$Register, $mem$$Address); 5517 %} 5518 ins_pipe(ialu_reg_mem); 5519 %} 5520 5521 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5522 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5523 match(Set dst (ConvI2L (LoadUS mem))); 5524 effect(KILL cr); 5525 5526 ins_cost(250); 5527 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5528 "XOR $dst.hi,$dst.hi" %} 5529 5530 ins_encode %{ 5531 __ movzwl($dst$$Register, $mem$$Address); 5532 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5533 %} 5534 5535 ins_pipe(ialu_reg_mem); 5536 %} 5537 5538 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5539 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5540 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5541 effect(KILL cr); 5542 5543 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5544 "XOR $dst.hi,$dst.hi" %} 5545 ins_encode %{ 5546 Register Rdst = $dst$$Register; 5547 __ movzbl(Rdst, $mem$$Address); 5548 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5549 %} 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5554 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5555 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5556 effect(KILL cr); 5557 5558 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5559 "XOR $dst.hi,$dst.hi\n\t" 5560 "AND $dst.lo,right_n_bits($mask, 16)" %} 5561 ins_encode %{ 5562 Register Rdst = $dst$$Register; 5563 __ movzwl(Rdst, $mem$$Address); 5564 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5565 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5566 %} 5567 ins_pipe(ialu_reg_mem); 5568 %} 5569 5570 // Load Integer 5571 instruct loadI(rRegI dst, memory mem) %{ 5572 match(Set dst (LoadI mem)); 5573 5574 ins_cost(125); 5575 format %{ "MOV $dst,$mem\t# int" %} 5576 5577 ins_encode %{ 5578 __ movl($dst$$Register, $mem$$Address); 5579 %} 5580 5581 ins_pipe(ialu_reg_mem); 5582 %} 5583 5584 // Load Integer (32 bit signed) to Byte (8 bit signed) 5585 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5586 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5587 5588 ins_cost(125); 5589 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5590 ins_encode %{ 5591 __ movsbl($dst$$Register, $mem$$Address); 5592 %} 5593 ins_pipe(ialu_reg_mem); 5594 %} 5595 5596 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5597 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5598 match(Set dst (AndI (LoadI mem) mask)); 5599 5600 ins_cost(125); 5601 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5602 ins_encode %{ 5603 __ movzbl($dst$$Register, $mem$$Address); 5604 %} 5605 ins_pipe(ialu_reg_mem); 5606 %} 5607 5608 // Load Integer (32 bit signed) to Short (16 bit signed) 5609 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5610 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5611 5612 ins_cost(125); 5613 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5614 ins_encode %{ 5615 __ movswl($dst$$Register, $mem$$Address); 5616 %} 5617 ins_pipe(ialu_reg_mem); 5618 %} 5619 5620 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5621 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5622 match(Set dst (AndI (LoadI mem) mask)); 5623 5624 ins_cost(125); 5625 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5626 ins_encode %{ 5627 __ movzwl($dst$$Register, $mem$$Address); 5628 %} 5629 ins_pipe(ialu_reg_mem); 5630 %} 5631 5632 // Load Integer into Long Register 5633 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5634 match(Set dst (ConvI2L (LoadI mem))); 5635 effect(KILL cr); 5636 5637 ins_cost(375); 5638 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5639 "MOV $dst.hi,$dst.lo\n\t" 5640 "SAR $dst.hi,31" %} 5641 5642 ins_encode %{ 5643 __ movl($dst$$Register, $mem$$Address); 5644 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5645 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5646 %} 5647 5648 ins_pipe(ialu_reg_mem); 5649 %} 5650 5651 // Load Integer with mask 0xFF into Long Register 5652 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5653 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5654 effect(KILL cr); 5655 5656 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5657 "XOR $dst.hi,$dst.hi" %} 5658 ins_encode %{ 5659 Register Rdst = $dst$$Register; 5660 __ movzbl(Rdst, $mem$$Address); 5661 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5662 %} 5663 ins_pipe(ialu_reg_mem); 5664 %} 5665 5666 // Load Integer with mask 0xFFFF into Long Register 5667 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5668 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5669 effect(KILL cr); 5670 5671 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5672 "XOR $dst.hi,$dst.hi" %} 5673 ins_encode %{ 5674 Register Rdst = $dst$$Register; 5675 __ movzwl(Rdst, $mem$$Address); 5676 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5677 %} 5678 ins_pipe(ialu_reg_mem); 5679 %} 5680 5681 // Load Integer with 31-bit mask into Long Register 5682 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5683 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5684 effect(KILL cr); 5685 5686 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5687 "XOR $dst.hi,$dst.hi\n\t" 5688 "AND $dst.lo,$mask" %} 5689 ins_encode %{ 5690 Register Rdst = $dst$$Register; 5691 __ movl(Rdst, $mem$$Address); 5692 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5693 __ andl(Rdst, $mask$$constant); 5694 %} 5695 ins_pipe(ialu_reg_mem); 5696 %} 5697 5698 // Load Unsigned Integer into Long Register 5699 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5700 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5701 effect(KILL cr); 5702 5703 ins_cost(250); 5704 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5705 "XOR $dst.hi,$dst.hi" %} 5706 5707 ins_encode %{ 5708 __ movl($dst$$Register, $mem$$Address); 5709 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5710 %} 5711 5712 ins_pipe(ialu_reg_mem); 5713 %} 5714 5715 // Load Long. Cannot clobber address while loading, so restrict address 5716 // register to ESI 5717 instruct loadL(eRegL dst, load_long_memory mem) %{ 5718 predicate(!((LoadLNode*)n)->require_atomic_access()); 5719 match(Set dst (LoadL mem)); 5720 5721 ins_cost(250); 5722 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5723 "MOV $dst.hi,$mem+4" %} 5724 5725 ins_encode %{ 5726 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5727 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5728 __ movl($dst$$Register, Amemlo); 5729 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5730 %} 5731 5732 ins_pipe(ialu_reg_long_mem); 5733 %} 5734 5735 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5736 // then store it down to the stack and reload on the int 5737 // side. 5738 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5739 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5740 match(Set dst (LoadL mem)); 5741 5742 ins_cost(200); 5743 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5744 "FISTp $dst" %} 5745 ins_encode(enc_loadL_volatile(mem,dst)); 5746 ins_pipe( fpu_reg_mem ); 5747 %} 5748 5749 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5750 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5751 match(Set dst (LoadL mem)); 5752 effect(TEMP tmp); 5753 ins_cost(180); 5754 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5755 "MOVSD $dst,$tmp" %} 5756 ins_encode %{ 5757 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5758 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5764 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5765 match(Set dst (LoadL mem)); 5766 effect(TEMP tmp); 5767 ins_cost(160); 5768 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5769 "MOVD $dst.lo,$tmp\n\t" 5770 "PSRLQ $tmp,32\n\t" 5771 "MOVD $dst.hi,$tmp" %} 5772 ins_encode %{ 5773 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5774 __ movdl($dst$$Register, $tmp$$XMMRegister); 5775 __ psrlq($tmp$$XMMRegister, 32); 5776 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 // Load Range 5782 instruct loadRange(rRegI dst, memory mem) %{ 5783 match(Set dst (LoadRange mem)); 5784 5785 ins_cost(125); 5786 format %{ "MOV $dst,$mem" %} 5787 opcode(0x8B); 5788 ins_encode( OpcP, RegMem(dst,mem)); 5789 ins_pipe( ialu_reg_mem ); 5790 %} 5791 5792 5793 // Load Pointer 5794 instruct loadP(eRegP dst, memory mem) %{ 5795 match(Set dst (LoadP mem)); 5796 5797 ins_cost(125); 5798 format %{ "MOV $dst,$mem" %} 5799 opcode(0x8B); 5800 ins_encode( OpcP, RegMem(dst,mem)); 5801 ins_pipe( ialu_reg_mem ); 5802 %} 5803 5804 // Load Klass Pointer 5805 instruct loadKlass(eRegP dst, memory mem) %{ 5806 match(Set dst (LoadKlass mem)); 5807 5808 ins_cost(125); 5809 format %{ "MOV $dst,$mem" %} 5810 opcode(0x8B); 5811 ins_encode( OpcP, RegMem(dst,mem)); 5812 ins_pipe( ialu_reg_mem ); 5813 %} 5814 5815 // Load Double 5816 instruct loadDPR(regDPR dst, memory mem) %{ 5817 predicate(UseSSE<=1); 5818 match(Set dst (LoadD mem)); 5819 5820 ins_cost(150); 5821 format %{ "FLD_D ST,$mem\n\t" 5822 "FSTP $dst" %} 5823 opcode(0xDD); /* DD /0 */ 5824 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5825 Pop_Reg_DPR(dst) ); 5826 ins_pipe( fpu_reg_mem ); 5827 %} 5828 5829 // Load Double to XMM 5830 instruct loadD(regD dst, memory mem) %{ 5831 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5832 match(Set dst (LoadD mem)); 5833 ins_cost(145); 5834 format %{ "MOVSD $dst,$mem" %} 5835 ins_encode %{ 5836 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 instruct loadD_partial(regD dst, memory mem) %{ 5842 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5843 match(Set dst (LoadD mem)); 5844 ins_cost(145); 5845 format %{ "MOVLPD $dst,$mem" %} 5846 ins_encode %{ 5847 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 // Load to XMM register (single-precision floating point) 5853 // MOVSS instruction 5854 instruct loadF(regF dst, memory mem) %{ 5855 predicate(UseSSE>=1); 5856 match(Set dst (LoadF mem)); 5857 ins_cost(145); 5858 format %{ "MOVSS $dst,$mem" %} 5859 ins_encode %{ 5860 __ movflt ($dst$$XMMRegister, $mem$$Address); 5861 %} 5862 ins_pipe( pipe_slow ); 5863 %} 5864 5865 // Load Float 5866 instruct loadFPR(regFPR dst, memory mem) %{ 5867 predicate(UseSSE==0); 5868 match(Set dst (LoadF mem)); 5869 5870 ins_cost(150); 5871 format %{ "FLD_S ST,$mem\n\t" 5872 "FSTP $dst" %} 5873 opcode(0xD9); /* D9 /0 */ 5874 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5875 Pop_Reg_FPR(dst) ); 5876 ins_pipe( fpu_reg_mem ); 5877 %} 5878 5879 // Load Effective Address 5880 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5881 match(Set dst mem); 5882 5883 ins_cost(110); 5884 format %{ "LEA $dst,$mem" %} 5885 opcode(0x8D); 5886 ins_encode( OpcP, RegMem(dst,mem)); 5887 ins_pipe( ialu_reg_reg_fat ); 5888 %} 5889 5890 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5891 match(Set dst mem); 5892 5893 ins_cost(110); 5894 format %{ "LEA $dst,$mem" %} 5895 opcode(0x8D); 5896 ins_encode( OpcP, RegMem(dst,mem)); 5897 ins_pipe( ialu_reg_reg_fat ); 5898 %} 5899 5900 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5901 match(Set dst mem); 5902 5903 ins_cost(110); 5904 format %{ "LEA $dst,$mem" %} 5905 opcode(0x8D); 5906 ins_encode( OpcP, RegMem(dst,mem)); 5907 ins_pipe( ialu_reg_reg_fat ); 5908 %} 5909 5910 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5911 match(Set dst mem); 5912 5913 ins_cost(110); 5914 format %{ "LEA $dst,$mem" %} 5915 opcode(0x8D); 5916 ins_encode( OpcP, RegMem(dst,mem)); 5917 ins_pipe( ialu_reg_reg_fat ); 5918 %} 5919 5920 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5921 match(Set dst mem); 5922 5923 ins_cost(110); 5924 format %{ "LEA $dst,$mem" %} 5925 opcode(0x8D); 5926 ins_encode( OpcP, RegMem(dst,mem)); 5927 ins_pipe( ialu_reg_reg_fat ); 5928 %} 5929 5930 // Load Constant 5931 instruct loadConI(rRegI dst, immI src) %{ 5932 match(Set dst src); 5933 5934 format %{ "MOV $dst,$src" %} 5935 ins_encode( LdImmI(dst, src) ); 5936 ins_pipe( ialu_reg_fat ); 5937 %} 5938 5939 // Load Constant zero 5940 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5941 match(Set dst src); 5942 effect(KILL cr); 5943 5944 ins_cost(50); 5945 format %{ "XOR $dst,$dst" %} 5946 opcode(0x33); /* + rd */ 5947 ins_encode( OpcP, RegReg( dst, dst ) ); 5948 ins_pipe( ialu_reg ); 5949 %} 5950 5951 instruct loadConP(eRegP dst, immP src) %{ 5952 match(Set dst src); 5953 5954 format %{ "MOV $dst,$src" %} 5955 opcode(0xB8); /* + rd */ 5956 ins_encode( LdImmP(dst, src) ); 5957 ins_pipe( ialu_reg_fat ); 5958 %} 5959 5960 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5961 match(Set dst src); 5962 effect(KILL cr); 5963 ins_cost(200); 5964 format %{ "MOV $dst.lo,$src.lo\n\t" 5965 "MOV $dst.hi,$src.hi" %} 5966 opcode(0xB8); 5967 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5968 ins_pipe( ialu_reg_long_fat ); 5969 %} 5970 5971 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5972 match(Set dst src); 5973 effect(KILL cr); 5974 ins_cost(150); 5975 format %{ "XOR $dst.lo,$dst.lo\n\t" 5976 "XOR $dst.hi,$dst.hi" %} 5977 opcode(0x33,0x33); 5978 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5979 ins_pipe( ialu_reg_long ); 5980 %} 5981 5982 // The instruction usage is guarded by predicate in operand immFPR(). 5983 instruct loadConFPR(regFPR dst, immFPR con) %{ 5984 match(Set dst con); 5985 ins_cost(125); 5986 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5987 "FSTP $dst" %} 5988 ins_encode %{ 5989 __ fld_s($constantaddress($con)); 5990 __ fstp_d($dst$$reg); 5991 %} 5992 ins_pipe(fpu_reg_con); 5993 %} 5994 5995 // The instruction usage is guarded by predicate in operand immFPR0(). 5996 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5997 match(Set dst con); 5998 ins_cost(125); 5999 format %{ "FLDZ ST\n\t" 6000 "FSTP $dst" %} 6001 ins_encode %{ 6002 __ fldz(); 6003 __ fstp_d($dst$$reg); 6004 %} 6005 ins_pipe(fpu_reg_con); 6006 %} 6007 6008 // The instruction usage is guarded by predicate in operand immFPR1(). 6009 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6010 match(Set dst con); 6011 ins_cost(125); 6012 format %{ "FLD1 ST\n\t" 6013 "FSTP $dst" %} 6014 ins_encode %{ 6015 __ fld1(); 6016 __ fstp_d($dst$$reg); 6017 %} 6018 ins_pipe(fpu_reg_con); 6019 %} 6020 6021 // The instruction usage is guarded by predicate in operand immF(). 6022 instruct loadConF(regF dst, immF con) %{ 6023 match(Set dst con); 6024 ins_cost(125); 6025 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6026 ins_encode %{ 6027 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6028 %} 6029 ins_pipe(pipe_slow); 6030 %} 6031 6032 // The instruction usage is guarded by predicate in operand immF0(). 6033 instruct loadConF0(regF dst, immF0 src) %{ 6034 match(Set dst src); 6035 ins_cost(100); 6036 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6037 ins_encode %{ 6038 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6039 %} 6040 ins_pipe(pipe_slow); 6041 %} 6042 6043 // The instruction usage is guarded by predicate in operand immDPR(). 6044 instruct loadConDPR(regDPR dst, immDPR con) %{ 6045 match(Set dst con); 6046 ins_cost(125); 6047 6048 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6049 "FSTP $dst" %} 6050 ins_encode %{ 6051 __ fld_d($constantaddress($con)); 6052 __ fstp_d($dst$$reg); 6053 %} 6054 ins_pipe(fpu_reg_con); 6055 %} 6056 6057 // The instruction usage is guarded by predicate in operand immDPR0(). 6058 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6059 match(Set dst con); 6060 ins_cost(125); 6061 6062 format %{ "FLDZ ST\n\t" 6063 "FSTP $dst" %} 6064 ins_encode %{ 6065 __ fldz(); 6066 __ fstp_d($dst$$reg); 6067 %} 6068 ins_pipe(fpu_reg_con); 6069 %} 6070 6071 // The instruction usage is guarded by predicate in operand immDPR1(). 6072 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6073 match(Set dst con); 6074 ins_cost(125); 6075 6076 format %{ "FLD1 ST\n\t" 6077 "FSTP $dst" %} 6078 ins_encode %{ 6079 __ fld1(); 6080 __ fstp_d($dst$$reg); 6081 %} 6082 ins_pipe(fpu_reg_con); 6083 %} 6084 6085 // The instruction usage is guarded by predicate in operand immD(). 6086 instruct loadConD(regD dst, immD con) %{ 6087 match(Set dst con); 6088 ins_cost(125); 6089 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6090 ins_encode %{ 6091 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6092 %} 6093 ins_pipe(pipe_slow); 6094 %} 6095 6096 // The instruction usage is guarded by predicate in operand immD0(). 6097 instruct loadConD0(regD dst, immD0 src) %{ 6098 match(Set dst src); 6099 ins_cost(100); 6100 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6101 ins_encode %{ 6102 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6103 %} 6104 ins_pipe( pipe_slow ); 6105 %} 6106 6107 // Load Stack Slot 6108 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6109 match(Set dst src); 6110 ins_cost(125); 6111 6112 format %{ "MOV $dst,$src" %} 6113 opcode(0x8B); 6114 ins_encode( OpcP, RegMem(dst,src)); 6115 ins_pipe( ialu_reg_mem ); 6116 %} 6117 6118 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6119 match(Set dst src); 6120 6121 ins_cost(200); 6122 format %{ "MOV $dst,$src.lo\n\t" 6123 "MOV $dst+4,$src.hi" %} 6124 opcode(0x8B, 0x8B); 6125 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6126 ins_pipe( ialu_mem_long_reg ); 6127 %} 6128 6129 // Load Stack Slot 6130 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6131 match(Set dst src); 6132 ins_cost(125); 6133 6134 format %{ "MOV $dst,$src" %} 6135 opcode(0x8B); 6136 ins_encode( OpcP, RegMem(dst,src)); 6137 ins_pipe( ialu_reg_mem ); 6138 %} 6139 6140 // Load Stack Slot 6141 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6142 match(Set dst src); 6143 ins_cost(125); 6144 6145 format %{ "FLD_S $src\n\t" 6146 "FSTP $dst" %} 6147 opcode(0xD9); /* D9 /0, FLD m32real */ 6148 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6149 Pop_Reg_FPR(dst) ); 6150 ins_pipe( fpu_reg_mem ); 6151 %} 6152 6153 // Load Stack Slot 6154 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6155 match(Set dst src); 6156 ins_cost(125); 6157 6158 format %{ "FLD_D $src\n\t" 6159 "FSTP $dst" %} 6160 opcode(0xDD); /* DD /0, FLD m64real */ 6161 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6162 Pop_Reg_DPR(dst) ); 6163 ins_pipe( fpu_reg_mem ); 6164 %} 6165 6166 // Prefetch instructions for allocation. 6167 // Must be safe to execute with invalid address (cannot fault). 6168 6169 instruct prefetchAlloc0( memory mem ) %{ 6170 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6171 match(PrefetchAllocation mem); 6172 ins_cost(0); 6173 size(0); 6174 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6175 ins_encode(); 6176 ins_pipe(empty); 6177 %} 6178 6179 instruct prefetchAlloc( memory mem ) %{ 6180 predicate(AllocatePrefetchInstr==3); 6181 match( PrefetchAllocation mem ); 6182 ins_cost(100); 6183 6184 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6185 ins_encode %{ 6186 __ prefetchw($mem$$Address); 6187 %} 6188 ins_pipe(ialu_mem); 6189 %} 6190 6191 instruct prefetchAllocNTA( memory mem ) %{ 6192 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6193 match(PrefetchAllocation mem); 6194 ins_cost(100); 6195 6196 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6197 ins_encode %{ 6198 __ prefetchnta($mem$$Address); 6199 %} 6200 ins_pipe(ialu_mem); 6201 %} 6202 6203 instruct prefetchAllocT0( memory mem ) %{ 6204 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6205 match(PrefetchAllocation mem); 6206 ins_cost(100); 6207 6208 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6209 ins_encode %{ 6210 __ prefetcht0($mem$$Address); 6211 %} 6212 ins_pipe(ialu_mem); 6213 %} 6214 6215 instruct prefetchAllocT2( memory mem ) %{ 6216 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6217 match(PrefetchAllocation mem); 6218 ins_cost(100); 6219 6220 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6221 ins_encode %{ 6222 __ prefetcht2($mem$$Address); 6223 %} 6224 ins_pipe(ialu_mem); 6225 %} 6226 6227 //----------Store Instructions------------------------------------------------- 6228 6229 // Store Byte 6230 instruct storeB(memory mem, xRegI src) %{ 6231 match(Set mem (StoreB mem src)); 6232 6233 ins_cost(125); 6234 format %{ "MOV8 $mem,$src" %} 6235 opcode(0x88); 6236 ins_encode( OpcP, RegMem( src, mem ) ); 6237 ins_pipe( ialu_mem_reg ); 6238 %} 6239 6240 // Store Char/Short 6241 instruct storeC(memory mem, rRegI src) %{ 6242 match(Set mem (StoreC mem src)); 6243 6244 ins_cost(125); 6245 format %{ "MOV16 $mem,$src" %} 6246 opcode(0x89, 0x66); 6247 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6248 ins_pipe( ialu_mem_reg ); 6249 %} 6250 6251 // Store Integer 6252 instruct storeI(memory mem, rRegI src) %{ 6253 match(Set mem (StoreI mem src)); 6254 6255 ins_cost(125); 6256 format %{ "MOV $mem,$src" %} 6257 opcode(0x89); 6258 ins_encode( OpcP, RegMem( src, mem ) ); 6259 ins_pipe( ialu_mem_reg ); 6260 %} 6261 6262 // Store Long 6263 instruct storeL(long_memory mem, eRegL src) %{ 6264 predicate(!((StoreLNode*)n)->require_atomic_access()); 6265 match(Set mem (StoreL mem src)); 6266 6267 ins_cost(200); 6268 format %{ "MOV $mem,$src.lo\n\t" 6269 "MOV $mem+4,$src.hi" %} 6270 opcode(0x89, 0x89); 6271 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6272 ins_pipe( ialu_mem_long_reg ); 6273 %} 6274 6275 // Store Long to Integer 6276 instruct storeL2I(memory mem, eRegL src) %{ 6277 match(Set mem (StoreI mem (ConvL2I src))); 6278 6279 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6280 ins_encode %{ 6281 __ movl($mem$$Address, $src$$Register); 6282 %} 6283 ins_pipe(ialu_mem_reg); 6284 %} 6285 6286 // Volatile Store Long. Must be atomic, so move it into 6287 // the FP TOS and then do a 64-bit FIST. Has to probe the 6288 // target address before the store (for null-ptr checks) 6289 // so the memory operand is used twice in the encoding. 6290 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6291 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6292 match(Set mem (StoreL mem src)); 6293 effect( KILL cr ); 6294 ins_cost(400); 6295 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6296 "FILD $src\n\t" 6297 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6298 opcode(0x3B); 6299 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6300 ins_pipe( fpu_reg_mem ); 6301 %} 6302 6303 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6304 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6305 match(Set mem (StoreL mem src)); 6306 effect( TEMP tmp, KILL cr ); 6307 ins_cost(380); 6308 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6309 "MOVSD $tmp,$src\n\t" 6310 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6311 ins_encode %{ 6312 __ cmpl(rax, $mem$$Address); 6313 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6314 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6320 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6321 match(Set mem (StoreL mem src)); 6322 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6323 ins_cost(360); 6324 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6325 "MOVD $tmp,$src.lo\n\t" 6326 "MOVD $tmp2,$src.hi\n\t" 6327 "PUNPCKLDQ $tmp,$tmp2\n\t" 6328 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6329 ins_encode %{ 6330 __ cmpl(rax, $mem$$Address); 6331 __ movdl($tmp$$XMMRegister, $src$$Register); 6332 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6333 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6334 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 // Store Pointer; for storing unknown oops and raw pointers 6340 instruct storeP(memory mem, anyRegP src) %{ 6341 match(Set mem (StoreP mem src)); 6342 6343 ins_cost(125); 6344 format %{ "MOV $mem,$src" %} 6345 opcode(0x89); 6346 ins_encode( OpcP, RegMem( src, mem ) ); 6347 ins_pipe( ialu_mem_reg ); 6348 %} 6349 6350 // Store Integer Immediate 6351 instruct storeImmI(memory mem, immI src) %{ 6352 match(Set mem (StoreI mem src)); 6353 6354 ins_cost(150); 6355 format %{ "MOV $mem,$src" %} 6356 opcode(0xC7); /* C7 /0 */ 6357 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6358 ins_pipe( ialu_mem_imm ); 6359 %} 6360 6361 // Store Short/Char Immediate 6362 instruct storeImmI16(memory mem, immI16 src) %{ 6363 predicate(UseStoreImmI16); 6364 match(Set mem (StoreC mem src)); 6365 6366 ins_cost(150); 6367 format %{ "MOV16 $mem,$src" %} 6368 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6369 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6370 ins_pipe( ialu_mem_imm ); 6371 %} 6372 6373 // Store Pointer Immediate; null pointers or constant oops that do not 6374 // need card-mark barriers. 6375 instruct storeImmP(memory mem, immP src) %{ 6376 match(Set mem (StoreP mem src)); 6377 6378 ins_cost(150); 6379 format %{ "MOV $mem,$src" %} 6380 opcode(0xC7); /* C7 /0 */ 6381 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6382 ins_pipe( ialu_mem_imm ); 6383 %} 6384 6385 // Store Byte Immediate 6386 instruct storeImmB(memory mem, immI8 src) %{ 6387 match(Set mem (StoreB mem src)); 6388 6389 ins_cost(150); 6390 format %{ "MOV8 $mem,$src" %} 6391 opcode(0xC6); /* C6 /0 */ 6392 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6393 ins_pipe( ialu_mem_imm ); 6394 %} 6395 6396 // Store CMS card-mark Immediate 6397 instruct storeImmCM(memory mem, immI8 src) %{ 6398 match(Set mem (StoreCM mem src)); 6399 6400 ins_cost(150); 6401 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6402 opcode(0xC6); /* C6 /0 */ 6403 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6404 ins_pipe( ialu_mem_imm ); 6405 %} 6406 6407 // Store Double 6408 instruct storeDPR( memory mem, regDPR1 src) %{ 6409 predicate(UseSSE<=1); 6410 match(Set mem (StoreD mem src)); 6411 6412 ins_cost(100); 6413 format %{ "FST_D $mem,$src" %} 6414 opcode(0xDD); /* DD /2 */ 6415 ins_encode( enc_FPR_store(mem,src) ); 6416 ins_pipe( fpu_mem_reg ); 6417 %} 6418 6419 // Store double does rounding on x86 6420 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6421 predicate(UseSSE<=1); 6422 match(Set mem (StoreD mem (RoundDouble src))); 6423 6424 ins_cost(100); 6425 format %{ "FST_D $mem,$src\t# round" %} 6426 opcode(0xDD); /* DD /2 */ 6427 ins_encode( enc_FPR_store(mem,src) ); 6428 ins_pipe( fpu_mem_reg ); 6429 %} 6430 6431 // Store XMM register to memory (double-precision floating points) 6432 // MOVSD instruction 6433 instruct storeD(memory mem, regD src) %{ 6434 predicate(UseSSE>=2); 6435 match(Set mem (StoreD mem src)); 6436 ins_cost(95); 6437 format %{ "MOVSD $mem,$src" %} 6438 ins_encode %{ 6439 __ movdbl($mem$$Address, $src$$XMMRegister); 6440 %} 6441 ins_pipe( pipe_slow ); 6442 %} 6443 6444 // Store XMM register to memory (single-precision floating point) 6445 // MOVSS instruction 6446 instruct storeF(memory mem, regF src) %{ 6447 predicate(UseSSE>=1); 6448 match(Set mem (StoreF mem src)); 6449 ins_cost(95); 6450 format %{ "MOVSS $mem,$src" %} 6451 ins_encode %{ 6452 __ movflt($mem$$Address, $src$$XMMRegister); 6453 %} 6454 ins_pipe( pipe_slow ); 6455 %} 6456 6457 // Store Float 6458 instruct storeFPR( memory mem, regFPR1 src) %{ 6459 predicate(UseSSE==0); 6460 match(Set mem (StoreF mem src)); 6461 6462 ins_cost(100); 6463 format %{ "FST_S $mem,$src" %} 6464 opcode(0xD9); /* D9 /2 */ 6465 ins_encode( enc_FPR_store(mem,src) ); 6466 ins_pipe( fpu_mem_reg ); 6467 %} 6468 6469 // Store Float does rounding on x86 6470 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6471 predicate(UseSSE==0); 6472 match(Set mem (StoreF mem (RoundFloat src))); 6473 6474 ins_cost(100); 6475 format %{ "FST_S $mem,$src\t# round" %} 6476 opcode(0xD9); /* D9 /2 */ 6477 ins_encode( enc_FPR_store(mem,src) ); 6478 ins_pipe( fpu_mem_reg ); 6479 %} 6480 6481 // Store Float does rounding on x86 6482 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6483 predicate(UseSSE<=1); 6484 match(Set mem (StoreF mem (ConvD2F src))); 6485 6486 ins_cost(100); 6487 format %{ "FST_S $mem,$src\t# D-round" %} 6488 opcode(0xD9); /* D9 /2 */ 6489 ins_encode( enc_FPR_store(mem,src) ); 6490 ins_pipe( fpu_mem_reg ); 6491 %} 6492 6493 // Store immediate Float value (it is faster than store from FPU register) 6494 // The instruction usage is guarded by predicate in operand immFPR(). 6495 instruct storeFPR_imm( memory mem, immFPR src) %{ 6496 match(Set mem (StoreF mem src)); 6497 6498 ins_cost(50); 6499 format %{ "MOV $mem,$src\t# store float" %} 6500 opcode(0xC7); /* C7 /0 */ 6501 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6502 ins_pipe( ialu_mem_imm ); 6503 %} 6504 6505 // Store immediate Float value (it is faster than store from XMM register) 6506 // The instruction usage is guarded by predicate in operand immF(). 6507 instruct storeF_imm( memory mem, immF src) %{ 6508 match(Set mem (StoreF mem src)); 6509 6510 ins_cost(50); 6511 format %{ "MOV $mem,$src\t# store float" %} 6512 opcode(0xC7); /* C7 /0 */ 6513 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6514 ins_pipe( ialu_mem_imm ); 6515 %} 6516 6517 // Store Integer to stack slot 6518 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6519 match(Set dst src); 6520 6521 ins_cost(100); 6522 format %{ "MOV $dst,$src" %} 6523 opcode(0x89); 6524 ins_encode( OpcPRegSS( dst, src ) ); 6525 ins_pipe( ialu_mem_reg ); 6526 %} 6527 6528 // Store Integer to stack slot 6529 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6530 match(Set dst src); 6531 6532 ins_cost(100); 6533 format %{ "MOV $dst,$src" %} 6534 opcode(0x89); 6535 ins_encode( OpcPRegSS( dst, src ) ); 6536 ins_pipe( ialu_mem_reg ); 6537 %} 6538 6539 // Store Long to stack slot 6540 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6541 match(Set dst src); 6542 6543 ins_cost(200); 6544 format %{ "MOV $dst,$src.lo\n\t" 6545 "MOV $dst+4,$src.hi" %} 6546 opcode(0x89, 0x89); 6547 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6548 ins_pipe( ialu_mem_long_reg ); 6549 %} 6550 6551 //----------MemBar Instructions----------------------------------------------- 6552 // Memory barrier flavors 6553 6554 instruct membar_acquire() %{ 6555 match(MemBarAcquire); 6556 match(LoadFence); 6557 ins_cost(400); 6558 6559 size(0); 6560 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6561 ins_encode(); 6562 ins_pipe(empty); 6563 %} 6564 6565 instruct membar_acquire_lock() %{ 6566 match(MemBarAcquireLock); 6567 ins_cost(0); 6568 6569 size(0); 6570 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6571 ins_encode( ); 6572 ins_pipe(empty); 6573 %} 6574 6575 instruct membar_release() %{ 6576 match(MemBarRelease); 6577 match(StoreFence); 6578 ins_cost(400); 6579 6580 size(0); 6581 format %{ "MEMBAR-release ! (empty encoding)" %} 6582 ins_encode( ); 6583 ins_pipe(empty); 6584 %} 6585 6586 instruct membar_release_lock() %{ 6587 match(MemBarReleaseLock); 6588 ins_cost(0); 6589 6590 size(0); 6591 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6592 ins_encode( ); 6593 ins_pipe(empty); 6594 %} 6595 6596 instruct membar_volatile(eFlagsReg cr) %{ 6597 match(MemBarVolatile); 6598 effect(KILL cr); 6599 ins_cost(400); 6600 6601 format %{ 6602 $$template 6603 if (os::is_MP()) { 6604 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6605 } else { 6606 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6607 } 6608 %} 6609 ins_encode %{ 6610 __ membar(Assembler::StoreLoad); 6611 %} 6612 ins_pipe(pipe_slow); 6613 %} 6614 6615 instruct unnecessary_membar_volatile() %{ 6616 match(MemBarVolatile); 6617 predicate(Matcher::post_store_load_barrier(n)); 6618 ins_cost(0); 6619 6620 size(0); 6621 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6622 ins_encode( ); 6623 ins_pipe(empty); 6624 %} 6625 6626 instruct membar_storestore() %{ 6627 match(MemBarStoreStore); 6628 ins_cost(0); 6629 6630 size(0); 6631 format %{ "MEMBAR-storestore (empty encoding)" %} 6632 ins_encode( ); 6633 ins_pipe(empty); 6634 %} 6635 6636 //----------Move Instructions-------------------------------------------------- 6637 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6638 match(Set dst (CastX2P src)); 6639 format %{ "# X2P $dst, $src" %} 6640 ins_encode( /*empty encoding*/ ); 6641 ins_cost(0); 6642 ins_pipe(empty); 6643 %} 6644 6645 instruct castP2X(rRegI dst, eRegP src ) %{ 6646 match(Set dst (CastP2X src)); 6647 ins_cost(50); 6648 format %{ "MOV $dst, $src\t# CastP2X" %} 6649 ins_encode( enc_Copy( dst, src) ); 6650 ins_pipe( ialu_reg_reg ); 6651 %} 6652 6653 //----------Conditional Move--------------------------------------------------- 6654 // Conditional move 6655 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6656 predicate(!VM_Version::supports_cmov() ); 6657 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6658 ins_cost(200); 6659 format %{ "J$cop,us skip\t# signed cmove\n\t" 6660 "MOV $dst,$src\n" 6661 "skip:" %} 6662 ins_encode %{ 6663 Label Lskip; 6664 // Invert sense of branch from sense of CMOV 6665 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6666 __ movl($dst$$Register, $src$$Register); 6667 __ bind(Lskip); 6668 %} 6669 ins_pipe( pipe_cmov_reg ); 6670 %} 6671 6672 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6673 predicate(!VM_Version::supports_cmov() ); 6674 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6675 ins_cost(200); 6676 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6677 "MOV $dst,$src\n" 6678 "skip:" %} 6679 ins_encode %{ 6680 Label Lskip; 6681 // Invert sense of branch from sense of CMOV 6682 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6683 __ movl($dst$$Register, $src$$Register); 6684 __ bind(Lskip); 6685 %} 6686 ins_pipe( pipe_cmov_reg ); 6687 %} 6688 6689 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6690 predicate(VM_Version::supports_cmov() ); 6691 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6692 ins_cost(200); 6693 format %{ "CMOV$cop $dst,$src" %} 6694 opcode(0x0F,0x40); 6695 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6696 ins_pipe( pipe_cmov_reg ); 6697 %} 6698 6699 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6700 predicate(VM_Version::supports_cmov() ); 6701 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6702 ins_cost(200); 6703 format %{ "CMOV$cop $dst,$src" %} 6704 opcode(0x0F,0x40); 6705 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6706 ins_pipe( pipe_cmov_reg ); 6707 %} 6708 6709 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6710 predicate(VM_Version::supports_cmov() ); 6711 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6712 ins_cost(200); 6713 expand %{ 6714 cmovI_regU(cop, cr, dst, src); 6715 %} 6716 %} 6717 6718 // Conditional move 6719 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6720 predicate(VM_Version::supports_cmov() ); 6721 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6722 ins_cost(250); 6723 format %{ "CMOV$cop $dst,$src" %} 6724 opcode(0x0F,0x40); 6725 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6726 ins_pipe( pipe_cmov_mem ); 6727 %} 6728 6729 // Conditional move 6730 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6731 predicate(VM_Version::supports_cmov() ); 6732 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6733 ins_cost(250); 6734 format %{ "CMOV$cop $dst,$src" %} 6735 opcode(0x0F,0x40); 6736 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6737 ins_pipe( pipe_cmov_mem ); 6738 %} 6739 6740 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6741 predicate(VM_Version::supports_cmov() ); 6742 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6743 ins_cost(250); 6744 expand %{ 6745 cmovI_memU(cop, cr, dst, src); 6746 %} 6747 %} 6748 6749 // Conditional move 6750 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6751 predicate(VM_Version::supports_cmov() ); 6752 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6753 ins_cost(200); 6754 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6755 opcode(0x0F,0x40); 6756 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6757 ins_pipe( pipe_cmov_reg ); 6758 %} 6759 6760 // Conditional move (non-P6 version) 6761 // Note: a CMoveP is generated for stubs and native wrappers 6762 // regardless of whether we are on a P6, so we 6763 // emulate a cmov here 6764 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6765 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6766 ins_cost(300); 6767 format %{ "Jn$cop skip\n\t" 6768 "MOV $dst,$src\t# pointer\n" 6769 "skip:" %} 6770 opcode(0x8b); 6771 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6772 ins_pipe( pipe_cmov_reg ); 6773 %} 6774 6775 // Conditional move 6776 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6777 predicate(VM_Version::supports_cmov() ); 6778 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6779 ins_cost(200); 6780 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6781 opcode(0x0F,0x40); 6782 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6783 ins_pipe( pipe_cmov_reg ); 6784 %} 6785 6786 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6787 predicate(VM_Version::supports_cmov() ); 6788 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6789 ins_cost(200); 6790 expand %{ 6791 cmovP_regU(cop, cr, dst, src); 6792 %} 6793 %} 6794 6795 // DISABLED: Requires the ADLC to emit a bottom_type call that 6796 // correctly meets the two pointer arguments; one is an incoming 6797 // register but the other is a memory operand. ALSO appears to 6798 // be buggy with implicit null checks. 6799 // 6800 //// Conditional move 6801 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6802 // predicate(VM_Version::supports_cmov() ); 6803 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6804 // ins_cost(250); 6805 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6806 // opcode(0x0F,0x40); 6807 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6808 // ins_pipe( pipe_cmov_mem ); 6809 //%} 6810 // 6811 //// Conditional move 6812 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6813 // predicate(VM_Version::supports_cmov() ); 6814 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6815 // ins_cost(250); 6816 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6817 // opcode(0x0F,0x40); 6818 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6819 // ins_pipe( pipe_cmov_mem ); 6820 //%} 6821 6822 // Conditional move 6823 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6824 predicate(UseSSE<=1); 6825 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6826 ins_cost(200); 6827 format %{ "FCMOV$cop $dst,$src\t# double" %} 6828 opcode(0xDA); 6829 ins_encode( enc_cmov_dpr(cop,src) ); 6830 ins_pipe( pipe_cmovDPR_reg ); 6831 %} 6832 6833 // Conditional move 6834 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6835 predicate(UseSSE==0); 6836 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6837 ins_cost(200); 6838 format %{ "FCMOV$cop $dst,$src\t# float" %} 6839 opcode(0xDA); 6840 ins_encode( enc_cmov_dpr(cop,src) ); 6841 ins_pipe( pipe_cmovDPR_reg ); 6842 %} 6843 6844 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6845 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6846 predicate(UseSSE<=1); 6847 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6848 ins_cost(200); 6849 format %{ "Jn$cop skip\n\t" 6850 "MOV $dst,$src\t# double\n" 6851 "skip:" %} 6852 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6853 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6854 ins_pipe( pipe_cmovDPR_reg ); 6855 %} 6856 6857 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6858 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6859 predicate(UseSSE==0); 6860 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6861 ins_cost(200); 6862 format %{ "Jn$cop skip\n\t" 6863 "MOV $dst,$src\t# float\n" 6864 "skip:" %} 6865 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6866 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6867 ins_pipe( pipe_cmovDPR_reg ); 6868 %} 6869 6870 // No CMOVE with SSE/SSE2 6871 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6872 predicate (UseSSE>=1); 6873 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6874 ins_cost(200); 6875 format %{ "Jn$cop skip\n\t" 6876 "MOVSS $dst,$src\t# float\n" 6877 "skip:" %} 6878 ins_encode %{ 6879 Label skip; 6880 // Invert sense of branch from sense of CMOV 6881 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6882 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6883 __ bind(skip); 6884 %} 6885 ins_pipe( pipe_slow ); 6886 %} 6887 6888 // No CMOVE with SSE/SSE2 6889 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6890 predicate (UseSSE>=2); 6891 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6892 ins_cost(200); 6893 format %{ "Jn$cop skip\n\t" 6894 "MOVSD $dst,$src\t# float\n" 6895 "skip:" %} 6896 ins_encode %{ 6897 Label skip; 6898 // Invert sense of branch from sense of CMOV 6899 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6900 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6901 __ bind(skip); 6902 %} 6903 ins_pipe( pipe_slow ); 6904 %} 6905 6906 // unsigned version 6907 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6908 predicate (UseSSE>=1); 6909 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6910 ins_cost(200); 6911 format %{ "Jn$cop skip\n\t" 6912 "MOVSS $dst,$src\t# float\n" 6913 "skip:" %} 6914 ins_encode %{ 6915 Label skip; 6916 // Invert sense of branch from sense of CMOV 6917 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6918 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6919 __ bind(skip); 6920 %} 6921 ins_pipe( pipe_slow ); 6922 %} 6923 6924 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6925 predicate (UseSSE>=1); 6926 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6927 ins_cost(200); 6928 expand %{ 6929 fcmovF_regU(cop, cr, dst, src); 6930 %} 6931 %} 6932 6933 // unsigned version 6934 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6935 predicate (UseSSE>=2); 6936 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6937 ins_cost(200); 6938 format %{ "Jn$cop skip\n\t" 6939 "MOVSD $dst,$src\t# float\n" 6940 "skip:" %} 6941 ins_encode %{ 6942 Label skip; 6943 // Invert sense of branch from sense of CMOV 6944 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6945 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6946 __ bind(skip); 6947 %} 6948 ins_pipe( pipe_slow ); 6949 %} 6950 6951 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6952 predicate (UseSSE>=2); 6953 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6954 ins_cost(200); 6955 expand %{ 6956 fcmovD_regU(cop, cr, dst, src); 6957 %} 6958 %} 6959 6960 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6961 predicate(VM_Version::supports_cmov() ); 6962 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6963 ins_cost(200); 6964 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6965 "CMOV$cop $dst.hi,$src.hi" %} 6966 opcode(0x0F,0x40); 6967 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6968 ins_pipe( pipe_cmov_reg_long ); 6969 %} 6970 6971 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6972 predicate(VM_Version::supports_cmov() ); 6973 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6974 ins_cost(200); 6975 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6976 "CMOV$cop $dst.hi,$src.hi" %} 6977 opcode(0x0F,0x40); 6978 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6979 ins_pipe( pipe_cmov_reg_long ); 6980 %} 6981 6982 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6983 predicate(VM_Version::supports_cmov() ); 6984 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6985 ins_cost(200); 6986 expand %{ 6987 cmovL_regU(cop, cr, dst, src); 6988 %} 6989 %} 6990 6991 //----------Arithmetic Instructions-------------------------------------------- 6992 //----------Addition Instructions---------------------------------------------- 6993 6994 // Integer Addition Instructions 6995 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6996 match(Set dst (AddI dst src)); 6997 effect(KILL cr); 6998 6999 size(2); 7000 format %{ "ADD $dst,$src" %} 7001 opcode(0x03); 7002 ins_encode( OpcP, RegReg( dst, src) ); 7003 ins_pipe( ialu_reg_reg ); 7004 %} 7005 7006 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7007 match(Set dst (AddI dst src)); 7008 effect(KILL cr); 7009 7010 format %{ "ADD $dst,$src" %} 7011 opcode(0x81, 0x00); /* /0 id */ 7012 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7013 ins_pipe( ialu_reg ); 7014 %} 7015 7016 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7017 predicate(UseIncDec); 7018 match(Set dst (AddI dst src)); 7019 effect(KILL cr); 7020 7021 size(1); 7022 format %{ "INC $dst" %} 7023 opcode(0x40); /* */ 7024 ins_encode( Opc_plus( primary, dst ) ); 7025 ins_pipe( ialu_reg ); 7026 %} 7027 7028 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7029 match(Set dst (AddI src0 src1)); 7030 ins_cost(110); 7031 7032 format %{ "LEA $dst,[$src0 + $src1]" %} 7033 opcode(0x8D); /* 0x8D /r */ 7034 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7035 ins_pipe( ialu_reg_reg ); 7036 %} 7037 7038 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7039 match(Set dst (AddP src0 src1)); 7040 ins_cost(110); 7041 7042 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7043 opcode(0x8D); /* 0x8D /r */ 7044 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7045 ins_pipe( ialu_reg_reg ); 7046 %} 7047 7048 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7049 predicate(UseIncDec); 7050 match(Set dst (AddI dst src)); 7051 effect(KILL cr); 7052 7053 size(1); 7054 format %{ "DEC $dst" %} 7055 opcode(0x48); /* */ 7056 ins_encode( Opc_plus( primary, dst ) ); 7057 ins_pipe( ialu_reg ); 7058 %} 7059 7060 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7061 match(Set dst (AddP dst src)); 7062 effect(KILL cr); 7063 7064 size(2); 7065 format %{ "ADD $dst,$src" %} 7066 opcode(0x03); 7067 ins_encode( OpcP, RegReg( dst, src) ); 7068 ins_pipe( ialu_reg_reg ); 7069 %} 7070 7071 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7072 match(Set dst (AddP dst src)); 7073 effect(KILL cr); 7074 7075 format %{ "ADD $dst,$src" %} 7076 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7077 // ins_encode( RegImm( dst, src) ); 7078 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7079 ins_pipe( ialu_reg ); 7080 %} 7081 7082 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7083 match(Set dst (AddI dst (LoadI src))); 7084 effect(KILL cr); 7085 7086 ins_cost(125); 7087 format %{ "ADD $dst,$src" %} 7088 opcode(0x03); 7089 ins_encode( OpcP, RegMem( dst, src) ); 7090 ins_pipe( ialu_reg_mem ); 7091 %} 7092 7093 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7094 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7095 effect(KILL cr); 7096 7097 ins_cost(150); 7098 format %{ "ADD $dst,$src" %} 7099 opcode(0x01); /* Opcode 01 /r */ 7100 ins_encode( OpcP, RegMem( src, dst ) ); 7101 ins_pipe( ialu_mem_reg ); 7102 %} 7103 7104 // Add Memory with Immediate 7105 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7106 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7107 effect(KILL cr); 7108 7109 ins_cost(125); 7110 format %{ "ADD $dst,$src" %} 7111 opcode(0x81); /* Opcode 81 /0 id */ 7112 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7113 ins_pipe( ialu_mem_imm ); 7114 %} 7115 7116 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7117 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7118 effect(KILL cr); 7119 7120 ins_cost(125); 7121 format %{ "INC $dst" %} 7122 opcode(0xFF); /* Opcode FF /0 */ 7123 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7124 ins_pipe( ialu_mem_imm ); 7125 %} 7126 7127 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7128 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7129 effect(KILL cr); 7130 7131 ins_cost(125); 7132 format %{ "DEC $dst" %} 7133 opcode(0xFF); /* Opcode FF /1 */ 7134 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7135 ins_pipe( ialu_mem_imm ); 7136 %} 7137 7138 7139 instruct checkCastPP( eRegP dst ) %{ 7140 match(Set dst (CheckCastPP dst)); 7141 7142 size(0); 7143 format %{ "#checkcastPP of $dst" %} 7144 ins_encode( /*empty encoding*/ ); 7145 ins_pipe( empty ); 7146 %} 7147 7148 instruct castPP( eRegP dst ) %{ 7149 match(Set dst (CastPP dst)); 7150 format %{ "#castPP of $dst" %} 7151 ins_encode( /*empty encoding*/ ); 7152 ins_pipe( empty ); 7153 %} 7154 7155 instruct castII( rRegI dst ) %{ 7156 match(Set dst (CastII dst)); 7157 format %{ "#castII of $dst" %} 7158 ins_encode( /*empty encoding*/ ); 7159 ins_cost(0); 7160 ins_pipe( empty ); 7161 %} 7162 7163 7164 // Load-locked - same as a regular pointer load when used with compare-swap 7165 instruct loadPLocked(eRegP dst, memory mem) %{ 7166 match(Set dst (LoadPLocked mem)); 7167 7168 ins_cost(125); 7169 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7170 opcode(0x8B); 7171 ins_encode( OpcP, RegMem(dst,mem)); 7172 ins_pipe( ialu_reg_mem ); 7173 %} 7174 7175 // Conditional-store of the updated heap-top. 7176 // Used during allocation of the shared heap. 7177 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7178 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7179 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7180 // EAX is killed if there is contention, but then it's also unused. 7181 // In the common case of no contention, EAX holds the new oop address. 7182 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7183 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7184 ins_pipe( pipe_cmpxchg ); 7185 %} 7186 7187 // Conditional-store of an int value. 7188 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7189 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7190 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7191 effect(KILL oldval); 7192 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7193 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7194 ins_pipe( pipe_cmpxchg ); 7195 %} 7196 7197 // Conditional-store of a long value. 7198 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7199 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7200 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7201 effect(KILL oldval); 7202 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7203 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7204 "XCHG EBX,ECX" 7205 %} 7206 ins_encode %{ 7207 // Note: we need to swap rbx, and rcx before and after the 7208 // cmpxchg8 instruction because the instruction uses 7209 // rcx as the high order word of the new value to store but 7210 // our register encoding uses rbx. 7211 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7212 if( os::is_MP() ) 7213 __ lock(); 7214 __ cmpxchg8($mem$$Address); 7215 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7216 %} 7217 ins_pipe( pipe_cmpxchg ); 7218 %} 7219 7220 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7221 7222 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7223 predicate(VM_Version::supports_cx8()); 7224 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7225 effect(KILL cr, KILL oldval); 7226 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7227 "MOV $res,0\n\t" 7228 "JNE,s fail\n\t" 7229 "MOV $res,1\n" 7230 "fail:" %} 7231 ins_encode( enc_cmpxchg8(mem_ptr), 7232 enc_flags_ne_to_boolean(res) ); 7233 ins_pipe( pipe_cmpxchg ); 7234 %} 7235 7236 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7237 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7238 effect(KILL cr, KILL oldval); 7239 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7240 "MOV $res,0\n\t" 7241 "JNE,s fail\n\t" 7242 "MOV $res,1\n" 7243 "fail:" %} 7244 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7245 ins_pipe( pipe_cmpxchg ); 7246 %} 7247 7248 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7249 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7250 effect(KILL cr, KILL oldval); 7251 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7252 "MOV $res,0\n\t" 7253 "JNE,s fail\n\t" 7254 "MOV $res,1\n" 7255 "fail:" %} 7256 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7257 ins_pipe( pipe_cmpxchg ); 7258 %} 7259 7260 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7261 predicate(n->as_LoadStore()->result_not_used()); 7262 match(Set dummy (GetAndAddI mem add)); 7263 effect(KILL cr); 7264 format %{ "ADDL [$mem],$add" %} 7265 ins_encode %{ 7266 if (os::is_MP()) { __ lock(); } 7267 __ addl($mem$$Address, $add$$constant); 7268 %} 7269 ins_pipe( pipe_cmpxchg ); 7270 %} 7271 7272 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7273 match(Set newval (GetAndAddI mem newval)); 7274 effect(KILL cr); 7275 format %{ "XADDL [$mem],$newval" %} 7276 ins_encode %{ 7277 if (os::is_MP()) { __ lock(); } 7278 __ xaddl($mem$$Address, $newval$$Register); 7279 %} 7280 ins_pipe( pipe_cmpxchg ); 7281 %} 7282 7283 instruct xchgI( memory mem, rRegI newval) %{ 7284 match(Set newval (GetAndSetI mem newval)); 7285 format %{ "XCHGL $newval,[$mem]" %} 7286 ins_encode %{ 7287 __ xchgl($newval$$Register, $mem$$Address); 7288 %} 7289 ins_pipe( pipe_cmpxchg ); 7290 %} 7291 7292 instruct xchgP( memory mem, pRegP newval) %{ 7293 match(Set newval (GetAndSetP mem newval)); 7294 format %{ "XCHGL $newval,[$mem]" %} 7295 ins_encode %{ 7296 __ xchgl($newval$$Register, $mem$$Address); 7297 %} 7298 ins_pipe( pipe_cmpxchg ); 7299 %} 7300 7301 //----------Subtraction Instructions------------------------------------------- 7302 7303 // Integer Subtraction Instructions 7304 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7305 match(Set dst (SubI dst src)); 7306 effect(KILL cr); 7307 7308 size(2); 7309 format %{ "SUB $dst,$src" %} 7310 opcode(0x2B); 7311 ins_encode( OpcP, RegReg( dst, src) ); 7312 ins_pipe( ialu_reg_reg ); 7313 %} 7314 7315 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7316 match(Set dst (SubI dst src)); 7317 effect(KILL cr); 7318 7319 format %{ "SUB $dst,$src" %} 7320 opcode(0x81,0x05); /* Opcode 81 /5 */ 7321 // ins_encode( RegImm( dst, src) ); 7322 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7323 ins_pipe( ialu_reg ); 7324 %} 7325 7326 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7327 match(Set dst (SubI dst (LoadI src))); 7328 effect(KILL cr); 7329 7330 ins_cost(125); 7331 format %{ "SUB $dst,$src" %} 7332 opcode(0x2B); 7333 ins_encode( OpcP, RegMem( dst, src) ); 7334 ins_pipe( ialu_reg_mem ); 7335 %} 7336 7337 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7338 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7339 effect(KILL cr); 7340 7341 ins_cost(150); 7342 format %{ "SUB $dst,$src" %} 7343 opcode(0x29); /* Opcode 29 /r */ 7344 ins_encode( OpcP, RegMem( src, dst ) ); 7345 ins_pipe( ialu_mem_reg ); 7346 %} 7347 7348 // Subtract from a pointer 7349 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7350 match(Set dst (AddP dst (SubI zero src))); 7351 effect(KILL cr); 7352 7353 size(2); 7354 format %{ "SUB $dst,$src" %} 7355 opcode(0x2B); 7356 ins_encode( OpcP, RegReg( dst, src) ); 7357 ins_pipe( ialu_reg_reg ); 7358 %} 7359 7360 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7361 match(Set dst (SubI zero dst)); 7362 effect(KILL cr); 7363 7364 size(2); 7365 format %{ "NEG $dst" %} 7366 opcode(0xF7,0x03); // Opcode F7 /3 7367 ins_encode( OpcP, RegOpc( dst ) ); 7368 ins_pipe( ialu_reg ); 7369 %} 7370 7371 //----------Multiplication/Division Instructions------------------------------- 7372 // Integer Multiplication Instructions 7373 // Multiply Register 7374 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7375 match(Set dst (MulI dst src)); 7376 effect(KILL cr); 7377 7378 size(3); 7379 ins_cost(300); 7380 format %{ "IMUL $dst,$src" %} 7381 opcode(0xAF, 0x0F); 7382 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7383 ins_pipe( ialu_reg_reg_alu0 ); 7384 %} 7385 7386 // Multiply 32-bit Immediate 7387 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7388 match(Set dst (MulI src imm)); 7389 effect(KILL cr); 7390 7391 ins_cost(300); 7392 format %{ "IMUL $dst,$src,$imm" %} 7393 opcode(0x69); /* 69 /r id */ 7394 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7395 ins_pipe( ialu_reg_reg_alu0 ); 7396 %} 7397 7398 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7399 match(Set dst src); 7400 effect(KILL cr); 7401 7402 // Note that this is artificially increased to make it more expensive than loadConL 7403 ins_cost(250); 7404 format %{ "MOV EAX,$src\t// low word only" %} 7405 opcode(0xB8); 7406 ins_encode( LdImmL_Lo(dst, src) ); 7407 ins_pipe( ialu_reg_fat ); 7408 %} 7409 7410 // Multiply by 32-bit Immediate, taking the shifted high order results 7411 // (special case for shift by 32) 7412 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7413 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7414 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7415 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7416 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7417 effect(USE src1, KILL cr); 7418 7419 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7420 ins_cost(0*100 + 1*400 - 150); 7421 format %{ "IMUL EDX:EAX,$src1" %} 7422 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7423 ins_pipe( pipe_slow ); 7424 %} 7425 7426 // Multiply by 32-bit Immediate, taking the shifted high order results 7427 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7428 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7429 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7430 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7431 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7432 effect(USE src1, KILL cr); 7433 7434 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7435 ins_cost(1*100 + 1*400 - 150); 7436 format %{ "IMUL EDX:EAX,$src1\n\t" 7437 "SAR EDX,$cnt-32" %} 7438 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7439 ins_pipe( pipe_slow ); 7440 %} 7441 7442 // Multiply Memory 32-bit Immediate 7443 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7444 match(Set dst (MulI (LoadI src) imm)); 7445 effect(KILL cr); 7446 7447 ins_cost(300); 7448 format %{ "IMUL $dst,$src,$imm" %} 7449 opcode(0x69); /* 69 /r id */ 7450 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7451 ins_pipe( ialu_reg_mem_alu0 ); 7452 %} 7453 7454 // Multiply Memory 7455 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7456 match(Set dst (MulI dst (LoadI src))); 7457 effect(KILL cr); 7458 7459 ins_cost(350); 7460 format %{ "IMUL $dst,$src" %} 7461 opcode(0xAF, 0x0F); 7462 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7463 ins_pipe( ialu_reg_mem_alu0 ); 7464 %} 7465 7466 // Multiply Register Int to Long 7467 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7468 // Basic Idea: long = (long)int * (long)int 7469 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7470 effect(DEF dst, USE src, USE src1, KILL flags); 7471 7472 ins_cost(300); 7473 format %{ "IMUL $dst,$src1" %} 7474 7475 ins_encode( long_int_multiply( dst, src1 ) ); 7476 ins_pipe( ialu_reg_reg_alu0 ); 7477 %} 7478 7479 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7480 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7481 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7482 effect(KILL flags); 7483 7484 ins_cost(300); 7485 format %{ "MUL $dst,$src1" %} 7486 7487 ins_encode( long_uint_multiply(dst, src1) ); 7488 ins_pipe( ialu_reg_reg_alu0 ); 7489 %} 7490 7491 // Multiply Register Long 7492 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7493 match(Set dst (MulL dst src)); 7494 effect(KILL cr, TEMP tmp); 7495 ins_cost(4*100+3*400); 7496 // Basic idea: lo(result) = lo(x_lo * y_lo) 7497 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7498 format %{ "MOV $tmp,$src.lo\n\t" 7499 "IMUL $tmp,EDX\n\t" 7500 "MOV EDX,$src.hi\n\t" 7501 "IMUL EDX,EAX\n\t" 7502 "ADD $tmp,EDX\n\t" 7503 "MUL EDX:EAX,$src.lo\n\t" 7504 "ADD EDX,$tmp" %} 7505 ins_encode( long_multiply( dst, src, tmp ) ); 7506 ins_pipe( pipe_slow ); 7507 %} 7508 7509 // Multiply Register Long where the left operand's high 32 bits are zero 7510 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7511 predicate(is_operand_hi32_zero(n->in(1))); 7512 match(Set dst (MulL dst src)); 7513 effect(KILL cr, TEMP tmp); 7514 ins_cost(2*100+2*400); 7515 // Basic idea: lo(result) = lo(x_lo * y_lo) 7516 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7517 format %{ "MOV $tmp,$src.hi\n\t" 7518 "IMUL $tmp,EAX\n\t" 7519 "MUL EDX:EAX,$src.lo\n\t" 7520 "ADD EDX,$tmp" %} 7521 ins_encode %{ 7522 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7523 __ imull($tmp$$Register, rax); 7524 __ mull($src$$Register); 7525 __ addl(rdx, $tmp$$Register); 7526 %} 7527 ins_pipe( pipe_slow ); 7528 %} 7529 7530 // Multiply Register Long where the right operand's high 32 bits are zero 7531 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7532 predicate(is_operand_hi32_zero(n->in(2))); 7533 match(Set dst (MulL dst src)); 7534 effect(KILL cr, TEMP tmp); 7535 ins_cost(2*100+2*400); 7536 // Basic idea: lo(result) = lo(x_lo * y_lo) 7537 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7538 format %{ "MOV $tmp,$src.lo\n\t" 7539 "IMUL $tmp,EDX\n\t" 7540 "MUL EDX:EAX,$src.lo\n\t" 7541 "ADD EDX,$tmp" %} 7542 ins_encode %{ 7543 __ movl($tmp$$Register, $src$$Register); 7544 __ imull($tmp$$Register, rdx); 7545 __ mull($src$$Register); 7546 __ addl(rdx, $tmp$$Register); 7547 %} 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7552 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7553 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7554 match(Set dst (MulL dst src)); 7555 effect(KILL cr); 7556 ins_cost(1*400); 7557 // Basic idea: lo(result) = lo(x_lo * y_lo) 7558 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7559 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7560 ins_encode %{ 7561 __ mull($src$$Register); 7562 %} 7563 ins_pipe( pipe_slow ); 7564 %} 7565 7566 // Multiply Register Long by small constant 7567 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7568 match(Set dst (MulL dst src)); 7569 effect(KILL cr, TEMP tmp); 7570 ins_cost(2*100+2*400); 7571 size(12); 7572 // Basic idea: lo(result) = lo(src * EAX) 7573 // hi(result) = hi(src * EAX) + lo(src * EDX) 7574 format %{ "IMUL $tmp,EDX,$src\n\t" 7575 "MOV EDX,$src\n\t" 7576 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7577 "ADD EDX,$tmp" %} 7578 ins_encode( long_multiply_con( dst, src, tmp ) ); 7579 ins_pipe( pipe_slow ); 7580 %} 7581 7582 // Integer DIV with Register 7583 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7584 match(Set rax (DivI rax div)); 7585 effect(KILL rdx, KILL cr); 7586 size(26); 7587 ins_cost(30*100+10*100); 7588 format %{ "CMP EAX,0x80000000\n\t" 7589 "JNE,s normal\n\t" 7590 "XOR EDX,EDX\n\t" 7591 "CMP ECX,-1\n\t" 7592 "JE,s done\n" 7593 "normal: CDQ\n\t" 7594 "IDIV $div\n\t" 7595 "done:" %} 7596 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7597 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7598 ins_pipe( ialu_reg_reg_alu0 ); 7599 %} 7600 7601 // Divide Register Long 7602 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7603 match(Set dst (DivL src1 src2)); 7604 effect( KILL cr, KILL cx, KILL bx ); 7605 ins_cost(10000); 7606 format %{ "PUSH $src1.hi\n\t" 7607 "PUSH $src1.lo\n\t" 7608 "PUSH $src2.hi\n\t" 7609 "PUSH $src2.lo\n\t" 7610 "CALL SharedRuntime::ldiv\n\t" 7611 "ADD ESP,16" %} 7612 ins_encode( long_div(src1,src2) ); 7613 ins_pipe( pipe_slow ); 7614 %} 7615 7616 // Integer DIVMOD with Register, both quotient and mod results 7617 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7618 match(DivModI rax div); 7619 effect(KILL cr); 7620 size(26); 7621 ins_cost(30*100+10*100); 7622 format %{ "CMP EAX,0x80000000\n\t" 7623 "JNE,s normal\n\t" 7624 "XOR EDX,EDX\n\t" 7625 "CMP ECX,-1\n\t" 7626 "JE,s done\n" 7627 "normal: CDQ\n\t" 7628 "IDIV $div\n\t" 7629 "done:" %} 7630 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7631 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7632 ins_pipe( pipe_slow ); 7633 %} 7634 7635 // Integer MOD with Register 7636 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7637 match(Set rdx (ModI rax div)); 7638 effect(KILL rax, KILL cr); 7639 7640 size(26); 7641 ins_cost(300); 7642 format %{ "CDQ\n\t" 7643 "IDIV $div" %} 7644 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7645 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7646 ins_pipe( ialu_reg_reg_alu0 ); 7647 %} 7648 7649 // Remainder Register Long 7650 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7651 match(Set dst (ModL src1 src2)); 7652 effect( KILL cr, KILL cx, KILL bx ); 7653 ins_cost(10000); 7654 format %{ "PUSH $src1.hi\n\t" 7655 "PUSH $src1.lo\n\t" 7656 "PUSH $src2.hi\n\t" 7657 "PUSH $src2.lo\n\t" 7658 "CALL SharedRuntime::lrem\n\t" 7659 "ADD ESP,16" %} 7660 ins_encode( long_mod(src1,src2) ); 7661 ins_pipe( pipe_slow ); 7662 %} 7663 7664 // Divide Register Long (no special case since divisor != -1) 7665 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7666 match(Set dst (DivL dst imm)); 7667 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7668 ins_cost(1000); 7669 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7670 "XOR $tmp2,$tmp2\n\t" 7671 "CMP $tmp,EDX\n\t" 7672 "JA,s fast\n\t" 7673 "MOV $tmp2,EAX\n\t" 7674 "MOV EAX,EDX\n\t" 7675 "MOV EDX,0\n\t" 7676 "JLE,s pos\n\t" 7677 "LNEG EAX : $tmp2\n\t" 7678 "DIV $tmp # unsigned division\n\t" 7679 "XCHG EAX,$tmp2\n\t" 7680 "DIV $tmp\n\t" 7681 "LNEG $tmp2 : EAX\n\t" 7682 "JMP,s done\n" 7683 "pos:\n\t" 7684 "DIV $tmp\n\t" 7685 "XCHG EAX,$tmp2\n" 7686 "fast:\n\t" 7687 "DIV $tmp\n" 7688 "done:\n\t" 7689 "MOV EDX,$tmp2\n\t" 7690 "NEG EDX:EAX # if $imm < 0" %} 7691 ins_encode %{ 7692 int con = (int)$imm$$constant; 7693 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7694 int pcon = (con > 0) ? con : -con; 7695 Label Lfast, Lpos, Ldone; 7696 7697 __ movl($tmp$$Register, pcon); 7698 __ xorl($tmp2$$Register,$tmp2$$Register); 7699 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7700 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7701 7702 __ movl($tmp2$$Register, $dst$$Register); // save 7703 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7704 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7705 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7706 7707 // Negative dividend. 7708 // convert value to positive to use unsigned division 7709 __ lneg($dst$$Register, $tmp2$$Register); 7710 __ divl($tmp$$Register); 7711 __ xchgl($dst$$Register, $tmp2$$Register); 7712 __ divl($tmp$$Register); 7713 // revert result back to negative 7714 __ lneg($tmp2$$Register, $dst$$Register); 7715 __ jmpb(Ldone); 7716 7717 __ bind(Lpos); 7718 __ divl($tmp$$Register); // Use unsigned division 7719 __ xchgl($dst$$Register, $tmp2$$Register); 7720 // Fallthrow for final divide, tmp2 has 32 bit hi result 7721 7722 __ bind(Lfast); 7723 // fast path: src is positive 7724 __ divl($tmp$$Register); // Use unsigned division 7725 7726 __ bind(Ldone); 7727 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7728 if (con < 0) { 7729 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7730 } 7731 %} 7732 ins_pipe( pipe_slow ); 7733 %} 7734 7735 // Remainder Register Long (remainder fit into 32 bits) 7736 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7737 match(Set dst (ModL dst imm)); 7738 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7739 ins_cost(1000); 7740 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7741 "CMP $tmp,EDX\n\t" 7742 "JA,s fast\n\t" 7743 "MOV $tmp2,EAX\n\t" 7744 "MOV EAX,EDX\n\t" 7745 "MOV EDX,0\n\t" 7746 "JLE,s pos\n\t" 7747 "LNEG EAX : $tmp2\n\t" 7748 "DIV $tmp # unsigned division\n\t" 7749 "MOV EAX,$tmp2\n\t" 7750 "DIV $tmp\n\t" 7751 "NEG EDX\n\t" 7752 "JMP,s done\n" 7753 "pos:\n\t" 7754 "DIV $tmp\n\t" 7755 "MOV EAX,$tmp2\n" 7756 "fast:\n\t" 7757 "DIV $tmp\n" 7758 "done:\n\t" 7759 "MOV EAX,EDX\n\t" 7760 "SAR EDX,31\n\t" %} 7761 ins_encode %{ 7762 int con = (int)$imm$$constant; 7763 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7764 int pcon = (con > 0) ? con : -con; 7765 Label Lfast, Lpos, Ldone; 7766 7767 __ movl($tmp$$Register, pcon); 7768 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7769 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7770 7771 __ movl($tmp2$$Register, $dst$$Register); // save 7772 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7773 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7774 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7775 7776 // Negative dividend. 7777 // convert value to positive to use unsigned division 7778 __ lneg($dst$$Register, $tmp2$$Register); 7779 __ divl($tmp$$Register); 7780 __ movl($dst$$Register, $tmp2$$Register); 7781 __ divl($tmp$$Register); 7782 // revert remainder back to negative 7783 __ negl(HIGH_FROM_LOW($dst$$Register)); 7784 __ jmpb(Ldone); 7785 7786 __ bind(Lpos); 7787 __ divl($tmp$$Register); 7788 __ movl($dst$$Register, $tmp2$$Register); 7789 7790 __ bind(Lfast); 7791 // fast path: src is positive 7792 __ divl($tmp$$Register); 7793 7794 __ bind(Ldone); 7795 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7796 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7797 7798 %} 7799 ins_pipe( pipe_slow ); 7800 %} 7801 7802 // Integer Shift Instructions 7803 // Shift Left by one 7804 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7805 match(Set dst (LShiftI dst shift)); 7806 effect(KILL cr); 7807 7808 size(2); 7809 format %{ "SHL $dst,$shift" %} 7810 opcode(0xD1, 0x4); /* D1 /4 */ 7811 ins_encode( OpcP, RegOpc( dst ) ); 7812 ins_pipe( ialu_reg ); 7813 %} 7814 7815 // Shift Left by 8-bit immediate 7816 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7817 match(Set dst (LShiftI dst shift)); 7818 effect(KILL cr); 7819 7820 size(3); 7821 format %{ "SHL $dst,$shift" %} 7822 opcode(0xC1, 0x4); /* C1 /4 ib */ 7823 ins_encode( RegOpcImm( dst, shift) ); 7824 ins_pipe( ialu_reg ); 7825 %} 7826 7827 // Shift Left by variable 7828 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7829 match(Set dst (LShiftI dst shift)); 7830 effect(KILL cr); 7831 7832 size(2); 7833 format %{ "SHL $dst,$shift" %} 7834 opcode(0xD3, 0x4); /* D3 /4 */ 7835 ins_encode( OpcP, RegOpc( dst ) ); 7836 ins_pipe( ialu_reg_reg ); 7837 %} 7838 7839 // Arithmetic shift right by one 7840 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7841 match(Set dst (RShiftI dst shift)); 7842 effect(KILL cr); 7843 7844 size(2); 7845 format %{ "SAR $dst,$shift" %} 7846 opcode(0xD1, 0x7); /* D1 /7 */ 7847 ins_encode( OpcP, RegOpc( dst ) ); 7848 ins_pipe( ialu_reg ); 7849 %} 7850 7851 // Arithmetic shift right by one 7852 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7853 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7854 effect(KILL cr); 7855 format %{ "SAR $dst,$shift" %} 7856 opcode(0xD1, 0x7); /* D1 /7 */ 7857 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7858 ins_pipe( ialu_mem_imm ); 7859 %} 7860 7861 // Arithmetic Shift Right by 8-bit immediate 7862 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7863 match(Set dst (RShiftI dst shift)); 7864 effect(KILL cr); 7865 7866 size(3); 7867 format %{ "SAR $dst,$shift" %} 7868 opcode(0xC1, 0x7); /* C1 /7 ib */ 7869 ins_encode( RegOpcImm( dst, shift ) ); 7870 ins_pipe( ialu_mem_imm ); 7871 %} 7872 7873 // Arithmetic Shift Right by 8-bit immediate 7874 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7875 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7876 effect(KILL cr); 7877 7878 format %{ "SAR $dst,$shift" %} 7879 opcode(0xC1, 0x7); /* C1 /7 ib */ 7880 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7881 ins_pipe( ialu_mem_imm ); 7882 %} 7883 7884 // Arithmetic Shift Right by variable 7885 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7886 match(Set dst (RShiftI dst shift)); 7887 effect(KILL cr); 7888 7889 size(2); 7890 format %{ "SAR $dst,$shift" %} 7891 opcode(0xD3, 0x7); /* D3 /7 */ 7892 ins_encode( OpcP, RegOpc( dst ) ); 7893 ins_pipe( ialu_reg_reg ); 7894 %} 7895 7896 // Logical shift right by one 7897 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7898 match(Set dst (URShiftI dst shift)); 7899 effect(KILL cr); 7900 7901 size(2); 7902 format %{ "SHR $dst,$shift" %} 7903 opcode(0xD1, 0x5); /* D1 /5 */ 7904 ins_encode( OpcP, RegOpc( dst ) ); 7905 ins_pipe( ialu_reg ); 7906 %} 7907 7908 // Logical Shift Right by 8-bit immediate 7909 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7910 match(Set dst (URShiftI dst shift)); 7911 effect(KILL cr); 7912 7913 size(3); 7914 format %{ "SHR $dst,$shift" %} 7915 opcode(0xC1, 0x5); /* C1 /5 ib */ 7916 ins_encode( RegOpcImm( dst, shift) ); 7917 ins_pipe( ialu_reg ); 7918 %} 7919 7920 7921 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7922 // This idiom is used by the compiler for the i2b bytecode. 7923 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7924 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7925 7926 size(3); 7927 format %{ "MOVSX $dst,$src :8" %} 7928 ins_encode %{ 7929 __ movsbl($dst$$Register, $src$$Register); 7930 %} 7931 ins_pipe(ialu_reg_reg); 7932 %} 7933 7934 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7935 // This idiom is used by the compiler the i2s bytecode. 7936 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7937 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7938 7939 size(3); 7940 format %{ "MOVSX $dst,$src :16" %} 7941 ins_encode %{ 7942 __ movswl($dst$$Register, $src$$Register); 7943 %} 7944 ins_pipe(ialu_reg_reg); 7945 %} 7946 7947 7948 // Logical Shift Right by variable 7949 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7950 match(Set dst (URShiftI dst shift)); 7951 effect(KILL cr); 7952 7953 size(2); 7954 format %{ "SHR $dst,$shift" %} 7955 opcode(0xD3, 0x5); /* D3 /5 */ 7956 ins_encode( OpcP, RegOpc( dst ) ); 7957 ins_pipe( ialu_reg_reg ); 7958 %} 7959 7960 7961 //----------Logical Instructions----------------------------------------------- 7962 //----------Integer Logical Instructions--------------------------------------- 7963 // And Instructions 7964 // And Register with Register 7965 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7966 match(Set dst (AndI dst src)); 7967 effect(KILL cr); 7968 7969 size(2); 7970 format %{ "AND $dst,$src" %} 7971 opcode(0x23); 7972 ins_encode( OpcP, RegReg( dst, src) ); 7973 ins_pipe( ialu_reg_reg ); 7974 %} 7975 7976 // And Register with Immediate 7977 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7978 match(Set dst (AndI dst src)); 7979 effect(KILL cr); 7980 7981 format %{ "AND $dst,$src" %} 7982 opcode(0x81,0x04); /* Opcode 81 /4 */ 7983 // ins_encode( RegImm( dst, src) ); 7984 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7985 ins_pipe( ialu_reg ); 7986 %} 7987 7988 // And Register with Memory 7989 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7990 match(Set dst (AndI dst (LoadI src))); 7991 effect(KILL cr); 7992 7993 ins_cost(125); 7994 format %{ "AND $dst,$src" %} 7995 opcode(0x23); 7996 ins_encode( OpcP, RegMem( dst, src) ); 7997 ins_pipe( ialu_reg_mem ); 7998 %} 7999 8000 // And Memory with Register 8001 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8002 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8003 effect(KILL cr); 8004 8005 ins_cost(150); 8006 format %{ "AND $dst,$src" %} 8007 opcode(0x21); /* Opcode 21 /r */ 8008 ins_encode( OpcP, RegMem( src, dst ) ); 8009 ins_pipe( ialu_mem_reg ); 8010 %} 8011 8012 // And Memory with Immediate 8013 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8014 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8015 effect(KILL cr); 8016 8017 ins_cost(125); 8018 format %{ "AND $dst,$src" %} 8019 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8020 // ins_encode( MemImm( dst, src) ); 8021 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8022 ins_pipe( ialu_mem_imm ); 8023 %} 8024 8025 // BMI1 instructions 8026 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8027 match(Set dst (AndI (XorI src1 minus_1) src2)); 8028 predicate(UseBMI1Instructions); 8029 effect(KILL cr); 8030 8031 format %{ "ANDNL $dst, $src1, $src2" %} 8032 8033 ins_encode %{ 8034 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8035 %} 8036 ins_pipe(ialu_reg); 8037 %} 8038 8039 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8040 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8041 predicate(UseBMI1Instructions); 8042 effect(KILL cr); 8043 8044 ins_cost(125); 8045 format %{ "ANDNL $dst, $src1, $src2" %} 8046 8047 ins_encode %{ 8048 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8049 %} 8050 ins_pipe(ialu_reg_mem); 8051 %} 8052 8053 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8054 match(Set dst (AndI (SubI imm_zero src) src)); 8055 predicate(UseBMI1Instructions); 8056 effect(KILL cr); 8057 8058 format %{ "BLSIL $dst, $src" %} 8059 8060 ins_encode %{ 8061 __ blsil($dst$$Register, $src$$Register); 8062 %} 8063 ins_pipe(ialu_reg); 8064 %} 8065 8066 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8067 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8068 predicate(UseBMI1Instructions); 8069 effect(KILL cr); 8070 8071 ins_cost(125); 8072 format %{ "BLSIL $dst, $src" %} 8073 8074 ins_encode %{ 8075 __ blsil($dst$$Register, $src$$Address); 8076 %} 8077 ins_pipe(ialu_reg_mem); 8078 %} 8079 8080 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8081 %{ 8082 match(Set dst (XorI (AddI src minus_1) src)); 8083 predicate(UseBMI1Instructions); 8084 effect(KILL cr); 8085 8086 format %{ "BLSMSKL $dst, $src" %} 8087 8088 ins_encode %{ 8089 __ blsmskl($dst$$Register, $src$$Register); 8090 %} 8091 8092 ins_pipe(ialu_reg); 8093 %} 8094 8095 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8096 %{ 8097 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8098 predicate(UseBMI1Instructions); 8099 effect(KILL cr); 8100 8101 ins_cost(125); 8102 format %{ "BLSMSKL $dst, $src" %} 8103 8104 ins_encode %{ 8105 __ blsmskl($dst$$Register, $src$$Address); 8106 %} 8107 8108 ins_pipe(ialu_reg_mem); 8109 %} 8110 8111 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8112 %{ 8113 match(Set dst (AndI (AddI src minus_1) src) ); 8114 predicate(UseBMI1Instructions); 8115 effect(KILL cr); 8116 8117 format %{ "BLSRL $dst, $src" %} 8118 8119 ins_encode %{ 8120 __ blsrl($dst$$Register, $src$$Register); 8121 %} 8122 8123 ins_pipe(ialu_reg); 8124 %} 8125 8126 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8127 %{ 8128 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8129 predicate(UseBMI1Instructions); 8130 effect(KILL cr); 8131 8132 ins_cost(125); 8133 format %{ "BLSRL $dst, $src" %} 8134 8135 ins_encode %{ 8136 __ blsrl($dst$$Register, $src$$Address); 8137 %} 8138 8139 ins_pipe(ialu_reg_mem); 8140 %} 8141 8142 // Or Instructions 8143 // Or Register with Register 8144 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8145 match(Set dst (OrI dst src)); 8146 effect(KILL cr); 8147 8148 size(2); 8149 format %{ "OR $dst,$src" %} 8150 opcode(0x0B); 8151 ins_encode( OpcP, RegReg( dst, src) ); 8152 ins_pipe( ialu_reg_reg ); 8153 %} 8154 8155 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8156 match(Set dst (OrI dst (CastP2X src))); 8157 effect(KILL cr); 8158 8159 size(2); 8160 format %{ "OR $dst,$src" %} 8161 opcode(0x0B); 8162 ins_encode( OpcP, RegReg( dst, src) ); 8163 ins_pipe( ialu_reg_reg ); 8164 %} 8165 8166 8167 // Or Register with Immediate 8168 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8169 match(Set dst (OrI dst src)); 8170 effect(KILL cr); 8171 8172 format %{ "OR $dst,$src" %} 8173 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8174 // ins_encode( RegImm( dst, src) ); 8175 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8176 ins_pipe( ialu_reg ); 8177 %} 8178 8179 // Or Register with Memory 8180 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8181 match(Set dst (OrI dst (LoadI src))); 8182 effect(KILL cr); 8183 8184 ins_cost(125); 8185 format %{ "OR $dst,$src" %} 8186 opcode(0x0B); 8187 ins_encode( OpcP, RegMem( dst, src) ); 8188 ins_pipe( ialu_reg_mem ); 8189 %} 8190 8191 // Or Memory with Register 8192 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8193 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8194 effect(KILL cr); 8195 8196 ins_cost(150); 8197 format %{ "OR $dst,$src" %} 8198 opcode(0x09); /* Opcode 09 /r */ 8199 ins_encode( OpcP, RegMem( src, dst ) ); 8200 ins_pipe( ialu_mem_reg ); 8201 %} 8202 8203 // Or Memory with Immediate 8204 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8205 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8206 effect(KILL cr); 8207 8208 ins_cost(125); 8209 format %{ "OR $dst,$src" %} 8210 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8211 // ins_encode( MemImm( dst, src) ); 8212 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8213 ins_pipe( ialu_mem_imm ); 8214 %} 8215 8216 // ROL/ROR 8217 // ROL expand 8218 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8219 effect(USE_DEF dst, USE shift, KILL cr); 8220 8221 format %{ "ROL $dst, $shift" %} 8222 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8223 ins_encode( OpcP, RegOpc( dst )); 8224 ins_pipe( ialu_reg ); 8225 %} 8226 8227 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8228 effect(USE_DEF dst, USE shift, KILL cr); 8229 8230 format %{ "ROL $dst, $shift" %} 8231 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8232 ins_encode( RegOpcImm(dst, shift) ); 8233 ins_pipe(ialu_reg); 8234 %} 8235 8236 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8237 effect(USE_DEF dst, USE shift, KILL cr); 8238 8239 format %{ "ROL $dst, $shift" %} 8240 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8241 ins_encode(OpcP, RegOpc(dst)); 8242 ins_pipe( ialu_reg_reg ); 8243 %} 8244 // end of ROL expand 8245 8246 // ROL 32bit by one once 8247 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8248 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8249 8250 expand %{ 8251 rolI_eReg_imm1(dst, lshift, cr); 8252 %} 8253 %} 8254 8255 // ROL 32bit var by imm8 once 8256 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8257 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8258 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8259 8260 expand %{ 8261 rolI_eReg_imm8(dst, lshift, cr); 8262 %} 8263 %} 8264 8265 // ROL 32bit var by var once 8266 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8267 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8268 8269 expand %{ 8270 rolI_eReg_CL(dst, shift, cr); 8271 %} 8272 %} 8273 8274 // ROL 32bit var by var once 8275 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8276 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8277 8278 expand %{ 8279 rolI_eReg_CL(dst, shift, cr); 8280 %} 8281 %} 8282 8283 // ROR expand 8284 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8285 effect(USE_DEF dst, USE shift, KILL cr); 8286 8287 format %{ "ROR $dst, $shift" %} 8288 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8289 ins_encode( OpcP, RegOpc( dst ) ); 8290 ins_pipe( ialu_reg ); 8291 %} 8292 8293 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8294 effect (USE_DEF dst, USE shift, KILL cr); 8295 8296 format %{ "ROR $dst, $shift" %} 8297 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8298 ins_encode( RegOpcImm(dst, shift) ); 8299 ins_pipe( ialu_reg ); 8300 %} 8301 8302 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8303 effect(USE_DEF dst, USE shift, KILL cr); 8304 8305 format %{ "ROR $dst, $shift" %} 8306 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8307 ins_encode(OpcP, RegOpc(dst)); 8308 ins_pipe( ialu_reg_reg ); 8309 %} 8310 // end of ROR expand 8311 8312 // ROR right once 8313 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8314 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8315 8316 expand %{ 8317 rorI_eReg_imm1(dst, rshift, cr); 8318 %} 8319 %} 8320 8321 // ROR 32bit by immI8 once 8322 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8323 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8324 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8325 8326 expand %{ 8327 rorI_eReg_imm8(dst, rshift, cr); 8328 %} 8329 %} 8330 8331 // ROR 32bit var by var once 8332 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8333 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8334 8335 expand %{ 8336 rorI_eReg_CL(dst, shift, cr); 8337 %} 8338 %} 8339 8340 // ROR 32bit var by var once 8341 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8342 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8343 8344 expand %{ 8345 rorI_eReg_CL(dst, shift, cr); 8346 %} 8347 %} 8348 8349 // Xor Instructions 8350 // Xor Register with Register 8351 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8352 match(Set dst (XorI dst src)); 8353 effect(KILL cr); 8354 8355 size(2); 8356 format %{ "XOR $dst,$src" %} 8357 opcode(0x33); 8358 ins_encode( OpcP, RegReg( dst, src) ); 8359 ins_pipe( ialu_reg_reg ); 8360 %} 8361 8362 // Xor Register with Immediate -1 8363 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8364 match(Set dst (XorI dst imm)); 8365 8366 size(2); 8367 format %{ "NOT $dst" %} 8368 ins_encode %{ 8369 __ notl($dst$$Register); 8370 %} 8371 ins_pipe( ialu_reg ); 8372 %} 8373 8374 // Xor Register with Immediate 8375 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8376 match(Set dst (XorI dst src)); 8377 effect(KILL cr); 8378 8379 format %{ "XOR $dst,$src" %} 8380 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8381 // ins_encode( RegImm( dst, src) ); 8382 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8383 ins_pipe( ialu_reg ); 8384 %} 8385 8386 // Xor Register with Memory 8387 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8388 match(Set dst (XorI dst (LoadI src))); 8389 effect(KILL cr); 8390 8391 ins_cost(125); 8392 format %{ "XOR $dst,$src" %} 8393 opcode(0x33); 8394 ins_encode( OpcP, RegMem(dst, src) ); 8395 ins_pipe( ialu_reg_mem ); 8396 %} 8397 8398 // Xor Memory with Register 8399 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8400 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8401 effect(KILL cr); 8402 8403 ins_cost(150); 8404 format %{ "XOR $dst,$src" %} 8405 opcode(0x31); /* Opcode 31 /r */ 8406 ins_encode( OpcP, RegMem( src, dst ) ); 8407 ins_pipe( ialu_mem_reg ); 8408 %} 8409 8410 // Xor Memory with Immediate 8411 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8412 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8413 effect(KILL cr); 8414 8415 ins_cost(125); 8416 format %{ "XOR $dst,$src" %} 8417 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8418 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8419 ins_pipe( ialu_mem_imm ); 8420 %} 8421 8422 //----------Convert Int to Boolean--------------------------------------------- 8423 8424 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8425 effect( DEF dst, USE src ); 8426 format %{ "MOV $dst,$src" %} 8427 ins_encode( enc_Copy( dst, src) ); 8428 ins_pipe( ialu_reg_reg ); 8429 %} 8430 8431 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8432 effect( USE_DEF dst, USE src, KILL cr ); 8433 8434 size(4); 8435 format %{ "NEG $dst\n\t" 8436 "ADC $dst,$src" %} 8437 ins_encode( neg_reg(dst), 8438 OpcRegReg(0x13,dst,src) ); 8439 ins_pipe( ialu_reg_reg_long ); 8440 %} 8441 8442 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8443 match(Set dst (Conv2B src)); 8444 8445 expand %{ 8446 movI_nocopy(dst,src); 8447 ci2b(dst,src,cr); 8448 %} 8449 %} 8450 8451 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8452 effect( DEF dst, USE src ); 8453 format %{ "MOV $dst,$src" %} 8454 ins_encode( enc_Copy( dst, src) ); 8455 ins_pipe( ialu_reg_reg ); 8456 %} 8457 8458 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8459 effect( USE_DEF dst, USE src, KILL cr ); 8460 format %{ "NEG $dst\n\t" 8461 "ADC $dst,$src" %} 8462 ins_encode( neg_reg(dst), 8463 OpcRegReg(0x13,dst,src) ); 8464 ins_pipe( ialu_reg_reg_long ); 8465 %} 8466 8467 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8468 match(Set dst (Conv2B src)); 8469 8470 expand %{ 8471 movP_nocopy(dst,src); 8472 cp2b(dst,src,cr); 8473 %} 8474 %} 8475 8476 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8477 match(Set dst (CmpLTMask p q)); 8478 effect(KILL cr); 8479 ins_cost(400); 8480 8481 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8482 format %{ "XOR $dst,$dst\n\t" 8483 "CMP $p,$q\n\t" 8484 "SETlt $dst\n\t" 8485 "NEG $dst" %} 8486 ins_encode %{ 8487 Register Rp = $p$$Register; 8488 Register Rq = $q$$Register; 8489 Register Rd = $dst$$Register; 8490 Label done; 8491 __ xorl(Rd, Rd); 8492 __ cmpl(Rp, Rq); 8493 __ setb(Assembler::less, Rd); 8494 __ negl(Rd); 8495 %} 8496 8497 ins_pipe(pipe_slow); 8498 %} 8499 8500 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8501 match(Set dst (CmpLTMask dst zero)); 8502 effect(DEF dst, KILL cr); 8503 ins_cost(100); 8504 8505 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8506 ins_encode %{ 8507 __ sarl($dst$$Register, 31); 8508 %} 8509 ins_pipe(ialu_reg); 8510 %} 8511 8512 /* better to save a register than avoid a branch */ 8513 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8514 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8515 effect(KILL cr); 8516 ins_cost(400); 8517 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8518 "JGE done\n\t" 8519 "ADD $p,$y\n" 8520 "done: " %} 8521 ins_encode %{ 8522 Register Rp = $p$$Register; 8523 Register Rq = $q$$Register; 8524 Register Ry = $y$$Register; 8525 Label done; 8526 __ subl(Rp, Rq); 8527 __ jccb(Assembler::greaterEqual, done); 8528 __ addl(Rp, Ry); 8529 __ bind(done); 8530 %} 8531 8532 ins_pipe(pipe_cmplt); 8533 %} 8534 8535 /* better to save a register than avoid a branch */ 8536 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8537 match(Set y (AndI (CmpLTMask p q) y)); 8538 effect(KILL cr); 8539 8540 ins_cost(300); 8541 8542 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8543 "JLT done\n\t" 8544 "XORL $y, $y\n" 8545 "done: " %} 8546 ins_encode %{ 8547 Register Rp = $p$$Register; 8548 Register Rq = $q$$Register; 8549 Register Ry = $y$$Register; 8550 Label done; 8551 __ cmpl(Rp, Rq); 8552 __ jccb(Assembler::less, done); 8553 __ xorl(Ry, Ry); 8554 __ bind(done); 8555 %} 8556 8557 ins_pipe(pipe_cmplt); 8558 %} 8559 8560 /* If I enable this, I encourage spilling in the inner loop of compress. 8561 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8562 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8563 */ 8564 //----------Overflow Math Instructions----------------------------------------- 8565 8566 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8567 %{ 8568 match(Set cr (OverflowAddI op1 op2)); 8569 effect(DEF cr, USE_KILL op1, USE op2); 8570 8571 format %{ "ADD $op1, $op2\t# overflow check int" %} 8572 8573 ins_encode %{ 8574 __ addl($op1$$Register, $op2$$Register); 8575 %} 8576 ins_pipe(ialu_reg_reg); 8577 %} 8578 8579 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8580 %{ 8581 match(Set cr (OverflowAddI op1 op2)); 8582 effect(DEF cr, USE_KILL op1, USE op2); 8583 8584 format %{ "ADD $op1, $op2\t# overflow check int" %} 8585 8586 ins_encode %{ 8587 __ addl($op1$$Register, $op2$$constant); 8588 %} 8589 ins_pipe(ialu_reg_reg); 8590 %} 8591 8592 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8593 %{ 8594 match(Set cr (OverflowSubI op1 op2)); 8595 8596 format %{ "CMP $op1, $op2\t# overflow check int" %} 8597 ins_encode %{ 8598 __ cmpl($op1$$Register, $op2$$Register); 8599 %} 8600 ins_pipe(ialu_reg_reg); 8601 %} 8602 8603 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8604 %{ 8605 match(Set cr (OverflowSubI op1 op2)); 8606 8607 format %{ "CMP $op1, $op2\t# overflow check int" %} 8608 ins_encode %{ 8609 __ cmpl($op1$$Register, $op2$$constant); 8610 %} 8611 ins_pipe(ialu_reg_reg); 8612 %} 8613 8614 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8615 %{ 8616 match(Set cr (OverflowSubI zero op2)); 8617 effect(DEF cr, USE_KILL op2); 8618 8619 format %{ "NEG $op2\t# overflow check int" %} 8620 ins_encode %{ 8621 __ negl($op2$$Register); 8622 %} 8623 ins_pipe(ialu_reg_reg); 8624 %} 8625 8626 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8627 %{ 8628 match(Set cr (OverflowMulI op1 op2)); 8629 effect(DEF cr, USE_KILL op1, USE op2); 8630 8631 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8632 ins_encode %{ 8633 __ imull($op1$$Register, $op2$$Register); 8634 %} 8635 ins_pipe(ialu_reg_reg_alu0); 8636 %} 8637 8638 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8639 %{ 8640 match(Set cr (OverflowMulI op1 op2)); 8641 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8642 8643 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8644 ins_encode %{ 8645 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8646 %} 8647 ins_pipe(ialu_reg_reg_alu0); 8648 %} 8649 8650 //----------Long Instructions------------------------------------------------ 8651 // Add Long Register with Register 8652 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8653 match(Set dst (AddL dst src)); 8654 effect(KILL cr); 8655 ins_cost(200); 8656 format %{ "ADD $dst.lo,$src.lo\n\t" 8657 "ADC $dst.hi,$src.hi" %} 8658 opcode(0x03, 0x13); 8659 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8660 ins_pipe( ialu_reg_reg_long ); 8661 %} 8662 8663 // Add Long Register with Immediate 8664 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8665 match(Set dst (AddL dst src)); 8666 effect(KILL cr); 8667 format %{ "ADD $dst.lo,$src.lo\n\t" 8668 "ADC $dst.hi,$src.hi" %} 8669 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8670 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8671 ins_pipe( ialu_reg_long ); 8672 %} 8673 8674 // Add Long Register with Memory 8675 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8676 match(Set dst (AddL dst (LoadL mem))); 8677 effect(KILL cr); 8678 ins_cost(125); 8679 format %{ "ADD $dst.lo,$mem\n\t" 8680 "ADC $dst.hi,$mem+4" %} 8681 opcode(0x03, 0x13); 8682 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8683 ins_pipe( ialu_reg_long_mem ); 8684 %} 8685 8686 // Subtract Long Register with Register. 8687 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8688 match(Set dst (SubL dst src)); 8689 effect(KILL cr); 8690 ins_cost(200); 8691 format %{ "SUB $dst.lo,$src.lo\n\t" 8692 "SBB $dst.hi,$src.hi" %} 8693 opcode(0x2B, 0x1B); 8694 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8695 ins_pipe( ialu_reg_reg_long ); 8696 %} 8697 8698 // Subtract Long Register with Immediate 8699 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8700 match(Set dst (SubL dst src)); 8701 effect(KILL cr); 8702 format %{ "SUB $dst.lo,$src.lo\n\t" 8703 "SBB $dst.hi,$src.hi" %} 8704 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8705 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8706 ins_pipe( ialu_reg_long ); 8707 %} 8708 8709 // Subtract Long Register with Memory 8710 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8711 match(Set dst (SubL dst (LoadL mem))); 8712 effect(KILL cr); 8713 ins_cost(125); 8714 format %{ "SUB $dst.lo,$mem\n\t" 8715 "SBB $dst.hi,$mem+4" %} 8716 opcode(0x2B, 0x1B); 8717 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8718 ins_pipe( ialu_reg_long_mem ); 8719 %} 8720 8721 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8722 match(Set dst (SubL zero dst)); 8723 effect(KILL cr); 8724 ins_cost(300); 8725 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8726 ins_encode( neg_long(dst) ); 8727 ins_pipe( ialu_reg_reg_long ); 8728 %} 8729 8730 // And Long Register with Register 8731 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8732 match(Set dst (AndL dst src)); 8733 effect(KILL cr); 8734 format %{ "AND $dst.lo,$src.lo\n\t" 8735 "AND $dst.hi,$src.hi" %} 8736 opcode(0x23,0x23); 8737 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8738 ins_pipe( ialu_reg_reg_long ); 8739 %} 8740 8741 // And Long Register with Immediate 8742 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8743 match(Set dst (AndL dst src)); 8744 effect(KILL cr); 8745 format %{ "AND $dst.lo,$src.lo\n\t" 8746 "AND $dst.hi,$src.hi" %} 8747 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8748 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8749 ins_pipe( ialu_reg_long ); 8750 %} 8751 8752 // And Long Register with Memory 8753 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8754 match(Set dst (AndL dst (LoadL mem))); 8755 effect(KILL cr); 8756 ins_cost(125); 8757 format %{ "AND $dst.lo,$mem\n\t" 8758 "AND $dst.hi,$mem+4" %} 8759 opcode(0x23, 0x23); 8760 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8761 ins_pipe( ialu_reg_long_mem ); 8762 %} 8763 8764 // BMI1 instructions 8765 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8766 match(Set dst (AndL (XorL src1 minus_1) src2)); 8767 predicate(UseBMI1Instructions); 8768 effect(KILL cr, TEMP dst); 8769 8770 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8771 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8772 %} 8773 8774 ins_encode %{ 8775 Register Rdst = $dst$$Register; 8776 Register Rsrc1 = $src1$$Register; 8777 Register Rsrc2 = $src2$$Register; 8778 __ andnl(Rdst, Rsrc1, Rsrc2); 8779 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8780 %} 8781 ins_pipe(ialu_reg_reg_long); 8782 %} 8783 8784 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8785 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8786 predicate(UseBMI1Instructions); 8787 effect(KILL cr, TEMP dst); 8788 8789 ins_cost(125); 8790 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8791 "ANDNL $dst.hi, $src1.hi, $src2+4" 8792 %} 8793 8794 ins_encode %{ 8795 Register Rdst = $dst$$Register; 8796 Register Rsrc1 = $src1$$Register; 8797 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8798 8799 __ andnl(Rdst, Rsrc1, $src2$$Address); 8800 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8801 %} 8802 ins_pipe(ialu_reg_mem); 8803 %} 8804 8805 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8806 match(Set dst (AndL (SubL imm_zero src) src)); 8807 predicate(UseBMI1Instructions); 8808 effect(KILL cr, TEMP dst); 8809 8810 format %{ "MOVL $dst.hi, 0\n\t" 8811 "BLSIL $dst.lo, $src.lo\n\t" 8812 "JNZ done\n\t" 8813 "BLSIL $dst.hi, $src.hi\n" 8814 "done:" 8815 %} 8816 8817 ins_encode %{ 8818 Label done; 8819 Register Rdst = $dst$$Register; 8820 Register Rsrc = $src$$Register; 8821 __ movl(HIGH_FROM_LOW(Rdst), 0); 8822 __ blsil(Rdst, Rsrc); 8823 __ jccb(Assembler::notZero, done); 8824 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8825 __ bind(done); 8826 %} 8827 ins_pipe(ialu_reg); 8828 %} 8829 8830 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8831 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8832 predicate(UseBMI1Instructions); 8833 effect(KILL cr, TEMP dst); 8834 8835 ins_cost(125); 8836 format %{ "MOVL $dst.hi, 0\n\t" 8837 "BLSIL $dst.lo, $src\n\t" 8838 "JNZ done\n\t" 8839 "BLSIL $dst.hi, $src+4\n" 8840 "done:" 8841 %} 8842 8843 ins_encode %{ 8844 Label done; 8845 Register Rdst = $dst$$Register; 8846 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8847 8848 __ movl(HIGH_FROM_LOW(Rdst), 0); 8849 __ blsil(Rdst, $src$$Address); 8850 __ jccb(Assembler::notZero, done); 8851 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8852 __ bind(done); 8853 %} 8854 ins_pipe(ialu_reg_mem); 8855 %} 8856 8857 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8858 %{ 8859 match(Set dst (XorL (AddL src minus_1) src)); 8860 predicate(UseBMI1Instructions); 8861 effect(KILL cr, TEMP dst); 8862 8863 format %{ "MOVL $dst.hi, 0\n\t" 8864 "BLSMSKL $dst.lo, $src.lo\n\t" 8865 "JNC done\n\t" 8866 "BLSMSKL $dst.hi, $src.hi\n" 8867 "done:" 8868 %} 8869 8870 ins_encode %{ 8871 Label done; 8872 Register Rdst = $dst$$Register; 8873 Register Rsrc = $src$$Register; 8874 __ movl(HIGH_FROM_LOW(Rdst), 0); 8875 __ blsmskl(Rdst, Rsrc); 8876 __ jccb(Assembler::carryClear, done); 8877 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8878 __ bind(done); 8879 %} 8880 8881 ins_pipe(ialu_reg); 8882 %} 8883 8884 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8885 %{ 8886 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8887 predicate(UseBMI1Instructions); 8888 effect(KILL cr, TEMP dst); 8889 8890 ins_cost(125); 8891 format %{ "MOVL $dst.hi, 0\n\t" 8892 "BLSMSKL $dst.lo, $src\n\t" 8893 "JNC done\n\t" 8894 "BLSMSKL $dst.hi, $src+4\n" 8895 "done:" 8896 %} 8897 8898 ins_encode %{ 8899 Label done; 8900 Register Rdst = $dst$$Register; 8901 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8902 8903 __ movl(HIGH_FROM_LOW(Rdst), 0); 8904 __ blsmskl(Rdst, $src$$Address); 8905 __ jccb(Assembler::carryClear, done); 8906 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8907 __ bind(done); 8908 %} 8909 8910 ins_pipe(ialu_reg_mem); 8911 %} 8912 8913 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8914 %{ 8915 match(Set dst (AndL (AddL src minus_1) src) ); 8916 predicate(UseBMI1Instructions); 8917 effect(KILL cr, TEMP dst); 8918 8919 format %{ "MOVL $dst.hi, $src.hi\n\t" 8920 "BLSRL $dst.lo, $src.lo\n\t" 8921 "JNC done\n\t" 8922 "BLSRL $dst.hi, $src.hi\n" 8923 "done:" 8924 %} 8925 8926 ins_encode %{ 8927 Label done; 8928 Register Rdst = $dst$$Register; 8929 Register Rsrc = $src$$Register; 8930 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8931 __ blsrl(Rdst, Rsrc); 8932 __ jccb(Assembler::carryClear, done); 8933 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8934 __ bind(done); 8935 %} 8936 8937 ins_pipe(ialu_reg); 8938 %} 8939 8940 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8941 %{ 8942 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8943 predicate(UseBMI1Instructions); 8944 effect(KILL cr, TEMP dst); 8945 8946 ins_cost(125); 8947 format %{ "MOVL $dst.hi, $src+4\n\t" 8948 "BLSRL $dst.lo, $src\n\t" 8949 "JNC done\n\t" 8950 "BLSRL $dst.hi, $src+4\n" 8951 "done:" 8952 %} 8953 8954 ins_encode %{ 8955 Label done; 8956 Register Rdst = $dst$$Register; 8957 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8958 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8959 __ blsrl(Rdst, $src$$Address); 8960 __ jccb(Assembler::carryClear, done); 8961 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8962 __ bind(done); 8963 %} 8964 8965 ins_pipe(ialu_reg_mem); 8966 %} 8967 8968 // Or Long Register with Register 8969 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8970 match(Set dst (OrL dst src)); 8971 effect(KILL cr); 8972 format %{ "OR $dst.lo,$src.lo\n\t" 8973 "OR $dst.hi,$src.hi" %} 8974 opcode(0x0B,0x0B); 8975 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8976 ins_pipe( ialu_reg_reg_long ); 8977 %} 8978 8979 // Or Long Register with Immediate 8980 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8981 match(Set dst (OrL dst src)); 8982 effect(KILL cr); 8983 format %{ "OR $dst.lo,$src.lo\n\t" 8984 "OR $dst.hi,$src.hi" %} 8985 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8986 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8987 ins_pipe( ialu_reg_long ); 8988 %} 8989 8990 // Or Long Register with Memory 8991 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8992 match(Set dst (OrL dst (LoadL mem))); 8993 effect(KILL cr); 8994 ins_cost(125); 8995 format %{ "OR $dst.lo,$mem\n\t" 8996 "OR $dst.hi,$mem+4" %} 8997 opcode(0x0B,0x0B); 8998 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8999 ins_pipe( ialu_reg_long_mem ); 9000 %} 9001 9002 // Xor Long Register with Register 9003 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9004 match(Set dst (XorL dst src)); 9005 effect(KILL cr); 9006 format %{ "XOR $dst.lo,$src.lo\n\t" 9007 "XOR $dst.hi,$src.hi" %} 9008 opcode(0x33,0x33); 9009 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9010 ins_pipe( ialu_reg_reg_long ); 9011 %} 9012 9013 // Xor Long Register with Immediate -1 9014 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9015 match(Set dst (XorL dst imm)); 9016 format %{ "NOT $dst.lo\n\t" 9017 "NOT $dst.hi" %} 9018 ins_encode %{ 9019 __ notl($dst$$Register); 9020 __ notl(HIGH_FROM_LOW($dst$$Register)); 9021 %} 9022 ins_pipe( ialu_reg_long ); 9023 %} 9024 9025 // Xor Long Register with Immediate 9026 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9027 match(Set dst (XorL dst src)); 9028 effect(KILL cr); 9029 format %{ "XOR $dst.lo,$src.lo\n\t" 9030 "XOR $dst.hi,$src.hi" %} 9031 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9032 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9033 ins_pipe( ialu_reg_long ); 9034 %} 9035 9036 // Xor Long Register with Memory 9037 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9038 match(Set dst (XorL dst (LoadL mem))); 9039 effect(KILL cr); 9040 ins_cost(125); 9041 format %{ "XOR $dst.lo,$mem\n\t" 9042 "XOR $dst.hi,$mem+4" %} 9043 opcode(0x33,0x33); 9044 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9045 ins_pipe( ialu_reg_long_mem ); 9046 %} 9047 9048 // Shift Left Long by 1 9049 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9050 predicate(UseNewLongLShift); 9051 match(Set dst (LShiftL dst cnt)); 9052 effect(KILL cr); 9053 ins_cost(100); 9054 format %{ "ADD $dst.lo,$dst.lo\n\t" 9055 "ADC $dst.hi,$dst.hi" %} 9056 ins_encode %{ 9057 __ addl($dst$$Register,$dst$$Register); 9058 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9059 %} 9060 ins_pipe( ialu_reg_long ); 9061 %} 9062 9063 // Shift Left Long by 2 9064 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9065 predicate(UseNewLongLShift); 9066 match(Set dst (LShiftL dst cnt)); 9067 effect(KILL cr); 9068 ins_cost(100); 9069 format %{ "ADD $dst.lo,$dst.lo\n\t" 9070 "ADC $dst.hi,$dst.hi\n\t" 9071 "ADD $dst.lo,$dst.lo\n\t" 9072 "ADC $dst.hi,$dst.hi" %} 9073 ins_encode %{ 9074 __ addl($dst$$Register,$dst$$Register); 9075 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9076 __ addl($dst$$Register,$dst$$Register); 9077 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9078 %} 9079 ins_pipe( ialu_reg_long ); 9080 %} 9081 9082 // Shift Left Long by 3 9083 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9084 predicate(UseNewLongLShift); 9085 match(Set dst (LShiftL dst cnt)); 9086 effect(KILL cr); 9087 ins_cost(100); 9088 format %{ "ADD $dst.lo,$dst.lo\n\t" 9089 "ADC $dst.hi,$dst.hi\n\t" 9090 "ADD $dst.lo,$dst.lo\n\t" 9091 "ADC $dst.hi,$dst.hi\n\t" 9092 "ADD $dst.lo,$dst.lo\n\t" 9093 "ADC $dst.hi,$dst.hi" %} 9094 ins_encode %{ 9095 __ addl($dst$$Register,$dst$$Register); 9096 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9097 __ addl($dst$$Register,$dst$$Register); 9098 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9099 __ addl($dst$$Register,$dst$$Register); 9100 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9101 %} 9102 ins_pipe( ialu_reg_long ); 9103 %} 9104 9105 // Shift Left Long by 1-31 9106 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9107 match(Set dst (LShiftL dst cnt)); 9108 effect(KILL cr); 9109 ins_cost(200); 9110 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9111 "SHL $dst.lo,$cnt" %} 9112 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9113 ins_encode( move_long_small_shift(dst,cnt) ); 9114 ins_pipe( ialu_reg_long ); 9115 %} 9116 9117 // Shift Left Long by 32-63 9118 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9119 match(Set dst (LShiftL dst cnt)); 9120 effect(KILL cr); 9121 ins_cost(300); 9122 format %{ "MOV $dst.hi,$dst.lo\n" 9123 "\tSHL $dst.hi,$cnt-32\n" 9124 "\tXOR $dst.lo,$dst.lo" %} 9125 opcode(0xC1, 0x4); /* C1 /4 ib */ 9126 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9127 ins_pipe( ialu_reg_long ); 9128 %} 9129 9130 // Shift Left Long by variable 9131 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9132 match(Set dst (LShiftL dst shift)); 9133 effect(KILL cr); 9134 ins_cost(500+200); 9135 size(17); 9136 format %{ "TEST $shift,32\n\t" 9137 "JEQ,s small\n\t" 9138 "MOV $dst.hi,$dst.lo\n\t" 9139 "XOR $dst.lo,$dst.lo\n" 9140 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9141 "SHL $dst.lo,$shift" %} 9142 ins_encode( shift_left_long( dst, shift ) ); 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 // Shift Right Long by 1-31 9147 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9148 match(Set dst (URShiftL dst cnt)); 9149 effect(KILL cr); 9150 ins_cost(200); 9151 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9152 "SHR $dst.hi,$cnt" %} 9153 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9154 ins_encode( move_long_small_shift(dst,cnt) ); 9155 ins_pipe( ialu_reg_long ); 9156 %} 9157 9158 // Shift Right Long by 32-63 9159 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9160 match(Set dst (URShiftL dst cnt)); 9161 effect(KILL cr); 9162 ins_cost(300); 9163 format %{ "MOV $dst.lo,$dst.hi\n" 9164 "\tSHR $dst.lo,$cnt-32\n" 9165 "\tXOR $dst.hi,$dst.hi" %} 9166 opcode(0xC1, 0x5); /* C1 /5 ib */ 9167 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9168 ins_pipe( ialu_reg_long ); 9169 %} 9170 9171 // Shift Right Long by variable 9172 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9173 match(Set dst (URShiftL dst shift)); 9174 effect(KILL cr); 9175 ins_cost(600); 9176 size(17); 9177 format %{ "TEST $shift,32\n\t" 9178 "JEQ,s small\n\t" 9179 "MOV $dst.lo,$dst.hi\n\t" 9180 "XOR $dst.hi,$dst.hi\n" 9181 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9182 "SHR $dst.hi,$shift" %} 9183 ins_encode( shift_right_long( dst, shift ) ); 9184 ins_pipe( pipe_slow ); 9185 %} 9186 9187 // Shift Right Long by 1-31 9188 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9189 match(Set dst (RShiftL dst cnt)); 9190 effect(KILL cr); 9191 ins_cost(200); 9192 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9193 "SAR $dst.hi,$cnt" %} 9194 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9195 ins_encode( move_long_small_shift(dst,cnt) ); 9196 ins_pipe( ialu_reg_long ); 9197 %} 9198 9199 // Shift Right Long by 32-63 9200 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9201 match(Set dst (RShiftL dst cnt)); 9202 effect(KILL cr); 9203 ins_cost(300); 9204 format %{ "MOV $dst.lo,$dst.hi\n" 9205 "\tSAR $dst.lo,$cnt-32\n" 9206 "\tSAR $dst.hi,31" %} 9207 opcode(0xC1, 0x7); /* C1 /7 ib */ 9208 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9209 ins_pipe( ialu_reg_long ); 9210 %} 9211 9212 // Shift Right arithmetic Long by variable 9213 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9214 match(Set dst (RShiftL dst shift)); 9215 effect(KILL cr); 9216 ins_cost(600); 9217 size(18); 9218 format %{ "TEST $shift,32\n\t" 9219 "JEQ,s small\n\t" 9220 "MOV $dst.lo,$dst.hi\n\t" 9221 "SAR $dst.hi,31\n" 9222 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9223 "SAR $dst.hi,$shift" %} 9224 ins_encode( shift_right_arith_long( dst, shift ) ); 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 9229 //----------Double Instructions------------------------------------------------ 9230 // Double Math 9231 9232 // Compare & branch 9233 9234 // P6 version of float compare, sets condition codes in EFLAGS 9235 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9236 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9237 match(Set cr (CmpD src1 src2)); 9238 effect(KILL rax); 9239 ins_cost(150); 9240 format %{ "FLD $src1\n\t" 9241 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9242 "JNP exit\n\t" 9243 "MOV ah,1 // saw a NaN, set CF\n\t" 9244 "SAHF\n" 9245 "exit:\tNOP // avoid branch to branch" %} 9246 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9247 ins_encode( Push_Reg_DPR(src1), 9248 OpcP, RegOpc(src2), 9249 cmpF_P6_fixup ); 9250 ins_pipe( pipe_slow ); 9251 %} 9252 9253 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9254 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9255 match(Set cr (CmpD src1 src2)); 9256 ins_cost(150); 9257 format %{ "FLD $src1\n\t" 9258 "FUCOMIP ST,$src2 // P6 instruction" %} 9259 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9260 ins_encode( Push_Reg_DPR(src1), 9261 OpcP, RegOpc(src2)); 9262 ins_pipe( pipe_slow ); 9263 %} 9264 9265 // Compare & branch 9266 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9267 predicate(UseSSE<=1); 9268 match(Set cr (CmpD src1 src2)); 9269 effect(KILL rax); 9270 ins_cost(200); 9271 format %{ "FLD $src1\n\t" 9272 "FCOMp $src2\n\t" 9273 "FNSTSW AX\n\t" 9274 "TEST AX,0x400\n\t" 9275 "JZ,s flags\n\t" 9276 "MOV AH,1\t# unordered treat as LT\n" 9277 "flags:\tSAHF" %} 9278 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9279 ins_encode( Push_Reg_DPR(src1), 9280 OpcP, RegOpc(src2), 9281 fpu_flags); 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 // Compare vs zero into -1,0,1 9286 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9287 predicate(UseSSE<=1); 9288 match(Set dst (CmpD3 src1 zero)); 9289 effect(KILL cr, KILL rax); 9290 ins_cost(280); 9291 format %{ "FTSTD $dst,$src1" %} 9292 opcode(0xE4, 0xD9); 9293 ins_encode( Push_Reg_DPR(src1), 9294 OpcS, OpcP, PopFPU, 9295 CmpF_Result(dst)); 9296 ins_pipe( pipe_slow ); 9297 %} 9298 9299 // Compare into -1,0,1 9300 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9301 predicate(UseSSE<=1); 9302 match(Set dst (CmpD3 src1 src2)); 9303 effect(KILL cr, KILL rax); 9304 ins_cost(300); 9305 format %{ "FCMPD $dst,$src1,$src2" %} 9306 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9307 ins_encode( Push_Reg_DPR(src1), 9308 OpcP, RegOpc(src2), 9309 CmpF_Result(dst)); 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 // float compare and set condition codes in EFLAGS by XMM regs 9314 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9315 predicate(UseSSE>=2); 9316 match(Set cr (CmpD src1 src2)); 9317 ins_cost(145); 9318 format %{ "UCOMISD $src1,$src2\n\t" 9319 "JNP,s exit\n\t" 9320 "PUSHF\t# saw NaN, set CF\n\t" 9321 "AND [rsp], #0xffffff2b\n\t" 9322 "POPF\n" 9323 "exit:" %} 9324 ins_encode %{ 9325 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9326 emit_cmpfp_fixup(_masm); 9327 %} 9328 ins_pipe( pipe_slow ); 9329 %} 9330 9331 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9332 predicate(UseSSE>=2); 9333 match(Set cr (CmpD src1 src2)); 9334 ins_cost(100); 9335 format %{ "UCOMISD $src1,$src2" %} 9336 ins_encode %{ 9337 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9338 %} 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 // float compare and set condition codes in EFLAGS by XMM regs 9343 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9344 predicate(UseSSE>=2); 9345 match(Set cr (CmpD src1 (LoadD src2))); 9346 ins_cost(145); 9347 format %{ "UCOMISD $src1,$src2\n\t" 9348 "JNP,s exit\n\t" 9349 "PUSHF\t# saw NaN, set CF\n\t" 9350 "AND [rsp], #0xffffff2b\n\t" 9351 "POPF\n" 9352 "exit:" %} 9353 ins_encode %{ 9354 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9355 emit_cmpfp_fixup(_masm); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9361 predicate(UseSSE>=2); 9362 match(Set cr (CmpD src1 (LoadD src2))); 9363 ins_cost(100); 9364 format %{ "UCOMISD $src1,$src2" %} 9365 ins_encode %{ 9366 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9367 %} 9368 ins_pipe( pipe_slow ); 9369 %} 9370 9371 // Compare into -1,0,1 in XMM 9372 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9373 predicate(UseSSE>=2); 9374 match(Set dst (CmpD3 src1 src2)); 9375 effect(KILL cr); 9376 ins_cost(255); 9377 format %{ "UCOMISD $src1, $src2\n\t" 9378 "MOV $dst, #-1\n\t" 9379 "JP,s done\n\t" 9380 "JB,s done\n\t" 9381 "SETNE $dst\n\t" 9382 "MOVZB $dst, $dst\n" 9383 "done:" %} 9384 ins_encode %{ 9385 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9386 emit_cmpfp3(_masm, $dst$$Register); 9387 %} 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 // Compare into -1,0,1 in XMM and memory 9392 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9393 predicate(UseSSE>=2); 9394 match(Set dst (CmpD3 src1 (LoadD src2))); 9395 effect(KILL cr); 9396 ins_cost(275); 9397 format %{ "UCOMISD $src1, $src2\n\t" 9398 "MOV $dst, #-1\n\t" 9399 "JP,s done\n\t" 9400 "JB,s done\n\t" 9401 "SETNE $dst\n\t" 9402 "MOVZB $dst, $dst\n" 9403 "done:" %} 9404 ins_encode %{ 9405 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9406 emit_cmpfp3(_masm, $dst$$Register); 9407 %} 9408 ins_pipe( pipe_slow ); 9409 %} 9410 9411 9412 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9413 predicate (UseSSE <=1); 9414 match(Set dst (SubD dst src)); 9415 9416 format %{ "FLD $src\n\t" 9417 "DSUBp $dst,ST" %} 9418 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9419 ins_cost(150); 9420 ins_encode( Push_Reg_DPR(src), 9421 OpcP, RegOpc(dst) ); 9422 ins_pipe( fpu_reg_reg ); 9423 %} 9424 9425 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9426 predicate (UseSSE <=1); 9427 match(Set dst (RoundDouble (SubD src1 src2))); 9428 ins_cost(250); 9429 9430 format %{ "FLD $src2\n\t" 9431 "DSUB ST,$src1\n\t" 9432 "FSTP_D $dst\t# D-round" %} 9433 opcode(0xD8, 0x5); 9434 ins_encode( Push_Reg_DPR(src2), 9435 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9436 ins_pipe( fpu_mem_reg_reg ); 9437 %} 9438 9439 9440 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9441 predicate (UseSSE <=1); 9442 match(Set dst (SubD dst (LoadD src))); 9443 ins_cost(150); 9444 9445 format %{ "FLD $src\n\t" 9446 "DSUBp $dst,ST" %} 9447 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9448 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9449 OpcP, RegOpc(dst) ); 9450 ins_pipe( fpu_reg_mem ); 9451 %} 9452 9453 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9454 predicate (UseSSE<=1); 9455 match(Set dst (AbsD src)); 9456 ins_cost(100); 9457 format %{ "FABS" %} 9458 opcode(0xE1, 0xD9); 9459 ins_encode( OpcS, OpcP ); 9460 ins_pipe( fpu_reg_reg ); 9461 %} 9462 9463 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9464 predicate(UseSSE<=1); 9465 match(Set dst (NegD src)); 9466 ins_cost(100); 9467 format %{ "FCHS" %} 9468 opcode(0xE0, 0xD9); 9469 ins_encode( OpcS, OpcP ); 9470 ins_pipe( fpu_reg_reg ); 9471 %} 9472 9473 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9474 predicate(UseSSE<=1); 9475 match(Set dst (AddD dst src)); 9476 format %{ "FLD $src\n\t" 9477 "DADD $dst,ST" %} 9478 size(4); 9479 ins_cost(150); 9480 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9481 ins_encode( Push_Reg_DPR(src), 9482 OpcP, RegOpc(dst) ); 9483 ins_pipe( fpu_reg_reg ); 9484 %} 9485 9486 9487 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9488 predicate(UseSSE<=1); 9489 match(Set dst (RoundDouble (AddD src1 src2))); 9490 ins_cost(250); 9491 9492 format %{ "FLD $src2\n\t" 9493 "DADD ST,$src1\n\t" 9494 "FSTP_D $dst\t# D-round" %} 9495 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9496 ins_encode( Push_Reg_DPR(src2), 9497 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9498 ins_pipe( fpu_mem_reg_reg ); 9499 %} 9500 9501 9502 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9503 predicate(UseSSE<=1); 9504 match(Set dst (AddD dst (LoadD src))); 9505 ins_cost(150); 9506 9507 format %{ "FLD $src\n\t" 9508 "DADDp $dst,ST" %} 9509 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9510 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9511 OpcP, RegOpc(dst) ); 9512 ins_pipe( fpu_reg_mem ); 9513 %} 9514 9515 // add-to-memory 9516 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9517 predicate(UseSSE<=1); 9518 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9519 ins_cost(150); 9520 9521 format %{ "FLD_D $dst\n\t" 9522 "DADD ST,$src\n\t" 9523 "FST_D $dst" %} 9524 opcode(0xDD, 0x0); 9525 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9526 Opcode(0xD8), RegOpc(src), 9527 set_instruction_start, 9528 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9529 ins_pipe( fpu_reg_mem ); 9530 %} 9531 9532 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9533 predicate(UseSSE<=1); 9534 match(Set dst (AddD dst con)); 9535 ins_cost(125); 9536 format %{ "FLD1\n\t" 9537 "DADDp $dst,ST" %} 9538 ins_encode %{ 9539 __ fld1(); 9540 __ faddp($dst$$reg); 9541 %} 9542 ins_pipe(fpu_reg); 9543 %} 9544 9545 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9546 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9547 match(Set dst (AddD dst con)); 9548 ins_cost(200); 9549 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9550 "DADDp $dst,ST" %} 9551 ins_encode %{ 9552 __ fld_d($constantaddress($con)); 9553 __ faddp($dst$$reg); 9554 %} 9555 ins_pipe(fpu_reg_mem); 9556 %} 9557 9558 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9559 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9560 match(Set dst (RoundDouble (AddD src con))); 9561 ins_cost(200); 9562 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9563 "DADD ST,$src\n\t" 9564 "FSTP_D $dst\t# D-round" %} 9565 ins_encode %{ 9566 __ fld_d($constantaddress($con)); 9567 __ fadd($src$$reg); 9568 __ fstp_d(Address(rsp, $dst$$disp)); 9569 %} 9570 ins_pipe(fpu_mem_reg_con); 9571 %} 9572 9573 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9574 predicate(UseSSE<=1); 9575 match(Set dst (MulD dst src)); 9576 format %{ "FLD $src\n\t" 9577 "DMULp $dst,ST" %} 9578 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9579 ins_cost(150); 9580 ins_encode( Push_Reg_DPR(src), 9581 OpcP, RegOpc(dst) ); 9582 ins_pipe( fpu_reg_reg ); 9583 %} 9584 9585 // Strict FP instruction biases argument before multiply then 9586 // biases result to avoid double rounding of subnormals. 9587 // 9588 // scale arg1 by multiplying arg1 by 2^(-15360) 9589 // load arg2 9590 // multiply scaled arg1 by arg2 9591 // rescale product by 2^(15360) 9592 // 9593 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9594 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9595 match(Set dst (MulD dst src)); 9596 ins_cost(1); // Select this instruction for all strict FP double multiplies 9597 9598 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9599 "DMULp $dst,ST\n\t" 9600 "FLD $src\n\t" 9601 "DMULp $dst,ST\n\t" 9602 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9603 "DMULp $dst,ST\n\t" %} 9604 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9605 ins_encode( strictfp_bias1(dst), 9606 Push_Reg_DPR(src), 9607 OpcP, RegOpc(dst), 9608 strictfp_bias2(dst) ); 9609 ins_pipe( fpu_reg_reg ); 9610 %} 9611 9612 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9613 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9614 match(Set dst (MulD dst con)); 9615 ins_cost(200); 9616 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9617 "DMULp $dst,ST" %} 9618 ins_encode %{ 9619 __ fld_d($constantaddress($con)); 9620 __ fmulp($dst$$reg); 9621 %} 9622 ins_pipe(fpu_reg_mem); 9623 %} 9624 9625 9626 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9627 predicate( UseSSE<=1 ); 9628 match(Set dst (MulD dst (LoadD src))); 9629 ins_cost(200); 9630 format %{ "FLD_D $src\n\t" 9631 "DMULp $dst,ST" %} 9632 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9633 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9634 OpcP, RegOpc(dst) ); 9635 ins_pipe( fpu_reg_mem ); 9636 %} 9637 9638 // 9639 // Cisc-alternate to reg-reg multiply 9640 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9641 predicate( UseSSE<=1 ); 9642 match(Set dst (MulD src (LoadD mem))); 9643 ins_cost(250); 9644 format %{ "FLD_D $mem\n\t" 9645 "DMUL ST,$src\n\t" 9646 "FSTP_D $dst" %} 9647 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9648 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9649 OpcReg_FPR(src), 9650 Pop_Reg_DPR(dst) ); 9651 ins_pipe( fpu_reg_reg_mem ); 9652 %} 9653 9654 9655 // MACRO3 -- addDPR a mulDPR 9656 // This instruction is a '2-address' instruction in that the result goes 9657 // back to src2. This eliminates a move from the macro; possibly the 9658 // register allocator will have to add it back (and maybe not). 9659 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9660 predicate( UseSSE<=1 ); 9661 match(Set src2 (AddD (MulD src0 src1) src2)); 9662 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9663 "DMUL ST,$src1\n\t" 9664 "DADDp $src2,ST" %} 9665 ins_cost(250); 9666 opcode(0xDD); /* LoadD DD /0 */ 9667 ins_encode( Push_Reg_FPR(src0), 9668 FMul_ST_reg(src1), 9669 FAddP_reg_ST(src2) ); 9670 ins_pipe( fpu_reg_reg_reg ); 9671 %} 9672 9673 9674 // MACRO3 -- subDPR a mulDPR 9675 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9676 predicate( UseSSE<=1 ); 9677 match(Set src2 (SubD (MulD src0 src1) src2)); 9678 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9679 "DMUL ST,$src1\n\t" 9680 "DSUBRp $src2,ST" %} 9681 ins_cost(250); 9682 ins_encode( Push_Reg_FPR(src0), 9683 FMul_ST_reg(src1), 9684 Opcode(0xDE), Opc_plus(0xE0,src2)); 9685 ins_pipe( fpu_reg_reg_reg ); 9686 %} 9687 9688 9689 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9690 predicate( UseSSE<=1 ); 9691 match(Set dst (DivD dst src)); 9692 9693 format %{ "FLD $src\n\t" 9694 "FDIVp $dst,ST" %} 9695 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9696 ins_cost(150); 9697 ins_encode( Push_Reg_DPR(src), 9698 OpcP, RegOpc(dst) ); 9699 ins_pipe( fpu_reg_reg ); 9700 %} 9701 9702 // Strict FP instruction biases argument before division then 9703 // biases result, to avoid double rounding of subnormals. 9704 // 9705 // scale dividend by multiplying dividend by 2^(-15360) 9706 // load divisor 9707 // divide scaled dividend by divisor 9708 // rescale quotient by 2^(15360) 9709 // 9710 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9711 predicate (UseSSE<=1); 9712 match(Set dst (DivD dst src)); 9713 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9714 ins_cost(01); 9715 9716 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9717 "DMULp $dst,ST\n\t" 9718 "FLD $src\n\t" 9719 "FDIVp $dst,ST\n\t" 9720 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9721 "DMULp $dst,ST\n\t" %} 9722 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9723 ins_encode( strictfp_bias1(dst), 9724 Push_Reg_DPR(src), 9725 OpcP, RegOpc(dst), 9726 strictfp_bias2(dst) ); 9727 ins_pipe( fpu_reg_reg ); 9728 %} 9729 9730 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9731 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9732 match(Set dst (RoundDouble (DivD src1 src2))); 9733 9734 format %{ "FLD $src1\n\t" 9735 "FDIV ST,$src2\n\t" 9736 "FSTP_D $dst\t# D-round" %} 9737 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9738 ins_encode( Push_Reg_DPR(src1), 9739 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9740 ins_pipe( fpu_mem_reg_reg ); 9741 %} 9742 9743 9744 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9745 predicate(UseSSE<=1); 9746 match(Set dst (ModD dst src)); 9747 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9748 9749 format %{ "DMOD $dst,$src" %} 9750 ins_cost(250); 9751 ins_encode(Push_Reg_Mod_DPR(dst, src), 9752 emitModDPR(), 9753 Push_Result_Mod_DPR(src), 9754 Pop_Reg_DPR(dst)); 9755 ins_pipe( pipe_slow ); 9756 %} 9757 9758 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9759 predicate(UseSSE>=2); 9760 match(Set dst (ModD src0 src1)); 9761 effect(KILL rax, KILL cr); 9762 9763 format %{ "SUB ESP,8\t # DMOD\n" 9764 "\tMOVSD [ESP+0],$src1\n" 9765 "\tFLD_D [ESP+0]\n" 9766 "\tMOVSD [ESP+0],$src0\n" 9767 "\tFLD_D [ESP+0]\n" 9768 "loop:\tFPREM\n" 9769 "\tFWAIT\n" 9770 "\tFNSTSW AX\n" 9771 "\tSAHF\n" 9772 "\tJP loop\n" 9773 "\tFSTP_D [ESP+0]\n" 9774 "\tMOVSD $dst,[ESP+0]\n" 9775 "\tADD ESP,8\n" 9776 "\tFSTP ST0\t # Restore FPU Stack" 9777 %} 9778 ins_cost(250); 9779 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9784 predicate (UseSSE<=1); 9785 match(Set dst (SinD src)); 9786 ins_cost(1800); 9787 format %{ "DSIN $dst" %} 9788 opcode(0xD9, 0xFE); 9789 ins_encode( OpcP, OpcS ); 9790 ins_pipe( pipe_slow ); 9791 %} 9792 9793 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9794 predicate (UseSSE>=2); 9795 match(Set dst (SinD dst)); 9796 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9797 ins_cost(1800); 9798 format %{ "DSIN $dst" %} 9799 opcode(0xD9, 0xFE); 9800 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9801 ins_pipe( pipe_slow ); 9802 %} 9803 9804 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9805 predicate (UseSSE<=1); 9806 match(Set dst (CosD src)); 9807 ins_cost(1800); 9808 format %{ "DCOS $dst" %} 9809 opcode(0xD9, 0xFF); 9810 ins_encode( OpcP, OpcS ); 9811 ins_pipe( pipe_slow ); 9812 %} 9813 9814 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9815 predicate (UseSSE>=2); 9816 match(Set dst (CosD dst)); 9817 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9818 ins_cost(1800); 9819 format %{ "DCOS $dst" %} 9820 opcode(0xD9, 0xFF); 9821 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9822 ins_pipe( pipe_slow ); 9823 %} 9824 9825 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9826 predicate (UseSSE<=1); 9827 match(Set dst(TanD src)); 9828 format %{ "DTAN $dst" %} 9829 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9830 Opcode(0xDD), Opcode(0xD8)); // fstp st 9831 ins_pipe( pipe_slow ); 9832 %} 9833 9834 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9835 predicate (UseSSE>=2); 9836 match(Set dst(TanD dst)); 9837 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9838 format %{ "DTAN $dst" %} 9839 ins_encode( Push_SrcD(dst), 9840 Opcode(0xD9), Opcode(0xF2), // fptan 9841 Opcode(0xDD), Opcode(0xD8), // fstp st 9842 Push_ResultD(dst) ); 9843 ins_pipe( pipe_slow ); 9844 %} 9845 9846 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9847 predicate (UseSSE<=1); 9848 match(Set dst(AtanD dst src)); 9849 format %{ "DATA $dst,$src" %} 9850 opcode(0xD9, 0xF3); 9851 ins_encode( Push_Reg_DPR(src), 9852 OpcP, OpcS, RegOpc(dst) ); 9853 ins_pipe( pipe_slow ); 9854 %} 9855 9856 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9857 predicate (UseSSE>=2); 9858 match(Set dst(AtanD dst src)); 9859 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9860 format %{ "DATA $dst,$src" %} 9861 opcode(0xD9, 0xF3); 9862 ins_encode( Push_SrcD(src), 9863 OpcP, OpcS, Push_ResultD(dst) ); 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9868 predicate (UseSSE<=1); 9869 match(Set dst (SqrtD src)); 9870 format %{ "DSQRT $dst,$src" %} 9871 opcode(0xFA, 0xD9); 9872 ins_encode( Push_Reg_DPR(src), 9873 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9874 ins_pipe( pipe_slow ); 9875 %} 9876 9877 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9878 predicate (UseSSE<=1); 9879 match(Set Y (PowD X Y)); // Raise X to the Yth power 9880 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9881 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9882 ins_encode %{ 9883 __ subptr(rsp, 8); 9884 __ fld_s($X$$reg - 1); 9885 __ fast_pow(); 9886 __ addptr(rsp, 8); 9887 %} 9888 ins_pipe( pipe_slow ); 9889 %} 9890 9891 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9892 predicate (UseSSE>=2); 9893 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9894 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9895 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9896 ins_encode %{ 9897 __ subptr(rsp, 8); 9898 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9899 __ fld_d(Address(rsp, 0)); 9900 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9901 __ fld_d(Address(rsp, 0)); 9902 __ fast_pow(); 9903 __ fstp_d(Address(rsp, 0)); 9904 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9905 __ addptr(rsp, 8); 9906 %} 9907 ins_pipe( pipe_slow ); 9908 %} 9909 9910 9911 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9912 predicate (UseSSE<=1); 9913 match(Set dpr1 (ExpD dpr1)); 9914 effect(KILL rax, KILL rcx, KILL rdx, KILL cr); 9915 format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %} 9916 ins_encode %{ 9917 __ fast_exp(); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9923 predicate (UseSSE>=2); 9924 match(Set dst (ExpD src)); 9925 effect(KILL rax, KILL rcx, KILL rdx, KILL cr); 9926 format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} 9927 ins_encode %{ 9928 __ subptr(rsp, 8); 9929 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 9930 __ fld_d(Address(rsp, 0)); 9931 __ fast_exp(); 9932 __ fstp_d(Address(rsp, 0)); 9933 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9934 __ addptr(rsp, 8); 9935 %} 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9940 predicate (UseSSE<=1); 9941 // The source Double operand on FPU stack 9942 match(Set dst (Log10D src)); 9943 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9944 // fxch ; swap ST(0) with ST(1) 9945 // fyl2x ; compute log_10(2) * log_2(x) 9946 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9947 "FXCH \n\t" 9948 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9949 %} 9950 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9951 Opcode(0xD9), Opcode(0xC9), // fxch 9952 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9953 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9958 predicate (UseSSE>=2); 9959 effect(KILL cr); 9960 match(Set dst (Log10D src)); 9961 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9962 // fyl2x ; compute log_10(2) * log_2(x) 9963 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9964 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9965 %} 9966 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9967 Push_SrcD(src), 9968 Opcode(0xD9), Opcode(0xF1), // fyl2x 9969 Push_ResultD(dst)); 9970 9971 ins_pipe( pipe_slow ); 9972 %} 9973 9974 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 9975 predicate (UseSSE<=1); 9976 // The source Double operand on FPU stack 9977 match(Set dst (LogD src)); 9978 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9979 // fxch ; swap ST(0) with ST(1) 9980 // fyl2x ; compute log_e(2) * log_2(x) 9981 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9982 "FXCH \n\t" 9983 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9984 %} 9985 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9986 Opcode(0xD9), Opcode(0xC9), // fxch 9987 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9988 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 9993 predicate (UseSSE>=2); 9994 effect(KILL cr); 9995 // The source and result Double operands in XMM registers 9996 match(Set dst (LogD src)); 9997 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9998 // fyl2x ; compute log_e(2) * log_2(x) 9999 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10000 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10001 %} 10002 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10003 Push_SrcD(src), 10004 Opcode(0xD9), Opcode(0xF1), // fyl2x 10005 Push_ResultD(dst)); 10006 ins_pipe( pipe_slow ); 10007 %} 10008 10009 //-------------Float Instructions------------------------------- 10010 // Float Math 10011 10012 // Code for float compare: 10013 // fcompp(); 10014 // fwait(); fnstsw_ax(); 10015 // sahf(); 10016 // movl(dst, unordered_result); 10017 // jcc(Assembler::parity, exit); 10018 // movl(dst, less_result); 10019 // jcc(Assembler::below, exit); 10020 // movl(dst, equal_result); 10021 // jcc(Assembler::equal, exit); 10022 // movl(dst, greater_result); 10023 // exit: 10024 10025 // P6 version of float compare, sets condition codes in EFLAGS 10026 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10027 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10028 match(Set cr (CmpF src1 src2)); 10029 effect(KILL rax); 10030 ins_cost(150); 10031 format %{ "FLD $src1\n\t" 10032 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10033 "JNP exit\n\t" 10034 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10035 "SAHF\n" 10036 "exit:\tNOP // avoid branch to branch" %} 10037 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10038 ins_encode( Push_Reg_DPR(src1), 10039 OpcP, RegOpc(src2), 10040 cmpF_P6_fixup ); 10041 ins_pipe( pipe_slow ); 10042 %} 10043 10044 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10045 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10046 match(Set cr (CmpF src1 src2)); 10047 ins_cost(100); 10048 format %{ "FLD $src1\n\t" 10049 "FUCOMIP ST,$src2 // P6 instruction" %} 10050 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10051 ins_encode( Push_Reg_DPR(src1), 10052 OpcP, RegOpc(src2)); 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 10057 // Compare & branch 10058 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10059 predicate(UseSSE == 0); 10060 match(Set cr (CmpF src1 src2)); 10061 effect(KILL rax); 10062 ins_cost(200); 10063 format %{ "FLD $src1\n\t" 10064 "FCOMp $src2\n\t" 10065 "FNSTSW AX\n\t" 10066 "TEST AX,0x400\n\t" 10067 "JZ,s flags\n\t" 10068 "MOV AH,1\t# unordered treat as LT\n" 10069 "flags:\tSAHF" %} 10070 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10071 ins_encode( Push_Reg_DPR(src1), 10072 OpcP, RegOpc(src2), 10073 fpu_flags); 10074 ins_pipe( pipe_slow ); 10075 %} 10076 10077 // Compare vs zero into -1,0,1 10078 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10079 predicate(UseSSE == 0); 10080 match(Set dst (CmpF3 src1 zero)); 10081 effect(KILL cr, KILL rax); 10082 ins_cost(280); 10083 format %{ "FTSTF $dst,$src1" %} 10084 opcode(0xE4, 0xD9); 10085 ins_encode( Push_Reg_DPR(src1), 10086 OpcS, OpcP, PopFPU, 10087 CmpF_Result(dst)); 10088 ins_pipe( pipe_slow ); 10089 %} 10090 10091 // Compare into -1,0,1 10092 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10093 predicate(UseSSE == 0); 10094 match(Set dst (CmpF3 src1 src2)); 10095 effect(KILL cr, KILL rax); 10096 ins_cost(300); 10097 format %{ "FCMPF $dst,$src1,$src2" %} 10098 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10099 ins_encode( Push_Reg_DPR(src1), 10100 OpcP, RegOpc(src2), 10101 CmpF_Result(dst)); 10102 ins_pipe( pipe_slow ); 10103 %} 10104 10105 // float compare and set condition codes in EFLAGS by XMM regs 10106 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10107 predicate(UseSSE>=1); 10108 match(Set cr (CmpF src1 src2)); 10109 ins_cost(145); 10110 format %{ "UCOMISS $src1,$src2\n\t" 10111 "JNP,s exit\n\t" 10112 "PUSHF\t# saw NaN, set CF\n\t" 10113 "AND [rsp], #0xffffff2b\n\t" 10114 "POPF\n" 10115 "exit:" %} 10116 ins_encode %{ 10117 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10118 emit_cmpfp_fixup(_masm); 10119 %} 10120 ins_pipe( pipe_slow ); 10121 %} 10122 10123 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10124 predicate(UseSSE>=1); 10125 match(Set cr (CmpF src1 src2)); 10126 ins_cost(100); 10127 format %{ "UCOMISS $src1,$src2" %} 10128 ins_encode %{ 10129 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10130 %} 10131 ins_pipe( pipe_slow ); 10132 %} 10133 10134 // float compare and set condition codes in EFLAGS by XMM regs 10135 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10136 predicate(UseSSE>=1); 10137 match(Set cr (CmpF src1 (LoadF src2))); 10138 ins_cost(165); 10139 format %{ "UCOMISS $src1,$src2\n\t" 10140 "JNP,s exit\n\t" 10141 "PUSHF\t# saw NaN, set CF\n\t" 10142 "AND [rsp], #0xffffff2b\n\t" 10143 "POPF\n" 10144 "exit:" %} 10145 ins_encode %{ 10146 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10147 emit_cmpfp_fixup(_masm); 10148 %} 10149 ins_pipe( pipe_slow ); 10150 %} 10151 10152 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10153 predicate(UseSSE>=1); 10154 match(Set cr (CmpF src1 (LoadF src2))); 10155 ins_cost(100); 10156 format %{ "UCOMISS $src1,$src2" %} 10157 ins_encode %{ 10158 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10159 %} 10160 ins_pipe( pipe_slow ); 10161 %} 10162 10163 // Compare into -1,0,1 in XMM 10164 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10165 predicate(UseSSE>=1); 10166 match(Set dst (CmpF3 src1 src2)); 10167 effect(KILL cr); 10168 ins_cost(255); 10169 format %{ "UCOMISS $src1, $src2\n\t" 10170 "MOV $dst, #-1\n\t" 10171 "JP,s done\n\t" 10172 "JB,s done\n\t" 10173 "SETNE $dst\n\t" 10174 "MOVZB $dst, $dst\n" 10175 "done:" %} 10176 ins_encode %{ 10177 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10178 emit_cmpfp3(_masm, $dst$$Register); 10179 %} 10180 ins_pipe( pipe_slow ); 10181 %} 10182 10183 // Compare into -1,0,1 in XMM and memory 10184 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10185 predicate(UseSSE>=1); 10186 match(Set dst (CmpF3 src1 (LoadF src2))); 10187 effect(KILL cr); 10188 ins_cost(275); 10189 format %{ "UCOMISS $src1, $src2\n\t" 10190 "MOV $dst, #-1\n\t" 10191 "JP,s done\n\t" 10192 "JB,s done\n\t" 10193 "SETNE $dst\n\t" 10194 "MOVZB $dst, $dst\n" 10195 "done:" %} 10196 ins_encode %{ 10197 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10198 emit_cmpfp3(_masm, $dst$$Register); 10199 %} 10200 ins_pipe( pipe_slow ); 10201 %} 10202 10203 // Spill to obtain 24-bit precision 10204 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10205 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10206 match(Set dst (SubF src1 src2)); 10207 10208 format %{ "FSUB $dst,$src1 - $src2" %} 10209 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10210 ins_encode( Push_Reg_FPR(src1), 10211 OpcReg_FPR(src2), 10212 Pop_Mem_FPR(dst) ); 10213 ins_pipe( fpu_mem_reg_reg ); 10214 %} 10215 // 10216 // This instruction does not round to 24-bits 10217 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10218 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10219 match(Set dst (SubF dst src)); 10220 10221 format %{ "FSUB $dst,$src" %} 10222 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10223 ins_encode( Push_Reg_FPR(src), 10224 OpcP, RegOpc(dst) ); 10225 ins_pipe( fpu_reg_reg ); 10226 %} 10227 10228 // Spill to obtain 24-bit precision 10229 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10230 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10231 match(Set dst (AddF src1 src2)); 10232 10233 format %{ "FADD $dst,$src1,$src2" %} 10234 opcode(0xD8, 0x0); /* D8 C0+i */ 10235 ins_encode( Push_Reg_FPR(src2), 10236 OpcReg_FPR(src1), 10237 Pop_Mem_FPR(dst) ); 10238 ins_pipe( fpu_mem_reg_reg ); 10239 %} 10240 // 10241 // This instruction does not round to 24-bits 10242 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10243 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10244 match(Set dst (AddF dst src)); 10245 10246 format %{ "FLD $src\n\t" 10247 "FADDp $dst,ST" %} 10248 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10249 ins_encode( Push_Reg_FPR(src), 10250 OpcP, RegOpc(dst) ); 10251 ins_pipe( fpu_reg_reg ); 10252 %} 10253 10254 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10255 predicate(UseSSE==0); 10256 match(Set dst (AbsF src)); 10257 ins_cost(100); 10258 format %{ "FABS" %} 10259 opcode(0xE1, 0xD9); 10260 ins_encode( OpcS, OpcP ); 10261 ins_pipe( fpu_reg_reg ); 10262 %} 10263 10264 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10265 predicate(UseSSE==0); 10266 match(Set dst (NegF src)); 10267 ins_cost(100); 10268 format %{ "FCHS" %} 10269 opcode(0xE0, 0xD9); 10270 ins_encode( OpcS, OpcP ); 10271 ins_pipe( fpu_reg_reg ); 10272 %} 10273 10274 // Cisc-alternate to addFPR_reg 10275 // Spill to obtain 24-bit precision 10276 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10277 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10278 match(Set dst (AddF src1 (LoadF src2))); 10279 10280 format %{ "FLD $src2\n\t" 10281 "FADD ST,$src1\n\t" 10282 "FSTP_S $dst" %} 10283 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10284 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10285 OpcReg_FPR(src1), 10286 Pop_Mem_FPR(dst) ); 10287 ins_pipe( fpu_mem_reg_mem ); 10288 %} 10289 // 10290 // Cisc-alternate to addFPR_reg 10291 // This instruction does not round to 24-bits 10292 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10293 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10294 match(Set dst (AddF dst (LoadF src))); 10295 10296 format %{ "FADD $dst,$src" %} 10297 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10298 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10299 OpcP, RegOpc(dst) ); 10300 ins_pipe( fpu_reg_mem ); 10301 %} 10302 10303 // // Following two instructions for _222_mpegaudio 10304 // Spill to obtain 24-bit precision 10305 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10306 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10307 match(Set dst (AddF src1 src2)); 10308 10309 format %{ "FADD $dst,$src1,$src2" %} 10310 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10311 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10312 OpcReg_FPR(src2), 10313 Pop_Mem_FPR(dst) ); 10314 ins_pipe( fpu_mem_reg_mem ); 10315 %} 10316 10317 // Cisc-spill variant 10318 // Spill to obtain 24-bit precision 10319 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10320 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10321 match(Set dst (AddF src1 (LoadF src2))); 10322 10323 format %{ "FADD $dst,$src1,$src2 cisc" %} 10324 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10325 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10326 set_instruction_start, 10327 OpcP, RMopc_Mem(secondary,src1), 10328 Pop_Mem_FPR(dst) ); 10329 ins_pipe( fpu_mem_mem_mem ); 10330 %} 10331 10332 // Spill to obtain 24-bit precision 10333 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10334 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10335 match(Set dst (AddF src1 src2)); 10336 10337 format %{ "FADD $dst,$src1,$src2" %} 10338 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10339 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10340 set_instruction_start, 10341 OpcP, RMopc_Mem(secondary,src1), 10342 Pop_Mem_FPR(dst) ); 10343 ins_pipe( fpu_mem_mem_mem ); 10344 %} 10345 10346 10347 // Spill to obtain 24-bit precision 10348 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10349 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10350 match(Set dst (AddF src con)); 10351 format %{ "FLD $src\n\t" 10352 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10353 "FSTP_S $dst" %} 10354 ins_encode %{ 10355 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10356 __ fadd_s($constantaddress($con)); 10357 __ fstp_s(Address(rsp, $dst$$disp)); 10358 %} 10359 ins_pipe(fpu_mem_reg_con); 10360 %} 10361 // 10362 // This instruction does not round to 24-bits 10363 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10364 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10365 match(Set dst (AddF src con)); 10366 format %{ "FLD $src\n\t" 10367 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10368 "FSTP $dst" %} 10369 ins_encode %{ 10370 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10371 __ fadd_s($constantaddress($con)); 10372 __ fstp_d($dst$$reg); 10373 %} 10374 ins_pipe(fpu_reg_reg_con); 10375 %} 10376 10377 // Spill to obtain 24-bit precision 10378 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10379 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10380 match(Set dst (MulF src1 src2)); 10381 10382 format %{ "FLD $src1\n\t" 10383 "FMUL $src2\n\t" 10384 "FSTP_S $dst" %} 10385 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10386 ins_encode( Push_Reg_FPR(src1), 10387 OpcReg_FPR(src2), 10388 Pop_Mem_FPR(dst) ); 10389 ins_pipe( fpu_mem_reg_reg ); 10390 %} 10391 // 10392 // This instruction does not round to 24-bits 10393 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10394 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10395 match(Set dst (MulF src1 src2)); 10396 10397 format %{ "FLD $src1\n\t" 10398 "FMUL $src2\n\t" 10399 "FSTP_S $dst" %} 10400 opcode(0xD8, 0x1); /* D8 C8+i */ 10401 ins_encode( Push_Reg_FPR(src2), 10402 OpcReg_FPR(src1), 10403 Pop_Reg_FPR(dst) ); 10404 ins_pipe( fpu_reg_reg_reg ); 10405 %} 10406 10407 10408 // Spill to obtain 24-bit precision 10409 // Cisc-alternate to reg-reg multiply 10410 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10411 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10412 match(Set dst (MulF src1 (LoadF src2))); 10413 10414 format %{ "FLD_S $src2\n\t" 10415 "FMUL $src1\n\t" 10416 "FSTP_S $dst" %} 10417 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10418 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10419 OpcReg_FPR(src1), 10420 Pop_Mem_FPR(dst) ); 10421 ins_pipe( fpu_mem_reg_mem ); 10422 %} 10423 // 10424 // This instruction does not round to 24-bits 10425 // Cisc-alternate to reg-reg multiply 10426 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10427 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10428 match(Set dst (MulF src1 (LoadF src2))); 10429 10430 format %{ "FMUL $dst,$src1,$src2" %} 10431 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10432 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10433 OpcReg_FPR(src1), 10434 Pop_Reg_FPR(dst) ); 10435 ins_pipe( fpu_reg_reg_mem ); 10436 %} 10437 10438 // Spill to obtain 24-bit precision 10439 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10440 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10441 match(Set dst (MulF src1 src2)); 10442 10443 format %{ "FMUL $dst,$src1,$src2" %} 10444 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10445 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10446 set_instruction_start, 10447 OpcP, RMopc_Mem(secondary,src1), 10448 Pop_Mem_FPR(dst) ); 10449 ins_pipe( fpu_mem_mem_mem ); 10450 %} 10451 10452 // Spill to obtain 24-bit precision 10453 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10454 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10455 match(Set dst (MulF src con)); 10456 10457 format %{ "FLD $src\n\t" 10458 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10459 "FSTP_S $dst" %} 10460 ins_encode %{ 10461 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10462 __ fmul_s($constantaddress($con)); 10463 __ fstp_s(Address(rsp, $dst$$disp)); 10464 %} 10465 ins_pipe(fpu_mem_reg_con); 10466 %} 10467 // 10468 // This instruction does not round to 24-bits 10469 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10470 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10471 match(Set dst (MulF src con)); 10472 10473 format %{ "FLD $src\n\t" 10474 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10475 "FSTP $dst" %} 10476 ins_encode %{ 10477 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10478 __ fmul_s($constantaddress($con)); 10479 __ fstp_d($dst$$reg); 10480 %} 10481 ins_pipe(fpu_reg_reg_con); 10482 %} 10483 10484 10485 // 10486 // MACRO1 -- subsume unshared load into mulFPR 10487 // This instruction does not round to 24-bits 10488 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10489 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10490 match(Set dst (MulF (LoadF mem1) src)); 10491 10492 format %{ "FLD $mem1 ===MACRO1===\n\t" 10493 "FMUL ST,$src\n\t" 10494 "FSTP $dst" %} 10495 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10496 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10497 OpcReg_FPR(src), 10498 Pop_Reg_FPR(dst) ); 10499 ins_pipe( fpu_reg_reg_mem ); 10500 %} 10501 // 10502 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10503 // This instruction does not round to 24-bits 10504 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10505 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10506 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10507 ins_cost(95); 10508 10509 format %{ "FLD $mem1 ===MACRO2===\n\t" 10510 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10511 "FADD ST,$src2\n\t" 10512 "FSTP $dst" %} 10513 opcode(0xD9); /* LoadF D9 /0 */ 10514 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10515 FMul_ST_reg(src1), 10516 FAdd_ST_reg(src2), 10517 Pop_Reg_FPR(dst) ); 10518 ins_pipe( fpu_reg_mem_reg_reg ); 10519 %} 10520 10521 // MACRO3 -- addFPR a mulFPR 10522 // This instruction does not round to 24-bits. It is a '2-address' 10523 // instruction in that the result goes back to src2. This eliminates 10524 // a move from the macro; possibly the register allocator will have 10525 // to add it back (and maybe not). 10526 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10527 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10528 match(Set src2 (AddF (MulF src0 src1) src2)); 10529 10530 format %{ "FLD $src0 ===MACRO3===\n\t" 10531 "FMUL ST,$src1\n\t" 10532 "FADDP $src2,ST" %} 10533 opcode(0xD9); /* LoadF D9 /0 */ 10534 ins_encode( Push_Reg_FPR(src0), 10535 FMul_ST_reg(src1), 10536 FAddP_reg_ST(src2) ); 10537 ins_pipe( fpu_reg_reg_reg ); 10538 %} 10539 10540 // MACRO4 -- divFPR subFPR 10541 // This instruction does not round to 24-bits 10542 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10543 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10544 match(Set dst (DivF (SubF src2 src1) src3)); 10545 10546 format %{ "FLD $src2 ===MACRO4===\n\t" 10547 "FSUB ST,$src1\n\t" 10548 "FDIV ST,$src3\n\t" 10549 "FSTP $dst" %} 10550 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10551 ins_encode( Push_Reg_FPR(src2), 10552 subFPR_divFPR_encode(src1,src3), 10553 Pop_Reg_FPR(dst) ); 10554 ins_pipe( fpu_reg_reg_reg_reg ); 10555 %} 10556 10557 // Spill to obtain 24-bit precision 10558 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10559 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10560 match(Set dst (DivF src1 src2)); 10561 10562 format %{ "FDIV $dst,$src1,$src2" %} 10563 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10564 ins_encode( Push_Reg_FPR(src1), 10565 OpcReg_FPR(src2), 10566 Pop_Mem_FPR(dst) ); 10567 ins_pipe( fpu_mem_reg_reg ); 10568 %} 10569 // 10570 // This instruction does not round to 24-bits 10571 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10572 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10573 match(Set dst (DivF dst src)); 10574 10575 format %{ "FDIV $dst,$src" %} 10576 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10577 ins_encode( Push_Reg_FPR(src), 10578 OpcP, RegOpc(dst) ); 10579 ins_pipe( fpu_reg_reg ); 10580 %} 10581 10582 10583 // Spill to obtain 24-bit precision 10584 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10585 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10586 match(Set dst (ModF src1 src2)); 10587 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10588 10589 format %{ "FMOD $dst,$src1,$src2" %} 10590 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10591 emitModDPR(), 10592 Push_Result_Mod_DPR(src2), 10593 Pop_Mem_FPR(dst)); 10594 ins_pipe( pipe_slow ); 10595 %} 10596 // 10597 // This instruction does not round to 24-bits 10598 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10599 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10600 match(Set dst (ModF dst src)); 10601 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10602 10603 format %{ "FMOD $dst,$src" %} 10604 ins_encode(Push_Reg_Mod_DPR(dst, src), 10605 emitModDPR(), 10606 Push_Result_Mod_DPR(src), 10607 Pop_Reg_FPR(dst)); 10608 ins_pipe( pipe_slow ); 10609 %} 10610 10611 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10612 predicate(UseSSE>=1); 10613 match(Set dst (ModF src0 src1)); 10614 effect(KILL rax, KILL cr); 10615 format %{ "SUB ESP,4\t # FMOD\n" 10616 "\tMOVSS [ESP+0],$src1\n" 10617 "\tFLD_S [ESP+0]\n" 10618 "\tMOVSS [ESP+0],$src0\n" 10619 "\tFLD_S [ESP+0]\n" 10620 "loop:\tFPREM\n" 10621 "\tFWAIT\n" 10622 "\tFNSTSW AX\n" 10623 "\tSAHF\n" 10624 "\tJP loop\n" 10625 "\tFSTP_S [ESP+0]\n" 10626 "\tMOVSS $dst,[ESP+0]\n" 10627 "\tADD ESP,4\n" 10628 "\tFSTP ST0\t # Restore FPU Stack" 10629 %} 10630 ins_cost(250); 10631 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10632 ins_pipe( pipe_slow ); 10633 %} 10634 10635 10636 //----------Arithmetic Conversion Instructions--------------------------------- 10637 // The conversions operations are all Alpha sorted. Please keep it that way! 10638 10639 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10640 predicate(UseSSE==0); 10641 match(Set dst (RoundFloat src)); 10642 ins_cost(125); 10643 format %{ "FST_S $dst,$src\t# F-round" %} 10644 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10645 ins_pipe( fpu_mem_reg ); 10646 %} 10647 10648 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10649 predicate(UseSSE<=1); 10650 match(Set dst (RoundDouble src)); 10651 ins_cost(125); 10652 format %{ "FST_D $dst,$src\t# D-round" %} 10653 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10654 ins_pipe( fpu_mem_reg ); 10655 %} 10656 10657 // Force rounding to 24-bit precision and 6-bit exponent 10658 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10659 predicate(UseSSE==0); 10660 match(Set dst (ConvD2F src)); 10661 format %{ "FST_S $dst,$src\t# F-round" %} 10662 expand %{ 10663 roundFloat_mem_reg(dst,src); 10664 %} 10665 %} 10666 10667 // Force rounding to 24-bit precision and 6-bit exponent 10668 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10669 predicate(UseSSE==1); 10670 match(Set dst (ConvD2F src)); 10671 effect( KILL cr ); 10672 format %{ "SUB ESP,4\n\t" 10673 "FST_S [ESP],$src\t# F-round\n\t" 10674 "MOVSS $dst,[ESP]\n\t" 10675 "ADD ESP,4" %} 10676 ins_encode %{ 10677 __ subptr(rsp, 4); 10678 if ($src$$reg != FPR1L_enc) { 10679 __ fld_s($src$$reg-1); 10680 __ fstp_s(Address(rsp, 0)); 10681 } else { 10682 __ fst_s(Address(rsp, 0)); 10683 } 10684 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10685 __ addptr(rsp, 4); 10686 %} 10687 ins_pipe( pipe_slow ); 10688 %} 10689 10690 // Force rounding double precision to single precision 10691 instruct convD2F_reg(regF dst, regD src) %{ 10692 predicate(UseSSE>=2); 10693 match(Set dst (ConvD2F src)); 10694 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10695 ins_encode %{ 10696 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10697 %} 10698 ins_pipe( pipe_slow ); 10699 %} 10700 10701 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10702 predicate(UseSSE==0); 10703 match(Set dst (ConvF2D src)); 10704 format %{ "FST_S $dst,$src\t# D-round" %} 10705 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10706 ins_pipe( fpu_reg_reg ); 10707 %} 10708 10709 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10710 predicate(UseSSE==1); 10711 match(Set dst (ConvF2D src)); 10712 format %{ "FST_D $dst,$src\t# D-round" %} 10713 expand %{ 10714 roundDouble_mem_reg(dst,src); 10715 %} 10716 %} 10717 10718 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10719 predicate(UseSSE==1); 10720 match(Set dst (ConvF2D src)); 10721 effect( KILL cr ); 10722 format %{ "SUB ESP,4\n\t" 10723 "MOVSS [ESP] $src\n\t" 10724 "FLD_S [ESP]\n\t" 10725 "ADD ESP,4\n\t" 10726 "FSTP $dst\t# D-round" %} 10727 ins_encode %{ 10728 __ subptr(rsp, 4); 10729 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10730 __ fld_s(Address(rsp, 0)); 10731 __ addptr(rsp, 4); 10732 __ fstp_d($dst$$reg); 10733 %} 10734 ins_pipe( pipe_slow ); 10735 %} 10736 10737 instruct convF2D_reg(regD dst, regF src) %{ 10738 predicate(UseSSE>=2); 10739 match(Set dst (ConvF2D src)); 10740 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10741 ins_encode %{ 10742 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10743 %} 10744 ins_pipe( pipe_slow ); 10745 %} 10746 10747 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10748 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10749 predicate(UseSSE<=1); 10750 match(Set dst (ConvD2I src)); 10751 effect( KILL tmp, KILL cr ); 10752 format %{ "FLD $src\t# Convert double to int \n\t" 10753 "FLDCW trunc mode\n\t" 10754 "SUB ESP,4\n\t" 10755 "FISTp [ESP + #0]\n\t" 10756 "FLDCW std/24-bit mode\n\t" 10757 "POP EAX\n\t" 10758 "CMP EAX,0x80000000\n\t" 10759 "JNE,s fast\n\t" 10760 "FLD_D $src\n\t" 10761 "CALL d2i_wrapper\n" 10762 "fast:" %} 10763 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10764 ins_pipe( pipe_slow ); 10765 %} 10766 10767 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10768 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10769 predicate(UseSSE>=2); 10770 match(Set dst (ConvD2I src)); 10771 effect( KILL tmp, KILL cr ); 10772 format %{ "CVTTSD2SI $dst, $src\n\t" 10773 "CMP $dst,0x80000000\n\t" 10774 "JNE,s fast\n\t" 10775 "SUB ESP, 8\n\t" 10776 "MOVSD [ESP], $src\n\t" 10777 "FLD_D [ESP]\n\t" 10778 "ADD ESP, 8\n\t" 10779 "CALL d2i_wrapper\n" 10780 "fast:" %} 10781 ins_encode %{ 10782 Label fast; 10783 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10784 __ cmpl($dst$$Register, 0x80000000); 10785 __ jccb(Assembler::notEqual, fast); 10786 __ subptr(rsp, 8); 10787 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10788 __ fld_d(Address(rsp, 0)); 10789 __ addptr(rsp, 8); 10790 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10791 __ bind(fast); 10792 %} 10793 ins_pipe( pipe_slow ); 10794 %} 10795 10796 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10797 predicate(UseSSE<=1); 10798 match(Set dst (ConvD2L src)); 10799 effect( KILL cr ); 10800 format %{ "FLD $src\t# Convert double to long\n\t" 10801 "FLDCW trunc mode\n\t" 10802 "SUB ESP,8\n\t" 10803 "FISTp [ESP + #0]\n\t" 10804 "FLDCW std/24-bit mode\n\t" 10805 "POP EAX\n\t" 10806 "POP EDX\n\t" 10807 "CMP EDX,0x80000000\n\t" 10808 "JNE,s fast\n\t" 10809 "TEST EAX,EAX\n\t" 10810 "JNE,s fast\n\t" 10811 "FLD $src\n\t" 10812 "CALL d2l_wrapper\n" 10813 "fast:" %} 10814 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10815 ins_pipe( pipe_slow ); 10816 %} 10817 10818 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10819 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10820 predicate (UseSSE>=2); 10821 match(Set dst (ConvD2L src)); 10822 effect( KILL cr ); 10823 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10824 "MOVSD [ESP],$src\n\t" 10825 "FLD_D [ESP]\n\t" 10826 "FLDCW trunc mode\n\t" 10827 "FISTp [ESP + #0]\n\t" 10828 "FLDCW std/24-bit mode\n\t" 10829 "POP EAX\n\t" 10830 "POP EDX\n\t" 10831 "CMP EDX,0x80000000\n\t" 10832 "JNE,s fast\n\t" 10833 "TEST EAX,EAX\n\t" 10834 "JNE,s fast\n\t" 10835 "SUB ESP,8\n\t" 10836 "MOVSD [ESP],$src\n\t" 10837 "FLD_D [ESP]\n\t" 10838 "ADD ESP,8\n\t" 10839 "CALL d2l_wrapper\n" 10840 "fast:" %} 10841 ins_encode %{ 10842 Label fast; 10843 __ subptr(rsp, 8); 10844 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10845 __ fld_d(Address(rsp, 0)); 10846 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10847 __ fistp_d(Address(rsp, 0)); 10848 // Restore the rounding mode, mask the exception 10849 if (Compile::current()->in_24_bit_fp_mode()) { 10850 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10851 } else { 10852 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10853 } 10854 // Load the converted long, adjust CPU stack 10855 __ pop(rax); 10856 __ pop(rdx); 10857 __ cmpl(rdx, 0x80000000); 10858 __ jccb(Assembler::notEqual, fast); 10859 __ testl(rax, rax); 10860 __ jccb(Assembler::notEqual, fast); 10861 __ subptr(rsp, 8); 10862 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10863 __ fld_d(Address(rsp, 0)); 10864 __ addptr(rsp, 8); 10865 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10866 __ bind(fast); 10867 %} 10868 ins_pipe( pipe_slow ); 10869 %} 10870 10871 // Convert a double to an int. Java semantics require we do complex 10872 // manglations in the corner cases. So we set the rounding mode to 10873 // 'zero', store the darned double down as an int, and reset the 10874 // rounding mode to 'nearest'. The hardware stores a flag value down 10875 // if we would overflow or converted a NAN; we check for this and 10876 // and go the slow path if needed. 10877 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10878 predicate(UseSSE==0); 10879 match(Set dst (ConvF2I src)); 10880 effect( KILL tmp, KILL cr ); 10881 format %{ "FLD $src\t# Convert float to int \n\t" 10882 "FLDCW trunc mode\n\t" 10883 "SUB ESP,4\n\t" 10884 "FISTp [ESP + #0]\n\t" 10885 "FLDCW std/24-bit mode\n\t" 10886 "POP EAX\n\t" 10887 "CMP EAX,0x80000000\n\t" 10888 "JNE,s fast\n\t" 10889 "FLD $src\n\t" 10890 "CALL d2i_wrapper\n" 10891 "fast:" %} 10892 // DPR2I_encoding works for FPR2I 10893 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10894 ins_pipe( pipe_slow ); 10895 %} 10896 10897 // Convert a float in xmm to an int reg. 10898 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10899 predicate(UseSSE>=1); 10900 match(Set dst (ConvF2I src)); 10901 effect( KILL tmp, KILL cr ); 10902 format %{ "CVTTSS2SI $dst, $src\n\t" 10903 "CMP $dst,0x80000000\n\t" 10904 "JNE,s fast\n\t" 10905 "SUB ESP, 4\n\t" 10906 "MOVSS [ESP], $src\n\t" 10907 "FLD [ESP]\n\t" 10908 "ADD ESP, 4\n\t" 10909 "CALL d2i_wrapper\n" 10910 "fast:" %} 10911 ins_encode %{ 10912 Label fast; 10913 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10914 __ cmpl($dst$$Register, 0x80000000); 10915 __ jccb(Assembler::notEqual, fast); 10916 __ subptr(rsp, 4); 10917 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10918 __ fld_s(Address(rsp, 0)); 10919 __ addptr(rsp, 4); 10920 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10921 __ bind(fast); 10922 %} 10923 ins_pipe( pipe_slow ); 10924 %} 10925 10926 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10927 predicate(UseSSE==0); 10928 match(Set dst (ConvF2L src)); 10929 effect( KILL cr ); 10930 format %{ "FLD $src\t# Convert float to long\n\t" 10931 "FLDCW trunc mode\n\t" 10932 "SUB ESP,8\n\t" 10933 "FISTp [ESP + #0]\n\t" 10934 "FLDCW std/24-bit mode\n\t" 10935 "POP EAX\n\t" 10936 "POP EDX\n\t" 10937 "CMP EDX,0x80000000\n\t" 10938 "JNE,s fast\n\t" 10939 "TEST EAX,EAX\n\t" 10940 "JNE,s fast\n\t" 10941 "FLD $src\n\t" 10942 "CALL d2l_wrapper\n" 10943 "fast:" %} 10944 // DPR2L_encoding works for FPR2L 10945 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10946 ins_pipe( pipe_slow ); 10947 %} 10948 10949 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10950 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10951 predicate (UseSSE>=1); 10952 match(Set dst (ConvF2L src)); 10953 effect( KILL cr ); 10954 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10955 "MOVSS [ESP],$src\n\t" 10956 "FLD_S [ESP]\n\t" 10957 "FLDCW trunc mode\n\t" 10958 "FISTp [ESP + #0]\n\t" 10959 "FLDCW std/24-bit mode\n\t" 10960 "POP EAX\n\t" 10961 "POP EDX\n\t" 10962 "CMP EDX,0x80000000\n\t" 10963 "JNE,s fast\n\t" 10964 "TEST EAX,EAX\n\t" 10965 "JNE,s fast\n\t" 10966 "SUB ESP,4\t# Convert float to long\n\t" 10967 "MOVSS [ESP],$src\n\t" 10968 "FLD_S [ESP]\n\t" 10969 "ADD ESP,4\n\t" 10970 "CALL d2l_wrapper\n" 10971 "fast:" %} 10972 ins_encode %{ 10973 Label fast; 10974 __ subptr(rsp, 8); 10975 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10976 __ fld_s(Address(rsp, 0)); 10977 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10978 __ fistp_d(Address(rsp, 0)); 10979 // Restore the rounding mode, mask the exception 10980 if (Compile::current()->in_24_bit_fp_mode()) { 10981 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10982 } else { 10983 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10984 } 10985 // Load the converted long, adjust CPU stack 10986 __ pop(rax); 10987 __ pop(rdx); 10988 __ cmpl(rdx, 0x80000000); 10989 __ jccb(Assembler::notEqual, fast); 10990 __ testl(rax, rax); 10991 __ jccb(Assembler::notEqual, fast); 10992 __ subptr(rsp, 4); 10993 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10994 __ fld_s(Address(rsp, 0)); 10995 __ addptr(rsp, 4); 10996 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10997 __ bind(fast); 10998 %} 10999 ins_pipe( pipe_slow ); 11000 %} 11001 11002 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11003 predicate( UseSSE<=1 ); 11004 match(Set dst (ConvI2D src)); 11005 format %{ "FILD $src\n\t" 11006 "FSTP $dst" %} 11007 opcode(0xDB, 0x0); /* DB /0 */ 11008 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11009 ins_pipe( fpu_reg_mem ); 11010 %} 11011 11012 instruct convI2D_reg(regD dst, rRegI src) %{ 11013 predicate( UseSSE>=2 && !UseXmmI2D ); 11014 match(Set dst (ConvI2D src)); 11015 format %{ "CVTSI2SD $dst,$src" %} 11016 ins_encode %{ 11017 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11018 %} 11019 ins_pipe( pipe_slow ); 11020 %} 11021 11022 instruct convI2D_mem(regD dst, memory mem) %{ 11023 predicate( UseSSE>=2 ); 11024 match(Set dst (ConvI2D (LoadI mem))); 11025 format %{ "CVTSI2SD $dst,$mem" %} 11026 ins_encode %{ 11027 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11028 %} 11029 ins_pipe( pipe_slow ); 11030 %} 11031 11032 instruct convXI2D_reg(regD dst, rRegI src) 11033 %{ 11034 predicate( UseSSE>=2 && UseXmmI2D ); 11035 match(Set dst (ConvI2D src)); 11036 11037 format %{ "MOVD $dst,$src\n\t" 11038 "CVTDQ2PD $dst,$dst\t# i2d" %} 11039 ins_encode %{ 11040 __ movdl($dst$$XMMRegister, $src$$Register); 11041 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11042 %} 11043 ins_pipe(pipe_slow); // XXX 11044 %} 11045 11046 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11047 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11048 match(Set dst (ConvI2D (LoadI mem))); 11049 format %{ "FILD $mem\n\t" 11050 "FSTP $dst" %} 11051 opcode(0xDB); /* DB /0 */ 11052 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11053 Pop_Reg_DPR(dst)); 11054 ins_pipe( fpu_reg_mem ); 11055 %} 11056 11057 // Convert a byte to a float; no rounding step needed. 11058 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11059 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11060 match(Set dst (ConvI2F src)); 11061 format %{ "FILD $src\n\t" 11062 "FSTP $dst" %} 11063 11064 opcode(0xDB, 0x0); /* DB /0 */ 11065 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11066 ins_pipe( fpu_reg_mem ); 11067 %} 11068 11069 // In 24-bit mode, force exponent rounding by storing back out 11070 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11071 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11072 match(Set dst (ConvI2F src)); 11073 ins_cost(200); 11074 format %{ "FILD $src\n\t" 11075 "FSTP_S $dst" %} 11076 opcode(0xDB, 0x0); /* DB /0 */ 11077 ins_encode( Push_Mem_I(src), 11078 Pop_Mem_FPR(dst)); 11079 ins_pipe( fpu_mem_mem ); 11080 %} 11081 11082 // In 24-bit mode, force exponent rounding by storing back out 11083 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11084 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11085 match(Set dst (ConvI2F (LoadI mem))); 11086 ins_cost(200); 11087 format %{ "FILD $mem\n\t" 11088 "FSTP_S $dst" %} 11089 opcode(0xDB); /* DB /0 */ 11090 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11091 Pop_Mem_FPR(dst)); 11092 ins_pipe( fpu_mem_mem ); 11093 %} 11094 11095 // This instruction does not round to 24-bits 11096 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11097 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11098 match(Set dst (ConvI2F src)); 11099 format %{ "FILD $src\n\t" 11100 "FSTP $dst" %} 11101 opcode(0xDB, 0x0); /* DB /0 */ 11102 ins_encode( Push_Mem_I(src), 11103 Pop_Reg_FPR(dst)); 11104 ins_pipe( fpu_reg_mem ); 11105 %} 11106 11107 // This instruction does not round to 24-bits 11108 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11109 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11110 match(Set dst (ConvI2F (LoadI mem))); 11111 format %{ "FILD $mem\n\t" 11112 "FSTP $dst" %} 11113 opcode(0xDB); /* DB /0 */ 11114 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11115 Pop_Reg_FPR(dst)); 11116 ins_pipe( fpu_reg_mem ); 11117 %} 11118 11119 // Convert an int to a float in xmm; no rounding step needed. 11120 instruct convI2F_reg(regF dst, rRegI src) %{ 11121 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11122 match(Set dst (ConvI2F src)); 11123 format %{ "CVTSI2SS $dst, $src" %} 11124 ins_encode %{ 11125 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11126 %} 11127 ins_pipe( pipe_slow ); 11128 %} 11129 11130 instruct convXI2F_reg(regF dst, rRegI src) 11131 %{ 11132 predicate( UseSSE>=2 && UseXmmI2F ); 11133 match(Set dst (ConvI2F src)); 11134 11135 format %{ "MOVD $dst,$src\n\t" 11136 "CVTDQ2PS $dst,$dst\t# i2f" %} 11137 ins_encode %{ 11138 __ movdl($dst$$XMMRegister, $src$$Register); 11139 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11140 %} 11141 ins_pipe(pipe_slow); // XXX 11142 %} 11143 11144 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11145 match(Set dst (ConvI2L src)); 11146 effect(KILL cr); 11147 ins_cost(375); 11148 format %{ "MOV $dst.lo,$src\n\t" 11149 "MOV $dst.hi,$src\n\t" 11150 "SAR $dst.hi,31" %} 11151 ins_encode(convert_int_long(dst,src)); 11152 ins_pipe( ialu_reg_reg_long ); 11153 %} 11154 11155 // Zero-extend convert int to long 11156 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11157 match(Set dst (AndL (ConvI2L src) mask) ); 11158 effect( KILL flags ); 11159 ins_cost(250); 11160 format %{ "MOV $dst.lo,$src\n\t" 11161 "XOR $dst.hi,$dst.hi" %} 11162 opcode(0x33); // XOR 11163 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11164 ins_pipe( ialu_reg_reg_long ); 11165 %} 11166 11167 // Zero-extend long 11168 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11169 match(Set dst (AndL src mask) ); 11170 effect( KILL flags ); 11171 ins_cost(250); 11172 format %{ "MOV $dst.lo,$src.lo\n\t" 11173 "XOR $dst.hi,$dst.hi\n\t" %} 11174 opcode(0x33); // XOR 11175 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11176 ins_pipe( ialu_reg_reg_long ); 11177 %} 11178 11179 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11180 predicate (UseSSE<=1); 11181 match(Set dst (ConvL2D src)); 11182 effect( KILL cr ); 11183 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11184 "PUSH $src.lo\n\t" 11185 "FILD ST,[ESP + #0]\n\t" 11186 "ADD ESP,8\n\t" 11187 "FSTP_D $dst\t# D-round" %} 11188 opcode(0xDF, 0x5); /* DF /5 */ 11189 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11190 ins_pipe( pipe_slow ); 11191 %} 11192 11193 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11194 predicate (UseSSE>=2); 11195 match(Set dst (ConvL2D src)); 11196 effect( KILL cr ); 11197 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11198 "PUSH $src.lo\n\t" 11199 "FILD_D [ESP]\n\t" 11200 "FSTP_D [ESP]\n\t" 11201 "MOVSD $dst,[ESP]\n\t" 11202 "ADD ESP,8" %} 11203 opcode(0xDF, 0x5); /* DF /5 */ 11204 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11205 ins_pipe( pipe_slow ); 11206 %} 11207 11208 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11209 predicate (UseSSE>=1); 11210 match(Set dst (ConvL2F src)); 11211 effect( KILL cr ); 11212 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11213 "PUSH $src.lo\n\t" 11214 "FILD_D [ESP]\n\t" 11215 "FSTP_S [ESP]\n\t" 11216 "MOVSS $dst,[ESP]\n\t" 11217 "ADD ESP,8" %} 11218 opcode(0xDF, 0x5); /* DF /5 */ 11219 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11220 ins_pipe( pipe_slow ); 11221 %} 11222 11223 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11224 match(Set dst (ConvL2F src)); 11225 effect( KILL cr ); 11226 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11227 "PUSH $src.lo\n\t" 11228 "FILD ST,[ESP + #0]\n\t" 11229 "ADD ESP,8\n\t" 11230 "FSTP_S $dst\t# F-round" %} 11231 opcode(0xDF, 0x5); /* DF /5 */ 11232 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11233 ins_pipe( pipe_slow ); 11234 %} 11235 11236 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11237 match(Set dst (ConvL2I src)); 11238 effect( DEF dst, USE src ); 11239 format %{ "MOV $dst,$src.lo" %} 11240 ins_encode(enc_CopyL_Lo(dst,src)); 11241 ins_pipe( ialu_reg_reg ); 11242 %} 11243 11244 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11245 match(Set dst (MoveF2I src)); 11246 effect( DEF dst, USE src ); 11247 ins_cost(100); 11248 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11249 ins_encode %{ 11250 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11251 %} 11252 ins_pipe( ialu_reg_mem ); 11253 %} 11254 11255 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11256 predicate(UseSSE==0); 11257 match(Set dst (MoveF2I src)); 11258 effect( DEF dst, USE src ); 11259 11260 ins_cost(125); 11261 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11262 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11263 ins_pipe( fpu_mem_reg ); 11264 %} 11265 11266 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11267 predicate(UseSSE>=1); 11268 match(Set dst (MoveF2I src)); 11269 effect( DEF dst, USE src ); 11270 11271 ins_cost(95); 11272 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11273 ins_encode %{ 11274 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11275 %} 11276 ins_pipe( pipe_slow ); 11277 %} 11278 11279 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11280 predicate(UseSSE>=2); 11281 match(Set dst (MoveF2I src)); 11282 effect( DEF dst, USE src ); 11283 ins_cost(85); 11284 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11285 ins_encode %{ 11286 __ movdl($dst$$Register, $src$$XMMRegister); 11287 %} 11288 ins_pipe( pipe_slow ); 11289 %} 11290 11291 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11292 match(Set dst (MoveI2F src)); 11293 effect( DEF dst, USE src ); 11294 11295 ins_cost(100); 11296 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11297 ins_encode %{ 11298 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11299 %} 11300 ins_pipe( ialu_mem_reg ); 11301 %} 11302 11303 11304 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11305 predicate(UseSSE==0); 11306 match(Set dst (MoveI2F src)); 11307 effect(DEF dst, USE src); 11308 11309 ins_cost(125); 11310 format %{ "FLD_S $src\n\t" 11311 "FSTP $dst\t# MoveI2F_stack_reg" %} 11312 opcode(0xD9); /* D9 /0, FLD m32real */ 11313 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11314 Pop_Reg_FPR(dst) ); 11315 ins_pipe( fpu_reg_mem ); 11316 %} 11317 11318 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11319 predicate(UseSSE>=1); 11320 match(Set dst (MoveI2F src)); 11321 effect( DEF dst, USE src ); 11322 11323 ins_cost(95); 11324 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11325 ins_encode %{ 11326 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11327 %} 11328 ins_pipe( pipe_slow ); 11329 %} 11330 11331 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11332 predicate(UseSSE>=2); 11333 match(Set dst (MoveI2F src)); 11334 effect( DEF dst, USE src ); 11335 11336 ins_cost(85); 11337 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11338 ins_encode %{ 11339 __ movdl($dst$$XMMRegister, $src$$Register); 11340 %} 11341 ins_pipe( pipe_slow ); 11342 %} 11343 11344 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11345 match(Set dst (MoveD2L src)); 11346 effect(DEF dst, USE src); 11347 11348 ins_cost(250); 11349 format %{ "MOV $dst.lo,$src\n\t" 11350 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11351 opcode(0x8B, 0x8B); 11352 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11353 ins_pipe( ialu_mem_long_reg ); 11354 %} 11355 11356 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11357 predicate(UseSSE<=1); 11358 match(Set dst (MoveD2L src)); 11359 effect(DEF dst, USE src); 11360 11361 ins_cost(125); 11362 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11363 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11364 ins_pipe( fpu_mem_reg ); 11365 %} 11366 11367 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11368 predicate(UseSSE>=2); 11369 match(Set dst (MoveD2L src)); 11370 effect(DEF dst, USE src); 11371 ins_cost(95); 11372 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11373 ins_encode %{ 11374 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11375 %} 11376 ins_pipe( pipe_slow ); 11377 %} 11378 11379 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11380 predicate(UseSSE>=2); 11381 match(Set dst (MoveD2L src)); 11382 effect(DEF dst, USE src, TEMP tmp); 11383 ins_cost(85); 11384 format %{ "MOVD $dst.lo,$src\n\t" 11385 "PSHUFLW $tmp,$src,0x4E\n\t" 11386 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11387 ins_encode %{ 11388 __ movdl($dst$$Register, $src$$XMMRegister); 11389 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11390 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11391 %} 11392 ins_pipe( pipe_slow ); 11393 %} 11394 11395 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11396 match(Set dst (MoveL2D src)); 11397 effect(DEF dst, USE src); 11398 11399 ins_cost(200); 11400 format %{ "MOV $dst,$src.lo\n\t" 11401 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11402 opcode(0x89, 0x89); 11403 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11404 ins_pipe( ialu_mem_long_reg ); 11405 %} 11406 11407 11408 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11409 predicate(UseSSE<=1); 11410 match(Set dst (MoveL2D src)); 11411 effect(DEF dst, USE src); 11412 ins_cost(125); 11413 11414 format %{ "FLD_D $src\n\t" 11415 "FSTP $dst\t# MoveL2D_stack_reg" %} 11416 opcode(0xDD); /* DD /0, FLD m64real */ 11417 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11418 Pop_Reg_DPR(dst) ); 11419 ins_pipe( fpu_reg_mem ); 11420 %} 11421 11422 11423 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11424 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11425 match(Set dst (MoveL2D src)); 11426 effect(DEF dst, USE src); 11427 11428 ins_cost(95); 11429 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11430 ins_encode %{ 11431 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11432 %} 11433 ins_pipe( pipe_slow ); 11434 %} 11435 11436 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11437 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11438 match(Set dst (MoveL2D src)); 11439 effect(DEF dst, USE src); 11440 11441 ins_cost(95); 11442 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11443 ins_encode %{ 11444 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11445 %} 11446 ins_pipe( pipe_slow ); 11447 %} 11448 11449 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11450 predicate(UseSSE>=2); 11451 match(Set dst (MoveL2D src)); 11452 effect(TEMP dst, USE src, TEMP tmp); 11453 ins_cost(85); 11454 format %{ "MOVD $dst,$src.lo\n\t" 11455 "MOVD $tmp,$src.hi\n\t" 11456 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11457 ins_encode %{ 11458 __ movdl($dst$$XMMRegister, $src$$Register); 11459 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11460 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11461 %} 11462 ins_pipe( pipe_slow ); 11463 %} 11464 11465 11466 // ======================================================================= 11467 // fast clearing of an array 11468 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11469 predicate(!UseFastStosb); 11470 match(Set dummy (ClearArray cnt base)); 11471 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11472 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11473 "SHL ECX,1\t# Convert doublewords to words\n\t" 11474 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11475 ins_encode %{ 11476 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11477 %} 11478 ins_pipe( pipe_slow ); 11479 %} 11480 11481 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11482 predicate(UseFastStosb); 11483 match(Set dummy (ClearArray cnt base)); 11484 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11485 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11486 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11487 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11488 ins_encode %{ 11489 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11490 %} 11491 ins_pipe( pipe_slow ); 11492 %} 11493 11494 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11495 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11496 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11497 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11498 11499 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11500 ins_encode %{ 11501 __ string_compare($str1$$Register, $str2$$Register, 11502 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11503 $tmp1$$XMMRegister); 11504 %} 11505 ins_pipe( pipe_slow ); 11506 %} 11507 11508 // fast string equals 11509 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11510 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11511 match(Set result (StrEquals (Binary str1 str2) cnt)); 11512 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11513 11514 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11515 ins_encode %{ 11516 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11517 $cnt$$Register, $result$$Register, $tmp3$$Register, 11518 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11519 %} 11520 ins_pipe( pipe_slow ); 11521 %} 11522 11523 // fast search of substring with known size. 11524 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11525 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11526 predicate(UseSSE42Intrinsics); 11527 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11528 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11529 11530 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11531 ins_encode %{ 11532 int icnt2 = (int)$int_cnt2$$constant; 11533 if (icnt2 >= 8) { 11534 // IndexOf for constant substrings with size >= 8 elements 11535 // which don't need to be loaded through stack. 11536 __ string_indexofC8($str1$$Register, $str2$$Register, 11537 $cnt1$$Register, $cnt2$$Register, 11538 icnt2, $result$$Register, 11539 $vec$$XMMRegister, $tmp$$Register); 11540 } else { 11541 // Small strings are loaded through stack if they cross page boundary. 11542 __ string_indexof($str1$$Register, $str2$$Register, 11543 $cnt1$$Register, $cnt2$$Register, 11544 icnt2, $result$$Register, 11545 $vec$$XMMRegister, $tmp$$Register); 11546 } 11547 %} 11548 ins_pipe( pipe_slow ); 11549 %} 11550 11551 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11552 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11553 predicate(UseSSE42Intrinsics); 11554 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11555 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11556 11557 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11558 ins_encode %{ 11559 __ string_indexof($str1$$Register, $str2$$Register, 11560 $cnt1$$Register, $cnt2$$Register, 11561 (-1), $result$$Register, 11562 $vec$$XMMRegister, $tmp$$Register); 11563 %} 11564 ins_pipe( pipe_slow ); 11565 %} 11566 11567 // fast array equals 11568 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11569 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11570 %{ 11571 match(Set result (AryEq ary1 ary2)); 11572 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11573 //ins_cost(300); 11574 11575 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11576 ins_encode %{ 11577 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11578 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11579 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11580 %} 11581 ins_pipe( pipe_slow ); 11582 %} 11583 11584 // encode char[] to byte[] in ISO_8859_1 11585 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11586 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11587 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11588 match(Set result (EncodeISOArray src (Binary dst len))); 11589 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11590 11591 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11592 ins_encode %{ 11593 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11594 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11595 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11596 %} 11597 ins_pipe( pipe_slow ); 11598 %} 11599 11600 11601 //----------Control Flow Instructions------------------------------------------ 11602 // Signed compare Instructions 11603 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11604 match(Set cr (CmpI op1 op2)); 11605 effect( DEF cr, USE op1, USE op2 ); 11606 format %{ "CMP $op1,$op2" %} 11607 opcode(0x3B); /* Opcode 3B /r */ 11608 ins_encode( OpcP, RegReg( op1, op2) ); 11609 ins_pipe( ialu_cr_reg_reg ); 11610 %} 11611 11612 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11613 match(Set cr (CmpI op1 op2)); 11614 effect( DEF cr, USE op1 ); 11615 format %{ "CMP $op1,$op2" %} 11616 opcode(0x81,0x07); /* Opcode 81 /7 */ 11617 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11618 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11619 ins_pipe( ialu_cr_reg_imm ); 11620 %} 11621 11622 // Cisc-spilled version of cmpI_eReg 11623 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11624 match(Set cr (CmpI op1 (LoadI op2))); 11625 11626 format %{ "CMP $op1,$op2" %} 11627 ins_cost(500); 11628 opcode(0x3B); /* Opcode 3B /r */ 11629 ins_encode( OpcP, RegMem( op1, op2) ); 11630 ins_pipe( ialu_cr_reg_mem ); 11631 %} 11632 11633 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11634 match(Set cr (CmpI src zero)); 11635 effect( DEF cr, USE src ); 11636 11637 format %{ "TEST $src,$src" %} 11638 opcode(0x85); 11639 ins_encode( OpcP, RegReg( src, src ) ); 11640 ins_pipe( ialu_cr_reg_imm ); 11641 %} 11642 11643 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11644 match(Set cr (CmpI (AndI src con) zero)); 11645 11646 format %{ "TEST $src,$con" %} 11647 opcode(0xF7,0x00); 11648 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11649 ins_pipe( ialu_cr_reg_imm ); 11650 %} 11651 11652 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11653 match(Set cr (CmpI (AndI src mem) zero)); 11654 11655 format %{ "TEST $src,$mem" %} 11656 opcode(0x85); 11657 ins_encode( OpcP, RegMem( src, mem ) ); 11658 ins_pipe( ialu_cr_reg_mem ); 11659 %} 11660 11661 // Unsigned compare Instructions; really, same as signed except they 11662 // produce an eFlagsRegU instead of eFlagsReg. 11663 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11664 match(Set cr (CmpU op1 op2)); 11665 11666 format %{ "CMPu $op1,$op2" %} 11667 opcode(0x3B); /* Opcode 3B /r */ 11668 ins_encode( OpcP, RegReg( op1, op2) ); 11669 ins_pipe( ialu_cr_reg_reg ); 11670 %} 11671 11672 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11673 match(Set cr (CmpU op1 op2)); 11674 11675 format %{ "CMPu $op1,$op2" %} 11676 opcode(0x81,0x07); /* Opcode 81 /7 */ 11677 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11678 ins_pipe( ialu_cr_reg_imm ); 11679 %} 11680 11681 // // Cisc-spilled version of cmpU_eReg 11682 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11683 match(Set cr (CmpU op1 (LoadI op2))); 11684 11685 format %{ "CMPu $op1,$op2" %} 11686 ins_cost(500); 11687 opcode(0x3B); /* Opcode 3B /r */ 11688 ins_encode( OpcP, RegMem( op1, op2) ); 11689 ins_pipe( ialu_cr_reg_mem ); 11690 %} 11691 11692 // // Cisc-spilled version of cmpU_eReg 11693 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11694 // match(Set cr (CmpU (LoadI op1) op2)); 11695 // 11696 // format %{ "CMPu $op1,$op2" %} 11697 // ins_cost(500); 11698 // opcode(0x39); /* Opcode 39 /r */ 11699 // ins_encode( OpcP, RegMem( op1, op2) ); 11700 //%} 11701 11702 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11703 match(Set cr (CmpU src zero)); 11704 11705 format %{ "TESTu $src,$src" %} 11706 opcode(0x85); 11707 ins_encode( OpcP, RegReg( src, src ) ); 11708 ins_pipe( ialu_cr_reg_imm ); 11709 %} 11710 11711 // Unsigned pointer compare Instructions 11712 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11713 match(Set cr (CmpP op1 op2)); 11714 11715 format %{ "CMPu $op1,$op2" %} 11716 opcode(0x3B); /* Opcode 3B /r */ 11717 ins_encode( OpcP, RegReg( op1, op2) ); 11718 ins_pipe( ialu_cr_reg_reg ); 11719 %} 11720 11721 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11722 match(Set cr (CmpP op1 op2)); 11723 11724 format %{ "CMPu $op1,$op2" %} 11725 opcode(0x81,0x07); /* Opcode 81 /7 */ 11726 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11727 ins_pipe( ialu_cr_reg_imm ); 11728 %} 11729 11730 // // Cisc-spilled version of cmpP_eReg 11731 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11732 match(Set cr (CmpP op1 (LoadP op2))); 11733 11734 format %{ "CMPu $op1,$op2" %} 11735 ins_cost(500); 11736 opcode(0x3B); /* Opcode 3B /r */ 11737 ins_encode( OpcP, RegMem( op1, op2) ); 11738 ins_pipe( ialu_cr_reg_mem ); 11739 %} 11740 11741 // // Cisc-spilled version of cmpP_eReg 11742 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11743 // match(Set cr (CmpP (LoadP op1) op2)); 11744 // 11745 // format %{ "CMPu $op1,$op2" %} 11746 // ins_cost(500); 11747 // opcode(0x39); /* Opcode 39 /r */ 11748 // ins_encode( OpcP, RegMem( op1, op2) ); 11749 //%} 11750 11751 // Compare raw pointer (used in out-of-heap check). 11752 // Only works because non-oop pointers must be raw pointers 11753 // and raw pointers have no anti-dependencies. 11754 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11755 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11756 match(Set cr (CmpP op1 (LoadP op2))); 11757 11758 format %{ "CMPu $op1,$op2" %} 11759 opcode(0x3B); /* Opcode 3B /r */ 11760 ins_encode( OpcP, RegMem( op1, op2) ); 11761 ins_pipe( ialu_cr_reg_mem ); 11762 %} 11763 11764 // 11765 // This will generate a signed flags result. This should be ok 11766 // since any compare to a zero should be eq/neq. 11767 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11768 match(Set cr (CmpP src zero)); 11769 11770 format %{ "TEST $src,$src" %} 11771 opcode(0x85); 11772 ins_encode( OpcP, RegReg( src, src ) ); 11773 ins_pipe( ialu_cr_reg_imm ); 11774 %} 11775 11776 // Cisc-spilled version of testP_reg 11777 // This will generate a signed flags result. This should be ok 11778 // since any compare to a zero should be eq/neq. 11779 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11780 match(Set cr (CmpP (LoadP op) zero)); 11781 11782 format %{ "TEST $op,0xFFFFFFFF" %} 11783 ins_cost(500); 11784 opcode(0xF7); /* Opcode F7 /0 */ 11785 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11786 ins_pipe( ialu_cr_reg_imm ); 11787 %} 11788 11789 // Yanked all unsigned pointer compare operations. 11790 // Pointer compares are done with CmpP which is already unsigned. 11791 11792 //----------Max and Min-------------------------------------------------------- 11793 // Min Instructions 11794 //// 11795 // *** Min and Max using the conditional move are slower than the 11796 // *** branch version on a Pentium III. 11797 // // Conditional move for min 11798 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11799 // effect( USE_DEF op2, USE op1, USE cr ); 11800 // format %{ "CMOVlt $op2,$op1\t! min" %} 11801 // opcode(0x4C,0x0F); 11802 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11803 // ins_pipe( pipe_cmov_reg ); 11804 //%} 11805 // 11806 //// Min Register with Register (P6 version) 11807 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11808 // predicate(VM_Version::supports_cmov() ); 11809 // match(Set op2 (MinI op1 op2)); 11810 // ins_cost(200); 11811 // expand %{ 11812 // eFlagsReg cr; 11813 // compI_eReg(cr,op1,op2); 11814 // cmovI_reg_lt(op2,op1,cr); 11815 // %} 11816 //%} 11817 11818 // Min Register with Register (generic version) 11819 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11820 match(Set dst (MinI dst src)); 11821 effect(KILL flags); 11822 ins_cost(300); 11823 11824 format %{ "MIN $dst,$src" %} 11825 opcode(0xCC); 11826 ins_encode( min_enc(dst,src) ); 11827 ins_pipe( pipe_slow ); 11828 %} 11829 11830 // Max Register with Register 11831 // *** Min and Max using the conditional move are slower than the 11832 // *** branch version on a Pentium III. 11833 // // Conditional move for max 11834 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11835 // effect( USE_DEF op2, USE op1, USE cr ); 11836 // format %{ "CMOVgt $op2,$op1\t! max" %} 11837 // opcode(0x4F,0x0F); 11838 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11839 // ins_pipe( pipe_cmov_reg ); 11840 //%} 11841 // 11842 // // Max Register with Register (P6 version) 11843 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11844 // predicate(VM_Version::supports_cmov() ); 11845 // match(Set op2 (MaxI op1 op2)); 11846 // ins_cost(200); 11847 // expand %{ 11848 // eFlagsReg cr; 11849 // compI_eReg(cr,op1,op2); 11850 // cmovI_reg_gt(op2,op1,cr); 11851 // %} 11852 //%} 11853 11854 // Max Register with Register (generic version) 11855 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11856 match(Set dst (MaxI dst src)); 11857 effect(KILL flags); 11858 ins_cost(300); 11859 11860 format %{ "MAX $dst,$src" %} 11861 opcode(0xCC); 11862 ins_encode( max_enc(dst,src) ); 11863 ins_pipe( pipe_slow ); 11864 %} 11865 11866 // ============================================================================ 11867 // Counted Loop limit node which represents exact final iterator value. 11868 // Note: the resulting value should fit into integer range since 11869 // counted loops have limit check on overflow. 11870 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11871 match(Set limit (LoopLimit (Binary init limit) stride)); 11872 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11873 ins_cost(300); 11874 11875 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11876 ins_encode %{ 11877 int strd = (int)$stride$$constant; 11878 assert(strd != 1 && strd != -1, "sanity"); 11879 int m1 = (strd > 0) ? 1 : -1; 11880 // Convert limit to long (EAX:EDX) 11881 __ cdql(); 11882 // Convert init to long (init:tmp) 11883 __ movl($tmp$$Register, $init$$Register); 11884 __ sarl($tmp$$Register, 31); 11885 // $limit - $init 11886 __ subl($limit$$Register, $init$$Register); 11887 __ sbbl($limit_hi$$Register, $tmp$$Register); 11888 // + ($stride - 1) 11889 if (strd > 0) { 11890 __ addl($limit$$Register, (strd - 1)); 11891 __ adcl($limit_hi$$Register, 0); 11892 __ movl($tmp$$Register, strd); 11893 } else { 11894 __ addl($limit$$Register, (strd + 1)); 11895 __ adcl($limit_hi$$Register, -1); 11896 __ lneg($limit_hi$$Register, $limit$$Register); 11897 __ movl($tmp$$Register, -strd); 11898 } 11899 // signed devision: (EAX:EDX) / pos_stride 11900 __ idivl($tmp$$Register); 11901 if (strd < 0) { 11902 // restore sign 11903 __ negl($tmp$$Register); 11904 } 11905 // (EAX) * stride 11906 __ mull($tmp$$Register); 11907 // + init (ignore upper bits) 11908 __ addl($limit$$Register, $init$$Register); 11909 %} 11910 ins_pipe( pipe_slow ); 11911 %} 11912 11913 // ============================================================================ 11914 // Branch Instructions 11915 // Jump Table 11916 instruct jumpXtnd(rRegI switch_val) %{ 11917 match(Jump switch_val); 11918 ins_cost(350); 11919 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11920 ins_encode %{ 11921 // Jump to Address(table_base + switch_reg) 11922 Address index(noreg, $switch_val$$Register, Address::times_1); 11923 __ jump(ArrayAddress($constantaddress, index)); 11924 %} 11925 ins_pipe(pipe_jmp); 11926 %} 11927 11928 // Jump Direct - Label defines a relative address from JMP+1 11929 instruct jmpDir(label labl) %{ 11930 match(Goto); 11931 effect(USE labl); 11932 11933 ins_cost(300); 11934 format %{ "JMP $labl" %} 11935 size(5); 11936 ins_encode %{ 11937 Label* L = $labl$$label; 11938 __ jmp(*L, false); // Always long jump 11939 %} 11940 ins_pipe( pipe_jmp ); 11941 %} 11942 11943 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11944 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 11945 match(If cop cr); 11946 effect(USE labl); 11947 11948 ins_cost(300); 11949 format %{ "J$cop $labl" %} 11950 size(6); 11951 ins_encode %{ 11952 Label* L = $labl$$label; 11953 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11954 %} 11955 ins_pipe( pipe_jcc ); 11956 %} 11957 11958 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11959 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 11960 match(CountedLoopEnd cop cr); 11961 effect(USE labl); 11962 11963 ins_cost(300); 11964 format %{ "J$cop $labl\t# Loop end" %} 11965 size(6); 11966 ins_encode %{ 11967 Label* L = $labl$$label; 11968 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11969 %} 11970 ins_pipe( pipe_jcc ); 11971 %} 11972 11973 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11974 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11975 match(CountedLoopEnd cop cmp); 11976 effect(USE labl); 11977 11978 ins_cost(300); 11979 format %{ "J$cop,u $labl\t# Loop end" %} 11980 size(6); 11981 ins_encode %{ 11982 Label* L = $labl$$label; 11983 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11984 %} 11985 ins_pipe( pipe_jcc ); 11986 %} 11987 11988 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11989 match(CountedLoopEnd cop cmp); 11990 effect(USE labl); 11991 11992 ins_cost(200); 11993 format %{ "J$cop,u $labl\t# Loop end" %} 11994 size(6); 11995 ins_encode %{ 11996 Label* L = $labl$$label; 11997 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11998 %} 11999 ins_pipe( pipe_jcc ); 12000 %} 12001 12002 // Jump Direct Conditional - using unsigned comparison 12003 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12004 match(If cop cmp); 12005 effect(USE labl); 12006 12007 ins_cost(300); 12008 format %{ "J$cop,u $labl" %} 12009 size(6); 12010 ins_encode %{ 12011 Label* L = $labl$$label; 12012 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12013 %} 12014 ins_pipe(pipe_jcc); 12015 %} 12016 12017 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12018 match(If cop cmp); 12019 effect(USE labl); 12020 12021 ins_cost(200); 12022 format %{ "J$cop,u $labl" %} 12023 size(6); 12024 ins_encode %{ 12025 Label* L = $labl$$label; 12026 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12027 %} 12028 ins_pipe(pipe_jcc); 12029 %} 12030 12031 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12032 match(If cop cmp); 12033 effect(USE labl); 12034 12035 ins_cost(200); 12036 format %{ $$template 12037 if ($cop$$cmpcode == Assembler::notEqual) { 12038 $$emit$$"JP,u $labl\n\t" 12039 $$emit$$"J$cop,u $labl" 12040 } else { 12041 $$emit$$"JP,u done\n\t" 12042 $$emit$$"J$cop,u $labl\n\t" 12043 $$emit$$"done:" 12044 } 12045 %} 12046 ins_encode %{ 12047 Label* l = $labl$$label; 12048 if ($cop$$cmpcode == Assembler::notEqual) { 12049 __ jcc(Assembler::parity, *l, false); 12050 __ jcc(Assembler::notEqual, *l, false); 12051 } else if ($cop$$cmpcode == Assembler::equal) { 12052 Label done; 12053 __ jccb(Assembler::parity, done); 12054 __ jcc(Assembler::equal, *l, false); 12055 __ bind(done); 12056 } else { 12057 ShouldNotReachHere(); 12058 } 12059 %} 12060 ins_pipe(pipe_jcc); 12061 %} 12062 12063 // ============================================================================ 12064 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12065 // array for an instance of the superklass. Set a hidden internal cache on a 12066 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12067 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12068 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12069 match(Set result (PartialSubtypeCheck sub super)); 12070 effect( KILL rcx, KILL cr ); 12071 12072 ins_cost(1100); // slightly larger than the next version 12073 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12074 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12075 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12076 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12077 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12078 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12079 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12080 "miss:\t" %} 12081 12082 opcode(0x1); // Force a XOR of EDI 12083 ins_encode( enc_PartialSubtypeCheck() ); 12084 ins_pipe( pipe_slow ); 12085 %} 12086 12087 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12088 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12089 effect( KILL rcx, KILL result ); 12090 12091 ins_cost(1000); 12092 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12093 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12094 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12095 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12096 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12097 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12098 "miss:\t" %} 12099 12100 opcode(0x0); // No need to XOR EDI 12101 ins_encode( enc_PartialSubtypeCheck() ); 12102 ins_pipe( pipe_slow ); 12103 %} 12104 12105 // ============================================================================ 12106 // Branch Instructions -- short offset versions 12107 // 12108 // These instructions are used to replace jumps of a long offset (the default 12109 // match) with jumps of a shorter offset. These instructions are all tagged 12110 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12111 // match rules in general matching. Instead, the ADLC generates a conversion 12112 // method in the MachNode which can be used to do in-place replacement of the 12113 // long variant with the shorter variant. The compiler will determine if a 12114 // branch can be taken by the is_short_branch_offset() predicate in the machine 12115 // specific code section of the file. 12116 12117 // Jump Direct - Label defines a relative address from JMP+1 12118 instruct jmpDir_short(label labl) %{ 12119 match(Goto); 12120 effect(USE labl); 12121 12122 ins_cost(300); 12123 format %{ "JMP,s $labl" %} 12124 size(2); 12125 ins_encode %{ 12126 Label* L = $labl$$label; 12127 __ jmpb(*L); 12128 %} 12129 ins_pipe( pipe_jmp ); 12130 ins_short_branch(1); 12131 %} 12132 12133 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12134 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12135 match(If cop cr); 12136 effect(USE labl); 12137 12138 ins_cost(300); 12139 format %{ "J$cop,s $labl" %} 12140 size(2); 12141 ins_encode %{ 12142 Label* L = $labl$$label; 12143 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12144 %} 12145 ins_pipe( pipe_jcc ); 12146 ins_short_branch(1); 12147 %} 12148 12149 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12150 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12151 match(CountedLoopEnd cop cr); 12152 effect(USE labl); 12153 12154 ins_cost(300); 12155 format %{ "J$cop,s $labl\t# Loop end" %} 12156 size(2); 12157 ins_encode %{ 12158 Label* L = $labl$$label; 12159 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12160 %} 12161 ins_pipe( pipe_jcc ); 12162 ins_short_branch(1); 12163 %} 12164 12165 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12166 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12167 match(CountedLoopEnd cop cmp); 12168 effect(USE labl); 12169 12170 ins_cost(300); 12171 format %{ "J$cop,us $labl\t# Loop end" %} 12172 size(2); 12173 ins_encode %{ 12174 Label* L = $labl$$label; 12175 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12176 %} 12177 ins_pipe( pipe_jcc ); 12178 ins_short_branch(1); 12179 %} 12180 12181 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12182 match(CountedLoopEnd cop cmp); 12183 effect(USE labl); 12184 12185 ins_cost(300); 12186 format %{ "J$cop,us $labl\t# Loop end" %} 12187 size(2); 12188 ins_encode %{ 12189 Label* L = $labl$$label; 12190 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12191 %} 12192 ins_pipe( pipe_jcc ); 12193 ins_short_branch(1); 12194 %} 12195 12196 // Jump Direct Conditional - using unsigned comparison 12197 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12198 match(If cop cmp); 12199 effect(USE labl); 12200 12201 ins_cost(300); 12202 format %{ "J$cop,us $labl" %} 12203 size(2); 12204 ins_encode %{ 12205 Label* L = $labl$$label; 12206 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12207 %} 12208 ins_pipe( pipe_jcc ); 12209 ins_short_branch(1); 12210 %} 12211 12212 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12213 match(If cop cmp); 12214 effect(USE labl); 12215 12216 ins_cost(300); 12217 format %{ "J$cop,us $labl" %} 12218 size(2); 12219 ins_encode %{ 12220 Label* L = $labl$$label; 12221 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12222 %} 12223 ins_pipe( pipe_jcc ); 12224 ins_short_branch(1); 12225 %} 12226 12227 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12228 match(If cop cmp); 12229 effect(USE labl); 12230 12231 ins_cost(300); 12232 format %{ $$template 12233 if ($cop$$cmpcode == Assembler::notEqual) { 12234 $$emit$$"JP,u,s $labl\n\t" 12235 $$emit$$"J$cop,u,s $labl" 12236 } else { 12237 $$emit$$"JP,u,s done\n\t" 12238 $$emit$$"J$cop,u,s $labl\n\t" 12239 $$emit$$"done:" 12240 } 12241 %} 12242 size(4); 12243 ins_encode %{ 12244 Label* l = $labl$$label; 12245 if ($cop$$cmpcode == Assembler::notEqual) { 12246 __ jccb(Assembler::parity, *l); 12247 __ jccb(Assembler::notEqual, *l); 12248 } else if ($cop$$cmpcode == Assembler::equal) { 12249 Label done; 12250 __ jccb(Assembler::parity, done); 12251 __ jccb(Assembler::equal, *l); 12252 __ bind(done); 12253 } else { 12254 ShouldNotReachHere(); 12255 } 12256 %} 12257 ins_pipe(pipe_jcc); 12258 ins_short_branch(1); 12259 %} 12260 12261 // ============================================================================ 12262 // Long Compare 12263 // 12264 // Currently we hold longs in 2 registers. Comparing such values efficiently 12265 // is tricky. The flavor of compare used depends on whether we are testing 12266 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12267 // The GE test is the negated LT test. The LE test can be had by commuting 12268 // the operands (yielding a GE test) and then negating; negate again for the 12269 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12270 // NE test is negated from that. 12271 12272 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12273 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12274 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12275 // are collapsed internally in the ADLC's dfa-gen code. The match for 12276 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12277 // foo match ends up with the wrong leaf. One fix is to not match both 12278 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12279 // both forms beat the trinary form of long-compare and both are very useful 12280 // on Intel which has so few registers. 12281 12282 // Manifest a CmpL result in an integer register. Very painful. 12283 // This is the test to avoid. 12284 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12285 match(Set dst (CmpL3 src1 src2)); 12286 effect( KILL flags ); 12287 ins_cost(1000); 12288 format %{ "XOR $dst,$dst\n\t" 12289 "CMP $src1.hi,$src2.hi\n\t" 12290 "JLT,s m_one\n\t" 12291 "JGT,s p_one\n\t" 12292 "CMP $src1.lo,$src2.lo\n\t" 12293 "JB,s m_one\n\t" 12294 "JEQ,s done\n" 12295 "p_one:\tINC $dst\n\t" 12296 "JMP,s done\n" 12297 "m_one:\tDEC $dst\n" 12298 "done:" %} 12299 ins_encode %{ 12300 Label p_one, m_one, done; 12301 __ xorptr($dst$$Register, $dst$$Register); 12302 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12303 __ jccb(Assembler::less, m_one); 12304 __ jccb(Assembler::greater, p_one); 12305 __ cmpl($src1$$Register, $src2$$Register); 12306 __ jccb(Assembler::below, m_one); 12307 __ jccb(Assembler::equal, done); 12308 __ bind(p_one); 12309 __ incrementl($dst$$Register); 12310 __ jmpb(done); 12311 __ bind(m_one); 12312 __ decrementl($dst$$Register); 12313 __ bind(done); 12314 %} 12315 ins_pipe( pipe_slow ); 12316 %} 12317 12318 //====== 12319 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12320 // compares. Can be used for LE or GT compares by reversing arguments. 12321 // NOT GOOD FOR EQ/NE tests. 12322 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12323 match( Set flags (CmpL src zero )); 12324 ins_cost(100); 12325 format %{ "TEST $src.hi,$src.hi" %} 12326 opcode(0x85); 12327 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12328 ins_pipe( ialu_cr_reg_reg ); 12329 %} 12330 12331 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12332 // compares. Can be used for LE or GT compares by reversing arguments. 12333 // NOT GOOD FOR EQ/NE tests. 12334 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12335 match( Set flags (CmpL src1 src2 )); 12336 effect( TEMP tmp ); 12337 ins_cost(300); 12338 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12339 "MOV $tmp,$src1.hi\n\t" 12340 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12341 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12342 ins_pipe( ialu_cr_reg_reg ); 12343 %} 12344 12345 // Long compares reg < zero/req OR reg >= zero/req. 12346 // Just a wrapper for a normal branch, plus the predicate test. 12347 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12348 match(If cmp flags); 12349 effect(USE labl); 12350 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12351 expand %{ 12352 jmpCon(cmp,flags,labl); // JLT or JGE... 12353 %} 12354 %} 12355 12356 // Compare 2 longs and CMOVE longs. 12357 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12358 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12359 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12360 ins_cost(400); 12361 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12362 "CMOV$cmp $dst.hi,$src.hi" %} 12363 opcode(0x0F,0x40); 12364 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12365 ins_pipe( pipe_cmov_reg_long ); 12366 %} 12367 12368 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12369 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12370 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12371 ins_cost(500); 12372 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12373 "CMOV$cmp $dst.hi,$src.hi" %} 12374 opcode(0x0F,0x40); 12375 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12376 ins_pipe( pipe_cmov_reg_long ); 12377 %} 12378 12379 // Compare 2 longs and CMOVE ints. 12380 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12381 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12382 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12383 ins_cost(200); 12384 format %{ "CMOV$cmp $dst,$src" %} 12385 opcode(0x0F,0x40); 12386 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12387 ins_pipe( pipe_cmov_reg ); 12388 %} 12389 12390 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12391 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12392 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12393 ins_cost(250); 12394 format %{ "CMOV$cmp $dst,$src" %} 12395 opcode(0x0F,0x40); 12396 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12397 ins_pipe( pipe_cmov_mem ); 12398 %} 12399 12400 // Compare 2 longs and CMOVE ints. 12401 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12402 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12403 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12404 ins_cost(200); 12405 format %{ "CMOV$cmp $dst,$src" %} 12406 opcode(0x0F,0x40); 12407 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12408 ins_pipe( pipe_cmov_reg ); 12409 %} 12410 12411 // Compare 2 longs and CMOVE doubles 12412 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12413 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12414 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12415 ins_cost(200); 12416 expand %{ 12417 fcmovDPR_regS(cmp,flags,dst,src); 12418 %} 12419 %} 12420 12421 // Compare 2 longs and CMOVE doubles 12422 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12423 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12424 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12425 ins_cost(200); 12426 expand %{ 12427 fcmovD_regS(cmp,flags,dst,src); 12428 %} 12429 %} 12430 12431 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12432 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12433 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12434 ins_cost(200); 12435 expand %{ 12436 fcmovFPR_regS(cmp,flags,dst,src); 12437 %} 12438 %} 12439 12440 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12441 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12442 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12443 ins_cost(200); 12444 expand %{ 12445 fcmovF_regS(cmp,flags,dst,src); 12446 %} 12447 %} 12448 12449 //====== 12450 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12451 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12452 match( Set flags (CmpL src zero )); 12453 effect(TEMP tmp); 12454 ins_cost(200); 12455 format %{ "MOV $tmp,$src.lo\n\t" 12456 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12457 ins_encode( long_cmp_flags0( src, tmp ) ); 12458 ins_pipe( ialu_reg_reg_long ); 12459 %} 12460 12461 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12462 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12463 match( Set flags (CmpL src1 src2 )); 12464 ins_cost(200+300); 12465 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12466 "JNE,s skip\n\t" 12467 "CMP $src1.hi,$src2.hi\n\t" 12468 "skip:\t" %} 12469 ins_encode( long_cmp_flags1( src1, src2 ) ); 12470 ins_pipe( ialu_cr_reg_reg ); 12471 %} 12472 12473 // Long compare reg == zero/reg OR reg != zero/reg 12474 // Just a wrapper for a normal branch, plus the predicate test. 12475 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12476 match(If cmp flags); 12477 effect(USE labl); 12478 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12479 expand %{ 12480 jmpCon(cmp,flags,labl); // JEQ or JNE... 12481 %} 12482 %} 12483 12484 // Compare 2 longs and CMOVE longs. 12485 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12486 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12487 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12488 ins_cost(400); 12489 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12490 "CMOV$cmp $dst.hi,$src.hi" %} 12491 opcode(0x0F,0x40); 12492 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12493 ins_pipe( pipe_cmov_reg_long ); 12494 %} 12495 12496 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12497 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12498 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12499 ins_cost(500); 12500 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12501 "CMOV$cmp $dst.hi,$src.hi" %} 12502 opcode(0x0F,0x40); 12503 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12504 ins_pipe( pipe_cmov_reg_long ); 12505 %} 12506 12507 // Compare 2 longs and CMOVE ints. 12508 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12509 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12510 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12511 ins_cost(200); 12512 format %{ "CMOV$cmp $dst,$src" %} 12513 opcode(0x0F,0x40); 12514 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12515 ins_pipe( pipe_cmov_reg ); 12516 %} 12517 12518 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12519 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12520 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12521 ins_cost(250); 12522 format %{ "CMOV$cmp $dst,$src" %} 12523 opcode(0x0F,0x40); 12524 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12525 ins_pipe( pipe_cmov_mem ); 12526 %} 12527 12528 // Compare 2 longs and CMOVE ints. 12529 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12530 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12531 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12532 ins_cost(200); 12533 format %{ "CMOV$cmp $dst,$src" %} 12534 opcode(0x0F,0x40); 12535 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12536 ins_pipe( pipe_cmov_reg ); 12537 %} 12538 12539 // Compare 2 longs and CMOVE doubles 12540 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12541 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12542 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12543 ins_cost(200); 12544 expand %{ 12545 fcmovDPR_regS(cmp,flags,dst,src); 12546 %} 12547 %} 12548 12549 // Compare 2 longs and CMOVE doubles 12550 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12551 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12552 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12553 ins_cost(200); 12554 expand %{ 12555 fcmovD_regS(cmp,flags,dst,src); 12556 %} 12557 %} 12558 12559 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12560 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12561 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12562 ins_cost(200); 12563 expand %{ 12564 fcmovFPR_regS(cmp,flags,dst,src); 12565 %} 12566 %} 12567 12568 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12569 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12570 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12571 ins_cost(200); 12572 expand %{ 12573 fcmovF_regS(cmp,flags,dst,src); 12574 %} 12575 %} 12576 12577 //====== 12578 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12579 // Same as cmpL_reg_flags_LEGT except must negate src 12580 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12581 match( Set flags (CmpL src zero )); 12582 effect( TEMP tmp ); 12583 ins_cost(300); 12584 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12585 "CMP $tmp,$src.lo\n\t" 12586 "SBB $tmp,$src.hi\n\t" %} 12587 ins_encode( long_cmp_flags3(src, tmp) ); 12588 ins_pipe( ialu_reg_reg_long ); 12589 %} 12590 12591 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12592 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12593 // requires a commuted test to get the same result. 12594 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12595 match( Set flags (CmpL src1 src2 )); 12596 effect( TEMP tmp ); 12597 ins_cost(300); 12598 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12599 "MOV $tmp,$src2.hi\n\t" 12600 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12601 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12602 ins_pipe( ialu_cr_reg_reg ); 12603 %} 12604 12605 // Long compares reg < zero/req OR reg >= zero/req. 12606 // Just a wrapper for a normal branch, plus the predicate test 12607 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12608 match(If cmp flags); 12609 effect(USE labl); 12610 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12611 ins_cost(300); 12612 expand %{ 12613 jmpCon(cmp,flags,labl); // JGT or JLE... 12614 %} 12615 %} 12616 12617 // Compare 2 longs and CMOVE longs. 12618 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12619 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12620 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12621 ins_cost(400); 12622 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12623 "CMOV$cmp $dst.hi,$src.hi" %} 12624 opcode(0x0F,0x40); 12625 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12626 ins_pipe( pipe_cmov_reg_long ); 12627 %} 12628 12629 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12630 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12631 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12632 ins_cost(500); 12633 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12634 "CMOV$cmp $dst.hi,$src.hi+4" %} 12635 opcode(0x0F,0x40); 12636 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12637 ins_pipe( pipe_cmov_reg_long ); 12638 %} 12639 12640 // Compare 2 longs and CMOVE ints. 12641 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12642 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12643 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12644 ins_cost(200); 12645 format %{ "CMOV$cmp $dst,$src" %} 12646 opcode(0x0F,0x40); 12647 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12648 ins_pipe( pipe_cmov_reg ); 12649 %} 12650 12651 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12652 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12653 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12654 ins_cost(250); 12655 format %{ "CMOV$cmp $dst,$src" %} 12656 opcode(0x0F,0x40); 12657 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12658 ins_pipe( pipe_cmov_mem ); 12659 %} 12660 12661 // Compare 2 longs and CMOVE ptrs. 12662 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12663 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12664 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12665 ins_cost(200); 12666 format %{ "CMOV$cmp $dst,$src" %} 12667 opcode(0x0F,0x40); 12668 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12669 ins_pipe( pipe_cmov_reg ); 12670 %} 12671 12672 // Compare 2 longs and CMOVE doubles 12673 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12674 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12675 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12676 ins_cost(200); 12677 expand %{ 12678 fcmovDPR_regS(cmp,flags,dst,src); 12679 %} 12680 %} 12681 12682 // Compare 2 longs and CMOVE doubles 12683 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12684 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12685 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12686 ins_cost(200); 12687 expand %{ 12688 fcmovD_regS(cmp,flags,dst,src); 12689 %} 12690 %} 12691 12692 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12693 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12694 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12695 ins_cost(200); 12696 expand %{ 12697 fcmovFPR_regS(cmp,flags,dst,src); 12698 %} 12699 %} 12700 12701 12702 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12703 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12704 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12705 ins_cost(200); 12706 expand %{ 12707 fcmovF_regS(cmp,flags,dst,src); 12708 %} 12709 %} 12710 12711 12712 // ============================================================================ 12713 // Procedure Call/Return Instructions 12714 // Call Java Static Instruction 12715 // Note: If this code changes, the corresponding ret_addr_offset() and 12716 // compute_padding() functions will have to be adjusted. 12717 instruct CallStaticJavaDirect(method meth) %{ 12718 match(CallStaticJava); 12719 effect(USE meth); 12720 12721 ins_cost(300); 12722 format %{ "CALL,static " %} 12723 opcode(0xE8); /* E8 cd */ 12724 ins_encode( pre_call_resets, 12725 Java_Static_Call( meth ), 12726 call_epilog, 12727 post_call_FPU ); 12728 ins_pipe( pipe_slow ); 12729 ins_alignment(4); 12730 %} 12731 12732 // Call Java Dynamic Instruction 12733 // Note: If this code changes, the corresponding ret_addr_offset() and 12734 // compute_padding() functions will have to be adjusted. 12735 instruct CallDynamicJavaDirect(method meth) %{ 12736 match(CallDynamicJava); 12737 effect(USE meth); 12738 12739 ins_cost(300); 12740 format %{ "MOV EAX,(oop)-1\n\t" 12741 "CALL,dynamic" %} 12742 opcode(0xE8); /* E8 cd */ 12743 ins_encode( pre_call_resets, 12744 Java_Dynamic_Call( meth ), 12745 call_epilog, 12746 post_call_FPU ); 12747 ins_pipe( pipe_slow ); 12748 ins_alignment(4); 12749 %} 12750 12751 // Call Runtime Instruction 12752 instruct CallRuntimeDirect(method meth) %{ 12753 match(CallRuntime ); 12754 effect(USE meth); 12755 12756 ins_cost(300); 12757 format %{ "CALL,runtime " %} 12758 opcode(0xE8); /* E8 cd */ 12759 // Use FFREEs to clear entries in float stack 12760 ins_encode( pre_call_resets, 12761 FFree_Float_Stack_All, 12762 Java_To_Runtime( meth ), 12763 post_call_FPU ); 12764 ins_pipe( pipe_slow ); 12765 %} 12766 12767 // Call runtime without safepoint 12768 instruct CallLeafDirect(method meth) %{ 12769 match(CallLeaf); 12770 effect(USE meth); 12771 12772 ins_cost(300); 12773 format %{ "CALL_LEAF,runtime " %} 12774 opcode(0xE8); /* E8 cd */ 12775 ins_encode( pre_call_resets, 12776 FFree_Float_Stack_All, 12777 Java_To_Runtime( meth ), 12778 Verify_FPU_For_Leaf, post_call_FPU ); 12779 ins_pipe( pipe_slow ); 12780 %} 12781 12782 instruct CallLeafNoFPDirect(method meth) %{ 12783 match(CallLeafNoFP); 12784 effect(USE meth); 12785 12786 ins_cost(300); 12787 format %{ "CALL_LEAF_NOFP,runtime " %} 12788 opcode(0xE8); /* E8 cd */ 12789 ins_encode(Java_To_Runtime(meth)); 12790 ins_pipe( pipe_slow ); 12791 %} 12792 12793 12794 // Return Instruction 12795 // Remove the return address & jump to it. 12796 instruct Ret() %{ 12797 match(Return); 12798 format %{ "RET" %} 12799 opcode(0xC3); 12800 ins_encode(OpcP); 12801 ins_pipe( pipe_jmp ); 12802 %} 12803 12804 // Tail Call; Jump from runtime stub to Java code. 12805 // Also known as an 'interprocedural jump'. 12806 // Target of jump will eventually return to caller. 12807 // TailJump below removes the return address. 12808 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12809 match(TailCall jump_target method_oop ); 12810 ins_cost(300); 12811 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12812 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12813 ins_encode( OpcP, RegOpc(jump_target) ); 12814 ins_pipe( pipe_jmp ); 12815 %} 12816 12817 12818 // Tail Jump; remove the return address; jump to target. 12819 // TailCall above leaves the return address around. 12820 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12821 match( TailJump jump_target ex_oop ); 12822 ins_cost(300); 12823 format %{ "POP EDX\t# pop return address into dummy\n\t" 12824 "JMP $jump_target " %} 12825 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12826 ins_encode( enc_pop_rdx, 12827 OpcP, RegOpc(jump_target) ); 12828 ins_pipe( pipe_jmp ); 12829 %} 12830 12831 // Create exception oop: created by stack-crawling runtime code. 12832 // Created exception is now available to this handler, and is setup 12833 // just prior to jumping to this handler. No code emitted. 12834 instruct CreateException( eAXRegP ex_oop ) 12835 %{ 12836 match(Set ex_oop (CreateEx)); 12837 12838 size(0); 12839 // use the following format syntax 12840 format %{ "# exception oop is in EAX; no code emitted" %} 12841 ins_encode(); 12842 ins_pipe( empty ); 12843 %} 12844 12845 12846 // Rethrow exception: 12847 // The exception oop will come in the first argument position. 12848 // Then JUMP (not call) to the rethrow stub code. 12849 instruct RethrowException() 12850 %{ 12851 match(Rethrow); 12852 12853 // use the following format syntax 12854 format %{ "JMP rethrow_stub" %} 12855 ins_encode(enc_rethrow); 12856 ins_pipe( pipe_jmp ); 12857 %} 12858 12859 // inlined locking and unlocking 12860 12861 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12862 predicate(Compile::current()->use_rtm()); 12863 match(Set cr (FastLock object box)); 12864 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12865 ins_cost(300); 12866 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12867 ins_encode %{ 12868 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12869 $scr$$Register, $cx1$$Register, $cx2$$Register, 12870 _counters, _rtm_counters, _stack_rtm_counters, 12871 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12872 true, ra_->C->profile_rtm()); 12873 %} 12874 ins_pipe(pipe_slow); 12875 %} 12876 12877 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12878 predicate(!Compile::current()->use_rtm()); 12879 match(Set cr (FastLock object box)); 12880 effect(TEMP tmp, TEMP scr, USE_KILL box); 12881 ins_cost(300); 12882 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12883 ins_encode %{ 12884 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12885 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12886 %} 12887 ins_pipe(pipe_slow); 12888 %} 12889 12890 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12891 match(Set cr (FastUnlock object box)); 12892 effect(TEMP tmp, USE_KILL box); 12893 ins_cost(300); 12894 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12895 ins_encode %{ 12896 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12897 %} 12898 ins_pipe(pipe_slow); 12899 %} 12900 12901 12902 12903 // ============================================================================ 12904 // Safepoint Instruction 12905 instruct safePoint_poll(eFlagsReg cr) %{ 12906 match(SafePoint); 12907 effect(KILL cr); 12908 12909 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12910 // On SPARC that might be acceptable as we can generate the address with 12911 // just a sethi, saving an or. By polling at offset 0 we can end up 12912 // putting additional pressure on the index-0 in the D$. Because of 12913 // alignment (just like the situation at hand) the lower indices tend 12914 // to see more traffic. It'd be better to change the polling address 12915 // to offset 0 of the last $line in the polling page. 12916 12917 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 12918 ins_cost(125); 12919 size(6) ; 12920 ins_encode( Safepoint_Poll() ); 12921 ins_pipe( ialu_reg_mem ); 12922 %} 12923 12924 12925 // ============================================================================ 12926 // This name is KNOWN by the ADLC and cannot be changed. 12927 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 12928 // for this guy. 12929 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 12930 match(Set dst (ThreadLocal)); 12931 effect(DEF dst, KILL cr); 12932 12933 format %{ "MOV $dst, Thread::current()" %} 12934 ins_encode %{ 12935 Register dstReg = as_Register($dst$$reg); 12936 __ get_thread(dstReg); 12937 %} 12938 ins_pipe( ialu_reg_fat ); 12939 %} 12940 12941 12942 12943 //----------PEEPHOLE RULES----------------------------------------------------- 12944 // These must follow all instruction definitions as they use the names 12945 // defined in the instructions definitions. 12946 // 12947 // peepmatch ( root_instr_name [preceding_instruction]* ); 12948 // 12949 // peepconstraint %{ 12950 // (instruction_number.operand_name relational_op instruction_number.operand_name 12951 // [, ...] ); 12952 // // instruction numbers are zero-based using left to right order in peepmatch 12953 // 12954 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 12955 // // provide an instruction_number.operand_name for each operand that appears 12956 // // in the replacement instruction's match rule 12957 // 12958 // ---------VM FLAGS--------------------------------------------------------- 12959 // 12960 // All peephole optimizations can be turned off using -XX:-OptoPeephole 12961 // 12962 // Each peephole rule is given an identifying number starting with zero and 12963 // increasing by one in the order seen by the parser. An individual peephole 12964 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 12965 // on the command-line. 12966 // 12967 // ---------CURRENT LIMITATIONS---------------------------------------------- 12968 // 12969 // Only match adjacent instructions in same basic block 12970 // Only equality constraints 12971 // Only constraints between operands, not (0.dest_reg == EAX_enc) 12972 // Only one replacement instruction 12973 // 12974 // ---------EXAMPLE---------------------------------------------------------- 12975 // 12976 // // pertinent parts of existing instructions in architecture description 12977 // instruct movI(rRegI dst, rRegI src) %{ 12978 // match(Set dst (CopyI src)); 12979 // %} 12980 // 12981 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 12982 // match(Set dst (AddI dst src)); 12983 // effect(KILL cr); 12984 // %} 12985 // 12986 // // Change (inc mov) to lea 12987 // peephole %{ 12988 // // increment preceeded by register-register move 12989 // peepmatch ( incI_eReg movI ); 12990 // // require that the destination register of the increment 12991 // // match the destination register of the move 12992 // peepconstraint ( 0.dst == 1.dst ); 12993 // // construct a replacement instruction that sets 12994 // // the destination to ( move's source register + one ) 12995 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12996 // %} 12997 // 12998 // Implementation no longer uses movX instructions since 12999 // machine-independent system no longer uses CopyX nodes. 13000 // 13001 // peephole %{ 13002 // peepmatch ( incI_eReg movI ); 13003 // peepconstraint ( 0.dst == 1.dst ); 13004 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13005 // %} 13006 // 13007 // peephole %{ 13008 // peepmatch ( decI_eReg movI ); 13009 // peepconstraint ( 0.dst == 1.dst ); 13010 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13011 // %} 13012 // 13013 // peephole %{ 13014 // peepmatch ( addI_eReg_imm movI ); 13015 // peepconstraint ( 0.dst == 1.dst ); 13016 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13017 // %} 13018 // 13019 // peephole %{ 13020 // peepmatch ( addP_eReg_imm movP ); 13021 // peepconstraint ( 0.dst == 1.dst ); 13022 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13023 // %} 13024 13025 // // Change load of spilled value to only a spill 13026 // instruct storeI(memory mem, rRegI src) %{ 13027 // match(Set mem (StoreI mem src)); 13028 // %} 13029 // 13030 // instruct loadI(rRegI dst, memory mem) %{ 13031 // match(Set dst (LoadI mem)); 13032 // %} 13033 // 13034 peephole %{ 13035 peepmatch ( loadI storeI ); 13036 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13037 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13038 %} 13039 13040 //----------SMARTSPILL RULES--------------------------------------------------- 13041 // These must follow all instruction definitions as they use the names 13042 // defined in the instructions definitions.